#include <time.h>
#include "EnvUtility.hh"
#include "PConfig.h"

#if defined(__GNU_STDC_OLD) && !defined(ios_base)
#define ios_base ios
#endif

using namespace std;

// clapack packing conventionns
inline int 
RC(int row, int col, int size) {
  return ((row)+(size)*(col));
}

int calc_rho(int offset,int correlation_width,float threshold,float *rp_signal,float *ip_signal,
	     int nenv_chan,float **rp_env,float **ip_env,float *rho2_pairwise,complx *A,complx *B,float *modx2sum)
{ 
  int k,n,row,col;

  n=nenv_chan; /* Size of matrix */

  /*************************************************************/
  /*                                                           */
  /*   Construct averages over band length correlation_width   */
  /*                                                           */
  /*************************************************************/ 
 
  /* Initialise all sums to zero ... */

  for (col=0;col<n;col++) {
    for (row=0;row<=col;row++) { /* as we only need give the upper triangular part of the matrix */
 	 A[RC(row,col,n)].r=A[RC(row,col,n)].i=0.0;
    }
  }
  for (row=0;row<n;row++) {
      B[row].r=B[row].i=0.0;
    }
  *modx2sum=0.0;

  /* ... and now form sums over bandwidth correlation_width */
  for (k=offset;k<offset+correlation_width;k++) {
    
    for (col=0;col<n;col++) {
	  for (row=0;row<=col;row++) {  /* as we only need give the upper triangular part of the matrix */
	    A[RC(row,col,n)].r+=rp_env[row][k]*rp_env[col][k]+ip_env[row][k]*ip_env[col][k]; /* Re(Y_p^* Y_q) */
	    A[RC(row,col,n)].i+=rp_env[row][k]*ip_env[col][k]-ip_env[row][k]*rp_env[col][k]; /* Im(Y_p^* Y_q) */
	  }
    }
    
    for (row=0;row<n;row++) {
      B[row].r+=rp_signal[k]*rp_env[row][k]+ip_signal[k]*ip_env[row][k];  /* Re(Y_row^* X) */
      B[row].i+=ip_signal[k]*rp_env[row][k]-rp_signal[k]*ip_env[row][k];  /* Im(Y_row^* X) */
    }
    *modx2sum+=rp_signal[k]*rp_signal[k]+ip_signal[k]*ip_signal[k];
  }

  /*  do we threshold */
  for (col=0;col<n;col++) {
    for (row=0;row<=col;row++) {  /* as we only need give the upper triangular part of the matrix */
      
      if ( (B[col].r*B[col].r+B[col].i*B[col].i) < threshold*A[RC(col,col,n)].r*(*modx2sum) ) 
	B[col].r=B[col].i=0.0; 
   
      if ( (A[RC(row,col,n)].r*A[RC(row,col,n)].r+A[RC(row,col,n)].i*A[RC(row,col,n)].i) <
	   threshold*A[RC(row,row,n)].r*A[RC(col,col,n)].r ) 
	A[RC(row,col,n)].r=A[RC(row,col,n)].i=0.0;
    }
        
  }

  
  /* Reduction in variance using the best r for just the single channel (after thresholding) */ 
  for (row=0;row<nenv_chan;row++) {
    rho2_pairwise[row]=(B[row].r*B[row].r+B[row].i*B[row].i)/(A[RC(row,row,n)].r*(*modx2sum));
  }                           

  return(0);
}
  
int chan_clean(int offset,int correlation_width,float threshold,float *rho2,float *rp_signal,float *ip_signal,
	       int nenv_chan,float **rp_env,float **ip_env,float *rp_clean,float *ip_clean,complx *A,complx *B,
	       float modx2sum,complx *R,complx *work,integer lwork,integer *ipivot)
{ 
  /*******************************************************************************/
  /*          NB f2c.h defines a data types complx and integer (= long int)     */
  /*                   which is used by clapack routines                         */
  /*******************************************************************************/
  float rho2i;
  char s;
  int k,row;
  integer info,n,lda,ldb,nrhs;

  /* Our equation is    y^p_i^* y^q_i r_q = x_i y^p_i*    */
  /* The lapack matrix convention is that the n x n matrix A_pq 
     is stored in a 1-dimensional array of size n^2 with
                A_pq = A[p+n*q]                           
     i.e we cycle fast down the rows of the first column 
     and then slowly across to the next column. Eg packing is as
                 ( 0   3   6 )
		 ( 1   4   7 )
		 ( 2   5   8 )                            */



  /* set lapack parameters for chesv  */

  /*************************************************************/
  /*                                                           */
  /*   Do not change these values without checking that        */
  /*    the changes do not affect the memory allocation        */
  /*                in the calling program                     */
  /*                                                           */
  /*************************************************************/ 


  n=(integer) nenv_chan; /* Size of matrix */
  lda=(integer) nenv_chan;
  ldb=(integer) nenv_chan;
  nrhs=1;    
  s='U';   /* We give the upper triangular part of the matrix */


  
  /*         Call to the lapack routine chesv to solve the equation AR=B        */
  /*   Set up so that R holds the value B on input and the solution on output   */
 
  for (row=0;row<nenv_chan;row++) {
    R[row].r=B[row].r;
    R[row].i=B[row].i;
  }


  /* Call to lapack routine chesv_    */
			                                                  
  chesv_(&s,&n,&nrhs,A,&lda,ipivot,R,&ldb,work,&lwork,&info);
  if (info != 0) {
    cerr << "# " << __FILE__ << ": Non-zero completion code " << info << "returned by chesv" << endl;
  }

  /* Calculate the significance measure |rho|^2  */

  *rho2=0.0;
  rho2i=0.0;
  for (row=0;row<nenv_chan;row++) {
    *rho2+=(R[row].r*B[row].r+R[row].i*B[row].i);
    rho2i+=(R[row].i*B[row].r-R[row].r*B[row].i);
  }    
  *rho2/=modx2sum;
  rho2i/=modx2sum;
  if ( fabs(rho2i) > 0.00001 ) {
    cerr << "# " << __FILE__ << "line " << __LINE__  << ": Round-off problem:  offset = " << offset << "  rho2i = " << rho2i << " (should be 0)" << endl; 
  }
   
  if ( *rho2 < threshold )  
     for (row=0;row<nenv_chan;row++) {
       R[row].r=R[row].i=0.0;
     }

  /* Calculate the `cleaned' signal  */
    
  for (k=offset;k<offset+correlation_width;k++) {
    rp_clean[k]=rp_signal[k];
    ip_clean[k]=ip_signal[k];
    for (row=0;row<nenv_chan;row++) {
      rp_clean[k]-=(R[row].r*rp_env[row][k]-R[row].i*ip_env[row][k]);
      ip_clean[k]-=(R[row].r*ip_env[row][k]+R[row].i*rp_env[row][k]);
    }
  }
  
  return(0);
}

void read_binary_fft(char *filename, unsigned int length, float *rp_fft, 
		     float *ip_fft, float *delta_f)
{
  /*************************************************************************************/
  /*    Read fft data from binary file: the first line contains the  frequency spacing */		 
  /*         then follow lines containing the real and imaginary part                  */
  /*************************************************************************************/

  ifstream ifs(filename, ios_base::in | ios_base::binary);
  ifsopen_check(ifs,filename);

  cerr << "# " << __FILE__ << ": Reading " << filename << endl;
 
  // first read  the frequency spacing 
  ifs.read((char *)delta_f,sizeof(float));
  if (ifs.gcount() != sizeof(*delta_f)) {
    cerr << "# EnvUtility: Error reading delta_f from " << filename << endl;
  }

  ifs.read((char*)rp_fft, length*sizeof(float)); 
  if (ifs.gcount() != int(length*sizeof(rp_fft))) {
    cerr << "# EnvUtility: Error reading real part of fft data from " 
	 << filename << endl;
  }

  ifs.read((char*)ip_fft,length*sizeof(float)); 
  if (ifs.gcount() != int(length*sizeof(ip_fft))) {
    cerr << "# EnvUtility: Error reading imaginary part of fft data from " 
	 << filename << endl;
  }

  ifs.close();
  rp_fft[0]=ip_fft[0]=0.0; /* set dc signal to 0 */  
}


void write_fft(char *filename,int length,float *rp_fft,float *ip_fft,float delta_f)
{
  /*************************************************************************************/
  /* Write fft data to an ascii file: the first line contains the  frequency spacing   */		 
  /*         then follow lines containing the real and imaginary part                  */
  /*************************************************************************************/  
  ofstream ofs;
 
  ofs.open(filename, ios_base::out);
  if (!ofs) {
    cerr << "# " << __FILE__ << ": unable to open file " << filename << endl;
    abort();
  }
  ofs.setf(ios_base::scientific);
  
  cerr << "# " <<  __FILE__ << ": Writing " << filename << endl;

  ofs << delta_f << endl;

  //   We fill the DC component with data from the first frequency bin 
  //   so that we can do lin-log plots without complaints.
  //   Note that the DC component is never used and is set to zero 
  //   by read_fft - still there should be a better way of doing this!  

  ofs << rp_fft[1] << "\t" << ip_fft[1] << endl;
  for (int i=1;i<length;i++) {
    ofs << rp_fft[i] << "\t" << ip_fft[i] << endl;
  }
  ofs.close();
}

void write_binary_fft(char *filename,int length,float *rp_fft,float *ip_fft,float delta_f)
{
  /*************************************************************************************/
  /* Write fft data to an ascii file: the first line contains the  frequency spacing   */		 
  /*         then follow lines containing the real and imaginary part                  */
  /*************************************************************************************/  
  ofstream ofs;
 
  ofs.open(filename, ios_base::out | ios_base::binary);
  if ( ofs.bad() ) {
    cerr << "# EnvUtility: unable to open file " << filename << endl;
    abort();
  }
  ofs.setf(ios_base::scientific);

  cerr << "# EnvUtility: Writing " << filename << endl;

  ofs.write((char*)(&delta_f),sizeof(float));
  ofs.write((char *)rp_fft,length*sizeof(float));
  ofs.write((char *)ip_fft,length*sizeof(float));

  ofs.close();
}

void write_rho2(ofstream &ofs,float freq,float rho2,int nenv_chan,float *rho2_pairwise)
{
  /*************************************************************************************/
  /*                     Write the list of rho values to file                          */		 
  /*************************************************************************************/
  int chan;

  ofs << freq << "\t\t" << rho2 << "\t\t";
  // fprintf(fp,"%.3f\t\t%.3f\t\t",freq,rho2); 
  for (chan=0;chan<nenv_chan;chan++) { 
    ofs << rho2_pairwise[chan] << " ";
    // fprintf(fp,"%.3f ",rho2_pairwise[chan]); 
  }
  ofs << endl;
  //fprintf(fp,"\n"); 
}


void fopen_check(FILE *fp,char *filename) 
{
  /*************************************************************************************/
  /*            Checks to see if a file has been opened properly                       */
  /*             and if not write an appropriate error message                         */
  /*************************************************************************************/
  if ( fp == NULL ) {
    fprintf(stderr,"Problems opening %s\n",filename);
    exit(1);    
  }    
}

void ifsopen_check(ifstream &ifs,const char *filename) 
{
  /*************************************************************************************/
  /*            Checks to see if a file has been opened properly                       */
  /*             and if not write an appropriate error message                         */
  /*************************************************************************************/

  if ( ifs.bad() ) {
    cerr << "Problems opening " << filename << endl;
    abort();    
  }    
}


int chesv_(char *uplo, integer *n, integer *nrhs, complx *a,
	 integer *lda, integer *ipiv, complx *b, integer *ldb, complx *work,
	 integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHESV computes the solution to a complex system of linear equations   
       A * X = B,   
    where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS   
    matrices.   

    The diagonal pivoting method is used to factor A as   
       A = U * D * U**H,  if UPLO = 'U', or   
       A = L * D * L**H,  if UPLO = 'L',   
    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, and D is Hermitian and block diagonal with   
    1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then   
    used to solve the system of equations A * X = B.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading 
  
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the block diagonal matrix D and the   
            multipliers used to obtain the factor U or L from the   
            factorization A = U*D*U**H or A = L*D*L**H as computed by   
            CHETRF.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D, as 
  
            determined by CHETRF.  If IPIV(k) > 0, then rows and columns 
  
            k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1   
            diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,   
            then rows and columns k-1 and -IPIV(k) were interchanged and 
  
            D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and 
  
            IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and   
            -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2   
            diagonal block.   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the N-by-NRHS right hand side matrix B.   
            On exit, if INFO = 0, the N-by-NRHS solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK >= 1, and for best performance   
            LWORK >= N*NB, where NB is the optimal blocksize for   
            CHETRF.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, D(i,i) is exactly zero.  The factorization 
  
                 has been completed, but the block diagonal matrix D is   
                 exactly singular, so the solution could not be computed. 
  

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
  // integer a_dim1, a_offset, b_dim1, b_offset;
  integer i__1;


#define IPIV(I) ipiv[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*lwork < 1) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHESV ", &i__1);
	return 0;
    }

/*     Compute the factorization A = U*D*U' or A = L*D*L'. */

    chetrf_(uplo, n, &A(1,1), lda, &IPIV(1), &WORK(1), lwork, info);
    if (*info == 0) {

/*        Solve the system A*X = B, overwriting B with X. */

	chetrs_(uplo, n, nrhs, &A(1,1), lda, &IPIV(1), &B(1,1), ldb, info);

    }
    return 0;

/*     End of CHESV */

} /* chesv_ */



int chetf2_(char *uplo, integer *n, complx *a, integer *lda,
	 integer *ipiv, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHETF2 computes the factorization of a complex Hermitian matrix A   
    using the Bunch-Kaufman diagonal pivoting method:   

       A = U*D*U'  or  A = L*D*L'   

    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, U' is the conjugate transpose of U, and D is   
    Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. 
  

    This is the unblocked version of the algorithm, calling Level 2 BLAS. 
  

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            Hermitian matrix A is stored:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading 
  
            n-by-n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n-by-n lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L (see below for further details). 
  

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   
            > 0: if INFO = k, D(k,k) is exactly zero.  The factorization 
  
                 has been completed, but the block diagonal matrix D is   
                 exactly singular, and division by zero will occur if it 
  
                 is used to solve a system of equations.   

    Further Details   
    ===============   

    If UPLO = 'U', then A = U*D*U', where   
       U = P(n)*U(n)* ... *P(k)U(k)* ...,   
    i.e., U is a product of terms P(k)*U(k), where k decreases from n to 
  
    1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and U(k) is a unit upper triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    v    0   )   k-s   
       U(k) =  (   0    I    0   )   s   
               (   0    0    I   )   n-k   
                  k-s   s   n-k   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).   
    If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), 
  
    and A(k,k), and v overwrites A(1:k-2,k-1:k).   

    If UPLO = 'L', then A = L*D*L', where   
       L = P(1)*L(1)* ... *P(k)*L(k)* ...,   
    i.e., L is a product of terms P(k)*L(k), where k increases from 1 to 
  
    n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and L(k) is a unit lower triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    0     0   )  k-1   
       L(k) =  (   0    I     0   )  s   
               (   0    v     I   )  n-k-s+1   
                  k-1   s  n-k-s+1   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).   
    If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),   
    and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1=0, i__1;
    /* integer a_offset; */
    sreal r__1, r__2, r__3, r__4;
    doublereal d__1;
    complx q__1;
 
    /* Local variables */

    static integer imax, jmax;
    static sreal c;
    static integer j, k;
    static complx s, t;
    static sreal alpha;
    static integer kstep;
    static logical upper;
    static sreal r1, r2;
    static integer kk, kp;
    static sreal absakk;

    static sreal colmax, rowmax;



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHETF2", &i__1);
	return 0;
    }

/*     Initialize ALPHA for use in choosing pivot block size. */

    alpha = (sqrt(17.f) + 1.f) / 8.f;

    if (upper) {

/*        Factorize A as U*D*U' using the upper triangle of A   

          K is the main loop index, decreasing from N to 1 in steps of
   
          1 or 2 */

	k = *n;
L10:

/*        If K < 1, exit from loop */

	if (k < 1) {
	    goto L50;
	}
	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = k + k * a_dim1;
	absakk = (r__1 = A(k,k).r, fabs(r__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k > 1) {
	    i__1 = k - 1;
	    imax = icamax_(&i__1, &A(1,k), &c__1);
	    i__1 = imax + k * a_dim1;
	    colmax = (r__1 = A(imax,k).r, fabs(r__1)) + (r__2 = r_imag(&A(imax,k)), fabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    //i__2 = k + k * a_dim1;
	    d__1 = A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = k - imax;
		jmax = imax + icamax_(&i__1, &A(imax,imax+1), 
			lda);
		i__1 = imax + jmax * a_dim1;
		rowmax = (r__1 = A(imax,jmax).r, fabs(r__1)) + (r__2 = r_imag(&A(imax,jmax)), fabs(r__2));
		if (imax > 1) {
		    i__1 = imax - 1;
		    jmax = icamax_(&i__1, &A(1,imax), &c__1);
/* Computing MAX */
		    i__1 = jmax + imax * a_dim1;
		    r__3 = rowmax, r__4 = (r__1 = A(jmax,imax).r, fabs(r__1)) + (
			    r__2 = r_imag(&A(jmax,imax)), fabs(
			    r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = imax + imax * a_dim1;
		    if ((r__1 = A(imax,imax).r, fabs(r__1)) >= alpha * rowmax) {

/*                 interchange rows and columns K and 
IMAX, use 1-by-1   
                   pivot block */

			kp = imax;
		    } else {

/*                 interchange rows and columns K-1 an
d IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k - kstep + 1;
	    if (kp != kk) {

/*              Interchange rows and columns KK and KP in the 
leading   
                submatrix A(1:k,1:k) */

		i__1 = kp - 1;
		cswap_(&i__1, &A(1,kk), &c__1, &A(1,kp),
			 &c__1);
		i__1 = kk - 1;
		for (j = kp + 1; j <= kk-1; ++j) {
		    r_cnjg(&q__1, &A(j,kk));
		    t.r = q__1.r, t.i = q__1.i;
		    //i__2 = j + kk * a_dim1;
		    r_cnjg(&q__1, &A(kp,j));
		    A(j,kk).r = q__1.r, A(j,kk).i = q__1.i;
		    //i__2 = kp + j * a_dim1;
		    A(kp,j).r = t.r, A(kp,j).i = t.i;
/* L20: */
		}
		i__1 = kp + kk * a_dim1;
		r_cnjg(&q__1, &A(kp,kk));
		A(kp,kk).r = q__1.r, A(kp,kk).i = q__1.i;
		i__1 = kk + kk * a_dim1;
		r1 = A(kk,kk).r;
		i__1 = kk + kk * a_dim1;
		//i__2 = kp + kp * a_dim1;
		d__1 = A(kp,kp).r;
		A(kk,kk).r = d__1, A(kk,kk).i = 0.f;
		i__1 = kp + kp * a_dim1;
		A(kp,kp).r = r1, A(kp,kp).i = 0.f;
		if (kstep == 2) {
		    i__1 = k + k * a_dim1;
		    //i__2 = k + k * a_dim1;
		    d__1 = A(k,k).r;
		    A(k,k).r = d__1, A(k,k).i = 0.f;
		    i__1 = k - 1 + k * a_dim1;
		    t.r = A(k-1,k).r, t.i = A(k-1,k).i;
		    i__1 = k - 1 + k * a_dim1;
		    //i__2 = kp + k * a_dim1;
		    A(k-1,k).r = A(kp,k).r, A(k-1,k).i = A(kp,k).i;
		    i__1 = kp + k * a_dim1;
		    A(kp,k).r = t.r, A(kp,k).i = t.i;
		}
	    } else {
		i__1 = k + k * a_dim1;
		//i__2 = k + k * a_dim1;
		d__1 = A(k,k).r;
		A(k,k).r = d__1, A(k,k).i = 0.f;
		if (kstep == 2) {
		    i__1 = k - 1 + (k - 1) * a_dim1;
		    //i__2 = k - 1 + (k - 1) * a_dim1;
		    d__1 = A(k-1,k-1).r;
		    A(k-1,k-1).r = d__1, A(k-1,k-1).i = 0.f;
		}
	    }

/*           Update the leading submatrix */

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column k now holds   

                W(k) = U(k)*D(k)   

                where U(k) is the k-th column of U   

                Perform a rank-1 update of A(1:k-1,1:k-1) as 
  

                A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k
)' */

		i__1 = k + k * a_dim1;
		r1 = 1.f / A(k,k).r;
		i__1 = k - 1;
		r__1 = -(doublereal)r1;
		cher_(uplo, &i__1, &r__1, &A(1,k), &c__1, &A(1,1), lda);

/*              Store U(k) in column k */

		i__1 = k - 1;
		csscal_(&i__1, &r1, &A(1,k), &c__1);
	    } else {

/*              2-by-2 pivot block D(k): columns k and k-1 now
 hold   

                ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)   

                where U(k) and U(k-1) are the k-th and (k-1)-t
h columns   
                of U   

                Perform a rank-2 update of A(1:k-2,1:k-2) as 
  

                A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
   
                   = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(
k) )'   

                Convert this to two rank-1 updates by using th
e eigen-   
                decomposition of D(k) */

		claev2_(&A(k-1,k-1), &A(k-1,k), 
			&A(k,k), &r1, &r2, &c, &s);
		r1 = 1.f / r1;
		r2 = 1.f / r2;
		i__1 = k - 2;
		crot_(&i__1, &A(1,k-1), &c__1, &A(1,k), &c__1, &c, &s);
		i__1 = k - 2;
		r__1 = -(doublereal)r1;
		cher_(uplo, &i__1, &r__1, &A(1,k-1), &c__1, &A(1,1), lda);
		i__1 = k - 2;
		r__1 = -(doublereal)r2;
		cher_(uplo, &i__1, &r__1, &A(1,k), &c__1, &A(1,1), lda);

/*              Store U(k) and U(k-1) in columns k and k-1 */

		i__1 = k - 2;
		csscal_(&i__1, &r1, &A(1,k-1), &c__1);
		i__1 = k - 2;
		csscal_(&i__1, &r2, &A(1,k), &c__1);
		i__1 = k - 2;
		q__1.r = -(doublereal)s.r, q__1.i = -(doublereal)s.i;
		crot_(&i__1, &A(1,k-1), &c__1, &A(1,k), &c__1, &c, &q__1);
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k - 1) = -kp;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kstep;
	goto L10;

    } else {

/*        Factorize A as L*D*L' using the lower triangle of A   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2 */

	k = 1;
L30:

/*        If K > N, exit from loop */

	if (k > *n) {
	    goto L50;
	}
	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = k + k * a_dim1;
	absakk = (r__1 = A(k,k).r, fabs(r__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k < *n) {
	    i__1 = *n - k;
	    imax = k + icamax_(&i__1, &A(k+1,k), &c__1);
	    i__1 = imax + k * a_dim1;
	    colmax = (r__1 = A(imax,k).r, fabs(r__1)) + (r__2 = r_imag(&A(imax,k)), fabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    //i__2 = k + k * a_dim1;
	    d__1 = A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = imax - k;
		jmax = k - 1 + icamax_(&i__1, &A(imax,k), lda);
		i__1 = imax + jmax * a_dim1;
		rowmax = (r__1 = A(imax,jmax).r, fabs(r__1)) + (r__2 = r_imag(&A(imax,jmax)), fabs(r__2));
		if (imax < *n) {
		    i__1 = *n - imax;
		    jmax = imax + icamax_(&i__1, &A(imax+1,imax),
			     &c__1);
/* Computing MAX */
		    i__1 = jmax + imax * a_dim1;
		    r__3 = rowmax, r__4 = (r__1 = A(jmax,imax).r, fabs(r__1)) + (
			    r__2 = r_imag(&A(jmax,imax)), fabs(
			    r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = imax + imax * a_dim1;
		    if ((r__1 = A(imax,imax).r, fabs(r__1)) >= alpha * rowmax) {

/*                 interchange rows and columns K and 
IMAX, use 1-by-1   
                   pivot block */

			kp = imax;
		    } else {

/*                 interchange rows and columns K+1 an
d IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k + kstep - 1;
	    if (kp != kk) {

/*              Interchange rows and columns KK and KP in the 
trailing   
                submatrix A(k:n,k:n) */

		if (kp < *n) {
		    i__1 = *n - kp;
		    cswap_(&i__1, &A(kp+1,kk), &c__1, &A(kp+1,kp), &c__1);
		}
		i__1 = kp - 1;
		for (j = kk + 1; j <= kp-1; ++j) {
		    r_cnjg(&q__1, &A(j,kk));
		    t.r = q__1.r, t.i = q__1.i;
		    //i__2 = j + kk * a_dim1;
		    r_cnjg(&q__1, &A(kp,j));
		    A(j,kk).r = q__1.r, A(j,kk).i = q__1.i;
		    //i__2 = kp + j * a_dim1;
		    A(kp,j).r = t.r, A(kp,j).i = t.i;
/* L40: */
		}
		i__1 = kp + kk * a_dim1;
		r_cnjg(&q__1, &A(kp,kk));
		A(kp,kk).r = q__1.r, A(kp,kk).i = q__1.i;
		i__1 = kk + kk * a_dim1;
		r1 = A(kk,kk).r;
		i__1 = kk + kk * a_dim1;
		//i__2 = kp + kp * a_dim1;
		d__1 = A(kp,kp).r;
		A(kk,kk).r = d__1, A(kk,kk).i = 0.f;
		i__1 = kp + kp * a_dim1;
		A(kp,kp).r = r1, A(kp,kp).i = 0.f;
		if (kstep == 2) {
		    i__1 = k + k * a_dim1;
		    //i__2 = k + k * a_dim1;
		    d__1 = A(k,k).r;
		    A(k,k).r = d__1, A(k,k).i = 0.f;
		    i__1 = k + 1 + k * a_dim1;
		    t.r = A(k+1,k).r, t.i = A(k+1,k).i;
		    i__1 = k + 1 + k * a_dim1;
		    //i__2 = kp + k * a_dim1;
		    A(k+1,k).r = A(kp,k).r, A(k+1,k).i = A(kp,k).i;
		    i__1 = kp + k * a_dim1;
		    A(kp,k).r = t.r, A(kp,k).i = t.i;
		}
	    } else {
		i__1 = k + k * a_dim1;
		//i__2 = k + k * a_dim1;
		d__1 = A(k,k).r;
		A(k,k).r = d__1, A(k,k).i = 0.f;
		if (kstep == 2) {
		    i__1 = k + 1 + (k + 1) * a_dim1;
		    //i__2 = k + 1 + (k + 1) * a_dim1;
		    d__1 = A(k+1,k+1).r;
		    A(k+1,k+1).r = d__1, A(k+1,k+1).i = 0.f;
		}
	    }

/*           Update the trailing submatrix */

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column k now holds   

                W(k) = L(k)*D(k)   

                where L(k) is the k-th column of L */

		if (k < *n) {

/*                 Perform a rank-1 update of A(k+1:n,k+1:
n) as   

                   A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/
D(k))*W(k)' */

		    i__1 = k + k * a_dim1;
		    r1 = 1.f / A(k,k).r;
		    i__1 = *n - k;
		    r__1 = -(doublereal)r1;
		    cher_(uplo, &i__1, &r__1, &A(k+1,k), &c__1, &
			    A(k+1,k+1), lda);

/*                 Store L(k) in column K */

		    i__1 = *n - k;
		    csscal_(&i__1, &r1, &A(k+1,k), &c__1);
		}
	    } else {

/*              2-by-2 pivot block D(k): columns K and K+1 now
 hold   

                ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)   

                where L(k) and L(k+1) are the k-th and (k+1)-t
h columns   
                of L */

		if (k < *n - 1) {

/*                 Perform a rank-2 update of A(k+2:n,k+2:
n) as   

                   A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(
k+1) )'   
                      = A - ( W(k) W(k+1) )*inv(D(k))*( W(
k) W(k+1) )'   

                   Convert this to two rank-1 updates by u
sing the eigen-   
                   decomposition of D(k) */

		    r_cnjg(&q__1, &A(k+1,k));
		    claev2_(&A(k,k), &q__1, &A(k+1,k+1), &r1, &r2, &c, &s);
		    r1 = 1.f / r1;
		    r2 = 1.f / r2;
		    i__1 = *n - k - 1;
		    crot_(&i__1, &A(k+2,k), &c__1, &A(k+2,k+1), &c__1, &c, &s);
		    i__1 = *n - k - 1;
		    r__1 = -(doublereal)r1;
		    cher_(uplo, &i__1, &r__1, &A(k+2,k), &c__1, &
			    A(k+2,k+2), lda);
		    i__1 = *n - k - 1;
		    r__1 = -(doublereal)r2;
		    cher_(uplo, &i__1, &r__1, &A(k+2,k+1), &
			    c__1, &A(k+2,k+2), lda);

/*                 Store L(k) and L(k+1) in columns k and 
k+1 */

		    i__1 = *n - k - 1;
		    csscal_(&i__1, &r1, &A(k+2,k), &c__1);
		    i__1 = *n - k - 1;
		    csscal_(&i__1, &r2, &A(k+2,k+1), &c__1);
		    i__1 = *n - k - 1;
		    q__1.r = -(doublereal)s.r, q__1.i = -(doublereal)s.i;
		    crot_(&i__1, &A(k+2,k), &c__1, &A(k+2,k+1), &c__1, &c, &q__1);
		}
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k + 1) = -kp;
	}

/*        Increase K and return to the start of the main loop */

	k += kstep;
	goto L30;

    }

L50:
    return 0;

/*     End of CHETF2 */

} /* chetf2_ */


int chetrf_(char *uplo, integer *n, complx *a, integer *lda,
	 integer *ipiv, complx *work, integer *lwork, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHETRF computes the factorization of a complex Hermitian matrix A   
    using the Bunch-Kaufman diagonal pivoting method.  The form of the   
    factorization is   

       A = U*D*U**H  or  A = L*D*L**H   

    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, and D is Hermitian and block diagonal with   
    1-by-1 and 2-by-2 diagonal blocks.   

    This is the blocked version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading 
  
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L (see below for further details). 
  

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK >=1.  For best performance   
            LWORK >= N*NB, where NB is the block size returned by ILAENV. 
  

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization 
  
                  has been completed, but the block diagonal matrix D is 
  
                  exactly singular, and division by zero will occur if it 
  
                  is used to solve a system of equations.   

    Further Details   
    ===============   

    If UPLO = 'U', then A = U*D*U', where   
       U = P(n)*U(n)* ... *P(k)U(k)* ...,   
    i.e., U is a product of terms P(k)*U(k), where k decreases from n to 
  
    1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and U(k) is a unit upper triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    v    0   )   k-s   
       U(k) =  (   0    I    0   )   s   
               (   0    0    I   )   n-k   
                  k-s   s   n-k   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).   
    If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), 
  
    and A(k,k), and v overwrites A(1:k-2,k-1:k).   

    If UPLO = 'L', then A = L*D*L', where   
       L = P(1)*L(1)* ... *P(k)*L(k)* ...,   
    i.e., L is a product of terms P(k)*L(k), where k increases from 1 to 
  
    n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and L(k) is a unit lower triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    0     0   )  k-1   
       L(k) =  (   0    I     0   )  s   
               (   0    v     I   )  n-k-s+1   
                  k-1   s  n-k-s+1   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).   
    If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),   
    and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer i__1, i__2;
    /* integer a_dim1, a_offset; */
    /* Local variables */
    static integer j, k;
    static integer nbmin, iinfo;
    static logical upper;
    static integer kb, nb;
    static integer ldwork, iws;



#define IPIV(I) ipiv[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    } else if (*lwork < 1) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHETRF", &i__1);
	return 0;
    }

/*     Determine the block size */

    nb = ilaenv_(&c__1, "CHETRF", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L);
    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
	iws = ldwork * nb;
	if (*lwork < iws) {
/* Computing MAX */
	    i__1 = *lwork / ldwork;
	    nb = max(i__1,1);
/* Computing MAX */
	    i__1 = 2, i__2 = ilaenv_(&c__2, "CHETRF", uplo, n, &c_n1, &c_n1, &
		    c_n1, 6L, 1L);
	    nbmin = max(i__1,i__2);
	}
    } else {
	iws = 1;
    }
    if (nb < nbmin) {
	nb = *n;
    }

    if (upper) {

/*        Factorize A as U*D*U' using the upper triangle of A   

          K is the main loop index, decreasing from N to 1 in steps of
   
          KB, where KB is the number of columns factorized by CLAHEF; 
  
          KB is either NB or NB-1, or K for the last block */

	k = *n;
L10:

/*        If K < 1, exit from loop */

	if (k < 1) {
	    goto L40;
	}

	if (k > nb) {

/*           Factorize columns k-kb+1:k of A and use blocked code 
to   
             update columns 1:k-kb */

	    clahef_(uplo, &k, &nb, &kb, &A(1,1), lda, &IPIV(1), &WORK(1),
		     n, &iinfo);
	} else {

/*           Use unblocked code to factorize columns 1:k of A */

	    chetf2_(uplo, &k, &A(1,1), lda, &IPIV(1), &iinfo);
	    kb = k;
	}

/*        Set INFO on the first occurrence of a zero pivot */

	if (*info == 0 && iinfo > 0) {
	    *info = iinfo;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kb;
	goto L10;

    } else {

/*        Factorize A as L*D*L' using the lower triangle of A   

          K is the main loop index, increasing from 1 to N in steps of
   
          KB, where KB is the number of columns factorized by CLAHEF; 
  
          KB is either NB or NB-1, or N-K+1 for the last block */

	k = 1;
L20:

/*        If K > N, exit from loop */

	if (k > *n) {
	    goto L40;
	}

	if (k <= *n - nb) {

/*           Factorize columns k:k+kb-1 of A and use blocked code 
to   
             update columns k+kb:n */

	    i__1 = *n - k + 1;
	    clahef_(uplo, &i__1, &nb, &kb, &A(k,k), lda, &IPIV(k), 
		    &WORK(1), n, &iinfo);
	} else {

/*           Use unblocked code to factorize columns k:n of A */

	    i__1 = *n - k + 1;
	    chetf2_(uplo, &i__1, &A(k,k), lda, &IPIV(k), &iinfo);
	    kb = *n - k + 1;
	}

/*        Set INFO on the first occurrence of a zero pivot */

	if (*info == 0 && iinfo > 0) {
	    *info = iinfo + k - 1;
	}

/*        Adjust IPIV */

	i__1 = k + kb - 1;
	for (j = k; j <= k+kb-1; ++j) {
	    if (IPIV(j) > 0) {
		IPIV(j) = IPIV(j) + k - 1;
	    } else {
		IPIV(j) = IPIV(j) - k + 1;
	    }
/* L30: */
	}

/*        Increase K and return to the start of the main loop */

	k += kb;
	goto L20;

    }

L40:
    WORK(1).r = (sreal) iws, WORK(1).i = 0.f;
    return 0;

/*     End of CHETRF */

} /* chetrf_ */


int chetrs_(char *uplo, integer *n, integer *nrhs, complx *
	a, integer *lda, integer *ipiv, complx *b, integer *ldb, integer *
	info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHETRS solves a system of linear equations A*X = B with a complex   
    Hermitian matrix A using the factorization A = U*D*U**H or   
    A = L*D*L**H computed by CHETRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored 
  
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**H;   
            = 'L':  Lower triangular, form is A = L*D*L**H.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The block diagonal matrix D and the multipliers used to   
            obtain the factor U or L as computed by CHETRF.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by CHETRF.   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the right hand side matrix B.   
            On exit, the solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
  //static complx c_b1 = {float(1),float(0)};
  static complx c_b1;
  c_b1.r=1.0;
  c_b1.i=0.0;
  static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1=0, i__1;
    /* integer a_offset, b_offset; */
    complx q__1, q__2, q__3;
 
    /* Local variables */
    static complx akm1k;
    static integer j, k;
    static sreal s;
    static complx denom;
    static logical upper;
    static complx ak, bk;
    static integer kp;
    static complx akm1, bkm1;



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHETRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	return 0;
    }

    if (upper) {

/*        Solve A*X = B, where A = U*D*U'.   

          First solve U*D*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L10:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L30;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformati
on   
             stored in column K of A. */

	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k + k * a_dim1;
	    s = 1.f / A(k,k).r;
	    csscal_(nrhs, &s, &B(k,1), ldb);
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K-1 and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k - 1) {
		cswap_(nrhs, &B(k-1,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformati
on   
             stored in columns K-1 and K of A. */

	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb);
	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &A(1,k-1), &c__1, &B(k-1,1), ldb, &B(1,1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k - 1 + k * a_dim1;
	    akm1k.r = A(k-1,k).r, akm1k.i = A(k-1,k).i;
	    c_div(&q__1, &A(k-1,k-1), &akm1k);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    r_cnjg(&q__2, &akm1k);
	    c_div(&q__1, &A(k,k), &q__2);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= *nrhs; ++j) {
		c_div(&q__1, &B(k-1,j), &akm1k);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		r_cnjg(&q__2, &akm1k);
		c_div(&q__1, &B(k,j), &q__2);
		bk.r = q__1.r, bk.i = q__1.i;
		//i__2 = k - 1 + j * b_dim1;
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		B(k-1,j).r = q__1.r, B(k-1,j).i = q__1.i;
		//i__2 = k + j * b_dim1;
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		B(k,j).r = q__1.r, B(k,j).i = q__1.i;
/* L20: */
	    }
	    k += -2;
	}

	goto L10;
L30:

/*        Next solve U'*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L40:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(U'(K)), where U(K) is the transformat
ion   
             stored in column K of A. */

	    if (k > 1) {
		clacgv_(nrhs, &B(k,1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(1,1)
			, ldb, &A(1,k), &c__1, &c_b1, &B(k,1), ldb);
		clacgv_(nrhs, &B(k,1), ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(U'(K+1)), where U(K+1) is the transfo
rmation   
             stored in columns K and K+1 of A. */

	    if (k > 1) {
		clacgv_(nrhs, &B(k,1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(1,1)
			, ldb, &A(1,k), &c__1, &c_b1, &B(k,1), ldb);
		clacgv_(nrhs, &B(k,1), ldb);

		clacgv_(nrhs, &B(k+1,1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(1,1)
			, ldb, &A(1,k+1), &c__1, &c_b1, &B(k+1,1), ldb);
		clacgv_(nrhs, &B(k+1,1), ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    k += 2;
	}

	goto L40;
L50:

	;
    } else {

/*        Solve A*X = B, where A = L*D*L'.   

          First solve L*D*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L60:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L80;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformati
on   
             stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &A(k+1,k), &c__1, &B(k,1), ldb, &B(k+1,1), ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k + k * a_dim1;
	    s = 1.f / A(k,k).r;
	    csscal_(nrhs, &s, &B(k,1), ldb);
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K+1 and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k + 1) {
		cswap_(nrhs, &B(k+1,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformati
on   
             stored in columns K and K+1 of A. */

	    if (k < *n - 1) {
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &A(k+2,k), &c__1, &B(k,1), ldb, &B(k+2,1), ldb);
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &A(k+2,k+1), &
			c__1, &B(k+1,1), ldb, &B(k+2,1), 
			ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k + 1 + k * a_dim1;
	    akm1k.r = A(k+1,k).r, akm1k.i = A(k+1,k).i;
	    r_cnjg(&q__2, &akm1k);
	    c_div(&q__1, &A(k,k), &q__2);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    c_div(&q__1, &A(k+1,k+1), &akm1k);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= *nrhs; ++j) {
		r_cnjg(&q__2, &akm1k);
		c_div(&q__1, &B(k,j), &q__2);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		c_div(&q__1, &B(k+1,j), &akm1k);
		bk.r = q__1.r, bk.i = q__1.i;
		//i__2 = k + j * b_dim1;
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		B(k,j).r = q__1.r, B(k,j).i = q__1.i;
		//i__2 = k + 1 + j * b_dim1;
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		B(k+1,j).r = q__1.r, B(k+1,j).i = q__1.i;
/* L70: */
	    }
	    k += 2;
	}

	goto L60;
L80:

/*        Next solve L'*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L90:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L100;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(L'(K)), where L(K) is the transformat
ion   
             stored in column K of A. */

	    if (k < *n) {
		clacgv_(nrhs, &B(k,1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(k+1,1), ldb, &A(k+1,k), &c__1, &c_b1, &
			B(k,1), ldb);
		clacgv_(nrhs, &B(k,1), ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(L'(K-1)), where L(K-1) is the transfo
rmation   
             stored in columns K-1 and K of A. */

	    if (k < *n) {
		clacgv_(nrhs, &B(k,1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(k+1,1), ldb, &A(k+1,k), &c__1, &c_b1, &
			B(k,1), ldb);
		clacgv_(nrhs, &B(k,1), ldb);

		clacgv_(nrhs, &B(k-1,1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &B(k+1,1), ldb, &A(k+1,k-1), &c__1, &
			c_b1, &B(k-1,1), ldb);
		clacgv_(nrhs, &B(k-1,1), ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k) {
		cswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    k += -2;
	}

	goto L90;
L100:
	;
    }

    return 0;

/*     End of CHETRS */

} /* chetrs_ */

int clacgv_(integer *n, complx *x, integer *incx)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    CLACGV conjugates a complex vector of length N.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The length of the vector X.  N >= 0.   

    X       (input/output) COMPLEX array, dimension   
                           (1+(N-1)*abs(INCX))   
            On entry, the vector of length N to be conjugated.   
            On exit, X is overwritten with conjg(X).   

    INCX    (input) INTEGER   
            The spacing between successive elements of X.   

   ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    //integer i__1, i__2;
    complx q__1;
    /* Local variables */
    static integer ioff, i;


#define X(I) x[(I)-1]


    if (*incx == 1) {
        //i__1 = *n;
	for (i = 1; i <= *n; ++i) {
	    //i__2 = i;
	    r_cnjg(&q__1, &X(i));
	    X(i).r = q__1.r, X(i).i = q__1.i;
/* L10: */
	}
    } else {
	ioff = 1;
	if (*incx < 0) {
	    ioff = 1 - (*n - 1) * *incx;
	}
	//i__1 = *n;
	for (i = 1; i <= *n; ++i) {
	    //i__2 = ioff;
	    r_cnjg(&q__1, &X(ioff));
	    X(ioff).r = q__1.r, X(ioff).i = q__1.i;
	    ioff += *incx;
/* L20: */
	}
    }
    return 0;

/*     End of CLACGV */

} /* clacgv_ */

int claev2_(complx *a, complx *b, complx *c, sreal *rt1, 
	sreal *rt2, sreal *cs1, complx *sn1)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix   
       [  A         B  ]   
       [  CONJG(B)  C  ].   
    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the 
  
    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right 
  
    eigenvector for RT1, giving the decomposition   

    [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ] 
  
    [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ]. 
  

    Arguments   
    =========   

    A      (input) COMPLEX   
           The (1,1) element of the 2-by-2 matrix.   

    B      (input) COMPLEX   
           The (1,2) element and the conjugate of the (2,1) element of   
           the 2-by-2 matrix.   

    C      (input) COMPLEX   
           The (2,2) element of the 2-by-2 matrix.   

    RT1    (output) REAL   
           The eigenvalue of larger absolute value.   

    RT2    (output) REAL   
           The eigenvalue of smaller absolute value.   

    CS1    (output) REAL   
    SN1    (output) COMPLEX   
           The vector (CS1, SN1) is a unit right eigenvector for RT1.   

    Further Details   
    ===============   

    RT1 is accurate to a few ulps barring over/underflow.   

    RT2 may be inaccurate if there is massive cancellation in the   
    determinant A*C-B*B; higher precision or correctly rounded or   
    correctly truncated arithmetic would be needed to compute RT2   
    accurately in all cases.   

    CS1 and SN1 are accurate to a few ulps barring over/underflow.   

    Overflow is possible only if RT1 is within a factor of 5 of overflow. 
  
    Underflow is harmless if the input data is 0 or exceeds   
       underflow_threshold / macheps.   

   ===================================================================== 
*/
    /* System generated locals */
    sreal r__1, r__2, r__3;
    doublereal d__1;
    complx q__1, q__2;
    /* Local variables */
    static sreal t;
    static complx w;



    if (c_abs(b) == 0.f) {
	w.r = 1.f, w.i = 0.f;
    } else {
	r_cnjg(&q__2, b);
	d__1 = c_abs(b);
	q__1.r = q__2.r / d__1, q__1.i = q__2.i / d__1;
	w.r = q__1.r, w.i = q__1.i;
    }
    r__1 = a->r;
    r__2 = c_abs(b);
    r__3 = c->r;
    slaev2_(&r__1, &r__2, &r__3, rt1, rt2, cs1, &t);
    q__1.r = t * w.r, q__1.i = t * w.i;
    sn1->r = q__1.r, sn1->i = q__1.i;
    return 0;

/*     End of CLAEV2 */

} /* claev2_ */

int clahef_(char *uplo, integer *n, integer *nb, integer *kb,
	 complx *a, integer *lda, integer *ipiv, complx *w, integer *ldw, 
	integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLAHEF computes a partial factorization of a complex Hermitian   
    matrix A using the Bunch-Kaufman diagonal pivoting method. The   
    partial factorization has the form:   

    A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:   
          ( 0  U22 ) (  0   D  ) ( U12' U22' )   

    A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'   
          ( L21  I ) (  0  A22 ) (  0    I   )   

    where the order of D is at most NB. The actual order is returned in   
    the argument KB, and is either NB or NB-1, or N if N <= NB.   
    Note that U' denotes the conjugate transpose of U.   

    CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code 
  
    (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or 
  
    A22 (if UPLO = 'L').   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            Hermitian matrix A is stored:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NB      (input) INTEGER   
            The maximum number of columns of the matrix A that should be 
  
            factored.  NB should be at least 2 to allow for 2-by-2 pivot 
  
            blocks.   

    KB      (output) INTEGER   
            The number of columns of A that were actually factored.   
            KB is either NB-1 or NB, or N if N <= NB.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading 
  
            n-by-n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n-by-n lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
            On exit, A contains details of the partial factorization.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If UPLO = 'U', only the last KB elements of IPIV are set;   
            if UPLO = 'L', only the first KB elements are set.   

            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    W       (workspace) COMPLEX array, dimension (LDW,NB)   

    LDW     (input) INTEGER   
            The leading dimension of the array W.  LDW >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            > 0: if INFO = k, D(k,k) is exactly zero.  The factorization 
  
                 has been completed, but the block diagonal matrix D is   
                 exactly singular.   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static complx c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1=0, w_dim1=0, i__1, i__2, i__3, i__4, i__5;
    /* integer a_offset, w_offset; */
    sreal r__1, r__2, r__3, r__4;
    doublereal d__1;
    complx q__1, q__2, q__3, q__4;
    /* Local variables */
    static integer imax, jmax, j, k;
    static sreal t, alpha;

    static integer kstep;
    static sreal r1;
    static complx d11, d21, d22;
    static integer jb, jj, kk, jp, kp;
    static sreal absakk;
    static integer kw;
    static sreal colmax, rowmax;
    static integer kkw;



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define W(I,J) w[(I)-1 + ((J)-1)* ( *ldw)]

    *info = 0;

/*     Initialize ALPHA for use in choosing pivot block size. */

    alpha = (sqrt(17.f) + 1.f) / 8.f;

    if (lsame_(uplo, "U")) {

/*        Factorize the trailing columns of A using the upper triangle
   
          of A and working backwards, and compute the matrix W = U12*D
   
          for use in updating A11 (note that conjg(W) is actually stor
ed)   

          K is the main loop index, decreasing from N in steps of 1 or
 2   

          KW is the column of W which corresponds to column K of A */

	k = *n;
L10:
	kw = *nb + k - *n;

/*        Exit from loop */

	if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
	    goto L30;
	}

/*        Copy column K of A to column KW of W and update it */

	i__1 = k - 1;
	ccopy_(&i__1, &A(1,k), &c__1, &W(1,kw), &c__1);
	i__1 = k + kw * w_dim1;
	i__2 = k + k * a_dim1;
	d__1 = A(k,k).r;
	W(k,kw).r = d__1, W(k,kw).i = 0.f;
	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemv_("No transpose", &k, &i__1, &q__1, &A(1,k+1),
		     lda, &W(k,kw+1), ldw, &c_b1, &W(1,kw), &c__1);
	    i__1 = k + kw * w_dim1;
	    i__2 = k + kw * w_dim1;
	    d__1 = W(k,kw).r;
	    W(k,kw).r = d__1, W(k,kw).i = 0.f;
	}

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = k + kw * w_dim1;
	absakk = (r__1 = W(k,kw).r, fabs(r__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k > 1) {
	    i__1 = k - 1;
	    imax = icamax_(&i__1, &W(1,kw), &c__1);
	    i__1 = imax + kw * w_dim1;
	    colmax = (r__1 = W(imax,kw).r, fabs(r__1)) + (r__2 = r_imag(&W(imax,kw)), fabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    i__2 = k + k * a_dim1;
	    d__1 = A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column KW-1 of W and updat
e it */

		i__1 = imax - 1;
		ccopy_(&i__1, &A(1,imax), &c__1, &W(1,kw-1), &c__1);
		i__1 = imax + (kw - 1) * w_dim1;
		i__2 = imax + imax * a_dim1;
		d__1 = A(imax,imax).r;
		W(imax,kw-1).r = d__1, W(imax,kw-1).i = 0.f;
		i__1 = k - imax;
		ccopy_(&i__1, &A(imax,imax+1), lda, &W(imax+1,kw-1), &c__1);
		i__1 = k - imax;
		clacgv_(&i__1, &W(imax+1,kw-1), &c__1);
		if (k < *n) {
		    i__1 = *n - k;
		    q__1.r = -1.f, q__1.i = 0.f;
		    cgemv_("No transpose", &k, &i__1, &q__1, &A(1,k+1), lda, &W(imax,kw+1), 
			    ldw, &c_b1, &W(1,kw-1), &c__1)
			    ;
		    i__1 = imax + (kw - 1) * w_dim1;
		    i__2 = imax + (kw - 1) * w_dim1;
		    d__1 = W(imax,kw-1).r;
		    W(imax,kw-1).r = d__1, W(imax,kw-1).i = 0.f;
		}

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = k - imax;
		jmax = imax + icamax_(&i__1, &W(imax+1,kw-1),
			 &c__1);
		i__1 = jmax + (kw - 1) * w_dim1;
		rowmax = (r__1 = W(jmax,kw-1).r, fabs(r__1)) + (r__2 = r_imag(&W(jmax,kw-1)), fabs(r__2));
		if (imax > 1) {
		    i__1 = imax - 1;
		    jmax = icamax_(&i__1, &W(1,kw-1), &c__1);
/* Computing MAX */
		    i__1 = jmax + (kw - 1) * w_dim1;
		    r__3 = rowmax, r__4 = (r__1 = W(jmax,kw-1).r, fabs(r__1)) + (
			    r__2 = r_imag(&W(jmax,kw-1)), fabs(
			    r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = imax + (kw - 1) * w_dim1;
		    if ((r__1 = W(imax,kw-1).r, fabs(r__1)) >= alpha * rowmax) {

/*                 interchange rows and columns K and 
IMAX, use 1-by-1   
                   pivot block */

			kp = imax;

/*                 copy column KW-1 of W to column KW 
*/

			ccopy_(&k, &W(1,kw-1), &c__1, &W(1,kw), &c__1);
		    } else {

/*                 interchange rows and columns K-1 an
d IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k - kstep + 1;
	    kkw = *nb + kk - *n;

/*           Updated column KP is already stored in column KKW of 
W */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		i__1 = kp + kp * a_dim1;
		i__2 = kk + kk * a_dim1;
		d__1 = A(kk,kk).r;
		A(kp,kp).r = d__1, A(kp,kp).i = 0.f;
		i__1 = kk - 1 - kp;
		ccopy_(&i__1, &A(kp+1,kk), &c__1, &A(kp,kp+1), lda);
		i__1 = kk - 1 - kp;
		clacgv_(&i__1, &A(kp,kp+1), lda);
		i__1 = kp - 1;
		ccopy_(&i__1, &A(1,kk), &c__1, &A(1,kp),
			 &c__1);

/*              Interchange rows KK and KP in last KK columns 
of A and W */

		if (kk < *n) {
		    i__1 = *n - kk;
		    cswap_(&i__1, &A(kk,kk+1), lda, &A(kp,kk+1), lda);
		}
		i__1 = *n - kk + 1;
		cswap_(&i__1, &W(kk,kkw), ldw, &W(kp,kkw), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column KW of W now ho
lds   

                W(k) = U(k)*D(k)   

                where U(k) is the k-th column of U   

                Store U(k) in column k of A */

		ccopy_(&k, &W(1,kw), &c__1, &A(1,k), &
			c__1);
		i__1 = k + k * a_dim1;
		r1 = 1.f / A(k,k).r;
		i__1 = k - 1;
		csscal_(&i__1, &r1, &A(1,k), &c__1);

/*              Conjugate W(k) */

		i__1 = k - 1;
		clacgv_(&i__1, &W(1,kw), &c__1);
	    } else {

/*              2-by-2 pivot block D(k): columns KW and KW-1 o
f W now   
                hold   

                ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)   

                where U(k) and U(k-1) are the k-th and (k-1)-t
h columns   
                of U */

		if (k > 2) {

/*                 Store U(k) and U(k-1) in columns k and 
k-1 of A */

		    i__1 = k - 1 + kw * w_dim1;
		    d21.r = W(k-1,kw).r, d21.i = W(k-1,kw).i;
		    r_cnjg(&q__2, &d21);
		    c_div(&q__1, &W(k,kw), &q__2);
		    d11.r = q__1.r, d11.i = q__1.i;
		    c_div(&q__1, &W(k-1,kw-1), &d21);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r * 
			    d22.i + d11.i * d22.r;
		    t = 1.f / (q__1.r - 1.f);
		    q__2.r = t, q__2.i = 0.f;
		    c_div(&q__1, &q__2, &d21);
		    d21.r = q__1.r, d21.i = q__1.i;
		    i__1 = k - 2;
		    for (j = 1; j <= k-2; ++j) {
			i__2 = j + (k - 1) * a_dim1;
			i__3 = j + (kw - 1) * w_dim1;
			q__3.r = d11.r * W(j,kw-1).r - d11.i * W(j,kw-1).i, 
				q__3.i = d11.r * W(j,kw-1).i + d11.i * W(j,kw-1)
				.r;
			i__4 = j + kw * w_dim1;
			q__2.r = q__3.r - W(j,kw).r, q__2.i = q__3.i - W(j,kw)
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			A(j,k-1).r = q__1.r, A(j,k-1).i = q__1.i;
			i__2 = j + k * a_dim1;
			r_cnjg(&q__2, &d21);
			i__3 = j + kw * w_dim1;
			q__4.r = d22.r * W(j,kw).r - d22.i * W(j,kw).i, 
				q__4.i = d22.r * W(j,kw).i + d22.i * W(j,kw)
				.r;
			i__4 = j + (kw - 1) * w_dim1;
			q__3.r = q__4.r - W(j,kw-1).r, q__3.i = q__4.i - W(j,kw-1)
				.i;
			q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = 
				q__2.r * q__3.i + q__2.i * q__3.r;
			A(j,k).r = q__1.r, A(j,k).i = q__1.i;
/* L20: */
		    }
		}

/*              Copy D(k) to A */

		i__1 = k - 1 + (k - 1) * a_dim1;
		i__2 = k - 1 + (kw - 1) * w_dim1;
		A(k-1,k-1).r = W(k-1,kw-1).r, A(k-1,k-1).i = W(k-1,kw-1).i;
		i__1 = k - 1 + k * a_dim1;
		i__2 = k - 1 + kw * w_dim1;
		A(k-1,k).r = W(k-1,kw).r, A(k-1,k).i = W(k-1,kw).i;
		i__1 = k + k * a_dim1;
		i__2 = k + kw * w_dim1;
		A(k,k).r = W(k,kw).r, A(k,k).i = W(k,kw).i;

/*              Conjugate W(k) and W(k-1) */

		i__1 = k - 1;
		clacgv_(&i__1, &W(1,kw), &c__1);
		i__1 = k - 2;
		clacgv_(&i__1, &W(1,kw-1), &c__1);
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k - 1) = -kp;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kstep;
	goto L10;

L30:

/*        Update the upper triangle of A11 (= A(1:k,1:k)) as   

          A11 := A11 - U12*D*U12' = A11 - U12*W'   

          computing blocks of NB columns at a time (note that conjg(W)
 is   
          actually stored) */

	i__1 = -(*nb);
	for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += 
		i__1) {
/* Computing MIN */
	    i__2 = *nb, i__3 = k - j + 1;
	    jb = min(i__2,i__3);

/*           Update the upper triangle of the diagonal block */

	    i__2 = j + jb - 1;
	    for (jj = j; jj <= j+jb-1; ++jj) {
		i__3 = jj + jj * a_dim1;
		i__4 = jj + jj * a_dim1;
		d__1 = A(jj,jj).r;
		A(jj,jj).r = d__1, A(jj,jj).i = 0.f;
		i__3 = jj - j + 1;
		i__4 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__3, &i__4, &q__1, &A(j,k+1), lda, &W(jj,kw+1), ldw, &c_b1, 
			&A(j,jj), &c__1);
		i__3 = jj + jj * a_dim1;
		i__4 = jj + jj * a_dim1;
		d__1 = A(jj,jj).r;
		A(jj,jj).r = d__1, A(jj,jj).i = 0.f;
/* L40: */
	    }

/*           Update the rectangular superdiagonal block */

	    i__2 = j - 1;
	    i__3 = *n - k;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &q__1, &A(1,k+1), lda, &W(j,kw+1), ldw,
		     &c_b1, &A(1,j), lda);
/* L50: */
	}

/*        Put U12 in standard form by partially undoing the interchang
es   
          in columns k+1:n */

	j = k + 1;
L60:
	jj = j;
	jp = IPIV(j);
	if (jp < 0) {
	    jp = -jp;
	    ++j;
	}
	++j;
	if (jp != jj && j <= *n) {
	    i__1 = *n - j + 1;
	    cswap_(&i__1, &A(jp,j), lda, &A(jj,j), lda);
	}
	if (j <= *n) {
	    goto L60;
	}

/*        Set KB to the number of columns factorized */

	*kb = *n - k;

    } else {

/*        Factorize the leading columns of A using the lower triangle 
  
          of A and working forwards, and compute the matrix W = L21*D 
  
          for use in updating A22 (note that conjg(W) is actually stor
ed)   

          K is the main loop index, increasing from 1 in steps of 1 or
 2 */

	k = 1;
L70:

/*        Exit from loop */

	if ((k >= *nb && *nb < *n) || k > *n) {
	    goto L90;
	}

/*        Copy column K of A to column K of W and update it */

	i__1 = k + k * w_dim1;
	i__2 = k + k * a_dim1;
	d__1 = A(k,k).r;
	W(k,k).r = d__1, W(k,k).i = 0.f;
	if (k < *n) {
	    i__1 = *n - k;
	    ccopy_(&i__1, &A(k+1,k), &c__1, &W(k+1,k), &c__1);
	}
	i__1 = *n - k + 1;
	i__2 = k - 1;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemv_("No transpose", &i__1, &i__2, &q__1, &A(k,1), lda, &W(k,1), ldw, &c_b1, &W(k,k), &c__1);
	i__1 = k + k * w_dim1;
	i__2 = k + k * w_dim1;
	d__1 = W(k,k).r;
	W(k,k).r = d__1, W(k,k).i = 0.f;

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	i__1 = k + k * w_dim1;
	absakk = (r__1 = W(k,k).r, fabs(r__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k < *n) {
	    i__1 = *n - k;
	    imax = k + icamax_(&i__1, &W(k+1,k), &c__1);
	    i__1 = imax + k * w_dim1;
	    colmax = (r__1 = W(imax,k).r, fabs(r__1)) + (r__2 = r_imag(&W(imax,k)), fabs(r__2));
	} else {
	    colmax = 0.f;
	}

	if (dmax(absakk,colmax) == 0.f) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    i__2 = k + k * a_dim1;
	    d__1 = A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column K+1 of W and update
 it */

		i__1 = imax - k;
		ccopy_(&i__1, &A(imax,k), lda, &W(k,k+1), &c__1);
		i__1 = imax - k;
		clacgv_(&i__1, &W(k,k+1), &c__1);
		i__1 = imax + (k + 1) * w_dim1;
		i__2 = imax + imax * a_dim1;
		d__1 = A(imax,imax).r;
		W(imax,k+1).r = d__1, W(imax,k+1).i = 0.f;
		if (imax < *n) {
		    i__1 = *n - imax;
		    ccopy_(&i__1, &A(imax+1,imax), &c__1, &W(imax+1,k+1), &c__1);
		}
		i__1 = *n - k + 1;
		i__2 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__1, &i__2, &q__1, &A(k,1), 
			lda, &W(imax,1), ldw, &c_b1, &W(k,k+1), &c__1);
		i__1 = imax + (k + 1) * w_dim1;
		i__2 = imax + (k + 1) * w_dim1;
		d__1 = W(imax,k+1).r;
		W(imax,k+1).r = d__1, W(imax,k+1).i = 0.f;

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = imax - k;
		jmax = k - 1 + icamax_(&i__1, &W(k,k+1), &c__1)
			;
		i__1 = jmax + (k + 1) * w_dim1;
		rowmax = (r__1 = W(jmax,k+1).r, fabs(r__1)) + (r__2 = r_imag(&W(jmax,k+1)), fabs(r__2));
		if (imax < *n) {
		    i__1 = *n - imax;
		    jmax = imax + icamax_(&i__1, &W(imax+1,k+1), &c__1);
/* Computing MAX */
		    i__1 = jmax + (k + 1) * w_dim1;
		    r__3 = rowmax, r__4 = (r__1 = W(jmax,k+1).r, fabs(r__1)) + (
			    r__2 = r_imag(&W(jmax,k+1)), fabs(
			    r__2));
		    rowmax = dmax(r__3,r__4);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else /* if(complicated condition) */ {
		    i__1 = imax + (k + 1) * w_dim1;
		    if ((r__1 = W(imax,k+1).r, fabs(r__1)) >= alpha * rowmax) {

/*                 interchange rows and columns K and 
IMAX, use 1-by-1   
                   pivot block */

			kp = imax;

/*                 copy column K+1 of W to column K */

			i__1 = *n - k + 1;
			ccopy_(&i__1, &W(k,k+1), &c__1, &W(k,k), &c__1);
		    } else {

/*                 interchange rows and columns K+1 an
d IMAX, use 2-by-2   
                   pivot block */

			kp = imax;
			kstep = 2;
		    }
		}
	    }

	    kk = k + kstep - 1;

/*           Updated column KP is already stored in column KK of W
 */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		i__1 = kp + kp * a_dim1;
		i__2 = kk + kk * a_dim1;
		d__1 = A(kk,kk).r;
		A(kp,kp).r = d__1, A(kp,kp).i = 0.f;
		i__1 = kp - kk - 1;
		ccopy_(&i__1, &A(kk+1,kk), &c__1, &A(kp,kk+1), lda);
		i__1 = kp - kk - 1;
		clacgv_(&i__1, &A(kp,kk+1), lda);
		if (kp < *n) {
		    i__1 = *n - kp;
		    ccopy_(&i__1, &A(kp+1,kk), &c__1, &A(kp+1,kp), &c__1);
		}

/*              Interchange rows KK and KP in first KK columns
 of A and W */

		i__1 = kk - 1;
		cswap_(&i__1, &A(kk,1), lda, &A(kp,1), lda);
		cswap_(&kk, &W(kk,1), ldw, &W(kp,1), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column k of W now hol
ds   

                W(k) = L(k)*D(k)   

                where L(k) is the k-th column of L   

                Store L(k) in column k of A */

		i__1 = *n - k + 1;
		ccopy_(&i__1, &W(k,k), &c__1, &A(k,k), &
			c__1);
		if (k < *n) {
		    i__1 = k + k * a_dim1;
		    r1 = 1.f / A(k,k).r;
		    i__1 = *n - k;
		    csscal_(&i__1, &r1, &A(k+1,k), &c__1);

/*                 Conjugate W(k) */

		    i__1 = *n - k;
		    clacgv_(&i__1, &W(k+1,k), &c__1);
		}
	    } else {

/*              2-by-2 pivot block D(k): columns k and k+1 of 
W now hold   

                ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)   

                where L(k) and L(k+1) are the k-th and (k+1)-t
h columns   
                of L */

		if (k < *n - 1) {

/*                 Store L(k) and L(k+1) in columns k and 
k+1 of A */

		    i__1 = k + 1 + k * w_dim1;
		    d21.r = W(k+1,k).r, d21.i = W(k+1,k).i;
		    c_div(&q__1, &W(k+1,k+1), &d21);
		    d11.r = q__1.r, d11.i = q__1.i;
		    r_cnjg(&q__2, &d21);
		    c_div(&q__1, &W(k,k), &q__2);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r * 
			    d22.i + d11.i * d22.r;
		    t = 1.f / (q__1.r - 1.f);
		    q__2.r = t, q__2.i = 0.f;
		    c_div(&q__1, &q__2, &d21);
		    d21.r = q__1.r, d21.i = q__1.i;
		    i__1 = *n;
		    for (j = k + 2; j <= *n; ++j) {
			i__2 = j + k * a_dim1;
			r_cnjg(&q__2, &d21);
			i__3 = j + k * w_dim1;
			q__4.r = d11.r * W(j,k).r - d11.i * W(j,k).i, 
				q__4.i = d11.r * W(j,k).i + d11.i * W(j,k)
				.r;
			i__4 = j + (k + 1) * w_dim1;
			q__3.r = q__4.r - W(j,k+1).r, q__3.i = q__4.i - W(j,k+1)
				.i;
			q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = 
				q__2.r * q__3.i + q__2.i * q__3.r;
			A(j,k).r = q__1.r, A(j,k).i = q__1.i;
			i__2 = j + (k + 1) * a_dim1;
			i__3 = j + (k + 1) * w_dim1;
			q__3.r = d22.r * W(j,k+1).r - d22.i * W(j,k+1).i, 
				q__3.i = d22.r * W(j,k+1).i + d22.i * W(j,k+1)
				.r;
			i__4 = j + k * w_dim1;
			q__2.r = q__3.r - W(j,k).r, q__2.i = q__3.i - W(j,k)
				.i;
			q__1.r = d21.r * q__2.r - d21.i * q__2.i, q__1.i = 
				d21.r * q__2.i + d21.i * q__2.r;
			A(j,k+1).r = q__1.r, A(j,k+1).i = q__1.i;
/* L80: */
		    }
		}

/*              Copy D(k) to A */

		i__1 = k + k * a_dim1;
		i__2 = k + k * w_dim1;
		A(k,k).r = W(k,k).r, A(k,k).i = W(k,k).i;
		i__1 = k + 1 + k * a_dim1;
		i__2 = k + 1 + k * w_dim1;
		A(k+1,k).r = W(k+1,k).r, A(k+1,k).i = W(k+1,k).i;
		i__1 = k + 1 + (k + 1) * a_dim1;
		i__2 = k + 1 + (k + 1) * w_dim1;
		A(k+1,k+1).r = W(k+1,k+1).r, A(k+1,k+1).i = W(k+1,k+1).i;

/*              Conjugate W(k) and W(k+1) */

		i__1 = *n - k;
		clacgv_(&i__1, &W(k+1,k), &c__1);
		i__1 = *n - k - 1;
		clacgv_(&i__1, &W(k+2,k+1), &c__1);
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k + 1) = -kp;
	}

/*        Increase K and return to the start of the main loop */

	k += kstep;
	goto L70;

L90:

/*        Update the lower triangle of A22 (= A(k:n,k:n)) as   

          A22 := A22 - L21*D*L21' = A22 - L21*W'   

          computing blocks of NB columns at a time (note that conjg(W)
 is   
          actually stored) */

	i__1 = *n;
	i__2 = *nb;
	for (j = k; *nb < 0 ? j >= *n : j <= *n; j += *nb) {
/* Computing MIN */
	    i__3 = *nb, i__4 = *n - j + 1;
	    jb = min(i__3,i__4);

/*           Update the lower triangle of the diagonal block */

	    i__3 = j + jb - 1;
	    for (jj = j; jj <= j+jb-1; ++jj) {
		i__4 = jj + jj * a_dim1;
		i__5 = jj + jj * a_dim1;
		d__1 = A(jj,jj).r;
		A(jj,jj).r = d__1, A(jj,jj).i = 0.f;
		i__4 = j + jb - jj;
		i__5 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("No transpose", &i__4, &i__5, &q__1, &A(jj,1), 
			lda, &W(jj,1), ldw, &c_b1, &A(jj,jj)
			, &c__1);
		i__4 = jj + jj * a_dim1;
		i__5 = jj + jj * a_dim1;
		d__1 = A(jj,jj).r;
		A(jj,jj).r = d__1, A(jj,jj).i = 0.f;
/* L100: */
	    }

/*           Update the rectangular subdiagonal block */

	    if (j + jb <= *n) {
		i__3 = *n - j - jb + 1;
		i__4 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &q__1, 
			&A(j+jb,1), lda, &W(j,1), ldw, &c_b1, 
			&A(j+jb,j), lda);
	    }
/* L110: */
	}

/*        Put L21 in standard form by partially undoing the interchang
es   
          in columns 1:k-1 */

	j = k - 1;
L120:
	jj = j;
	jp = IPIV(j);
	if (jp < 0) {
	    jp = -jp;
	    --j;
	}
	--j;
	if (jp != jj && j >= 1) {
	    cswap_(&j, &A(jp,1), lda, &A(jj,1), lda);
	}
	if (j >= 1) {
	    goto L120;
	}

/*        Set KB to the number of columns factorized */

	*kb = k - 1;

    }
    return 0;

/*     End of CLAHEF */

} /* clahef_ */

int crot_(integer *n, complx *cx, integer *incx, complx *
	cy, integer *incy, sreal *c, complx *s)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    CROT   applies a plane rotation, where the cos (C) is real and the   
    sin (S) is complex, and the vectors CX and CY are complex.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of elements in the vectors CX and CY.   

    CX      (input/output) COMPLEX array, dimension (N)   
            On input, the vector X.   
            On output, CX is overwritten with C*X + S*Y.   

    INCX    (input) INTEGER   
            The increment between successive values of CY.  INCX <> 0.   

    CY      (input/output) COMPLEX array, dimension (N)   
            On input, the vector Y.   
            On output, CY is overwritten with -CONJG(S)*X + C*Y.   

    INCY    (input) INTEGER   
            The increment between successive values of CY.  INCX <> 0.   

    C       (input) REAL   
    S       (input) COMPLEX   
            C and S define a rotation   
               [  C          S  ]   
               [ -conjg(S)   C  ]   
            where C*C + S*CONJG(S) = 1.0.   

   ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    //integer i__1, i__2, i__3, i__4;
    complx q__1, q__2, q__3, q__4;
    /* Local variables */
    static integer i;
    static complx stemp;
    static integer ix, iy;


#define CY(I) cy[(I)-1]
#define CX(I) cx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*     Code for unequal increments or equal increments not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
        //i__2 = ix;
	q__2.r = *c * CX(ix).r, q__2.i = *c * CX(ix).i;
	//i__3 = iy;
	q__3.r = s->r * CY(iy).r - s->i * CY(iy).i, q__3.i = s->r * CY(
		iy).i + s->i * CY(iy).r;
	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
	stemp.r = q__1.r, stemp.i = q__1.i;
	//i__2 = iy;
	//i__3 = iy;
	q__2.r = *c * CY(iy).r, q__2.i = *c * CY(iy).i;
	r_cnjg(&q__4, s);
	//i__4 = ix;
	q__3.r = q__4.r * CX(ix).r - q__4.i * CX(ix).i, q__3.i = q__4.r * 
		CX(ix).i + q__4.i * CX(ix).r;
	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
	CY(iy).r = q__1.r, CY(iy).i = q__1.i;
	//i__2 = ix;
	CX(ix).r = stemp.r, CX(ix).i = stemp.i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*     Code for both increments equal to 1 */

L20:
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
        //i__2 = i;
	q__2.r = *c * CX(i).r, q__2.i = *c * CX(i).i;
	//i__3 = i;
	q__3.r = s->r * CY(i).r - s->i * CY(i).i, q__3.i = s->r * CY(
		i).i + s->i * CY(i).r;
	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
	stemp.r = q__1.r, stemp.i = q__1.i;
	//i__2 = i;
	//i__3 = i;
	q__2.r = *c * CY(i).r, q__2.i = *c * CY(i).i;
	r_cnjg(&q__4, s);
	//i__4 = i;
	q__3.r = q__4.r * CX(i).r - q__4.i * CX(i).i, q__3.i = q__4.r * 
		CX(i).i + q__4.i * CX(i).r;
	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
	CY(i).r = q__1.r, CY(i).i = q__1.i;
	//i__2 = i;
	CX(i).r = stemp.r, CX(i).i = stemp.i;
/* L30: */
    }
    return 0;
} /* crot_ */


integer ilaenv_(integer *ispec, const char *name, const char *opts, 
		integer *n1, integer *n2, integer *n3, integer *n4, 
		ftnlen name_len, ftnlen opts_len)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    ILAENV is called from the LAPACK routines to choose problem-dependent 
  
    parameters for the local environment.  See ISPEC for a description of 
  
    the parameters.   

    This version provides a set of parameters which should give good,   
    but not optimal, performance on many of the currently available   
    computers.  Users are encouraged to modify this subroutine to set   
    the tuning parameters for their particular machine using the option   
    and problem size information in the arguments.   

    This routine will not function correctly if it is converted to all   
    lower case.  Converting it to all upper case is allowed.   

    Arguments   
    =========   

    ISPEC   (input) INTEGER   
            Specifies the parameter to be returned as the value of   
            ILAENV.   
            = 1: the optimal blocksize; if this value is 1, an unblocked 
  
                 algorithm will give the best performance.   
            = 2: the minimum block size for which the block routine   
                 should be used; if the usable block size is less than   
                 this value, an unblocked routine should be used.   
            = 3: the crossover point (in a block routine, for N less   
                 than this value, an unblocked routine should be used)   
            = 4: the number of shifts, used in the nonsymmetric   
                 eigenvalue routines   
            = 5: the minimum column dimension for blocking to be used;   
                 rectangular blocks must have dimension at least k by m, 
  
                 where k is given by ILAENV(2,...) and m by ILAENV(5,...) 
  
            = 6: the crossover point for the SVD (when reducing an m by n 
  
                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds 
  
                 this value, a QR factorization is used first to reduce   
                 the matrix to a triangular form.)   
            = 7: the number of processors   
            = 8: the crossover point for the multishift QR and QZ methods 
  
                 for nonsymmetric eigenvalue problems.   

    NAME    (input) CHARACTER*(*)   
            The name of the calling subroutine, in either upper case or   
            lower case.   

    OPTS    (input) CHARACTER*(*)   
            The character options to the subroutine NAME, concatenated   
            into a single character string.  For example, UPLO = 'U',   
            TRANS = 'T', and DIAG = 'N' for a triangular routine would   
            be specified as OPTS = 'UTN'.   

    N1      (input) INTEGER   
    N2      (input) INTEGER   
    N3      (input) INTEGER   
    N4      (input) INTEGER   
            Problem dimensions for the subroutine NAME; these may not all 
  
            be required.   

   (ILAENV) (output) INTEGER   
            >= 0: the value of the parameter specified by ISPEC   
            < 0:  if ILAENV = -k, the k-th argument had an illegal value. 
  

    Further Details   
    ===============   

    The following conventions have been used when calling ILAENV from the 
  
    LAPACK routines:   
    1)  OPTS is a concatenation of all of the character options to   
        subroutine NAME, in the same order that they appear in the   
        argument list for NAME, even if they are not used in determining 
  
        the value of the parameter specified by ISPEC.   
    2)  The problem dimensions N1, N2, N3, N4 are specified in the order 
  
        that they appear in the argument list for NAME.  N1 is used   
        first, N2 second, and so on, and unused problem dimensions are   
        passed a value of -1.   
    3)  The parameter value returned by ILAENV is checked for validity in 
  
        the calling subroutine.  For example, ILAENV is used to retrieve 
  
        the optimal blocksize for STRTRI as follows:   

        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )   
        IF( NB.LE.1 ) NB = MAX( 1, N )   

    ===================================================================== 
*/
/* >>Start of File<<   
       System generated locals */
    integer ret_val;
    /* Local variables */
    static integer i;
    static logical cname, sname;
    static integer nbmin;
    static char c1[1], c2[2], c3[3], c4[2];
    static integer ic, nb, iz, nx;
    static char subnam[6];

    switch (*ispec) {
	case 1:  goto L100;
	case 2:  goto L100;
	case 3:  goto L100;
	case 4:  goto L400;
	case 5:  goto L500;
	case 6:  goto L600;
	case 7:  goto L700;
	case 8:  goto L800;
    }

/*     Invalid value for ISPEC */

    ret_val = -1;
    return ret_val;

L100:

/*     Convert NAME to upper case if the first character is lower case. */

    ret_val = 1;
    s_copy(subnam, name, 6L, name_len);

    ic = *(unsigned char *)subnam;
    iz = 'Z';
    if (iz == 90 || iz == 122) {

/*        ASCII character set */

	if (ic >= 97 && ic <= 122) {
	    *(unsigned char *)subnam = (char) (ic - 32);
	    for (i = 2; i <= 6; ++i) {
		ic = *(unsigned char *)&subnam[i - 1];
		if (ic >= 97 && ic <= 122) {
		    *(unsigned char *)&subnam[i - 1] = (char) (ic - 32);
		}
/* L10: */
	    }
	}

    } else if (iz == 233 || iz == 169) {

/*        EBCDIC character set */

        if ((ic >= 129 && ic <= 137) || 
	    (ic >= 145 && ic <= 153) || 
	    (ic >= 162 && ic <= 169)) {
	    *(unsigned char *)subnam = (char) (ic + 64);
	    for (i = 2; i <= 6; ++i) {
		ic = *(unsigned char *)&subnam[i - 1];
		if ((ic >= 129 && ic <= 137) || 
		    (ic >= 145 && ic <= 153) || 
		    (ic >= 162 && ic <= 169)) {
		    *(unsigned char *)&subnam[i - 1] = (char) (ic + 64);
		}
/* L20: */
	    }
	}

    } else if (iz == 218 || iz == 250) {

/*        Prime machines:  ASCII+128 */

	if (ic >= 225 && ic <= 250) {
	    *(unsigned char *)subnam = (char) (ic - 32);
	    for (i = 2; i <= 6; ++i) {
		ic = *(unsigned char *)&subnam[i - 1];
		if (ic >= 225 && ic <= 250) {
		    *(unsigned char *)&subnam[i - 1] = (char) (ic - 32);
		}
/* L30: */
	    }
	}
    }

    *(unsigned char *)c1 = *(unsigned char *)subnam;
    sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D';
    cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z';
    if (! (cname || sname)) {
	return ret_val;
    }
    s_copy(c2, subnam + 1, 2L, 2L);
    s_copy(c3, subnam + 3, 3L, 3L);
    s_copy(c4, c3 + 1, 2L, 2L);

    switch (*ispec) {
	case 1:  goto L110;
	case 2:  goto L200;
	case 3:  goto L300;
    }

L110:

/*     ISPEC = 1:  block size   

       In these examples, separate code is provided for setting NB for   
       real and complex.  We assume that NB will take the same value in   
       single or double precision. */

    nb = 1;

    if (s_cmp(c2, "GE", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	} else if (s_cmp(c3, "QRF", 3L, 3L) == 0 || s_cmp(c3, "RQF", 3L, 3L) 
		== 0 || s_cmp(c3, "LQF", 3L, 3L) == 0 || s_cmp(c3, "QLF", 3L, 
		3L) == 0) {
	    if (sname) {
		nb = 32;
	    } else {
		nb = 32;
	    }
	} else if (s_cmp(c3, "HRD", 3L, 3L) == 0) {
	    if (sname) {
		nb = 32;
	    } else {
		nb = 32;
	    }
	} else if (s_cmp(c3, "BRD", 3L, 3L) == 0) {
	    if (sname) {
		nb = 32;
	    } else {
		nb = 32;
	    }
	} else if (s_cmp(c3, "TRI", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	}
    } else if (s_cmp(c2, "PO", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	}
    } else if (s_cmp(c2, "SY", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	} else if (sname && s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nb = 1;
	} else if (sname && s_cmp(c3, "GST", 3L, 3L) == 0) {
	    nb = 64;
	}
    } else if (cname && s_cmp(c2, "HE", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    nb = 64;
	} else if (s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nb = 1;
	} else if (s_cmp(c3, "GST", 3L, 3L) == 0) {
	    nb = 64;
	}
    } else if (sname && s_cmp(c2, "OR", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nb = 32;
	    }
	} else if (*(unsigned char *)c3 == 'M') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nb = 32;
	    }
	}
    } else if (cname && s_cmp(c2, "UN", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nb = 32;
	    }
	} else if (*(unsigned char *)c3 == 'M') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nb = 32;
	    }
	}
    } else if (s_cmp(c2, "GB", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		if (*n4 <= 64) {
		    nb = 1;
		} else {
		    nb = 32;
		}
	    } else {
		if (*n4 <= 64) {
		    nb = 1;
		} else {
		    nb = 32;
		}
	    }
	}
    } else if (s_cmp(c2, "PB", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		if (*n2 <= 64) {
		    nb = 1;
		} else {
		    nb = 32;
		}
	    } else {
		if (*n2 <= 64) {
		    nb = 1;
		} else {
		    nb = 32;
		}
	    }
	}
    } else if (s_cmp(c2, "TR", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRI", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	}
    } else if (s_cmp(c2, "LA", 2L, 2L) == 0) {
	if (s_cmp(c3, "UUM", 3L, 3L) == 0) {
	    if (sname) {
		nb = 64;
	    } else {
		nb = 64;
	    }
	}
    } else if (sname && s_cmp(c2, "ST", 2L, 2L) == 0) {
	if (s_cmp(c3, "EBZ", 3L, 3L) == 0) {
	    nb = 1;
	}
    }
    ret_val = nb;
    return ret_val;

L200:

/*     ISPEC = 2:  minimum block size */

    nbmin = 2;
    if (s_cmp(c2, "GE", 2L, 2L) == 0) {
	if (s_cmp(c3, "QRF", 3L, 3L) == 0 || s_cmp(c3, "RQF", 3L, 3L) == 0 || 
		s_cmp(c3, "LQF", 3L, 3L) == 0 || s_cmp(c3, "QLF", 3L, 3L) == 
		0) {
	    if (sname) {
		nbmin = 2;
	    } else {
		nbmin = 2;
	    }
	} else if (s_cmp(c3, "HRD", 3L, 3L) == 0) {
	    if (sname) {
		nbmin = 2;
	    } else {
		nbmin = 2;
	    }
	} else if (s_cmp(c3, "BRD", 3L, 3L) == 0) {
	    if (sname) {
		nbmin = 2;
	    } else {
		nbmin = 2;
	    }
	} else if (s_cmp(c3, "TRI", 3L, 3L) == 0) {
	    if (sname) {
		nbmin = 2;
	    } else {
		nbmin = 2;
	    }
	}
    } else if (s_cmp(c2, "SY", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRF", 3L, 3L) == 0) {
	    if (sname) {
		nbmin = 8;
	    } else {
		nbmin = 8;
	    }
	} else if (sname && s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nbmin = 2;
	}
    } else if (cname && s_cmp(c2, "HE", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nbmin = 2;
	}
    } else if (sname && s_cmp(c2, "OR", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nbmin = 2;
	    }
	} else if (*(unsigned char *)c3 == 'M') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nbmin = 2;
	    }
	}
    } else if (cname && s_cmp(c2, "UN", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nbmin = 2;
	    }
	} else if (*(unsigned char *)c3 == 'M') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nbmin = 2;
	    }
	}
    }
    ret_val = nbmin;
    return ret_val;

L300:

/*     ISPEC = 3:  crossover point */

    nx = 0;
    if (s_cmp(c2, "GE", 2L, 2L) == 0) {
	if (s_cmp(c3, "QRF", 3L, 3L) == 0 || s_cmp(c3, "RQF", 3L, 3L) == 0 || 
		s_cmp(c3, "LQF", 3L, 3L) == 0 || s_cmp(c3, "QLF", 3L, 3L) == 
		0) {
	    if (sname) {
		nx = 128;
	    } else {
		nx = 128;
	    }
	} else if (s_cmp(c3, "HRD", 3L, 3L) == 0) {
	    if (sname) {
		nx = 128;
	    } else {
		nx = 128;
	    }
	} else if (s_cmp(c3, "BRD", 3L, 3L) == 0) {
	    if (sname) {
		nx = 128;
	    } else {
		nx = 128;
	    }
	}
    } else if (s_cmp(c2, "SY", 2L, 2L) == 0) {
	if (sname && s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nx = 1;
	}
    } else if (cname && s_cmp(c2, "HE", 2L, 2L) == 0) {
	if (s_cmp(c3, "TRD", 3L, 3L) == 0) {
	    nx = 1;
	}
    } else if (sname && s_cmp(c2, "OR", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nx = 128;
	    }
	}
    } else if (cname && s_cmp(c2, "UN", 2L, 2L) == 0) {
	if (*(unsigned char *)c3 == 'G') {
	    if (s_cmp(c4, "QR", 2L, 2L) == 0 || s_cmp(c4, "RQ", 2L, 2L) == 0 
		    || s_cmp(c4, "LQ", 2L, 2L) == 0 || s_cmp(c4, "QL", 2L, 2L)
		     == 0 || s_cmp(c4, "HR", 2L, 2L) == 0 || s_cmp(c4, "TR", 
		    2L, 2L) == 0 || s_cmp(c4, "BR", 2L, 2L) == 0) {
		nx = 128;
	    }
	}
    }
    ret_val = nx;
    return ret_val;

L400:

/*     ISPEC = 4:  number of shifts (used by xHSEQR) */

    ret_val = 6;
    return ret_val;

L500:

/*     ISPEC = 5:  minimum column dimension (not used) */

    ret_val = 2;
    return ret_val;

L600:

/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */

    ret_val = (integer) ((sreal) min(*n1,*n2) * 1.6f);
    return ret_val;

L700:

/*     ISPEC = 7:  number of processors (not used) */

    ret_val = 1;
    return ret_val;

L800:

/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */

    ret_val = 50;
    return ret_val;

/*     End of ILAENV */

} /* ilaenv_ */


int slaev2_(sreal *a, sreal *b, sreal *c, sreal *rt1, sreal *rt2, 
	sreal *cs1, sreal *sn1)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix   
       [  A   B  ]   
       [  B   C  ].   
    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the 
  
    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right 
  
    eigenvector for RT1, giving the decomposition   

       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]   
       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].   

    Arguments   
    =========   

    A       (input) REAL   
            The (1,1) element of the 2-by-2 matrix.   

    B       (input) REAL   
            The (1,2) element and the conjugate of the (2,1) element of   
            the 2-by-2 matrix.   

    C       (input) REAL   
            The (2,2) element of the 2-by-2 matrix.   

    RT1     (output) REAL   
            The eigenvalue of larger absolute value.   

    RT2     (output) REAL   
            The eigenvalue of smaller absolute value.   

    CS1     (output) REAL   
    SN1     (output) REAL   
            The vector (CS1, SN1) is a unit right eigenvector for RT1.   

    Further Details   
    ===============   

    RT1 is accurate to a few ulps barring over/underflow.   

    RT2 may be inaccurate if there is massive cancellation in the   
    determinant A*C-B*B; higher precision or correctly rounded or   
    correctly truncated arithmetic would be needed to compute RT2   
    accurately in all cases.   

    CS1 and SN1 are accurate to a few ulps barring over/underflow.   

    Overflow is possible only if RT1 is within a factor of 5 of overflow. 
  
    Underflow is harmless if the input data is 0 or exceeds   
       underflow_threshold / macheps.   

   ===================================================================== 
  


       Compute the eigenvalues */
    /* System generated locals */
    sreal r__1;
    /* Local variables */
    static sreal acmn, acmx, ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
    static integer sgn1, sgn2;


    sm = *a + *c;
    df = *a - *c;
    adf = fabs(df);
    tb = *b + *b;
    ab = fabs(tb);
    if (fabs(*a) > fabs(*c)) {
	acmx = *a;
	acmn = *c;
    } else {
	acmx = *c;
	acmn = *a;
    }
    if (adf > ab) {
/* Computing 2nd power */
	r__1 = ab / adf;
	rt = adf * sqrt(r__1 * r__1 + 1.f);
    } else if (adf < ab) {
/* Computing 2nd power */
	r__1 = adf / ab;
	rt = ab * sqrt(r__1 * r__1 + 1.f);
    } else {

/*        Includes case AB=ADF=0 */

	rt = ab * sqrt(2.f);
    }
    if (sm < 0.f) {
	*rt1 = (sm - rt) * .5f;
	sgn1 = -1;

/*        Order of execution important.   
          To get fully accurate smaller eigenvalue,   
          next line needs to be executed in higher precision. */

	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else if (sm > 0.f) {
	*rt1 = (sm + rt) * .5f;
	sgn1 = 1;

/*        Order of execution important.   
          To get fully accurate smaller eigenvalue,   
          next line needs to be executed in higher precision. */

	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
    } else {

/*        Includes case RT1 = RT2 = 0 */

	*rt1 = rt * .5f;
	*rt2 = rt * -.5f;
	sgn1 = 1;
    }

/*     Compute the eigenvector */

    if (df >= 0.f) {
	cs = df + rt;
	sgn2 = 1;
    } else {
	cs = df - rt;
	sgn2 = -1;
    }
    acs = fabs(cs);
    if (acs > ab) {
	ct = -(doublereal)tb / cs;
	*sn1 = 1.f / sqrt(ct * ct + 1.f);
	*cs1 = ct * *sn1;
    } else {
	if (ab == 0.f) {
	    *cs1 = 1.f;
	    *sn1 = 0.f;
	} else {
	    tn = -(doublereal)cs / tb;
	    *cs1 = 1.f / sqrt(tn * tn + 1.f);
	    *sn1 = tn * *cs1;
	}
    }
    if (sgn1 == sgn2) {
	tn = *cs1;
	*cs1 = -(doublereal)(*sn1);
	*sn1 = tn;
    }
    return 0;

/*     End of SLAEV2 */

} /* slaev2_ */

void r_cnjg(complx *r, complx *z)
{
r->r = z->r;
r->i = - z->i;
}

double r_imag(complx *z)
{
return(z->i);
}


double c_abs(complx *z)
{
return( f__cabs( z->r, z->i ) );
}


double f__cabs(double real, double imag)
{
double temp;

if(real < 0)
	real = -real;
if(imag < 0)
	imag = -imag;
if(imag > real){
	temp = real;
	real = imag;
	imag = temp;
}
if((real+imag) == real)
	return(real);

temp = imag/real;
temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
return(temp);
}


void c_div(complx *c, complx *a, complx *b)
{
double ratio, den;
double abr, abi;

if( (abr = b->r) < 0.)
	abr = - abr;
if( (abi = b->i) < 0.)
	abi = - abi;
if( abr <= abi )
	{
	  if(abi == 0) {
	    cerr << "ACO: " << __FILE__ << " line " << __LINE__ << "complex division by zero" << endl;
	    abort();
	  }
	ratio = (double)b->r / b->i ;
	den = b->i * (1 + ratio*ratio);
	c->r = (a->r*ratio + a->i) / den;
	c->i = (a->i*ratio - a->r) / den;
	}

else
	{
	ratio = (double)b->i / b->r ;
	den = b->r * (1 + ratio*ratio);
	c->r = (a->r + a->i*ratio) / den;
	c->i = (a->i - a->r*ratio) / den;
	}
}


integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb)
{
register const unsigned char *a, *aend, *b, *bend;
a = (const unsigned char *)a0;
b = (const unsigned char *)b0;
aend = a + la;
bend = b + lb;

if(la <= lb)
	{
	while(a < aend)
		if(*a != *b)
			return( *a - *b );
		else
			{ ++a; ++b; }

	while(b < bend)
		if(*b != ' ')
			return( ' ' - *b );
		else	++b;
	}

else
	{
	while(b < bend)
		if(*a == *b)
			{ ++a; ++b; }
		else
			return( *a - *b );
	while(a < aend)
		if(*a != ' ')
			return(*a - ' ');
		else	++a;
	}
return(0);
}


//void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
void s_copy(char *a, const char *b, ftnlen la, ftnlen lb)
{
//register char *aend, *bend;
  char *aend;
  const char *bend;
aend = a + la;

if(la <= lb)
	while(a < aend)
		*a++ = *b++;

else
	{
	bend = b + lb;
	while(b < bend)
		*a++ = *b++;
	while(a < aend)
		*a++ = ' ';
	}
}


int cgemm_(const char *transa, const char *transb, integer *m, integer *
	n, integer *k, complx *alpha, complx *a, integer *lda, complx *b, 
	integer *ldb, complx *beta, complx *c, integer *ldc)
{


    /* System generated locals */
    //integer a_dim1=0, b_dim1=0, c_dim1=0;
    //integer i__1, i__2, i__3, i__4, i__5, i__6;
    /* integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, 
       i__1, i__2, i__3, i__4, i__5, i__6; */
    complx q__1, q__2, q__3, q__4;

    /* Local variables */
    static integer info;
    static logical nota, notb;
    static complx temp;
    static integer i, j, l;
    static logical conja, conjb;
    //static integer ncola;
    static integer nrowa, nrowb;


/*  Purpose   
    =======   

    CGEMM  performs one of the matrix-matrix operations   

       C := alpha*op( A )*op( B ) + beta*C,   

    where  op( X ) is one of   

       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),   

    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
  
    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
  

    Parameters   
    ==========   

    TRANSA - CHARACTER*1.   
             On entry, TRANSA specifies the form of op( A ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSA = 'N' or 'n',  op( A ) = A.   

                TRANSA = 'T' or 't',  op( A ) = A'.   

                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   

             Unchanged on exit.   

    TRANSB - CHARACTER*1.   
             On entry, TRANSB specifies the form of op( B ) to be used in 
  
             the matrix multiplication as follows:   

                TRANSB = 'N' or 'n',  op( B ) = B.   

                TRANSB = 'T' or 't',  op( B ) = B'.   

                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry,  M  specifies  the number  of rows  of the  matrix 
  
             op( A )  and of the  matrix  C.  M  must  be at least  zero. 
  
             Unchanged on exit.   

    N      - INTEGER.   
             On entry,  N  specifies the number  of columns of the matrix 
  
             op( B ) and the number of columns of the matrix C. N must be 
  
             at least zero.   
             Unchanged on exit.   

    K      - INTEGER.   
             On entry,  K  specifies  the number of columns of the matrix 
  
             op( A ) and the number of rows of the matrix op( B ). K must 
  
             be at least  zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX         .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is 
  
             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.   
             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k 
  
             part of the array  A  must contain the matrix  A,  otherwise 
  
             the leading  k by m  part of the array  A  must contain  the 
  
             matrix A.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. When  TRANSA = 'N' or 'n' then 
  
             LDA must be at least  max( 1, m ), otherwise  LDA must be at 
  
             least  max( 1, k ).   
             Unchanged on exit.   

    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is 
  
             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.   
             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n 
  
             part of the array  B  must contain the matrix  B,  otherwise 
  
             the leading  n by k  part of the array  B  must contain  the 
  
             matrix B.   
             Unchanged on exit.   

    LDB    - INTEGER.   
             On entry, LDB specifies the first dimension of B as declared 
  
             in the calling (sub) program. When  TRANSB = 'N' or 'n' then 
  
             LDB must be at least  max( 1, k ), otherwise  LDB must be at 
  
             least  max( 1, n ).   
             Unchanged on exit.   

    BETA   - COMPLEX         .   
             On entry,  BETA  specifies the scalar  beta.  When  BETA  is 
  
             supplied as zero then C need not be set on input.   
             Unchanged on exit.   

    C      - COMPLEX          array of DIMENSION ( LDC, n ).   
             Before entry, the leading  m by n  part of the array  C must 
  
             contain the matrix  C,  except when  beta  is zero, in which 
  
             case C need not be set on entry.   
             On exit, the array  C  is overwritten by the  m by n  matrix 
  
             ( alpha*op( A )*op( B ) + beta*C ).   

    LDC    - INTEGER.   
             On entry, LDC specifies the first dimension of C as declared 
  
             in  the  calling  (sub)  program.   LDC  must  be  at  least 
  
             max( 1, m ).   
             Unchanged on exit.   


    Level 3 Blas routine.   

    -- Written on 8-February-1989.   
       Jack Dongarra, Argonne National Laboratory.   
       Iain Duff, AERE Harwell.   
       Jeremy Du Croz, Numerical Algorithms Group Ltd.   
       Sven Hammarling, Numerical Algorithms Group Ltd.   



       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not 
  
       conjugated or transposed, set  CONJA and CONJB  as true if  A  and 
  
       B  respectively are to be  transposed but  not conjugated  and set 
  
       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A 
  
       and the number of rows of  B  respectively.   

    
   Parameter adjustments   
       Function Body */

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)]

    nota = lsame_(transa, "N");
    notb = lsame_(transb, "N");
    conja = lsame_(transa, "C");
    conjb = lsame_(transb, "C");
    if (nota) {
	nrowa = *m;
	//ncola = *k;
    } else {
	nrowa = *k;
	//ncola = *m;
    }
    if (notb) {
	nrowb = *k;
    } else {
	nrowb = *n;
    }

/*     Test the input parameters. */

    info = 0;
    if (! nota && ! conja && ! lsame_(transa, "T")) {
	info = 1;
    } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
	info = 2;
    } else if (*m < 0) {
	info = 3;
    } else if (*n < 0) {
	info = 4;
    } else if (*k < 0) {
	info = 5;
    } else if (*lda < max(1,nrowa)) {
	info = 8;
    } else if (*ldb < max(1,nrowb)) {
	info = 10;
    } else if (*ldc < max(1,*m)) {
	info = 13;
    }
    if (info != 0) {
	xerbla_("CGEMM ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) return 0;
    if (((alpha->r == 0.f && alpha->i == 0.f) || *k == 0) 
	&& beta->r == 1.f && beta->i == 0.f) {
	return 0;
    }

/*     And when  alpha.eq.zero. */

    if (alpha->r == 0.f && alpha->i == 0.f) {
	if (beta->r == 0.f && beta->i == 0.f) {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    //i__3 = i + j * c_dim1;
		    C(i,j).r = 0.f, C(i,j).i = 0.f;
/* L10: */
		}
/* L20: */
	    }
	} else {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    //i__3 = i + j * c_dim1;
		    //i__4 = i + j * c_dim1;
		    q__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
			    q__1.i = beta->r * C(i,j).i + beta->i * C(i,j)
			    .r;
		    C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L30: */
		}
/* L40: */
	    }
	}
	return 0;
    }

/*     Start the operations. */

    if (notb) {
	if (nota) {

/*           Form  C := alpha*A*B + beta*C. */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0.f && beta->i == 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			C(i,j).r = 0.f, C(i,j).i = 0.f;
/* L50: */
		    }
		} else if (beta->r != 1.f || beta->i != 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			//i__4 = i + j * c_dim1;
			q__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L60: */
		    }
		}
		//i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    //i__3 = l + j * b_dim1;
		    if (B(l,j).r != 0.f || B(l,j).i != 0.f) {
			//i__3 = l + j * b_dim1;
			q__1.r = alpha->r * B(l,j).r - alpha->i * B(l,j).i, 
				q__1.i = alpha->r * B(l,j).i + alpha->i * B(l,j).r;
			temp.r = q__1.r, temp.i = q__1.i;
			//i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    //i__4 = i + j * c_dim1;
			    //i__5 = i + j * c_dim1;
			    //i__6 = i + l * a_dim1;
			    q__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    q__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    q__1.r = C(i,j).r + q__2.r, q__1.i = C(i,j).i + 
				    q__2.i;
			    C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L70: */
			}
		    }
/* L80: */
		}
/* L90: */
	    }
	} else if (conja) {

/*           Form  C := alpha*conjg( A' )*B + beta*C. */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			r_cnjg(&q__3, &A(l,i));
			//i__4 = l + j * b_dim1;
			q__2.r = q__3.r * B(l,j).r - q__3.i * B(l,j).i, 
				q__2.i = q__3.r * B(l,j).i + q__3.i * B(l,j)
				.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L100: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L110: */
		}
/* L120: */
	    }
	} else {

/*           Form  C := alpha*A'*B + beta*C */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			//i__4 = l + i * a_dim1;
			//i__5 = l + j * b_dim1;
			q__2.r = A(l,i).r * B(l,j).r - A(l,i).i * B(l,j)
				.i, q__2.i = A(l,i).r * B(l,j).i + A(l,i)
				.i * B(l,j).r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L130: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L140: */
		}
/* L150: */
	    }
	}
    } else if (nota) {
	if (conjb) {

/*           Form  C := alpha*A*conjg( B' ) + beta*C. */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0.f && beta->i == 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			C(i,j).r = 0.f, C(i,j).i = 0.f;
/* L160: */
		    }
		} else if (beta->r != 1.f || beta->i != 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			//i__4 = i + j * c_dim1;
			q__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L170: */
		    }
		}
		//i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    //i__3 = j + l * b_dim1;
		    if (B(j,l).r != 0.f || B(j,l).i != 0.f) {
			r_cnjg(&q__2, &B(j,l));
			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
				q__1.i = alpha->r * q__2.i + alpha->i * 
				q__2.r;
			temp.r = q__1.r, temp.i = q__1.i;
			//i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    //i__4 = i + j * c_dim1;
			    //i__5 = i + j * c_dim1;
			    //i__6 = i + l * a_dim1;
			    q__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    q__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    q__1.r = C(i,j).r + q__2.r, q__1.i = C(i,j).i + 
				    q__2.i;
			    C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L180: */
			}
		    }
/* L190: */
		}
/* L200: */
	    }
	} else {

/*           Form  C := alpha*A*B'          + beta*C */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		if (beta->r == 0.f && beta->i == 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			C(i,j).r = 0.f, C(i,j).i = 0.f;
/* L210: */
		    }
		} else if (beta->r != 1.f || beta->i != 0.f) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * c_dim1;
			//i__4 = i + j * c_dim1;
			q__1.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__1.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L220: */
		    }
		}
		//i__2 = *k;
		for (l = 1; l <= *k; ++l) {
		    //i__3 = j + l * b_dim1;
		    if (B(j,l).r != 0.f || B(j,l).i != 0.f) {
			//i__3 = j + l * b_dim1;
			q__1.r = alpha->r * B(j,l).r - alpha->i * B(j,l).i, 
				q__1.i = alpha->r * B(j,l).i + alpha->i * B(j,l).r;
			temp.r = q__1.r, temp.i = q__1.i;
			//i__3 = *m;
			for (i = 1; i <= *m; ++i) {
			    //i__4 = i + j * c_dim1;
			    //i__5 = i + j * c_dim1;
			    //i__6 = i + l * a_dim1;
			    q__2.r = temp.r * A(i,l).r - temp.i * A(i,l).i, 
				    q__2.i = temp.r * A(i,l).i + temp.i * A(i,l).r;
			    q__1.r = C(i,j).r + q__2.r, q__1.i = C(i,j).i + 
				    q__2.i;
			    C(i,j).r = q__1.r, C(i,j).i = q__1.i;
/* L230: */
			}
		    }
/* L240: */
		}
/* L250: */
	    }
	}
    } else if (conja) {
	if (conjb) {

/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			r_cnjg(&q__3, &A(l,i));
			r_cnjg(&q__4, &B(j,l));
			q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = 
				q__3.r * q__4.i + q__3.i * q__4.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L260: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L270: */
		}
/* L280: */
	    }
	} else {

/*           Form  C := alpha*conjg( A' )*B' + beta*C */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			r_cnjg(&q__3, &A(l,i));
			//i__4 = j + l * b_dim1;
			q__2.r = q__3.r * B(j,l).r - q__3.i * B(j,l).i, 
				q__2.i = q__3.r * B(j,l).i + q__3.i * B(j,l)
				.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L290: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L300: */
		}
/* L310: */
	    }
	}
    } else {
	if (conjb) {

/*           Form  C := alpha*A'*conjg( B' ) + beta*C */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			//i__4 = l + i * a_dim1;
			r_cnjg(&q__3, &B(j,l));
			q__2.r = A(l,i).r * q__3.r - A(l,i).i * q__3.i, 
				q__2.i = A(l,i).r * q__3.i + A(l,i).i * 
				q__3.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L320: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L330: */
		}
/* L340: */
	    }
	} else {

/*           Form  C := alpha*A'*B' + beta*C */

	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    temp.r = 0.f, temp.i = 0.f;
		    //i__3 = *k;
		    for (l = 1; l <= *k; ++l) {
			//i__4 = l + i * a_dim1;
			//i__5 = j + l * b_dim1;
			q__2.r = A(l,i).r * B(j,l).r - A(l,i).i * B(j,l)
				.i, q__2.i = A(l,i).r * B(j,l).i + A(l,i)
				.i * B(j,l).r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L350: */
		    }
		    if (beta->r == 0.f && beta->i == 0.f) {
			//i__3 = i + j * c_dim1;
			q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__1.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    } else {
			//i__3 = i + j * c_dim1;
			q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
				q__2.i = alpha->r * temp.i + alpha->i * 
				temp.r;
			//i__4 = i + j * c_dim1;
			q__3.r = beta->r * C(i,j).r - beta->i * C(i,j).i, 
				q__3.i = beta->r * C(i,j).i + beta->i * C(i,j).r;
			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
			C(i,j).r = q__1.r, C(i,j).i = q__1.i;
		    }
/* L360: */
		}
/* L370: */
	    }
	}
    }

    return 0;

/*     End of CGEMM . */

} /* cgemm_ */

logical lsame_(const char *ca, const char *cb)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    LSAME returns .TRUE. if CA is the same letter as CB regardless of   
    case.   

    Arguments   
    =========   

    CA      (input) CHARACTER*1   
    CB      (input) CHARACTER*1   
            CA and CB specify the single characters to be compared.   

   ===================================================================== 
  


       Test if the characters are equal */
    /* System generated locals */
    logical ret_val;
    /* Local variables */
    static integer inta, intb, zcode;


    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
    if (ret_val) {
	return ret_val;
    }

/*     Now test for equivalence if both characters are alphabetic. */

    zcode = 'Z';

/*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
       machines, on which ICHAR returns a value with bit 8 set.   
       ICHAR('A') on Prime machines returns 193 which is the same as   
       ICHAR('A') on an EBCDIC machine. */

    inta = *(unsigned char *)ca;
    intb = *(unsigned char *)cb;

    if (zcode == 90 || zcode == 122) {

/*        ASCII is assumed - ZCODE is the ASCII code of either lower o
r   
          upper case 'Z'. */

	if (inta >= 97 && inta <= 122) {
	    inta += -32;
	}
	if (intb >= 97 && intb <= 122) {
	    intb += -32;
	}

    } else if (zcode == 233 || zcode == 169) {

/*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower
 or   
          upper case 'Z'. */

        if ((inta >= 129 && inta <= 137) || 
	    (inta >= 145 && inta <= 153) || 
	    (inta >= 162 && inta <= 169)) {
	    inta += 64;
	}
	if ((intb >= 129 && intb <= 137) || 
	    (intb >= 145 && intb <= 153) || 
	    (intb >= 162 && intb <= 169)) {
	    intb += 64;
	}

    } else if (zcode == 218 || zcode == 250) {

/*        ASCII is assumed, on Prime machines - ZCODE is the ASCII cod
e   
          plus 128 of either lower or upper case 'Z'. */

	if (inta >= 225 && inta <= 250) {
	    inta += -32;
	}
	if (intb >= 225 && intb <= 250) {
	    intb += -32;
	}
    }
    ret_val = inta == intb;

/*     RETURN   

       End of LSAME */

    return ret_val;
} /* lsame_ */




int cgemv_(const char *trans, integer *m, integer *n, complx *alpha, 
	   complx *a, integer *lda, complx *x, integer *incx, 
	   complx *beta, complx *y, integer *incy)
{


    /* System generated locals */
    //integer a_dim1=0;
    //integer i__1, i__2, i__3, i__4, i__5;
    /* integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; */
    complx q__1, q__2, q__3;

    /* Local variables */
    static integer info;
    static complx temp;
    static integer lenx, leny, i, j;
    static integer ix, iy, jx, jy, kx, ky;
    static logical noconj;


/*  Purpose   
    =======   

    CGEMV  performs one of the matrix-vector operations   

       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or   

       y := alpha*conjg( A' )*x + beta*y,   

    where alpha and beta are scalars, x and y are vectors and A is an   
    m by n matrix.   

    Parameters   
    ==========   

    TRANS  - CHARACTER*1.   
             On entry, TRANS specifies the operation to be performed as   
             follows:   

                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   

                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   

                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.   

             Unchanged on exit.   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX         .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    A      - COMPLEX          array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   
             Unchanged on exit.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   

    X      - COMPLEX          array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    BETA   - COMPLEX         .   
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   
             Unchanged on exit.   

    Y      - COMPLEX          array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
  
             updated vector y.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! 
	    lsame_(trans, "C")) {
	info = 1;
    } else if (*m < 0) {
	info = 2;
    } else if (*n < 0) {
	info = 3;
    } else if (*lda < max(1,*m)) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    } else if (*incy == 0) {
	info = 11;
    }
    if (info != 0) {
	xerbla_("CGEMV ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) return 0;
    if (alpha->r == 0.f && alpha->i == 0.f && beta->r == 1.f && beta->i == 0.f){
	return 0;
    }

    noconj = lsame_(trans, "T");

/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
  
       up the start points in  X  and  Y. */

    if (lsame_(trans, "N")) {
	lenx = *n;
	leny = *m;
    } else {
	lenx = *m;
	leny = *n;
    }
    if (*incx > 0) {
	kx = 1;
    } else {
	kx = 1 - (lenx - 1) * *incx;
    }
    if (*incy > 0) {
	ky = 1;
    } else {
	ky = 1 - (leny - 1) * *incy;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A.   

       First form  y := beta*y. */

    if (beta->r != 1.f || beta->i != 0.f) {
	if (*incy == 1) {
	    if (beta->r == 0.f && beta->i == 0.f) {
		//i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    //i__2 = i;
		    Y(i).r = 0.f, Y(i).i = 0.f;
/* L10: */
		}
	    } else {
		//i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    //i__2 = i;
		    //i__3 = i;
		    q__1.r = beta->r * Y(i).r - beta->i * Y(i).i, 
			    q__1.i = beta->r * Y(i).i + beta->i * Y(i)
			    .r;
		    Y(i).r = q__1.r, Y(i).i = q__1.i;
/* L20: */
		}
	    }
	} else {
	    iy = ky;
	    if (beta->r == 0.f && beta->i == 0.f) {
		//i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    //i__2 = iy;
		    Y(iy).r = 0.f, Y(iy).i = 0.f;
		    iy += *incy;
/* L30: */
		}
	    } else {
		//i__1 = leny;
		for (i = 1; i <= leny; ++i) {
		    //i__2 = iy;
		    //i__3 = iy;
		    q__1.r = beta->r * Y(iy).r - beta->i * Y(iy).i, 
			    q__1.i = beta->r * Y(iy).i + beta->i * Y(iy)
			    .r;
		    Y(iy).r = q__1.r, Y(iy).i = q__1.i;
		    iy += *incy;
/* L40: */
		}
	    }
	}
    }
    if (alpha->r == 0.f && alpha->i == 0.f) {
	return 0;
    }
    if (lsame_(trans, "N")) {

/*        Form  y := alpha*A*x + y. */

	jx = kx;
	if (*incy == 1) {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = jx;
		if (X(jx).r != 0.f || X(jx).i != 0.f) {
		    //i__2 = jx;
		    q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    q__1.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    temp.r = q__1.r, temp.i = q__1.i;
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i;
			//i__4 = i;
			//i__5 = i + j * a_dim1;
			q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				q__2.i = temp.r * A(i,j).i + temp.i * A(i,j)
				.r;
			q__1.r = Y(i).r + q__2.r, q__1.i = Y(i).i + 
				q__2.i;
			Y(i).r = q__1.r, Y(i).i = q__1.i;
/* L50: */
		    }
		}
		jx += *incx;
/* L60: */
	    }
	} else {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = jx;
		if (X(jx).r != 0.f || X(jx).i != 0.f) {
		    //i__2 = jx;
		    q__1.r = alpha->r * X(jx).r - alpha->i * X(jx).i, 
			    q__1.i = alpha->r * X(jx).i + alpha->i * X(jx)
			    .r;
		    temp.r = q__1.r, temp.i = q__1.i;
		    iy = ky;
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = iy;
			//i__4 = iy;
			//i__5 = i + j * a_dim1;
			q__2.r = temp.r * A(i,j).r - temp.i * A(i,j).i, 
				q__2.i = temp.r * A(i,j).i + temp.i * A(i,j)
				.r;
			q__1.r = Y(iy).r + q__2.r, q__1.i = Y(iy).i + 
				q__2.i;
			Y(iy).r = q__1.r, Y(iy).i = q__1.i;
			iy += *incy;
/* L70: */
		    }
		}
		jx += *incx;
/* L80: */
	    }
	}
    } else {

/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
 */

	jy = ky;
	if (*incx == 1) {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp.r = 0.f, temp.i = 0.f;
		if (noconj) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * a_dim1;
			//i__4 = i;
			q__2.r = A(i,j).r * X(i).r - A(i,j).i * X(i)
				.i, q__2.i = A(i,j).r * X(i).i + A(i,j)
				.i * X(i).r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L90: */
		    }
		} else {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			r_cnjg(&q__3, &A(i,j));
			//i__3 = i;
			q__2.r = q__3.r * X(i).r - q__3.i * X(i).i, 
				q__2.i = q__3.r * X(i).i + q__3.i * X(i)
				.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
/* L100: */
		    }
		}
		//i__2 = jy;
		//i__3 = jy;
		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
			alpha->r * temp.i + alpha->i * temp.r;
		q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i;
		Y(jy).r = q__1.r, Y(jy).i = q__1.i;
		jy += *incy;
/* L110: */
	    }
	} else {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		temp.r = 0.f, temp.i = 0.f;
		ix = kx;
		if (noconj) {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			//i__3 = i + j * a_dim1;
			//i__4 = ix;
			q__2.r = A(i,j).r * X(ix).r - A(i,j).i * X(ix)
				.i, q__2.i = A(i,j).r * X(ix).i + A(i,j)
				.i * X(ix).r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
			ix += *incx;
/* L120: */
		    }
		} else {
		    //i__2 = *m;
		    for (i = 1; i <= *m; ++i) {
			r_cnjg(&q__3, &A(i,j));
			//i__3 = ix;
			q__2.r = q__3.r * X(ix).r - q__3.i * X(ix).i, 
				q__2.i = q__3.r * X(ix).i + q__3.i * X(ix)
				.r;
			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
			temp.r = q__1.r, temp.i = q__1.i;
			ix += *incx;
/* L130: */
		    }
		}
		//i__2 = jy;
		//i__3 = jy;
		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
			alpha->r * temp.i + alpha->i * temp.r;
		q__1.r = Y(jy).r + q__2.r, q__1.i = Y(jy).i + q__2.i;
		Y(jy).r = q__1.r, Y(jy).i = q__1.i;
		jy += *incy;
/* L140: */
	    }
	}
    }

    return 0;

/*     End of CGEMV . */

} /* cgemv_ */


int ccopy_(integer *n, complx *cx, integer *incx, complx *
	cy, integer *incy)
{


    /* System generated locals */
    //integer i__1, i__2, i__3;

    /* Local variables */
    static integer i, ix, iy;


/*     copies a vector, x, to a vector, y.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define CY(I) cy[(I)-1]
#define CX(I) cx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*        code for unequal increments or equal increments   
            not equal to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	//i__2 = iy;
	//i__3 = ix;
	CY(iy).r = CX(ix).r, CY(iy).i = CX(ix).i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*        code for both increments equal to 1 */

L20:
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	//i__2 = i;
	//i__3 = i;
	CY(i).r = CX(i).r, CY(i).i = CX(i).i;
/* L30: */
    }
    return 0;
} /* ccopy_ */


integer icamax_(integer *n, complx *cx, integer *incx)
{
    /* System generated locals */
   integer ret_val;
   //integer i__1, i__2;
    sreal r__1, r__2;

    /* Local variables */
    static sreal smax;
    static integer i, ix;
/*     finds the index of element having max. absolute value.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   
    
   Parameter adjustments   
       Function Body */
#define CX(I) cx[(I)-1]
    ret_val = 0;
    if (*n < 1 || *incx <= 0) {
	return ret_val;
    }
    ret_val = 1;
    if (*n == 1) {
	return ret_val;
    }
    if (*incx == 1) {
	goto L20;
    }
/*        code for increment not equal to 1 */
    ix = 1;
    smax = (r__1 = CX(1).r, fabs(r__1)) + (r__2 = r_imag(&CX(1)), fabs(r__2));
    ix += *incx;
    //i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	//i__2 = ix;
	if ((r__1 = CX(ix).r, fabs(r__1)) + (r__2 = r_imag(&CX(ix)), fabs(
		r__2)) <= smax) {
	    goto L5;
	}
	ret_val = i;
	//i__2 = ix;
	smax = (r__1 = CX(ix).r, fabs(r__1)) + (r__2 = r_imag(&CX(ix)), 
		fabs(r__2));
L5:
	ix += *incx;
/* L10: */
    }
    return ret_val;
/*        code for increment equal to 1 */
L20:
    smax = (r__1 = CX(1).r, fabs(r__1)) + (r__2 = r_imag(&CX(1)), fabs(r__2));
    //i__1 = *n;
    for (i = 2; i <= *n; ++i) {
	//i__2 = i;
	if ((r__1 = CX(i).r, fabs(r__1)) + (r__2 = r_imag(&CX(i)), fabs(
		r__2)) <= smax) {
	    goto L30;
	}
	ret_val = i;
	//i__2 = i;
	smax = (r__1 = CX(i).r, fabs(r__1)) + (r__2 = r_imag(&CX(i)), fabs(
		r__2));
L30:
	;
    }
    return ret_val;
} /* icamax_ */

int cgeru_(integer *m, integer *n, complx *alpha, complx *
	x, integer *incx, complx *y, integer *incy, complx *a, integer *lda)
{


    /* System generated locals */
    //integer a_dim1=0;
    //integer i__1, i__2, i__3, i__4, i__5;
    /* integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; */
    complx q__1, q__2;

    /* Local variables */
    static integer info;
    static complx temp;
    static integer i, j, ix, jy, kx;


/*  Purpose   
    =======   

    CGERU  performs the rank 1 operation   

       A := alpha*x*y' + A,   

    where alpha is a scalar, x is an m element vector, y is an n element 
  
    vector and A is an m by n matrix.   

    Parameters   
    ==========   

    M      - INTEGER.   
             On entry, M specifies the number of rows of the matrix A.   
             M must be at least zero.   
             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the number of columns of the matrix A. 
  
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - COMPLEX         .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - COMPLEX          array of dimension at least   
             ( 1 + ( m - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the m   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    Y      - COMPLEX          array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCY ) ).   
             Before entry, the incremented array Y must contain the n   
             element vector y.   
             Unchanged on exit.   

    INCY   - INTEGER.   
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   
             Unchanged on exit.   

    A      - COMPLEX          array of DIMENSION ( LDA, n ).   
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients. On exit, A is   
             overwritten by the updated matrix.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, m ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]
#define Y(I) y[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (*m < 0) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*incy == 0) {
	info = 7;
    } else if (*lda < max(1,*m)) {
	info = 9;
    }
    if (info != 0) {
	xerbla_("CGERU ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f)) {
	return 0;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */

    if (*incy > 0) {
	jy = 1;
    } else {
	jy = 1 - (*n - 1) * *incy;
    }
    if (*incx == 1) {
	//i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    //i__2 = jy;
	    if (Y(jy).r != 0.f || Y(jy).i != 0.f) {
		//i__2 = jy;
		q__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, q__1.i =
			 alpha->r * Y(jy).i + alpha->i * Y(jy).r;
		temp.r = q__1.r, temp.i = q__1.i;
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    //i__3 = i + j * a_dim1;
		    //i__4 = i + j * a_dim1;
		    //i__5 = i;
		    q__2.r = X(i).r * temp.r - X(i).i * temp.i, q__2.i =
			     X(i).r * temp.i + X(i).i * temp.r;
		    q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i;
		    A(i,j).r = q__1.r, A(i,j).i = q__1.i;
/* L10: */
		}
	    }
	    jy += *incy;
/* L20: */
	}
    } else {
	if (*incx > 0) {
	    kx = 1;
	} else {
	    kx = 1 - (*m - 1) * *incx;
	}
	//i__1 = *n;
	for (j = 1; j <= *n; ++j) {
	    //i__2 = jy;
	    if (Y(jy).r != 0.f || Y(jy).i != 0.f) {
		//i__2 = jy;
		q__1.r = alpha->r * Y(jy).r - alpha->i * Y(jy).i, q__1.i =
			 alpha->r * Y(jy).i + alpha->i * Y(jy).r;
		temp.r = q__1.r, temp.i = q__1.i;
		ix = kx;
		//i__2 = *m;
		for (i = 1; i <= *m; ++i) {
		    //i__3 = i + j * a_dim1;
		    //i__4 = i + j * a_dim1;
		    //i__5 = ix;
		    q__2.r = X(ix).r * temp.r - X(ix).i * temp.i, q__2.i =
			     X(ix).r * temp.i + X(ix).i * temp.r;
		    q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + q__2.i;
		    A(i,j).r = q__1.r, A(i,j).i = q__1.i;
		    ix += *incx;
/* L30: */
		}
	    }
	    jy += *incy;
/* L40: */
	}
    }

    return 0;

/*     End of CGERU . */

} /* cgeru_ */


int cswap_(integer *n, complx *cx, integer *incx, complx *
	cy, integer *incy)
{


    /* System generated locals */
    //integer i__1, i__2, i__3;

    /* Local variables */
    static integer i;
    static complx ctemp;
    static integer ix, iy;


/*     interchanges two vectors.   
       jack dongarra, linpack, 3/11/78.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define CY(I) cy[(I)-1]
#define CX(I) cx[(I)-1]


    if (*n <= 0) {
	return 0;
    }
    if (*incx == 1 && *incy == 1) {
	goto L20;
    }

/*       code for unequal increments or equal increments not equal   
           to 1 */

    ix = 1;
    iy = 1;
    if (*incx < 0) {
	ix = (-(*n) + 1) * *incx + 1;
    }
    if (*incy < 0) {
	iy = (-(*n) + 1) * *incy + 1;
    }
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	//i__2 = ix;
	ctemp.r = CX(ix).r, ctemp.i = CX(ix).i;
	//i__2 = ix;
	//i__3 = iy;
	CX(ix).r = CY(iy).r, CX(ix).i = CY(iy).i;
	//i__2 = iy;
	CY(iy).r = ctemp.r, CY(iy).i = ctemp.i;
	ix += *incx;
	iy += *incy;
/* L10: */
    }
    return 0;

/*       code for both increments equal to 1 */
L20:
    //i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	//i__2 = i;
	ctemp.r = CX(i).r, ctemp.i = CX(i).i;
	//i__2 = i;
	//i__3 = i;
	CX(i).r = CY(i).r, CX(i).i = CY(i).i;
	//i__2 = i;
	CY(i).r = ctemp.r, CY(i).i = ctemp.i;
/* L30: */
    }
    return 0;
} /* cswap_ */

int xerbla_(const char *srname, integer *info)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    XERBLA  is an error handler for the LAPACK routines.   
    It is called by an LAPACK routine if an input parameter has an   
    invalid value.  A message is printed and execution stops.   

    Installers may consider modifying the STOP statement in order to   
    call system-specific exception-handling facilities.   

    Arguments   
    =========   

    SRNAME  (input) CHARACTER*6   
            The name of the routine which called XERBLA.   

    INFO    (input) INTEGER   
            The position of the invalid parameter in the parameter list   

            of the calling routine.   

   ===================================================================== 
*/

    printf("** On entry to %6s, parameter number %2i had an illegal value\n",
		srname, int(*info));

/*     End of XERBLA */

    return 0;
} /* xerbla_ */


int cher_(char *uplo, integer *n, sreal *alpha, complx *x, 
	integer *incx, complx *a, integer *lda)
{


    /* System generated locals */
    //integer a_dim1=0;
    //integer i__1, i__2, i__3, i__4, i__5;
    /* integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; */
    doublereal d__1;
    complx q__1, q__2;

    /* Local variables */
    static integer info;
    static complx temp;
    static integer i, j;
    static integer ix, jx, kx;

/*  Purpose   
    =======   

    CHER   performs the hermitian rank 1 operation   

       A := alpha*x*conjg( x' ) + A,   

    where alpha is a real scalar, x is an n element vector and A is an   
    n by n hermitian matrix.   

    Parameters   
    ==========   

    UPLO   - CHARACTER*1.   
             On entry, UPLO specifies whether the upper or lower   
             triangular part of the array A is to be referenced as   
             follows:   

                UPLO = 'U' or 'u'   Only the upper triangular part of A   
                                    is to be referenced.   

                UPLO = 'L' or 'l'   Only the lower triangular part of A   
                                    is to be referenced.   

             Unchanged on exit.   

    N      - INTEGER.   
             On entry, N specifies the order of the matrix A.   
             N must be at least zero.   
             Unchanged on exit.   

    ALPHA  - REAL            .   
             On entry, ALPHA specifies the scalar alpha.   
             Unchanged on exit.   

    X      - COMPLEX          array of dimension at least   
             ( 1 + ( n - 1 )*abs( INCX ) ).   
             Before entry, the incremented array X must contain the n   
             element vector x.   
             Unchanged on exit.   

    INCX   - INTEGER.   
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   
             Unchanged on exit.   

    A      - COMPLEX          array of DIMENSION ( LDA, n ).   
             Before entry with  UPLO = 'U' or 'u', the leading n by n   
             upper triangular part of the array A must contain the upper 
  
             triangular part of the hermitian matrix and the strictly   
             lower triangular part of A is not referenced. On exit, the   
             upper triangular part of the array A is overwritten by the   
             upper triangular part of the updated matrix.   
             Before entry with UPLO = 'L' or 'l', the leading n by n   
             lower triangular part of the array A must contain the lower 
  
             triangular part of the hermitian matrix and the strictly   
             upper triangular part of A is not referenced. On exit, the   
             lower triangular part of the array A is overwritten by the   
             lower triangular part of the updated matrix.   
             Note that the imaginary parts of the diagonal elements need 
  
             not be set, they are assumed to be zero, and on exit they   
             are set to zero.   

    LDA    - INTEGER.   
             On entry, LDA specifies the first dimension of A as declared 
  
             in the calling (sub) program. LDA must be at least   
             max( 1, n ).   
             Unchanged on exit.   


    Level 2 Blas routine.   

    -- Written on 22-October-1986.   
       Jack Dongarra, Argonne National Lab.   
       Jeremy Du Croz, Nag Central Office.   
       Sven Hammarling, Nag Central Office.   
       Richard Hanson, Sandia National Labs.   



       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
#define X(I) x[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	info = 1;
    } else if (*n < 0) {
	info = 2;
    } else if (*incx == 0) {
	info = 5;
    } else if (*lda < max(1,*n)) {
	info = 7;
    }
    if (info != 0) {
	xerbla_("CHER  ", &info);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || *alpha == 0.f) {
	return 0;
    }

/*     Set the start point in X if the increment is not unity. */

    if (*incx <= 0) {
	kx = 1 - (*n - 1) * *incx;
    } else if (*incx != 1) {
	kx = 1;
    }

/*     Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through the triangular part   
       of A. */

    if (lsame_(uplo, "U")) {

/*        Form  A  when A is stored in upper triangle. */

	if (*incx == 1) {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = j;
		if (X(j).r != 0.f || X(j).i != 0.f) {
		    r_cnjg(&q__2, &X(j));
		    q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
		    temp.r = q__1.r, temp.i = q__1.i;
		    //i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			//i__3 = i + j * a_dim1;
			//i__4 = i + j * a_dim1;
			//i__5 = i;
			q__2.r = X(i).r * temp.r - X(i).i * temp.i, 
				q__2.i = X(i).r * temp.i + X(i).i * 
				temp.r;
			q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + 
				q__2.i;
			A(i,j).r = q__1.r, A(i,j).i = q__1.i;
/* L10: */
		    }
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    //i__4 = j;
		    q__1.r = X(j).r * temp.r - X(j).i * temp.i, q__1.i =
			     X(j).r * temp.i + X(j).i * temp.r;
		    d__1 = A(j,j).r + q__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		} else {
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		}
/* L20: */
	    }
	} else {
	    jx = kx;
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = jx;
		if (X(jx).r != 0.f || X(jx).i != 0.f) {
		    r_cnjg(&q__2, &X(jx));
		    q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
		    temp.r = q__1.r, temp.i = q__1.i;
		    ix = kx;
		    //i__2 = j - 1;
		    for (i = 1; i <= j-1; ++i) {
			//i__3 = i + j * a_dim1;
			//i__4 = i + j * a_dim1;
			//i__5 = ix;
			q__2.r = X(ix).r * temp.r - X(ix).i * temp.i, 
				q__2.i = X(ix).r * temp.i + X(ix).i * 
				temp.r;
			q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + 
				q__2.i;
			A(i,j).r = q__1.r, A(i,j).i = q__1.i;
			ix += *incx;
/* L30: */
		    }
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    //i__4 = jx;
		    q__1.r = X(jx).r * temp.r - X(jx).i * temp.i, q__1.i =
			     X(jx).r * temp.i + X(jx).i * temp.r;
		    d__1 = A(j,j).r + q__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		} else {
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		}
		jx += *incx;
/* L40: */
	    }
	}
    } else {

/*        Form  A  when A is stored in lower triangle. */

	if (*incx == 1) {
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = j;
		if (X(j).r != 0.f || X(j).i != 0.f) {
		    r_cnjg(&q__2, &X(j));
		    q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
		    temp.r = q__1.r, temp.i = q__1.i;
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    //i__4 = j;
		    q__1.r = temp.r * X(j).r - temp.i * X(j).i, q__1.i =
			     temp.r * X(j).i + temp.i * X(j).r;
		    d__1 = A(j,j).r + q__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		    //i__2 = *n;
		    for (i = j + 1; i <= *n; ++i) {
			//i__3 = i + j * a_dim1;
			//i__4 = i + j * a_dim1;
			//i__5 = i;
			q__2.r = X(i).r * temp.r - X(i).i * temp.i, 
				q__2.i = X(i).r * temp.i + X(i).i * 
				temp.r;
			q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + 
				q__2.i;
			A(i,j).r = q__1.r, A(i,j).i = q__1.i;
/* L50: */
		    }
		} else {
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		}
/* L60: */
	    }
	} else {
	    jx = kx;
	    //i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		//i__2 = jx;
		if (X(jx).r != 0.f || X(jx).i != 0.f) {
		    r_cnjg(&q__2, &X(jx));
		    q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
		    temp.r = q__1.r, temp.i = q__1.i;
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    //i__4 = jx;
		    q__1.r = temp.r * X(jx).r - temp.i * X(jx).i, q__1.i =
			     temp.r * X(jx).i + temp.i * X(jx).r;
		    d__1 = A(j,j).r + q__1.r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		    ix = jx;
		    //i__2 = *n;
		    for (i = j + 1; i <= *n; ++i) {
			ix += *incx;
			//i__3 = i + j * a_dim1;
			//i__4 = i + j * a_dim1;
			//i__5 = ix;
			q__2.r = X(ix).r * temp.r - X(ix).i * temp.i, 
				q__2.i = X(ix).r * temp.i + X(ix).i * 
				temp.r;
			q__1.r = A(i,j).r + q__2.r, q__1.i = A(i,j).i + 
				q__2.i;
			A(i,j).r = q__1.r, A(i,j).i = q__1.i;
/* L70: */
		    }
		} else {
		    //i__2 = j + j * a_dim1;
		    //i__3 = j + j * a_dim1;
		    d__1 = A(j,j).r;
		    A(j,j).r = d__1, A(j,j).i = 0.f;
		}
		jx += *incx;
/* L80: */
	    }
	}
    }

    return 0;

/*     End of CHER  . */

} /* cher_ */


int csscal_(integer *n, sreal *sa, complx *cx, integer *incx)
{


    /* System generated locals */
    //integer i__1, i__2, i__3, i__4;
    doublereal d__1, d__2;
    complx q__1;

    /* Local variables */
    static integer i, nincx;


/*     scales a complex vector by a real constant.   
       jack dongarra, linpack, 3/11/78.   
       modified 3/93 to return if incx .le. 0.   
       modified 12/3/93, array(1) declarations changed to array(*)   


    
   Parameter adjustments   
       Function Body */
#define CX(I) cx[(I)-1]


    if (*n <= 0 || *incx <= 0) {
	return 0;
    }
    if (*incx == 1) {
	goto L20;
    }

/*        code for increment not equal to 1 */

    nincx = (*n) * (*incx);
    //i__1 = nincx;
    //i__2 = *incx;
    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
	//i__3 = i;
	//i__4 = i;
	d__1 = *sa * CX(i).r;
	d__2 = *sa * r_imag(&CX(i));
	q__1.r = d__1, q__1.i = d__2;
	CX(i).r = q__1.r, CX(i).i = q__1.i;
/* L10: */
    }
    return 0;

/*        code for increment equal to 1 */

L20:
    //i__2 = *n;
    for (i = 1; i <= *n; ++i) {
	//i__1 = i;
	//i__3 = i;
	d__1 = *sa * CX(i).r;
	d__2 = *sa * r_imag(&CX(i));
	q__1.r = d__1, q__1.i = d__2;
	CX(i).r = q__1.r, CX(i).i = q__1.i;
/* L30: */
    }
    return 0;
} /* csscal_ */
