// This file implements numerical linear algebra */

/*
Copyright (C) 1996 Free Software Foundation
    written by R.D. Pierce (pierce@math.psu.edu)

This file is part of the GNUSSL software package.  This package is free
software; you can redistribute it and/or modify it under the terms of
the GNU Library General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your
option) any later version.  This software is distributed in the hope
that it will be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE.  See the GNU Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this distribution; if not, write to the Free Software
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef _lin_alg_h_
#define _lin_alg_h_

#include<utilities.h>

template<class T,class S,class Real> 
void 
gauss_jordan(mMatrix<T,S> &opm,Real tol,bool full = 1,size_t r = 1,size_t c = 1)
{
  mMatrix<T,allocator<T> > null;
  gauss_jordan(opm,null,tol,full,r,c);
}

template<class T,class S,class T2,class S2,class Real> 
void 
gauss_jordan(mMatrix<T,S> &opm,mMatrix<T2,S2>& a,Real tol,bool full = 1,size_t r = 1,size_t c = 1)
{
//  Performs Gauss-Jordan elimination with partial pivoting (searches only
// the rows below the diagonal for pivots). If full=1 (default), then the
// operator matrix is fully reduced to the identity matrix.  If full!=1, then
// only the elements on the triangle below element(r,c) (default (1,1)) are
// set to zero. This algorithm accepts both a partitioned operator matrix
// (default) and a separate matrix of vector (column) inhomogeneities,
// which is the argument a.
  if(tol<=(Real)0.0) tol=GJ_TINY;
  mVector<size_t,allocator<size_t> > swp(opm.rsize());
  for(size_t j=1;j<=opm.rsize();j++) swp(j)=j;  // init row swap label array
  size_t lim=min(opm.rsize()-r+1,opm.csize()-c+1);
  for(size_t j=1;j<=lim;j++,r++,c++) {          // loop over necessary rows
    part_piv(opm,r,c,swp,tol,1);             // bring the pivot to row r
    if(a.rsize()>0) a.row(swp(r))/=opm(swp(r),c);      // normalize pivot and
    opm.row(swp(r))/=opm(swp(r),c);                    // the row containing it
    if(full==1) {         // zero the elements above the diagonal if required
      for(size_t k=1;k<r;k++)
        if(norm(opm(swp(k),c))!=(Real)0.0) {
          if(a.rsize()>0) a.row(swp(k))-=opm(swp(k),c)*a.row(swp(r));
          opm.row(swp(k))-=opm(swp(k),c)*opm.row(swp(r));
        }
    }
    for(size_t k=r+1;k<=opm.rsize();k++)    // always zero the subdiagonal elements
      if(norm(opm(swp(k),c))!=(Real)0.0) {
        if(a.rsize()>0) a.row(swp(k))-=opm(swp(k),c)*a.row(swp(r));
        opm.row(swp(k))-=opm(swp(k),c)*opm.row(swp(r));
      }
  }
  permute(opm,swp);                      // reorder the matrices 
  if(a.rsize()>0) permute(a,swp);
}

template<class T,class S,class S2,class Real> 
int 
part_piv(mMatrix<T,S> &opm,size_t r,size_t c,mVector<size_t,S2>& swp,Real tol,bool impl=1)
{
//   Implements partial pivoting (i.e., looks for pivot element only in rows
// below the current element, (swp(r),c), then swaps that row with the 
// current one in the index map).
// The default is for implicit pivoting (i.e., the pivot is chosen as if the
// max coefficient in each row is set to 1). The map of swapped indeces is
// recorded in swp. The return value is +1 or -1 depending on whether the
// number of row swaps was even or odd respectively. 
  if(tol<=(Real)0.0) tol=GJ_TINY;
  int swapNum=1;
      // find the max element in each row
  mVector<Real,allocator<Real> > max_elem(opm.rsize());
  for(size_t j=r;j<=opm.rsize();j++) {        // calc. max_elem, current to bottom
    if(!impl) max_elem(j)=(Real)1.0;      // if not implicit
    else max_elem(j)=sup(opm.row(swp(j))); // else implicit pivoting
  }
  size_t pivot=r;       // default pivot is the current position, [r,c]
  Real piv_elem=abs(opm(swp(r),c))/max_elem(r);
  for(size_t j=r+1;j<=opm.rsize();j++) {// loop over possible pivots below current
    Real tmp=abs(opm(swp(j),c))/max_elem(j);  // if this elem is larger, 
    if(tmp>piv_elem) {                   // then it becomes the pivot
      pivot=j;
      piv_elem=tmp;
    }
  }
  if(piv_elem<tol) {
    cerr << "part_piv(): Zero pivot encountered.\n";
    exit(1);
  }
  if(pivot>r) {         // bring the pivot to the diagonal
    size_t j=swp(r);                    // reorder swap array
    swp(r)=swp(pivot);
    swp(pivot)=j;
    swapNum*=-1;       // keeping track of odd or even swap
  }
  return swapNum;
}

// SQUARE TENSOR OPERATIONS

template<class T,class S,class S2,class Real> 
int 
lud(mMatrix<T,S> &opm,mVector<size_t,S2>& swp,Real tol)
{
// Performs the LU decomposition in place.  This is Crout's algorithm (cf.,
// Num. Rec. in C, Section 2.3).  The map of swapped indeces is
// recorded in swp. The return value is +1 or -1 depending on whether the
// number of row swaps was even or odd respectively.  swp must be 
// preinitialized to a valid set of indices (e.g., {1,2, ... ,opm.rsize()}).
  if(tol<=(Real)0.0) tol=GJ_TINY;
  if(opm.rsize()!=opm.csize()) {
    cerr << "lud(): Argument mMatrix is not square.\n";
    exit(1);
  }
  int swapNum=1;
  for(size_t c=1;c<=opm.csize();c++) {             // loop over columns
    swapNum*=part_piv(opm,c,c,swp,tol,1);       // bring pivot to diagonal
    for(size_t r=1;r<=opm.rsize();r++) {           //  loop over rows
      size_t lim=min(r,c);
      for(size_t j=1;j<lim;j++)
        opm(swp(r),c)-=opm(swp(r),j)*opm(swp(j),c);
      if(r>c) opm(swp(r),c)/=opm(swp(c),c);
    }
  }
  permute(opm,swp);
  return swapNum;
}

template<class T,class S,class S2>
void 
permute(mMatrix<T,S>& opm,mVector<size_t,S2>& swp) 
{
  // The opposite of sort(), permute() orders the rows of (*this) to
  // match the size_tegers in the index array.  The argument swp[] is unchanged.
  mVector<size_t,allocator<size_t> > ind(swp.size());
  for(size_t j=1;j<=opm.rsize();j++) ind(j)=j;
  for(size_t j=1;j<opm.rsize();j++)        // loop over permuted indeces
    if(ind(j)!=swp(j))
      for(size_t k=j+1;k<=opm.rsize();k++) // search only the remaining indeces
        if(ind(k)==swp(j)) {
          swap(opm,opm,j,k);            // swap the rows
          ind(k)=ind(j);
          ind(j)=swp(j);                // and the elements of the ordered index
          break;                        // next j
        }
}

template<class T,class S,class T2,class S2,class Real> 
void 
lu_solve(mMatrix<T,S> &opm,mMatrix<T2,S2>& a,Real tol) 
{
// Solves the inhomogeneous mMatrix problem with lu-decomposition. Note that
// inversion may be accomplished by setting a to the identity_matrix.
  mVector<size_t,allocator<size_t> > swp(opm.rsize());
  for(size_t j=1;j<=opm.rsize();j++) swp(j)=j;  // init row swap label array
  lud(opm,swp,tol);           // get the lu-decomp.
  permute(a,swp);             // sort the inhomogeneity to match the lu-decomp
  fsub(opm,a);                // solve the forward problem
  bsub(opm,a);                // solve the backward problem
}


template<class T,class S,class T2,class S2> 
void 
bsub(mMatrix<T,S> &opm,mMatrix<T2,S2>& a,bool diag = 0) 
{
// Solves the system of equations opm*b=a, ASSUMING that opm is
// upper triangular. If diag==1, then the diagonal elements are additionally
// assumed to be 1.  Note that the lower triangular elements are never checked,
// so this function is valid to use after a LU-decomposition in place.  opm
// is not modified, and the solution, b, is returned in a. 
  for(size_t r=opm.rsize();r>=1;r--) {
    for(size_t c=opm.csize();c>r;c--) a.row(r)-=opm(r,c)*a.row(c);
    if(!diag) a.row(r)/=opm(r,r);
  }
}

template<class T,class S,class T2,class S2> 
void 
fsub(mMatrix<T,S> &opm,mMatrix<T2,S2>& a,bool diag = 1) 
{
// Solves the system of equations opm*b=a, ASSUMING that opm is
// lower triangular. If diag==1, then the diagonal elements are additionally
// assumed to be 1.  Note that the upper triangular elements are never checked,
// so this function is valid to use after a LU-decomposition in place.  opm
// is not modified, and the solution, b, is returned in a.
  for(size_t r=1;r<=opm.rsize();r++) {
    for(size_t c=1;c<r;c++) a.row(r)-=opm(r,c)*a.row(c);
    if(!diag) a.row(r)/=opm(r,r);
  }
}

template<class T,class S,class Real> 
T 
determinant(mMatrix<T,S> &opm,Real tol) 
{
//  Returns the determinant using LU-decomposition. 
  mVector<size_t,allocator<size_t> > swp(opm.rsize());
  for(size_t j=1;j<=opm.rsize();j++) swp(j)=j;  // init row swap label array
  Real sgn=(Real)lud(opm,swp,tol);   // get lud
  return sgn*product(opm.diagonal());        // return determinant
}



/* These are the definitions of singular value and eigenvalue routines
based on Householder and Givens orthogonal transformations.  */


template<class T,class S> 
mVector<T,S>& 
lhouse(mVector<T,S> &v) 
{
// Returns in place the Householder vector which will generate the reflection
// operator from the left to zero all elements other than the first.

  T tmp=v(1)*conj(v(1));
  for(size_t j=2;j<=v.size();j++) tmp+=v(j)*conj(v(j));
  tmp=sqrt(tmp);                  // ||x||
  T tmp2(abs(v(1)));              // |x(1)|
  if(tmp==tmp2) v-=v;             // nothing to be done, return zero vector
  else {                          // x(1)+sign(x(1))*||x||
    v(1)=(tmp2==(T)0.0)?tmp:v(1) + v(1)*(tmp/tmp2);
    v/=v(1);                      // renormalize v(1) to 1
  }
  return v;
}

template<class T,class S> 
mVector<T,S>& 
rhouse(mVector<T,S> &v) 
{
// Returns in place the Householder vector which will generate the reflection
// operator from the right to zero all elements other than the first.

  lhouse(v);
  for(size_t j=1;j<=v.size();j++) v(j)=conj(v(j));
}

template<class T,class S,class T2,class S2> 
void 
left_house(mMatrix<T,S> &opm,mVector<T2,S2> &v,size_t r,size_t c) 
{
// Returns in place the left Householder transformation of opm by the
// Householder reflection vector, v.  The reflection operator is defined
// by (Id - 2*P(v)) where P(v) is the projection onto the Householder vector.
// P(v)=(outer product of v and Hermitian adjoint of v)/||v||**2.
// The vector v is treated as if it were a vector of length opm.rsize(),
// and it is effectively padded with (r-1) leading zeroes, and 
// (opm.rsize() - r + 1 - v.size()) trailing zeroes.  Thus all rows of opm
// < r are not affected.  By convention, all columns of opm < c are also not 
// effected (i.e., are assumed to have 0 entries).

  mVector<T,allocator<T> > vadj(v.size()), tv(opm.csize()-c+1);
  T vm=v(1)*conj(v(1));
  for(size_t j=2;j<=v.size();j++) vm+=v(j)*conj(v(j));  // ||v||^2
  for(size_t j=1;j<=v.size();j++) vadj(j)=conj(v(j)+v(j))/vm;
  for(size_t k=1;k<=tv.size();k++) {  // tv=(2/vm)*(v^H.opm)
    tv(k)=vadj(1)*opm(r,c+k-1);
    for(size_t j=2;j<=v.size();j++) tv(k)+=vadj(j)*opm(r+j-1,c+k-1);
  }
  for(size_t j=1;j<=v.size();j++)   // finish operation
    for(size_t k=1;k<=tv.size();k++) opm(r+j-1,c+k-1)-=v(j)*tv(k);
}

template<class T,class S,class T2,class S2> 
void 
right_house(mMatrix<T,S> &opm,mVector<T2,S2> &v,size_t r,size_t c) 
{
// Returns in place the right Householder transformation of opm by the
// right Householder reflection vector, v.  Equivalent to applying the left
// Householder transform to the adjoint of opm, then taking the adjoint
// a second time.

  mVector<T,allocator<T> > vadj(v.size()), tv(opm.rsize()-r+1);
  T vm=v(1)*conj(v(1));
  for(size_t j=2;j<=v.size();j++) vm+=v(j)*conj(v(j));  // ||v||^2
  for(size_t j=1;j<=v.size();j++) vadj(j)=(v(j)+v(j))/vm;
  for(size_t k=1;k<=tv.size();k++) {  // tv=(2/vm)*(v^H.opm)
    tv(k)=vadj(1)*opm(r+k-1,c);
    for(size_t j=2;j<=v.size();j++) tv(k)+=vadj(j)*opm(r+k-1,c+j-1);
  }
  for(size_t j=1;j<=v.size();j++)   // finish operation
  for(size_t k=1;k<=tv.size();k++) opm(r+k-1,c+j-1)-=conj(v(j))*tv(k);
}

template<class T,class S> 
void 
col_red(mMatrix<T,S> &opm,size_t r,size_t c,bool record=1) 
{
// Reduces the column of the matrix opm under the element (r,c) via 
// Householder reflection.  If record==1, the non-trivial 
// elements of the Householder vector are stored in the zeroed
// elements of the column. Elements outside of the submatrix are not affected.

  if(r==opm.rsize()) return;            // nothing to be done
  mVector<T,allocator<T> > v(opm.rsize()-r+1);
  for(size_t j=1;j<=v.size();j++) v(j)=opm(r+j-1,c);  // init the House vector
  lhouse(v);          // and compute the transformation
  left_house(opm,v,r,c);       // apply it
  if(record) for(size_t j=2;j<=v.size();j++) opm(r+j-1,c)=v(j);   // store it
}

template<class T,class S> 
void 
row_red(mMatrix<T,S> &opm,size_t r,size_t c,bool record=1) 
{
// Reduces the row of the matrix opm to the right of the element 
// (r,c) via Householder reflection.  If record==1, the non-trivial 
// elements of the Householder vector are stored in the zeroed
// elements of the row. Elements outside of the submatrix are not affected.

  if(c==opm.csize()) return;            // nothing to be done
  mVector<T,allocator<T> > v(opm.csize()-c+1);
  for(size_t j=1;j<=v.size();j++) v(j)=opm(r,c+j-1);  // init the House vector
  rhouse(v);         // and compute the transformation 
  right_house(opm,v,r,c);       // apply it
  if(record) for(size_t j=2;j<=v.size();j++) opm(r,c+j-1)=v(j);   // store it
}

template<class T,class S,class T2,class S2> 
void
col_accum(const mMatrix<T,S> &opm,mMatrix<T2,S2> &q,size_t r,size_t c)
{
// Accumulates c Householder reflections onto q from the left.  The Householder
// vectors are assumed to be stored in the first c columns of the lower
// triangular portion of opm, with the second element of the first Householder
// vector being at (r+1,c), the second element of the second vector being at
// (r,c-1), etc.  The first element of each Householder vector is assumed 
// normalized to one.  The vector is treated as if it were a vector of 
// length opm.rsize(), and it is effectively padded with (r-1) leading zeroes.

  if(&opm==&q) {
    cerr << "col_accum(): Source and destination arrays the same.";
    exit(1);
  }
  for(c=c;c>=1;c--,r--) {     // loop over cols, deepest first
    mVector<T,allocator<T> > v(opm.rsize()-r+1);
    if(v.size()==1) continue;
    v(1)=(T)1.0;
    for(size_t j=2;j<=v.size();j++) v(j)=opm(j+r-1,c);
    left_house(q,v,r,1);
  }
}

template<class T,class S,class T2,class S2> 
void
row_accum(const mMatrix<T,S> &opm,mMatrix<T2,S2> &q,size_t r,size_t c)
{
// Accumulates r Householder reflections onto q from the left.  The Householder
// vectors are assumed to be stored in the first r rows of the upper
// triangular portion of opm, with the second element of the first Householder
// vector being at (r,c+1), the second element of the second vector being at
// (r-1,c), etc.  The first element of each Householder vector is assumed 
// normalized to one.  The vector is treated as if it were a vector of 
// length opm.csize(), and it is effectively padded with (c-1) leading zeroes.

  if(&opm==&q) {
    cerr << "row_accum(): Source and destination arrays the same.\n";
    exit(1);
  }
  for(r=r;r>=1;c--,r--) {     // loop over rows, deepest first
    mVector<T,allocator<T> > v(opm.csize()-c+1);
    if(v.size()==1) continue;
    v(1)=(T)1.0;
    for(size_t j=2;j<=v.size();j++) v(j)=opm(r,j+c-1);
    left_house(q,v,c,1);
  }
}

template<class T,class S,class S2> 
void
qrd(mMatrix<T,S> &opm,mMatrix<T,S2> &q)
{
// Constructs the QR decomposition of opm, which becomes upper right
// triangular. q must be the identity to start and becomes the orthogonal
// transformation matrix.  The original matrix may be reconstructed as
// dot(q,opm,opm).
  if(q.rsize()!=opm.rsize()) {
    cerr << "qrd(): Mismatched matrices.\n";
    exit(1);
  }
  if(opm.rsize()<opm.csize()) {
    cerr << "qrd(): Underdetermined matrix.";
    exit(1);
  }
  for(size_t c=1;c<=opm.csize();c++) col_red(opm,c,c,1);  // find the H. vecs
  col_accum(opm,q,opm.csize(),opm.csize()); // accumulate orthog matrix
  for(size_t c=1;c<=opm.csize();c++)        // erase H. vecs
    for(size_t r=c+1;r<=opm.rsize();r++) opm(r,c)-=opm(r,c);
}

template<class T,class S> 
mMatrix<T,S>& 
hessenberg(mMatrix<T,S> &opm,bool record = 1)
{
// Returns in place the Hessenberg form of opm via Householder similarity
// transformations. If record=0 then the lower triangle remains zero, otherwise
// it is overwritten with the nontrivial parts of the Householder vectors.
  if(opm.rsize()!=opm.csize()) {
    cerr << "hessenberg(): Non-square matrix.\n";
    exit(1);
  }
  for(size_t i=1;i<=opm.rsize()-2;i++) {
    col_red(opm,i+1,i,1);  // Reduce elements below subdiagonal and store H. vec
    mVector<T,allocator<T> > v(opm.rsize()-i);
    v(1)=(T)1.0;                // extract the House. vector from 
    for(size_t j=2;j<=v.size();j++) v(j)=opm(j+i,i);  // the reduced column
    right_house(opm,v,1,i+1);      // and apply the similarity transformation
    if(!record)        // clean up stored vector if not desired
      for(size_t j=i+2;j<=opm.rsize();j++) opm(j,i)=(T)0.0;
  }
  return opm;
}

template<class T,class S> 
mMatrix<T,S>& 
bidiag(mMatrix<T,S> &opm,bool record = 1)
{
// Returns in place the bidiagonal form of opm via left and right
// Householder transformations.  Note that the bidiagonal matrix is NOT
// similar to the original matrix.
  if(opm.csize()>opm.rsize()) {
    cerr << "bidiag(): Underdetermined matrix.";
    exit(1);
  }
  for(size_t i=1;i<=opm.csize()-2;i++) {
    col_red(opm,i,i,record);
    row_red(opm,i,i+1,record);
  }
  col_red(opm,opm.csize()-1,opm.csize()-1,record);
  if(opm.rsize()>opm.csize()) col_red(opm,opm.csize(),opm.csize(),record);
  return opm;
}

// WARNING: Givens rotations on complex matrices work, but the transformation
// is NOT unitary!!

template<class T> 
void
givens1(T &c,T &s)
{
// Gives c'=cos(t) and s'=sin(t) such that {{c',-s'},{s',c'}}.{c,s}={0,r}
// i.e., the rotation matrix which zeroes the first element.
  if(c==(T)0.0) {
    c=(T)1.0;
    s=(T)0.0;
  } else if(norm(c)>norm(s)) {
    T t=s/c;
    s=sqrt(c*c/(c*c+s*s));
    c=s*t;
  } else {
    T t=c/s;
    c=sqrt(s*s/(c*c+s*s));
    s=c*t;
  }
}

template<class T> 
void
givens2(T &c,T &s)
{
// Gives c'=cos(t) and s'=sin(t) such that {{c',-s'},{s',c'}}.{c,s}={r,0}
// i.e., the rotation matrix which zeroes the second element.
  if(s==(T)0.0) {
    c=(T)1.0;
    s=(T)0.0;
  } else if(norm(s)>norm(c)) {
    T t=-c/s;
    s=sqrt(s*s/(c*c+s*s));
    c=s*t;
  } else {
    T t=-s/c;
    c=sqrt(c*c/(c*c+s*s));
    s=c*t;
  }
}

template<class T,class T2,class S> 
void
row_rot(mMatrix<T,S> &opm,size_t i,size_t j,T2 &c,T2 &s)
{
// Rotates the row vectors opm.row(i) and opm.row(j) about their defined axis.
  for(size_t k=1;k<=opm.csize();k++) {
    T tmp=opm(i,k);
    opm(i,k)=c*tmp - s*opm(j,k);
    opm(j,k)=c*opm(j,k) + s*tmp;
  }
}

template<class T,class T2,class S> 
void
col_rot(mMatrix<T,S> &opm,size_t i,size_t j,T2 &c,T2 &s)
{
// Rotates the column vectors opm.col(i) and opm.col(j) about their defined axis
  for(size_t r=1;r<=opm.rsize();r++) {
    T tmp=opm(r,i);
    opm(r,i)=c*tmp - s*opm(r,j);
    opm(r,j)=c*opm(r,j) + s*tmp;
  }
}

template<class T,class S> 
void
row_givens(mMatrix<T,S> &opm,size_t r,size_t c,size_t r2)
{
// Zeroes opm(r,c) by rotating rows r and r2 (default c).
  if(r==r2) {
    cerr << "row_givens(): Bad row request.\n";
    exit(1);
  }
  T si,co;
  if(r>r2) {
    co=opm(r2,c);
    si=opm(r,c);
    givens2(co,si);      // zeroing the lower element
    row_rot(opm,r2,r,co,si);
  } else {
    co=opm(r,c);
    si=opm(r2,c);
    givens1(co,si);          // or the upper element
    row_rot(opm,r,r2,co,si);
  }
}

template<class T,class S> 
void
col_givens(mMatrix<T,S> &opm,size_t r,size_t c,size_t c2)
{
// Zeroes opm(r,c) by rotating columns c and c2 (default r).
  if(c==c2) {
    cerr << "col_givens(): Bad column request.\n";
    exit(1);
  }
  T si,co;
  if(c>c2) {
    co=opm(r,c2);
    si=opm(r,c);
    givens2(co,si);      // zeroing the second element
    col_rot(opm,c2,c,co,si);
  } else {
    co=opm(r,c);
    si=opm(r,c2);
    givens1(co,si);          // or the first element
    col_rot(opm,c,c2,co,si);
  }
}

template<class Complex,class S,class Real> 
void
eigenvalues(mMatrix<Complex,S> &opm,Real tol)
{
// Calculates the eigenvalues of a square (nonsymmetric) matrix via QR
// iteration with single shifts. Note that this algorithm will fail to
// converge for a real matrix unless the matrix is Hermitian (real and
// symmetric, since the eigenvalues are then real). opm is left in it's
// complex Schur form.  The orthogonal transformations are not preserved.
// The eigenvalues are left on the diagonal of opm.
  if(tol<=(Real)0.0) tol=SVD_TINY;
  if(opm.rsize()!=opm.csize()) {
    cerr << "eigenvalues(): Non-square matrix.\n";
    exit(1);
  }
  hessenberg(opm,0); // reduce to hessenberg form, do not record House. vectors
  size_t bottom=opm.rsize(),top;
  mVector<Complex,allocator<Complex> > v(2);        // the Householder vector
  v(1)=(Complex)1.0;
  for(size_t k=0;k<=SVD_IT_MAX;k++) {  // QR iteration, first deflate the matrix
    Real min=sup(opm);
    min=tol*tol*min*min;  // square of tol*largest element
    for(size_t i=bottom;i>=2;i--) {  // zero subdiagonals below tolerance
      if(norm(opm(i,i-1))<=min) opm(i,i-1)-=opm(i,i-1);
    }
    while(bottom>=2) {           // find bottom of the last nondiagonal block
      if(norm(opm(bottom,bottom-1))>min) break;
      bottom--;
    }
    if(bottom==1) {              // if subdiagonal has been eliminated
      return;                    // we are done
    }
    top=bottom-1;
    while(top>=2) {  // find bottom of the last nondiagonal block
      if(norm(opm(top,top-1))<=min) break;
      top--;
    }
    mVector<Complex,allocator<Complex> > v_store(bottom-top);
    Complex shift=opm(bottom,bottom);    // explicit shift
    for(size_t j=1;j<=opm.rsize();j++) opm(j,j)-=shift;
    for(size_t j=top;j<bottom;j++) {  // Hessenberg QR decomposition
      Real tmp2=abs(opm(j,j));    // |x(1)|
      Real tmp=sqrt(norm(opm(j,j))+norm(opm(j+1,j)));//||x||
      v(1)=(Complex)1.0;
      v(2)=(tmp2==(Real)0.0)?(Complex)tmp:opm(j,j)+opm(j,j)*(tmp/tmp2);
      v(2)=opm(j+1,j)/v(2);    // done House. vector with v(1)=1
      v_store(j-top+1)=v(2);   // store nontrivial part of House. vector
      left_house(opm,v,j,j);   // zero m(2,1)=opm(j+1,j) with House. reflection
    }    // j-loop
    for(size_t j=top;j<bottom;j++) {  // apply the similarity tranformation
      v(1)=(Complex)1.0;
      v(2)=v_store(j-top+1);  // reset the House. vector
      right_house(opm,v,1,j);  // conj transf., subdiagonal opm(j+1,j) is not 0
    }    // j-loop
    for(size_t j=1;j<=opm.rsize();j++) opm(j,j)+=shift;
  }
  cerr << "eigenvalues(): No convergence.\n";
  exit(1);
}

template<class T,class S,class S2,class Real,class S3,class S4> 
mVector<Real,S3>& 
svd(mMatrix<T,S>& opm,mMatrix<T,S2>& u,mVector<Real,S3> &d,mMatrix<T,S4>& v,Real tol)
{
// Returns the two orthogonal matrices and a vector of singular values. Thus
// the COLUMNS of u and v are the left and right singular vectors of opm.
// The original matrix may be reconstructed by u^diag(sv)^adjoint(v).
// u and v must be identity matrices to start. Here class Real should be of
// some variant of a real number.

  if(tol<=(Real)0.0) tol=(Real)SVD_TINY;
  if(opm.rsize()<opm.csize()) {
    cerr << "svd(): Underdetermined matrix.";
    exit(1);
  }
  size_t ortho=u.rsize();   // flag for accum. the orthog matrices
    // ortho=0 means u and v are null matrices and accum. is turned off
  bidiag(opm,1);          // reduce to bidiagonal form;
  if(ortho) {
    col_accum(opm,u,opm.csize(),opm.csize());    // accum the left orthog matrix
    row_accum(opm,v,opm.csize()-2,opm.csize()-1); // accum right orthog matrix
  }
     // the original matrix may now be reconstructed as u^bidiag^adjoint(v)
  Real tmp;
  T tmpc;      // change to real bidiag
  for(size_t i=1;i<opm.csize();i++) {          // with one element changed
    tmp=abs(opm(i,i));
    if(tmp!=(Real)0.0) {
      tmpc=conj(opm(i,i))/tmp;
      opm(i,i)*=tmpc;          // reduce (i,i) to real
      opm(i,i+1)*=tmpc;
      if(ortho) u.col(i)*=conj(tmpc);     // same oper on entire col of u
    }
    tmp=abs(opm(i,i+1));
    if(tmp!=(Real)0.0) {
      tmpc=conj(opm(i,i+1))/tmp;    // reduce (i,i+1) to real
      opm(i,i+1)*=tmpc;
      opm(i+1,i+1)*=tmpc;
      if(ortho) v.col(i+1)*=tmpc; // same oper on entire col of v
    }
  }
  tmp=abs(opm(opm.csize(),opm.csize())); // reduce (col,col) to real
  if(tmp!=(Real)0.0) {
    tmpc=conj(opm(opm.csize(),opm.csize()))/tmp;
    opm(opm.csize(),opm.csize())*=tmpc;
    if(ortho) u.col(opm.csize())*=conj(tmpc);    // same oper on entire col of u
  }
        // bidiag is now REAL and positive
     // the original matrix may still be reconstructed as u^bidiag^adjoint(v)
  mVector<Real,allocator<Real> > f(opm.csize()-1);   // d=diagonal, f=upper band
  for(size_t i=1;i<opm.csize();i++) {
    d(i)=real(opm(i,i));
    f(i)=real(opm(i,i+1));
  }
  d(opm.csize())=real(opm(opm.csize(),opm.csize()));
  Real aa,bb,cc,c,s,mu;
  size_t bottom=opm.csize(), top=1;   // initially look at the bidiag matrix
  for(size_t k=0;k<SVD_IT_MAX*opm.csize();k++) {
    if(k==SVD_IT_MAX*opm.csize()) {
      cerr << "svd(): No convergence.\n";
      exit(1);
    }
    for(size_t i=1;i<=bottom-1;i++)  // search for zeroes on the super-diagonal
      if(abs(f(i))<=tol*(abs(d(i))+abs(d(i+1)))) f(i)-=f(i);
           // deflate first: the bottom is the first diag. element which
    while(bottom>1) {     // bottom diag. elem. with nonzero superdiag. above
      if(f(bottom-1)!=(Real)0.0) break;
      bottom--;     // move bottom up one
    }
    if(bottom==1) break;       // if at the top then we're done
           // set the top of the block
    top=bottom-1;
    while(top>=1) {   // search for zero diagonal elements
      if(d(top)!=(Real)0.0) top--;
      else {
        if(f(top)!=(Real)0.0) { // if row is not zero then zero it
          tmp=f(top);         // element to be zeroed
          f(top)-=f(top);        // zero it
          for(size_t j=top+1;j<=bottom;j++) {  // chase the zero out the row
            c=tmp;            // element to zero
            s=d(j);          // diag element below
            givens1(c,s);     // calc. the rotation to zero the first arg.
    // note that  d(top)*=c  and f(top)=c*f(top)-s*d(top+1) are identically 0
            d(j)=s*tmp+c*d(j);    // a Givens row rotation
            if(j<bottom) {
              tmp=-s*f(j);          // same as row_rot(top,j,c,s)
              f(j)*=c;
            }
            if(ortho) col_rot(u,top,j,c,s); // left orthog. matrix
          }
        }
        break;  // move on
      }
    }   // top-loop
    top++;  // drop back down one row
    size_t j=bottom;
    while(j>top) {       // search the block for zero super-diagonal elements
      if(f(j-1)==(Real)0.0) break;
      j--;
    }  // j-loop
    top=j;    // the diagonal block bounded by top and bottom is now unreduced
    if(j==bottom) continue;     // if the block is trivial start again
    aa=d(bottom-1)*d(bottom-1);     // the shift (mu) is the eigenvalue of the
    bb=d(bottom)*d(bottom);       // trailing 2X2 matrix of the block which is
    cc=aa*f(bottom-1)*f(bottom-1);  // closer to abs_sq(d(bottom))
    cc=sqrt((aa-bb)*(aa-bb)+4.0*cc);
    mu=(aa>bb) ? aa+bb-cc : aa+bb+cc;
    mu/=(Real)2.0;
    c=d(top)*d(top)-mu;        // shift
    s=f(top)*d(top);  // element of B.adjoint()*B to be implicitly zeroed
    for(size_t i=top;i<bottom;i++) {
      givens2(c,s);      // calc. the column rotation rotation
        // if(i!=top) then tmp=(i-1,i+1) is to be zeroed by col_rot(i,i+1,c,s)
      if(i!=top) f(i-1)=c*f(i-1)-s*tmp;    // (i-1,i)
      tmp=d(i);              // now tmp=(i,i)
      d(i)=c*tmp-s*f(i);     // (i,i)         tmp=(i,i)
      f(i)=s*tmp+c*f(i);     // (i,i+1)       tmp=(i,i)
      tmp=-s*d(i+1);         // (i+1,i),  now tmp=(i+1,i)
      d(i+1)*=c;             // (i+1,i+1)
      if(ortho) col_rot(v,i,i+1,c,s); // also rotate the cols of v
      c=d(i);
      s=tmp;                 // tmp=(i+1,i),  which is to be zeroed
      givens2(c,s);      // calc. rotation,  row_rot(i,i+1,c,s)
                        // opm.row(i)=c*opm.row(i)-s*opm.row(j);
                        // opm.row(j)= s*opm.row(i)+c*opm.row(j);
                        // note that (i+1,i) is zeroed by the rotation
      d(i)=c*d(i)-s*tmp;           // (i,i)    tmp=(i+1,i)
      tmp=f(i);          // now tmp=(i,i+1)
      f(i)=c*tmp-s*d(i+1);         // (i,i+1)
      d(i+1)=s*tmp+c*d(i+1);       // (i+1,i+1)
      if(ortho) col_rot(u,i,i+1,c,s); // also rotate the columns of u
      if(i<(bottom-1)) {      // if not at the bottom prepare for next round
        tmp=-s*f(i+1);               // (i,i+2),  f(bottom)=0
        f(i+1)*=c;                  // (i+1,i+2)
        c=f(i);                     // (i,i+1)
        s=tmp;                  // tmp=(i,i+2),  which is to be zeroed
      }
    }  // i-loop
  }  // k-loop
  for(size_t i=1;i<=opm.csize();i++) {
    if(d(i)<(Real)0.0) {      // change sing. val. to positive
      d(i)=-d(i);
      if(ortho)   // apply the same oper. to the col of u
        for(size_t j=1;j<=opm.rsize();j++) u(j,i)=-u(j,i);
    }
  }
  for(size_t i=1;i<opm.csize();i++) { // sort the singular values, greatest first
    for(size_t j=i+1;j<=opm.csize();j++) {
      if(d(j)>d(i)) {
        tmp=d(i);         // swap the sing. vals.
        d(i)=d(j);
        d(j)=tmp;
        if(ortho) {
          for(size_t k=1;k<=u.rsize();k++) {    // and the columns of u and v
            tmpc=u(k,i);
            u(k,i)=u(k,j);
            u(k,j)=tmpc;
            tmpc=v(k,i);
            v(k,i)=v(k,j);
            v(k,j)=tmpc;
          }  // k-loop
        }
      }
    }  // j-loop
  }  // i-loop
     // Finally, set all sing. vals less than specified precision to zero
  for(size_t j=1;j<=d.size();j++) if(d(j)<tol*d(1)) d(j)-=d(j);
  return d;   // can still reconstruct original matrix as u^diag(d)^adjoint(v)
}

template<class T,class S,class Real,class S2> 
mVector<Real,S2>& 
singular_values(mMatrix<T,S> &opm,mVector<Real,S2> &sv,Real tol)
{
// Same as svd(), but does not accumulate the left and right singular matrices.
  mMatrix<T,allocator<T> > u,v;
  return svd(opm,u,sv,v,tol);
}        // end singular_values()

template<class T,class S,class S2,class Real,class S3,class S4>
size_t
pseudo_inverse(mMatrix<T,S> &opm,mMatrix<T,S2> &u,mVector<Real,S3> &sv,mMatrix<T,S4> &v,Real tol)
{
// Finds the inverse restricted to the range in place.  u and v must be
// identity matrices to start.  Left and right singular vectors are
// the last columns of u and v respectively.
  if(tol<=(Real)0.0) tol=SVD_TINY;
  svd(opm,u,sv,v,tol);     // take the svd of opm
  if(sv(1)==(Real)0.0) {
    cerr << "pseudo_inverse(): Zero matrix";
    exit(1);
  }
  u.transpose();           // the hermitian adjoint of u
  T (*cp)(const T&)=conj;
  transform(u,u,cp);
    
  size_t urank;        // test for the numerical rank of the matrix, urank
  for(urank=opm.csize();urank>=1;urank--) if(sv(urank)/sv(1) > tol) break;
  for(size_t j=1;j<=urank;j++) u.row(j)/=(T)sv(j);  // diag(sv)^(-1).u^H
        // construct inverse in place, using only numerically significant rows
  opm.resize(opm.csize(),opm.rsize());  // transpose w/out assignment
  for(size_t r=1;r<=opm.rsize();r++)
    for(size_t c=1;c<=opm.csize();c++) {
      opm(r,c)=v(r,1)*u(1,c);                     // zero
      for(size_t j=2;j<=urank;j++) opm(r,c)+=v(r,j)*u(j,c);  // v.diag(sv)^(-1).u^H
    }
  for(size_t j=1;j<=urank;j++) u.row(j)*=(T)sv(j); // reset u to column orthogonal
  u.transpose();
  transform(u,u,cp);
  return urank;
}

#endif

