/*
    Theseus - maximum likelihood superpositioning of macromolecular structures

    Copyright (C) 2004-2007 Douglas L. Theobald

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the:

    Free Software Foundation, Inc.,
    59 Temple Place, Suite 330,
    Boston, MA  02111-1307  USA

    -/_|:|_|_\-
*/

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <ctype.h>
#include "DLTutils.h"
#include "pdbIO.h"
#include "pdbMalloc.h"
#include "Coords.h"
#include "PDBCoords.h"
#include "pdbStats.h"
#include "pdbUtils.h"
#include "CovMat.h"
#include "LedoitWolf.h"
#include "DLTmath.h"
#include "distfit.h"

/* Olivier Ledoit and Michael Wolf (Feb 2004)
   "A well-conditioned estimator for large-dimensional covariance matrices."
   Journal of Multivariate Analysis, 88(2):365-411.
   http://www.btinternet.com/~olivier.ledoit/ole1_abstract.htm
   http://www.sciencedirect.com/science?_ob=MImg&_imagekey=B6WK9-491RP2P-7-JS&_cdi=6901&_orig=browse&_coverDate=07%2F10%2F2003&_sk=999999999&view=c&wchp=dGLbVtz-zSkWA&_acct=C000047944&_version=1&_userid=918210&md5=373d91196189b4618887d5571d6e6&ie=f.pdf
  
   Abstract:
   Many economic problems require a covariance matrix estimator
   that is not only invertible, but also well-conditioned (that is,
   inverting it does not amplify estimation error). For
   large-dimensional covariance matrices, the usual estimator - the
   sample covariance matrix - is typically not well-conditioned and
   may not even be invertible. This paper introduces an estimator
   that is both well-conditioned and more accurate than the sample
   covariance matrix asymptotically. This estimator is
   distribution-free and has a simple explicit formula that is easy
   to compute and interpret. It is the asymptotically optimal
   convex combination of the sample covariance matrix with the
   identity matrix. Optimality is meant with respect to a quadratic
   loss function, asymptotically as the number of observations and
   the number of variables go to infinity together. Extensive
   Monte-Carlo confirm that the asymptotic results tend to hold
   well in finite sample.
 */
void
CalcLedoitCovMat(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          newz1, newz2, newx1, newx2, newy1, newy2, tmpcov, tmp;
    double          factor1, factor2, term1;
    double         *coordskx, *coordsky, *coordskz;
    int             i, j, k;
    const int       vlen = cdsA->vlen, cnum = cdsA->cnum;
    const double   *avex = (const double *) cdsA->avecoords->x,
                   *avey = (const double *) cdsA->avecoords->y,
                   *avez = (const double *) cdsA->avecoords->z;
    const Coords  **coords = (const Coords **) cdsA->coords;
    double        **CovMat = cdsA->CovMat;
    const Coords   *coordsk;

    /* (1) calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < vlen; ++i)
        m += CovMat[i][i];
    m /= vlen;

    /* (2) calculate d^2 (this part is correct! - triple checked) 
       d^2 is the variance of the variances of the parameters */
    d2 = 0.0;
    for (i = 0; i < vlen; ++i)
    {
        for (j = 0; j < vlen; j++)
        {
            if (i != j)
            {
                tmp = CovMat[i][j];
                d2 += tmp * tmp;
            }
            else
            {
                tmp = CovMat[i][j] - m;
                d2 += tmp * tmp;
            }
        }
    }
    d2 /= vlen;

    /* (3) calculate b^2, the error of the covariance matrix
       b^2 is the average variance of our estimates of the variance, 
       the variance of the observed variance for each parameter.
       If b^2 = 0, then we know the variance for each parameter
       exactly */
    b2 = 0.0;
    for (k = 0; k < cnum; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = 0; i < vlen; ++i)
        {
            for (j = 0; j < vlen; ++j)
            {
                coordsk = coords[k];
                coordskx = coordsk->x;
                coordsky = coordsk->y;
                coordskz = coordsk->z;

                newx1 = coordskx[i] - avex[i];
                newx2 = coordskx[j] - avex[j];

                newy1 = coordsky[i] - avey[i];
                newy2 = coordsky[j] - avey[j];

                newz1 = coordskz[i] - avez[i];
                newz2 = coordskz[j] - avez[j];

                tmpcov = ((newx1*newx2) + (newy1*newy2) + (newz1*newz2))/3.0 - CovMat[i][j];

                b2 += tmpcov * tmpcov;
            }
        }
    }

    b2 /= (vlen * cnum * cnum); /* ML, biased - should this be unbiased n-1 estimator of variance?, Ledoit uses n */

    if (d2 < b2)
        b2 = d2;

    /* (4) calculate a^2 */
    a2 = d2 - b2;

    /* (5) calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    factor1 = b2/d2;
    factor2 = a2/d2;

    term1 = factor1 * (m / 3.0);

    if (d2 == 0)
    {
        for (i = 0; i < vlen; ++i)
            for (j = 0; j < vlen; ++j)
                CovMat[i][j] = 0.0;
        for (i = 0; i < vlen; ++i)
            CovMat[i][i] = 1.0;
        
    }
    else
    {
        for (i = 0; i < vlen; ++i)
        {
            for (j = 0; j < vlen; ++j)
            {
                if (i == j)
                    CovMat[i][j] = term1 + (factor2 * CovMat[i][j]);
                else
                    CovMat[i][j] *= factor2;
            }
        }
    }

    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
}


/* Parametric, assumes a normal distribution.
   Estimates the variance of the variance with Var(Var(x)) = (2/N) Var(x)^2 .
   Estimates the variance of the covariance with Var(Cov(x)) = (1/N) Cov(x)^2 . */
void
CalcLedoitCovMatPar(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, factor2, term1;
    double        **tmp_covmat;
    int             i, j;
    const int       vlen = cdsA->vlen, cnum = cdsA->cnum;
    double        **CovMat = cdsA->CovMat;

    if (cdsA->tmpmat1 == NULL)
        cdsA->tmpmat1 = MatInit(vlen, vlen);

    tmp_covmat = cdsA->tmpmat1;

    for (i = 0; i < vlen; ++i)
        for (j = 0; j < vlen; j++)
            tmp_covmat[i][j] = 3.0 * CovMat[i][j];

    /* (1) calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < vlen; ++i)
        m += tmp_covmat[i][i];
    m /= (double) vlen;

    /* (2) calculate d^2 (this part is correct! - triple checked) 
       d^2 is the variance of the variances of the parameters */
    d2 = 0.0;
    for (i = 0; i < vlen; ++i)
    {
        for (j = 0; j < vlen; j++)
        {
            if (i != j)
                d2 += mysquare(tmp_covmat[i][j]);
            else
                d2 += mysquare(tmp_covmat[i][j] - m);
        }
    }
    d2 /= (double) vlen; /* ML, biased */

    /*for (i = 0; i < vlen; ++i)
        CovMat[i][i] -= m;
        
    d2 = SqrFrobeniusNormSym(CovMat, vlen);*/

    /* (3) calculate b^2, the error of the covariance matrix
       b^2 is the average variance of our estimates of the variance, 
       the variance of the observed variance for each parameter.
       If b^2 = 0, then we know the variance for each parameter
       exactly */
    b2 = 0.0;
    for (i = 0; i < vlen; ++i)
        for (j = 0; j <= i; ++j)
            b2 += mysquare(tmp_covmat[i][j]);
/*             if (i != j) */
/*                 b2 += mysquare(tmp_covmat[i][j]) / cnum; */
/*             else */
/*                 b2 += 2.0 * mysquare(tmp_covmat[i][j]) / cnum; */

    /* b2 /= (double) (cnum * vlen); */
    b2 = 2.0 * b2 / (double) (cnum * cnum * vlen);

    if (d2 < b2)
        b2 = d2;

    /* (4) calculate a^2 */
    a2 = d2 - b2;

    /* (5) calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    factor1 = b2/d2;
    factor2 = a2/d2;

    term1 = factor1 * (m / 3.0);

    if (d2 == 0)
    {
        for (i = 0; i < vlen; ++i)
            for (j = 0; j < vlen; ++j)
                CovMat[i][j] = 0.0;

        for (i = 0; i < vlen; ++i)
            CovMat[i][i] = 1.0;
    }
    else
    {
        for (i = 0; i < vlen; ++i)
        {
            for (j = 0; j < vlen; ++j)
            {
                if (i == j)
                    CovMat[i][j] = term1 + (factor2 * CovMat[i][j]);
                else
                    CovMat[i][j] *= factor2;
            }
        }
    }

    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
}


void
CalcLedoitFullCovMat(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          newz1, newz2, newx1, newx2, newy1, newy2;
    double          factor1, factor2, term1;
    double        **ind_covmat, **tmp_covmat;
    int             i, j, k;
    const int       cnum = cdsA->cnum, vlen = 3 * cdsA->vlen;
    const Coords  **coords = (const Coords **) cdsA->coords;
    const Coords    *avecoords = (const Coords *) cdsA->avecoords;
    double        **CovMat = cdsA->FullCovMat;

    if (cdsA->tmpmat1 == NULL)
        cdsA->tmpmat1 = MatInit(vlen, vlen);

    if (cdsA->tmpmat2 == NULL)
        cdsA->tmpmat2 = MatInit(vlen, vlen);

    ind_covmat = cdsA->tmpmat1;
    tmp_covmat = cdsA->tmpmat2;

    for (i = 0; i < vlen; ++i)
        for (j = 0; j < vlen; j++)
            tmp_covmat[i][j] = CovMat[i][j];

    /* calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < vlen; ++i)
        m += tmp_covmat[i][i];
    m /= (double) vlen;

    /* calculate d^2 (this part is correct! - triple checked) 
       d^2 is the variance of the variances of the parameters */
    d2 = 0.0;
    for (i = 0; i < vlen; ++i)
    {
        for (j = 0; j < vlen; j++)
        {
            if (i != j)
                d2 += mysquare(tmp_covmat[i][j]);
            else
                d2 += mysquare(tmp_covmat[i][j] - m);
        }
    }
    d2 /= (double)(vlen); /* ML, biased */

    /*for (i = 0; i < vlen; ++i)
        CovMat[i][i] -= m;
        
    d2 = SqrFrobeniusNormSym(CovMat, vlen);*/

    /* calculate b^2, the error of the covariance matrix
       b^2 is the average variance of our estimates of the variance, 
       the variance of the observed variance for each parameter.
       If b^2 = 0, then we know the variance for each parameter
       exactly */
    b2 = 0.0;
    for (k = 0; k < cnum; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = 0; i < vlen / 3; ++i)
        {
            for (j = 0; j < vlen / 3; ++j)
            {
                newx1 = coords[k]->x[i] - avecoords->x[i];
                newx2 = coords[k]->x[j] - avecoords->x[j];

                newy1 = coords[k]->y[i] - avecoords->y[i];
                newy2 = coords[k]->y[j] - avecoords->y[j];

                newz1 = coords[k]->z[i] - avecoords->z[i];
                newz2 = coords[k]->z[j] - avecoords->z[j];

                ind_covmat[i+0][j+0] = (newx1*newx2) - tmp_covmat[i+0][j+0];
                ind_covmat[i+0][j+1] = (newx1*newy2) - tmp_covmat[i+0][j+1];
                ind_covmat[i+0][j+2] = (newx1*newz2) - tmp_covmat[i+0][j+2];

                ind_covmat[i+1][j+0] = (newy1*newx2) - tmp_covmat[i+1][j+0];
                ind_covmat[i+1][j+1] = (newy1*newy2) - tmp_covmat[i+1][j+1];
                ind_covmat[i+1][j+2] = (newy1*newz2) - tmp_covmat[i+1][j+2];

                ind_covmat[i+2][j+0] = (newz1*newx2) - tmp_covmat[i+2][j+0];
                ind_covmat[i+2][j+1] = (newz1*newy2) - tmp_covmat[i+2][j+1];
                ind_covmat[i+2][j+2] = (newz1*newz2) - tmp_covmat[i+2][j+2];
            }
        }

        /* find the squared Froebenius norm of the individual covariance residual matrix */
        b2 += SqrFrobeniusNormSym((const double **) ind_covmat, vlen);
    }

    b2 /= (double) mysquare(cnum); /* ML, biased - should this be unbiased n-1 estimator of variance?, Ledoit uses n */

    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    factor1 = b2/d2;
    factor2 = a2/d2;

    term1 = factor1 * m;

    if (d2 == 0)
    {
        for (i = 0; i < vlen; ++i)
            for (j = 0; j < vlen; ++j)
                CovMat[i][j] = 0.0;
        for (i = 0; i < vlen; ++i)
            CovMat[i][i] = 1.0;
        
    }
    else
    {
        for (i = 0; i < vlen; ++i)
        {
            for (j = 0; j < vlen; ++j)
            {
                if (i == j)
                    CovMat[i][j] = term1 + (factor2 * CovMat[i][j]);
                else
                    CovMat[i][j] = factor2 * CovMat[i][j];
            }
        }
    }

    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
}


/* this one explicitly considers each x, y, and z variance as a unique observation */
void
CalcLedoitVar(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, factor2, term1;
    double         *ind_var, *variances;
    int             i, j, k, vari;
    double          sqrdistx, sqrdisty, sqrdistz;
    double          idf = 1.0 / (double) (cdsA->cnum); /* unbiased */
    int             varlen = (3 * cdsA->vlen);
    double          tmpvar, tmpval;
    const Coords   *coordsk, *coordsj;
    const double   *avex = (const double *) cdsA->avecoords->x,
                   *avey = (const double *) cdsA->avecoords->y,
                   *avez = (const double *) cdsA->avecoords->z;
    const Coords  **coords = (const Coords **) cdsA->coords;

    ind_var   = (double *) calloc(varlen, sizeof(double));
    variances = (double *) calloc(varlen, sizeof(double));

    for (i = vari = 0; i < varlen; i += 3, ++vari)
    {
        sqrdistx = sqrdisty = sqrdistz = 0.0;
        for (j = 0; j < cdsA->cnum; ++j)
        {
            coordsj = coords[j];
            tmpval = coordsj->x[vari] - avex[vari];
            sqrdistx += tmpval * tmpval;
            tmpval = coordsj->y[vari] - avey[vari];
            sqrdisty += tmpval * tmpval;
            tmpval = coordsj->z[vari] - avez[vari];
            sqrdistz += tmpval * tmpval;
        }

        variances[i+0] = sqrdistx *= idf;
        variances[i+1] = sqrdisty *= idf;
        variances[i+2] = sqrdistz *= idf;
        /* printf("\nvar %14.3e %14.3e %14.3e ", variances[i+0], variances[i+1], variances[i+2]); */
    }

    /* calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < varlen; ++i)
        m += variances[i];
    m /= (double) varlen;

    /* calculate d^2 (this part is correct! - triple checked)
       this is equal to the variance of the variances */
    d2 = 0.0;
    for (i = 0; i < varlen; ++i)
        d2 += mysquare(variances[i] - m);

    /* d2 *= (3.0 / (double)((3 * cdsA->vlen) - 1)); */
    d2 /= (double)(varlen); /* ML evaluator - unbiased estimate would use varlen - 1 */

    /* calculate b^2, the error of the covariance matrix */
    b2 = 0.0;
    for (k = 0; k < cdsA->cnum; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = vari = 0; i < varlen; i += 3, ++vari)
        {
            tmpvar = (variances[i+0] + variances[i+1] + variances[i+2]) / 3.0; /* DLT debug - this is inefficient */
            coordsk = coords[k];
            tmpval = coordsk->x[vari] - avex[vari];
            ind_var[i+0] = tmpval * tmpval - tmpvar;
            tmpval = coordsk->y[vari] - avey[vari];
            ind_var[i+1] = tmpval * tmpval - tmpvar;
            tmpval = coordsk->z[vari] - avez[vari];
            ind_var[i+2] = tmpval * tmpval - tmpvar;
            /* printf("\nind_var %14.3e %14.3e %14.3e ", ind_var[i+0], ind_var[i+1], ind_var[i+2]); */
        }

        b2 += SqrFrobeniusNormVec(ind_var, varlen);
    }

    b2 /= (double) mysquare(cdsA->cnum); /* ML biased, should this be unbiased n-1 estimator of variance?-- Ledoit uses n */

    /* b2 = min(b2, d2) */
    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;

    term1 = (factor1 * m);

    /*
    printf(" res         a2        b2        d2     b2/d2     a2/d2  m*b2/d2*I   a2/d2*S  m\n");
    for (i = 0; i < cdsA->vlen; ++i)
    {
        printf(" %3d  %9.3f %9.3f %9.3f %9.3f %9.3f  %9.3f %9.3f  %9.3f\n",
               i, a2, b2, d2, b2/d2, a2/d2, m*b2/d2, a2*cdsA->var[i]/d2, m);
    }
    */

    if (d2 == 0)
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = 1.0;
    }
    else
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = term1 + (factor2 * cdsA->var[i]);
    }

    free(ind_var);
    free(variances);
}


/* this one explicitly considers each x, y, and z variance as a unique observation */
void
CalcLedoitVar1(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, factor2, term1;
    double         *ind_var, *variances;
    int             i, j, k, vari;
    double          sqrdistx, sqrdisty, sqrdistz;
    double          idf = 1.0 / (double) (cdsA->cnum); /* unbiased */
    int             varlen = (3 * cdsA->vlen);

    ind_var   = (double *) calloc(varlen, sizeof(double));
    variances = (double *) calloc(varlen, sizeof(double));

    for (i = vari = 0; i < varlen; i += 3, ++vari)
    {
        sqrdistx = sqrdisty = sqrdistz = 0.0;
        for (j = 0; j < cdsA->cnum; ++j)
        {
            sqrdistx += mysquare(cdsA->coords[j]->x[vari] - cdsA->avecoords->x[vari]);
            sqrdisty += mysquare(cdsA->coords[j]->y[vari] - cdsA->avecoords->y[vari]);
            sqrdistz += mysquare(cdsA->coords[j]->z[vari] - cdsA->avecoords->z[vari]);
        }

        variances[i+0] = sqrdistx *= idf;
        variances[i+1] = sqrdisty *= idf;
        variances[i+2] = sqrdistz *= idf;
        /* printf("\nvar %14.3e %14.3e %14.3e ", variances[i+0], variances[i+1], variances[i+2]); */
    }

    /* calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < varlen; ++i)
        m += variances[i];
    m /= (double) varlen;

    /* calculate d^2 (this part is correct! - triple checked)
       this is equal to the variance of the variances */
    d2 = 0.0;
    for (i = 0; i < varlen; ++i)
        d2 += mysquare(variances[i] - m);

    /* d2 *= (3.0 / (double)((3 * cdsA->vlen) - 1)); */
    d2 /= (double)(varlen); /* ML evaluator - unbiased estimate would use varlen - 1 */

    /* calculate b^2, the error of the covariance matrix */
    b2 = 0.0;
    for (k = 0; k < cdsA->cnum; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = vari = 0; i < varlen; i += 3, ++vari)
        {
            ind_var[i+0] = mysquare(cdsA->coords[k]->x[vari] - cdsA->avecoords->x[vari]) - variances[i+0];
            ind_var[i+1] = mysquare(cdsA->coords[k]->y[vari] - cdsA->avecoords->y[vari]) - variances[i+1];
            ind_var[i+2] = mysquare(cdsA->coords[k]->z[vari] - cdsA->avecoords->z[vari]) - variances[i+2];
            /* printf("\nind_var %14.3e %14.3e %14.3e ", ind_var[i+0], ind_var[i+1], ind_var[i+2]); */
        }

        b2 += SqrFrobeniusNormVec(ind_var, varlen);
    }

    b2 /= (double) mysquare(cdsA->cnum); /* ML biased, should this be unbiased n-1 estimator of variance?-- Ledoit uses n */

    /* b2 = min(b2, d2) */
    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;

    term1 = (factor1 * m);

    /*
    printf(" res         a2        b2        d2     b2/d2     a2/d2  m*b2/d2*I   a2/d2*S  m\n");
    for (i = 0; i < cdsA->vlen; ++i)
    {
        printf(" %3d  %9.3f %9.3f %9.3f %9.3f %9.3f  %9.3f %9.3f  %9.3f\n",
               i, a2, b2, d2, b2/d2, a2/d2, m*b2/d2, a2*cdsA->var[i]/d2, m);
    }
    */

    if (d2 == 0)
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = 1.0;
    }
    else
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = term1 + (factor2 * cdsA->var[i]);
    }

    free(ind_var);
    free(variances);
}


void
CalcLedoitVarNew(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, term1;
    double         *var = cdsA->var;
    int             i, k;
    double          tmpx, tmpy, tmpz, tmpb2;
    int             plen = cdsA->vlen, nlen = cdsA->cnum;
    const Coords   *coordsk;
    const double   *avex = (const double *) cdsA->avecoords->x,
                   *avey = (const double *) cdsA->avecoords->y,
                   *avez = (const double *) cdsA->avecoords->z;
    const Coords  **coords = (const Coords **) cdsA->coords;

    /* calculate m, the average variance */
    m = 0.0;
    for (i = 0; i < plen; ++i)
        m += var[i];
    m /= (double) plen;

    /* calculate d^2 equal to the variance of the variances */
    d2 = 0.0;
    for (i = 0; i < plen; ++i)
        d2 += mysquare(var[i] - m);
    d2 /= plen;

    /* calculate b^2, the error of the covariance matrix */
    b2 = 0.0;
    for (k = 0; k < nlen; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = 0; i < plen; ++i)
        {
            coordsk = coords[k];
            tmpx = coordsk->x[i] - avex[i];
            tmpy = coordsk->y[i] - avey[i];
            tmpz = coordsk->z[i] - avez[i];

            tmpb2 = (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) / 3.0 - var[i];
            b2 += tmpb2*tmpb2;
        }
    }

    b2 /= (plen*nlen*nlen);

    /* b2 = min(b2, d2) */
    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = a2/d2;
    term1 = (factor1 * m);

/*     printf("\n res         a2        b2        d2     b2/d2     a2/d2  m*b2/d2*I   a2/d2*S  m"); */
/*     for (i = 0; i < cdsA->vlen; ++i) */
/*     { */
/*         printf("\n %3d  %9.3f %9.3f %9.3f %9.3f %9.3f  %9.3f %9.3f  %9.3f", */
/*                i, a2, b2, d2, b2/d2, a2/d2, m*b2/d2, a2*cdsA->var[i]/d2, m); */
/*     } */

    if (d2 == 0)
    {
        memsetd(cdsA->var, 1.0, plen);
    }
    else
    {
        for (i = 0; i < plen; ++i)
            var[i] = term1 + (var[i]);
    }
}


/******************************************************************************/
/******************************************************************************/
/* method of choice, parametric, assumes a normal distribution
   estimates the variance of the variance with Var(Var(x)) = (2/N) Var(x)^2 */
void
CalcLedoitVarNewPar(CoordsArray *cdsA)
{
    double          m, a2, b2, d2, factor1, factor2, term1;
    int             i;
    const int       nlen = cdsA->cnum, plen = cdsA->vlen;
    double         *vars = cdsA->var;

    /* calculate m, the average variance */
    m = average((const double *) vars, plen);

    /* calculate d^2, equal to the variance across the variances */
    d2 = variance((const double *) vars, plen, m);

    if (d2 == 0)
    {
        memsetd(vars, 1.0, plen);
        return;
    }

    /* calculate b^2, the average error of the covariance matrix estimate
       this assumes a normal dist, Var(var) =  2 * var^2 * (N-1) / N^2 */
    b2 = varvariance((const double *) vars, plen, nlen);

    /* b^2 = min(b^2, d^2) */
    if (d2 < b2)
        b2 = d2;

    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
    term1 = (factor1 * m);

    for (i = 0; i < plen; ++i)
        vars[i] = term1 + (factor2 * vars[i]);
}
/******************************************************************************/
/******************************************************************************/


/* This one uses the variance as the average of x, y, and z variances */
void
CalcLedoitVar3(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, factor2, term1;
    double         *ind_var, *variances;
    int             i, k;
    double          tmpx, tmpy, tmpz;
    const int       plen = cdsA->vlen;
    const int       nlen = cdsA->cnum;
    const double   *avex = (const double *) cdsA->avecoords->x,
                   *avey = (const double *) cdsA->avecoords->y,
                   *avez = (const double *) cdsA->avecoords->z;
    const Coords  **coords = (const Coords **) cdsA->coords;
    const Coords   *coordsk;

    ind_var   = (double *) calloc(cdsA->vlen, sizeof(double));
    variances = (double *) calloc(cdsA->vlen, sizeof(double));

    for (i = 0; i < plen; ++i)
        variances[i] = 3.0 * cdsA->var[i];

    /* calculate m, the average variance */
    m = 0.0;
    for (i = 0; i < plen; ++i)
        m += variances[i];
    m /= (double) plen;

    /* calculate d^2 equal to the variance of the variances */
    d2 = 0.0;
    for (i = 0; i < plen; ++i)
        d2 += mysquare(variances[i] - m);

    d2 /= (double) plen;

    /* calculate b^2, the error of the covariance matrix */
    b2 = 0.0;
    for (k = 0; k < nlen; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = 0; i < plen; ++i)
        {
            coordsk = coords[k];
            tmpx = coordsk->x[i] - avex[i];
            tmpy = coordsk->y[i] - avey[i];
            tmpz = coordsk->z[i] - avez[i];

            ind_var[i] = (tmpx*tmpx + tmpy*tmpy + tmpz*tmpz) - variances[i];
        }

        b2 += SqrFrobeniusNormVec(ind_var, plen);
    }

    b2 /= (double) nlen * nlen;

    /* b2 = min(b2, d2) */
    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
    term1 = (factor1 * m);

/*     printf("\n res         a2        b2        d2     b2/d2     a2/d2  m*b2/d2*I   a2/d2*S  m"); */
/*     for (i = 0; i < cdsA->vlen; ++i) */
/*     { */
/*         printf("\n %3d  %9.3f %9.3f %9.3f %9.3f %9.3f  %9.3f %9.3f  %9.3f", */
/*                i, a2, b2, d2, b2/d2, a2/d2, m*b2/d2, a2*cdsA->var[i]/d2, m); */
/*     } */

    if (d2 == 0)
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = 1.0;
    }
    else
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = term1 + (factor2 * cdsA->var[i]);
    }

    free(ind_var);
    free(variances);
}


/* This one uses the variance as the average of x, y, and z variances
   AND calculates the variance of the variance parametrically using
   the equation for the Gaussian: (2 * var^2 * (n-1)) / n^2 = var(var) = b2
   this one explicitly considers each x, y, and z variance as a unique observation */
void
CalcLedoitVar1Par(CoordsArray *cdsA)
{
    double          m, a2, b2, d2;
    double          factor1, factor2, term1;
    double         *ind_var, *variances;
    int             i, j, k, vari;
    double          sqrdistx, sqrdisty, sqrdistz;
    double          idf = 1.0 / (double) cdsA->cnum;
    double          nlen = (double) cdsA->cnum;
    int             varlen = (3 * cdsA->vlen);

    ind_var   = (double *) calloc(varlen, sizeof(double));
    variances = (double *) calloc(varlen, sizeof(double));

    for (i = vari = 0; i < varlen; i += 3, ++vari)
    {
        sqrdistx = sqrdisty = sqrdistz = 0.0;
        for (j = 0; j < cdsA->cnum; ++j)
        {
            sqrdistx += mysquare(cdsA->coords[j]->x[vari] - cdsA->avecoords->x[vari]);
            sqrdisty += mysquare(cdsA->coords[j]->y[vari] - cdsA->avecoords->y[vari]);
            sqrdistz += mysquare(cdsA->coords[j]->z[vari] - cdsA->avecoords->z[vari]);
        }

        variances[i+0] = sqrdistx *= idf;
        variances[i+1] = sqrdisty *= idf;
        variances[i+2] = sqrdistz *= idf;
        /* printf("\nvar %14.3e %14.3e %14.3e ", variances[i+0], variances[i+1], variances[i+2]); */
    }

    /* calculate m, the average variance (this part is correct) */
    m = 0.0;
    for (i = 0; i < varlen; ++i)
        m += variances[i];
    m /= (double) varlen;

    /* calculate d^2 (this part is correct! - triple checked)
       this is equal to the variance of the variances */
    d2 = 0.0;
    for (i = 0; i < varlen; ++i)
        d2 += mysquare(variances[i] - m);

    /* d2 *= (3.0 / (double)((3 * cdsA->vlen) - 1)); */
    d2 /= (double)(varlen); /* ML evaluator - unbiased estimate would use varlen - 1 */

    /* calculate b^2, the error of the covariance matrix */
    b2 = 0.0;
    for (k = 0; k < cdsA->cnum; ++k)
    {
        /* calculate the individual covariance residual matrix */
        for (i = vari = 0; i < varlen; i += 3, ++vari)
        {
            ; /* ummm */
            ind_var[i+0] = (2.0 * mysquare(variances[i+0]) * (nlen - 1)) / (nlen * nlen); /* ummm */
            ind_var[i+1] = (2.0 * mysquare(variances[i+1]) * (nlen - 1)) / (nlen * nlen); /* ummm */
            ind_var[i+2] = (2.0 * mysquare(variances[i+2]) * (nlen - 1)) / (nlen * nlen); /* ummm */
            /* printf("\nind_var %14.3e %14.3e %14.3e ", ind_var[i+0], ind_var[i+1], ind_var[i+2]); */
        }

        b2 += SqrFrobeniusNormVecSqrIn(ind_var, varlen);
    }

    b2 /= (double) mysquare(cdsA->cnum); /* ML biased, should this be unbiased n-1 estimator of variance?-- Ledoit uses n */

    /* b2 = min(b2, d2) */
    if (d2 < b2)
        b2 = d2;

    /* calculate a^2 */
    a2 = d2 - b2;

    /* calculate new estimator S* of S */
    /* S* = m(b^2/d^2)I + (a^2/d^2)S */
    cdsA->stats->ledoit1 = factor1 = b2/d2;
    cdsA->stats->ledoit2 = factor2 = a2/d2;
    term1 = (factor1 * m);

    /*
    printf(" res         a2        b2        d2     b2/d2     a2/d2  m*b2/d2*I   a2/d2*S  m\n");
    for (i = 0; i < cdsA->vlen; ++i)
    {
        printf(" %3d  %9.3f %9.3f %9.3f %9.3f %9.3f  %9.3f %9.3f  %9.3f\n",
               i, a2, b2, d2, b2/d2, a2/d2, m*b2/d2, a2*cdsA->var[i]/d2, m);
    }
    */

    if (d2 == 0)
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = 1.0;
    }
    else
    {
        for (i = 0; i < cdsA->vlen; ++i)
            cdsA->var[i] = term1 + (factor2 * cdsA->var[i]);
    }

    free(ind_var);
    free(variances);
}


/* the squared frobenius norm of a symmetric matrix
   is simply the sum of the squared elements 
   of the matrix normalized by the dimension */
double
SqrFrobeniusNormSym(const double **mat, const int dim)
{
    int             i, j;
    double          norm, tmp;

    norm = 0.0;
    for (i = 0; i < dim; ++i)
    {
        for (j = 0; j < dim; ++j)
        {
            tmp = mat[i][j];
            norm += tmp * tmp;
        }
    }
    norm /= dim;

    return(norm);
}


double
SqrFrobeniusNormVec(const double *vec, const int dim)
{
    int             i;
    double          norm;

    norm = 0.0;
    for (i = 0; i < dim; ++i)
        norm += mysquare(vec[i]);

    norm /= (double) dim;

    return(norm);
}


double
SqrFrobeniusNormVecSqrIn(const double *vec, const int dim)
{
    int             i;
    double          norm;

    norm = 0.0;
    for (i = 0; i < dim; ++i)
        norm += vec[i];
    norm /= (double) dim;

    return(norm);
}


double
SqrFrobeniusNormDiag(const double **mat, const int dim)
{
    int             i;
    double          norm;

    norm = 0.0;
    for (i = 0; i < dim; ++i)
        norm += mysquare(mat[i][i]);
    norm /= (double) dim;

    return(norm);
}


void
CalcSteinVar(CoordsArray *cdsA)
{
    int             i;
    const int       nlen = cdsA->cnum, plen = cdsA->vlen;
    double         *variances = cdsA->var;
    double        **CovMat = cdsA->CovMat;
    double        **evecs = MatInit(plen, plen);
    double         *evals = malloc(plen * sizeof(double));

    /* evals ranked small->big */
    CalcCovMat(cdsA);
    dsyev_opt_save(CovMat, plen, evecs, evals);

    for (i = 0; i < plen; ++i)
        evals[i] *= (double) nlen / (double) ((nlen + plen + 1) - 1 * (plen - i));

    /* for (i = 0; i < plen; ++i) printf("\n %e", evals[i]); */

    EigenReconSym(CovMat, (const double **) evecs, evals, plen);
    /* MatPrint(CovMat, plen); */

    for (i = 0; i < plen; ++i)
        variances[i] = CovMat[i][i];

    MatDestroy(evecs);
    free(evals);
}
