/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* rnascfgscan.c
 *
 * ER, Tue Dec 11 13:45:57 CST 2001 [St. Louis]
 * 
 * dynamic programming (cyk and inside) with the rnamodel
 *
 * calculates:
 *                       P(seqX,seqY \pi^* | rnamodel)  [CYK; \pi^* = best path ]
 *              \sum_\pi P(seqX,seqY \pi   | rnamodel)  [fInside algorithm ]
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"

static void bestvpscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, double *vp, double **vx);
static void bestVscanfast (int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx);
static void bestWscanfast (int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx);
static void bestWBscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx);

static void insidevpscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, double *vp, double **vx);
static void insideVscanfast (int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, 
			     struct rnamtxscanfast_s *mtx, double *vp, double *sc, int fastintloop);
static void insideWscanfast (int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, 
			     struct rnamtxscanfast_s *mtx, double *sc);
static void insideWBscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, 
			     struct rnamtxscanfast_s *mtx, double *sc);

/* Function: AllocRNAMtxScanFast()
 *
 * Date:     ER,  Wed Dec 12 12:42:54 CST 2001 [St. Louis]
 *
 * Purpose:  allocate dp matrices for the RNA model - scanning version
 * Args:     L    --  length of the sequences
 *           vx   --  dp matrix (L x L)
 *           wx   --  dp matrix (L x L)
 *           wbx  --  dp matrix (L x L)
 *
 * Purpose:  Allocates memory for the dp matrices of the RNA model
 *
 * Returns:  vx, wx, wbx are allocated.
 *
 */
struct rnamtxscanfast_s *
AllocRNAMtxScanFast(int L, int fastintloop)
{
  struct rnamtxscanfast_s *mtx;       /* structure with dp matrices   */
  int                      dim;
  int                      j;

  mtx = (struct rnamtxscanfast_s *) MallocOrDie (sizeof(struct rnamtxscanfast_s));

  if (fastintloop == TRUE) dim = 4*L+2;
  else                     dim = L*L + 3*L + 2;

  mtx->sc   = (double  *) MallocOrDie(sizeof(double  ) * dim);
  mtx->vp   = (double  *) MallocOrDie(sizeof(double  ) * L);
 
  mtx->rnaj = (double **) MallocOrDie(sizeof(double *) * L);
  mtx->vx   = (double **) MallocOrDie(sizeof(double *) * L);
  mtx->wx   = (double **) MallocOrDie(sizeof(double *) * L);
  mtx->wbx  = (double **) MallocOrDie(sizeof(double *) * L);

  mtx->rnaj[0] = (double *) MallocOrDie(sizeof(double) * L * (L+1));
  mtx->vx[0]   = (double *) MallocOrDie(sizeof(double) * L * (L+1));
  mtx->wx[0]   = (double *) MallocOrDie(sizeof(double) * L * (L+1));
  mtx->wbx[0]  = (double *) MallocOrDie(sizeof(double) * L * (L+1));

  mtx->othj = AllocDpDiagScanFastOTH(L);

  for (j = 1; j < L; j++) {
    mtx->rnaj[j] = mtx->rnaj[0] + j*(L+1);
    mtx->vx[j]   = mtx->vx[0]   + j*(L+1);
    mtx->wx[j]   = mtx->wx[0]   + j*(L+1);
    mtx->wbx[j]  = mtx->wbx[0]  + j*(L+1);
  }

  /* Initialize */
  PatternRNAMtxScanFast(L, mtx);
  PatternVec(L, mtx->vp);
  for (j = 0; j < dim; j++) 
    mtx->sc[j]  = 0.;
  

  return mtx;
}

/* Function: AllocScfgScanFastRNA()
 *
 * Date:     ER,  Wed Dec 12 12:49:07 CST 2001[St. Louis]
 *
 * Purpose:  allocate matrices for the RNA model -- scanning version
 *
 * Args:     L    --  length of the sequences
 *           sc   --  dp matrix (4L+2)

 *           vp   --  dp matrix (L)
 *           vx   --  dp matrix (L x L)
 *           wx   --  dp matrix (L x L)
 *           wbx  --  dp matrix (L x L)
 *           rnaj --  dp matrix (L x L)
 * Purpose:  Allocates memory for the dp matrices of the RNA model
 *
 * Returns:  sc, vp, vx, wx, wbx, rnaj are allocated.
 *
 */
struct rnascfgscanfast_s *
AllocScfgScanFastRNA(int L, int fastintloop)
{
  struct rnascfgscanfast_s *mx;       /* structure with dp matrices   */

  mx = (struct rnascfgscanfast_s *) MallocOrDie (sizeof(struct rnascfgscanfast_s));

  /* Allocate a 0..L-1 square matrix.
   */
  mx->in   = AllocRNAMtxScanFast(L, fastintloop);
  mx->inrv = AllocRNAMtxScanFast(L, fastintloop);

  return mx;
}


/* Function: CYKRNAScan()
 * Date:     ER, Wed Dec 12 13:21:56 CST 2001 [St. Louis]
 *
 * Purpose:  Score a gapped sequence alignment with RNA model. -- scanning version
 *           Sums over all possible structures.
 *
 */
void
CYKRNAScanFast(FILE *ofp, SQINFO sqinfoX, int *sX, SQINFO sqinfoY, int *sY, 
	       int win, int j, int jmod, int l,
	       struct rnamodel_s *rna, struct nullmodel_s *null, 
	       struct rnamtxscanfast_s *mtx, int logodds, int traceback)
{
  int d;

  if (l == 0) return;
  
  d = l - 1;
  
  bestVscanfast (sX, sY, rna->pi2, win, j, jmod, d, mtx);
  bestWBscanfast(sX, sY, rna->pi2, win, j, jmod, d, mtx);
  bestWscanfast (sX, sY, rna->pi2, win, j, jmod, d, mtx);
  
}

/* Function: InsideRNAScan()
 * Date:     ER, Mon Oct 21 15:16:15 CDT 2002 [St. Louis]
 *
 * Purpose:  Score a gapped sequence alignment with RNA model. -- scanning version
 *           Sums over all possible structures.
 *
 */
void
InsideRNAScanFast(FILE *ofp, SQINFO sqinfoX, int *sX, SQINFO sqinfoY, int *sY, 
	       int win, int j, int jmod, int l,
	       struct rnamodel_s *rna, struct nullmodel_s *null, 
	       struct rnamtxscanfast_s *mtx, double *sc, double *vp, int fastintloop, int logodds, int traceback)
{
  int d;

  if (l == 0) return;
  
  d = l - 1;
  
  insideVscanfast (sX, sY, rna->pi2, win, j, jmod, d, mtx, vp, sc, fastintloop);
  insideWBscanfast(sX, sY, rna->pi2, win, j, jmod, d, mtx, sc);
  insideWscanfast (sX, sY, rna->pi2, win, j, jmod, d, mtx, sc);
  /*printf("%d %d %f %f %f\n", j,d,mtx->vx[jmod][d],mtx->wx[jmod][d],mtx->wbx[jmod][d]);*/

}

void
FreeRNAMtxScanFast(struct rnamtxscanfast_s *mtx)
{
  free(mtx->sc);
  free(mtx->vp);

  free(mtx->rnaj[0]);
  free(mtx->vx[0]);
  free(mtx->wx[0]);
  free(mtx->wbx[0]);

  free(mtx->rnaj);
  free(mtx->vx);
  free(mtx->wx);
  free(mtx->wbx);

  FreeDpDiagScanFastOTH(mtx->othj);

  free(mtx);
}

void
FreeScfgScanFastRNA(struct rnascfgscanfast_s *mx)
{
  FreeRNAMtxScanFast(mx->in);
  FreeRNAMtxScanFast(mx->inrv);
  
  free(mx);
}

void
PatternRNAMtxScanFast(int L, struct rnamtxscanfast_s *mtx)
{
  int j, d;

  PatternDpDiagScanFastOTH(L, mtx->othj);
  
  for (j = 0; j < L; j++) 
    for (d = 0; d <= L; d++) {
      mtx->rnaj[j][d] = -BIGFLOAT;
      mtx->vx[j][d]   = -BIGFLOAT;
      mtx->wx[j][d]   = -BIGFLOAT;
      mtx->wbx[j][d]  = -BIGFLOAT;
    }
}

void
PatternScfgScanFastRNA(int L, struct rnascfgscanfast_s *mx)
{
  PatternRNAMtxScanFast(L, mx->in);
  PatternRNAMtxScanFast(L, mx->inrv); 
}

/* Function: bestV()
 * Date:     ER, Sat Jan  1 10:06:54 CST 2000 [Panticosa]
 *
 * Purpose:  fill matrix V using the CYK algorithm
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix vx is filled at position [j][d].
 *
 */
void
bestVscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx)
{
  int     x,y;
  int     i, imod;
  int     jmodmin;
  int     xi,xj,yi,yj;
  int     xk,xl,yk,yl;
  int     mid;
  int     k,l;
  int     lmod;
  int     mid1,mid2;
  int     nt;
  double  scnt;
  double  sc, bestsc;
  
  i = j - d;

  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
 /* Initialize diagonal 
   */
  if (d < 4) { mtx->vx[jmod][d] = -BIGFLOAT; return; }
  
  bestsc = -BIGFLOAT;
  
  /* Main recursion
   */

  /* hairpin loop
   */
  /* this is the way to calculate IS1s using the pi2->is1->tn[d]
   */     
  if (d < MAXRNALOOP) {
    scnt = 0.;
    for (nt = 1; nt < d; nt++) {
      x = sX[i+nt];
      y = sY[i+nt];
      scnt += pi2->is1->ps[idx5(x,y)];
    }
    if ((sc = scnt + pi2->v->t1 + pi2->is1->tn[d]) > bestsc) bestsc = sc;
  }
  
  /* this is the way to calculate IS1s using an OTH Model
  if (d < MAXRNALOOP)  
    if ((sc = pi2->v->t1 
	 + mtx->rnaj[jmodmin][d-1]) > bestsc)  
   */

      /* remember a difference: non-scan rnaj[j][d]
       *                            scan rnaj[j][l]   with l = d+1
       *                                                           */

      bestsc = sc;    
  /* stem loops 
   */
  xk = sX[i+1];
  yk = sY[i+1];
  xl = sX[j-1];
  yl = sY[j-1];
  
  /* this is the way to calculate IS2s using the pi2->is2->tn[d]
  */
  if (d > 1)  
    if ((sc = mtx->vx[jmodmin][d-2] 
	 + pi2->v->t2s 
	 + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;  
  
  /* this is the way to calculate IS2s using an OTH Model
   *
   * if (d > 1) if ((sc = vx[j-1][d-2] + pi2->v->t2s +
   *               pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
   */
  
  /* bulges L */
  for (mid = 2; mid < d; mid++)    
    if ((mid+1) < MAXRNALOOP) {
      xk = sX[i+mid];
      yk = sY[i+mid];
      xl = sX[j-1];
      yl = sY[j-1];
      
      /* this is the way to calculate IS2s using the pi2->is2->tn[d]
       */
      scnt = 0.;
      for (nt = 1; nt < mid; nt++) {
	x = sX[i+nt];
	y = sY[i+nt];
	scnt += pi2->is2b->ps[idx5(x,y)];
      }
      if ((sc = mtx->vx[jmodmin][d-mid-1] 
	   + scnt 
	   + pi2->v->t2b - 1.0
	   + pi2->is2b->tn[mid+1] 
	   + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
      
       /* this is the way to calculate IS2s using an OTH Model
       *
       * if ((sc = vx[j-1][d-mid-1] + pi2->v->t2b + 
       *   rnaj[i+mid-1][mid-2] +
       *   pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
       */
    }
  
  /* bulges R */
  for (mid = 2; mid < d; mid++)    
    if ((mid+1) < MAXRNALOOP) {
      xk = sX[i+1];
      yk = sY[i+1];
      xl = sX[j-mid];
      yl = sY[j-mid];
      
      /* this is the way to calculate IS2s using the pi2->is2->tn[d]
       */
      scnt = 0.;
      for (nt = 1; nt < mid; nt++) {
	x = sX[j-nt];
	y = sY[j-nt];
	scnt += pi2->is2b->ps[idx5(x,y)];
      }
      
      if ((sc = mtx->vx[(jmod-mid<0)?jmod-mid+win:jmod-mid][d-mid-1] 
	   + scnt 
	   + pi2->v->t2b - 1.0
	   + pi2->is2b->tn[mid+1] 
	   + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
      
      /* this is the way to calculate IS2s using an OTH Model
       *
       * if ((sc = vx[j-mid][d-mid-1] + pi2->v->t2b +
       *   rnaj[j-1][mid-2] +
       *   pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
       */
    }
  
  /* internal loops */


#ifdef L4CYKIL
  /* L^4  calculation of internal loops */
  for (k = i+2; k < j-1; k++)  
    for (l = k; l < j-1; l++) { 
      
      mid1 = k - i; 
      mid2 = j - l; 
      
      lmod = (jmod-mid2<0)? jmod-mid2+win : jmod-mid2;

      if ((mid1+mid2+d) < MAXRNALOOP) { 
	scnt = 0.; 
	for (nt = 1; nt < mid1; nt++) { 
	  x = sX[i+nt]; 
	  y = sY[i+nt]; 
	  scnt += pi2->is2i->ps[idx5(x,y)]; 
	} 
	for (nt = 1; nt < mid2; nt++) { 
	  x = sX[j-nt]; 
	  y = sY[j-nt]; 
	  scnt += pi2->is2i->ps[idx5(x,y)]; 
	} 
	
	if ((sc = mtx->vx[lmod][d-mid1-mid2] 
	     + pi2->v->t2i 
	     + scnt 
	     + pi2->is2i->tn[mid1+mid2] - LOG2(mid1+mid2-3)
	     + pi2->v->pp[idx5(sX[k],sY[k])][idx5(sX[l],sY[l])]) > bestsc) bestsc = sc; 
     } 
    }   
#else
  /* L^3 calculation of internal loops */
  bestvpscanfast(sX, sY, pi2, win, j, jmod, d, mtx->vp, mtx->vx);
  
  if (d > 3 && d < MAXRNALOOP) 
    for (mid = 0; mid <= (d-4); mid++) {
      if ((sc = mtx->vp[mid] + pi2->is2i->tn[d-mid] - LOG2(d-mid-3)) > bestsc) bestsc = sc;
      
    }
#endif

  /* multiloops */ 
  for (mid = 3; mid < d-3; mid++) { 
    if ((sc = pi2->v->t3 
	 + mtx->wbx[(jmod-d+1+mid<0)?jmod-d+1+mid+win:jmod-d+1+mid][mid] 
	 + mtx->wbx[jmodmin][d-mid-3]) > bestsc) bestsc = sc;
  }  

   				/* summation */
  mtx->vx[jmod][d] = bestsc;
}


/* Function: BestWScan()
 * Date:     ER, Wed Dec 12 16:05:14 CST 2001 [St. Louis]
 *
 * Purpose:  fill matrix W using the CYK algorithm -- scanning version
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix wx is filled at position [j][d].
 *
 */
void
bestWscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx)
{
  int     xi,xj,yi,yj;
  int     mid;
  int     i, imod;
  int     jmodmin;
  double  sc, bestsc;

  i = j - d;
  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
  bestsc = -BIGFLOAT;

  /* Initialize diagonal 
   */
  if (d == 0) { mtx->wx[jmod][0] = pi2->w->tl + pi2->w->pl[idx5(xi,yi)]; return; }

  /* Main recursion
   */

                                /* i left; connect to i+1, j; emit xi,yi */
  if ((sc = mtx->wx[jmod][d-1] + pi2->w->tl + pi2->w->pl[idx5(xi,yi)]) > bestsc) bestsc = sc;
  
				/* j right; connect to i, j-1; emit xj,yj */
  if ((sc = mtx->wx[jmodmin][d-1] + pi2->w->tr + pi2->w->pr[idx5(xj,yj)]) > bestsc) bestsc = sc;
  
				/* i,j pair; connect to i+1,j-1; emit xy */
  if ((sc = mtx->vx[jmod][d] + pi2->w->tv + pi2->w->pp[idx5(xi,yi)][idx5(xj,yj)]) > bestsc) bestsc = sc;
  
				/* bifurcations */
  for (mid = 0; mid < d; mid++)
    if ((sc = mtx->wx[jmod][mid] 
	 + mtx->wx[(jmod-mid-1<0)?jmod-mid-1+win:jmod-mid-1][d-mid-1] 
	 + pi2->w->tw) > bestsc) bestsc = sc;
  
				/* choose best */
  mtx->wx[jmod][d] = bestsc;
}

/* Function: BestWB()
 * Date:     ER, Sat Jan  1 09:59:44 CST 2000 [Zaragoza]
 *
 * Purpose:  fill matrix WB using the CYK algorithm
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix wbx is filled at position [j][b]
 *
 */
void
bestWBscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx)
{
  int     xi,xj,yi,yj;
  int     mid;
  int     i, imod;
  int     jmodmin;
  double  sc, bestsc;
  
  i = j - d;

  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
  bestsc = -BIGFLOAT;

  /* wbx has to have structure inside
   */
  if (d == 0) { mtx->wbx[jmod][0] = -BIGFLOAT; return; }

  /* Main recursion
   */
                                /* i left; connect to i+1, j; emit xi,yi */
  if ((sc = mtx->wbx[jmod][d-1] + pi2->wb->tl + pi2->wb->pl[idx5(xi,yi)]) > bestsc) bestsc = sc;

				/* j right; connect to i, j-1; emit xj,yj */
  if ((sc = mtx->wbx[jmodmin][d-1] + pi2->wb->tr + pi2->wb->pr[idx5(xj,yj)]) > bestsc) bestsc = sc;

				/* i,j pair; connect to i+1,j-1; emit xy */
  if ((sc = mtx->vx[jmod][d] 
       + pi2->wb->tv 
       + pi2->wb->pp[idx5(xi,yi)][idx5(xj,yj)]) > bestsc) bestsc = sc;

				/* bifurcations */
  for (mid = 0; mid < d; mid++)
    if ((sc = mtx->wbx[jmod][mid] 
	 + mtx->wbx[(jmod-mid-1<0)?jmod-mid-1+win:jmod-mid-1][d-mid-1] 
	 + pi2->wb->tw) > bestsc) bestsc = sc;

				/* pick the best */
  mtx->wbx[jmod][d] = bestsc;
}

/* Function: bestvpscan() 
 * 
 * Date:     ER,  Wed Dec 12 16:41:48 CST 2001  [STL]
 *
 * Purpose: calculate array vp for internal loops for the CYK algorithm
 *           
 * Arguments: sX    - sequenceX (0..len-1) 
 *                    representing array indices of symbols
 *            sY    - sequenceX (0..len-1) 
 *                    representing array indices of symbols
 *            cfg   - context-free grammar state transitions, float log2 form
 *            ntc   - context-free grammar nucleotide composition, float log2 form
 *             vp   - DP matrix, already alloc'ed 
 *             vx   - DP matrix, already alloc'ed 
 *            j,d   - coordinates of the matrix element
 *
 *            
 * Return:    (void)           
 *            vp is filled.
 */       
static void 
bestvpscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, double *vp, double **vx)
{
  int    mid;  
  int    nt;
  double scntc;
  double sc;
  
  /* initialize vp for mid = d-4 (first possible internal loop)
   */
  if (d < 4) 
    { 
      vp[d] = -BIGFLOAT; 
      return; 
    }
  else if (d > 3) 
    {
      vp[d-4] = vx[(jmod-2<0)?jmod-2+win:jmod-2][d-4] 
	+ pi2->v->t2i 
	+ pi2->is2i->ps[idx5(sX[j-1],  sY[j-1])]   
	+ pi2->is2i->ps[idx5(sX[j-d+1],sY[j-d+1])] 
	+ pi2->v   ->pp[idx5(sX[j-d+2],sY[j-d+2])][idx5(sX[j-2],sY[j-2])];
    }
  
  for (mid = 0; mid < (d-4); mid++) {
    
    if (vp[mid] > -BIGFLOAT) 
      {
	/* calculate the probability of the "new element" 
	 *     sc+k 
	 *           (sc = )
	 *           (k  = pi2->is2->ps[idx5(sX[j-d+1],sY[j-d+1])])
	 *
	 * then pick the best:
	 *
	 *    vp = best[(vp+k) , (sc+k)] 
	 *       = k + best(vp,sc)
	 */
	scntc = 0.;
	for (nt = 1; nt < (d-mid-2); nt++) 
	  scntc += pi2->is2i->ps[idx5(sX[j-nt],sY[j-nt])];
	
	if ((sc = vx[(jmod-d+mid+2<0)?jmod-d+mid+2+win:jmod-d+mid+2][mid] 
	     + scntc 
	     + pi2->v->t2i 
	     + pi2->v->pp[idx5(sX[j-d+2],sY[j-d+2])][idx5(sX[j-d+mid+2],sY[j-d+mid+2])]) > vp[mid]) vp[mid] = sc;
	
	vp[mid] +=  pi2->is2i->ps[idx5(sX[j-d+1],sY[j-d+1])];
      }
  } 
}
/* Function: insideVscanfast()
 * Date:     ER, Mon Oct 21 15:38:16 CDT 2002 [St. Louis]
 *
 * Purpose:  fill matrix V using the Inside algorithm -- scanning version
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix vx is filled at position [j][d].
 *
 */
void
insideVscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, 
		struct rnamtxscanfast_s *mtx, double *vp, double *sc, int fastintloop)
{
  int     idx = 0;
  int     x,y;
  int     i, imod;
  int     jmodmin;
  int     xi,xj,yi,yj;
  int     xk,xl,yk,yl;
  int     mid;
  int     k,l;
  int     lmod;
  int     mid1,mid2;
  int     nt;
  double  scnt;
  
  i = j - d;

  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
  /* Main recursion
   */

  /* hairpin loop
   */
  /* this is the way to calculate IS1s using the pi2->is1->tn[d]
   */
  if (d < MAXRNALOOP) {
    scnt = 0.;
    for (nt = 1; nt < d; nt++) {
      x = sX[i+nt];
      y = sY[i+nt];
      scnt += pi2->is1->ps[idx5(x,y)];
    }
    sc[idx++] = scnt + pi2->v->t1 + pi2->is1->tn[d];
  }
  
  /* this is the way to calculate IS1s using an OTH Model
     if (d < MAXRNALOOP)  
    sc[idx++] = pi2->v->t1 
	 + mtx->rnaj[jmodmin][d-1];     */
  /* remember a difference: non-scan rnaj[j][d]
   *                            scan rnaj[j][l]   with l = d+1
   *                                                           */

  /* stem loops 
   */
  xk = sX[i+1];
  yk = sY[i+1];
  xl = sX[j-1];
  yl = sY[j-1];
  
  /* this is the way to calculate IS2s using the pi2->is2->tn[d]
  */
  if (d > 1)  
    sc[idx++] = mtx->vx[jmodmin][d-2] 
      + pi2->v->t2s 
      + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)];  
  
  /* this is the way to calculate IS2s using an OTH Model
   *
   * if (d > 1) if ((sc = vx[j-1][d-2] + pi2->v->t2s +
   *               pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)]) > bestsc) bestsc = sc;
   */
  
  /* bulges L */
  for (mid = 2; mid < d; mid++)    
    if ((mid+1) < MAXRNALOOP) {
      xk = sX[i+mid];
      yk = sY[i+mid];
      xl = sX[j-1];
      yl = sY[j-1];
      
      /* this is the way to calculate IS2s using the pi2->is2->tn[d]
       */
      scnt = 0.;
      for (nt = 1; nt < mid; nt++) {
	x = sX[i+nt];
	y = sY[i+nt];
	scnt += pi2->is2b->ps[idx5(x,y)];
      }
      sc[idx++] = mtx->vx[jmodmin][d-mid-1] 
	   + scnt 
	   + pi2->v->t2b - 1.0
	   + pi2->is2b->tn[mid+1] 
	   + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)];
      
    }
  
  /* bulges R */
  for (mid = 2; mid < d; mid++)    
    if ((mid+1) < MAXRNALOOP) {
      xk = sX[i+1];
      yk = sY[i+1];
      xl = sX[j-mid];
      yl = sY[j-mid];
      
      /* this is the way to calculate IS2s using the pi2->is2->tn[d]
       */
      scnt = 0.;
      for (nt = 1; nt < mid; nt++) {
	x = sX[j-nt];
	y = sY[j-nt];
	scnt += pi2->is2b->ps[idx5(x,y)];
      }
      
      sc[idx++] = mtx->vx[(jmod-mid<0)?jmod-mid+win:jmod-mid][d-mid-1] 
	   + scnt 
	   + pi2->v->t2b - 1.0
	   + pi2->is2b->tn[mid+1] 
	   + pi2->v->pp[idx5(xk,yk)][idx5(xl,yl)];
      
    }
  
  /* internal loops */


  if (fastintloop) {
    /* L^3 calculation of internal loops */
    insidevpscanfast(sX, sY, pi2, win, j, jmod, d, mtx->vp, mtx->vx);
    
    if (d > 3 && d < MAXRNALOOP) 
      for (mid = 0; mid <= (d-4); mid++) {
	sc[idx++] = mtx->vp[mid] + pi2->is2i->tn[d-mid] - LOG2(d-mid-3);
	
      }
  }
  else {
    /* L^4  calculation of internal loops */
    for (k = i+2; k < j-1; k++)  
      for (l = k; l < j-1; l++) { 
	
	mid1 = k - i; 
	mid2 = j - l; 
	
	lmod = (jmod-mid2<0)? jmod-mid2+win : jmod-mid2;
	
	if ((mid1+mid2+d) < MAXRNALOOP) { 
	  scnt = 0.; 
	  for (nt = 1; nt < mid1; nt++) { 
	    x = sX[i+nt]; 
	    y = sY[i+nt]; 
	    scnt += pi2->is2i->ps[idx5(x,y)]; 
	  } 
	  for (nt = 1; nt < mid2; nt++) { 
	    x = sX[j-nt]; 
	    y = sY[j-nt]; 
	    scnt += pi2->is2i->ps[idx5(x,y)]; 
	  } 
	  
	  sc[idx++] = mtx->vx[lmod][d-mid1-mid2] 
	    + pi2->v->t2i 
	    + scnt 
	    + pi2->is2i->tn[mid1+mid2] - LOG2(mid1+mid2-3)
	    + pi2->v->pp[idx5(sX[k],sY[k])][idx5(sX[l],sY[l])]; 
	} 
      }   
  }
  
  /* multiloops */ 
  for (mid = 3; mid < d-3; mid++) { 
    sc[idx++] = pi2->v->t3 
	 + mtx->wbx[(jmod-d+1+mid<0)?jmod-d+1+mid+win:jmod-d+1+mid][mid] 
	 + mtx->wbx[jmodmin][d-mid-3];
  }  

   				/* summation */
  mtx->vx[jmod][d] = DLog2Sum(sc, idx);
}


/* Function: insideWscanfast()
 * Date:     ER, Mon Oct 21 15:38:45 CDT 2002 [St. Louis]
 *
 * Purpose:  fill matrix W using the Inside algorithm -- scanning version
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix wx is filled at position [j][d].
 *
 */
void
insideWscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx, double *sc)
{
  int     idx = 0;
  int     xi,xj,yi,yj;
  int     mid;
  int     i, imod;
  int     jmodmin;

  i = j - d;
  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
  /* Initialize diagonal 
   */
  /* wx has to have structure inside
   */
  if (d == 0) { mtx->wx[jmod][d] = -BIGFLOAT; return; }

  /* Main recursion
   */

                                /* i left; connect to i+1, j; emit xi,yi */
  sc[idx++] = mtx->wx[jmod][d-1] + pi2->w->tl + pi2->w->pl[idx5(xi,yi)];
  
				/* j right; connect to i, j-1; emit xj,yj */
  sc[idx++] = mtx->wx[jmodmin][d-1] + pi2->w->tr + pi2->w->pr[idx5(xj,yj)];
  
				/* i,j pair; connect to i+1,j-1; emit xy */
  sc[idx++] = mtx->vx[jmod][d] + pi2->w->tv + pi2->w->pp[idx5(xi,yi)][idx5(xj,yj)];
  
				/* bifurcations */
  for (mid = 0; mid < d; mid++)
    sc[idx++] = mtx->wx[jmod][mid] 
      + mtx->wx[(jmod-mid-1<0)?jmod-mid-1+win:jmod-mid-1][d-mid-1] 
      + pi2->w->tw;
  
				/* choose best */
  mtx->wx[jmod][d] = DLog2Sum(sc, idx);
}

/* Function: insideWBscanfast()
 * Date:     ER, Mon Oct 21 15:39:19 CDT 2002 [St. Louis]
 *
 * Purpose:  fill matrix WB using the Inside algorithm -- scanning version
 *
 * Args:     L    --
 *           vx   --
 *           wx   --
 *           wbx  --
 *           j,d   - coordinates of the matrix element
 *
 * Returns:  void
 *           matrix wbx is filled at position [j][b]
 *
 */
void
insideWBscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, struct rnamtxscanfast_s *mtx, double *sc)
{
  int     idx = 0;
  int     xi,xj,yi,yj;
  int     mid;
  int     i, imod;
  int     jmodmin;
  
  i = j - d;

  imod    = (jmod-d < 0)? jmod-d+win  : jmod-d;
  jmodmin = (jmod-1 < 0)? jmod-1+win  : jmod-1;

  xi = sX[i];
  yi = sY[i];
  xj = sX[j];
  yj = sY[j];
  
  /* wbx has to have structure inside
   */
  if (d == 0) { mtx->wbx[jmod][0] = -BIGFLOAT; return; }

  /* Main recursion
   */
                                /* i left; connect to i+1, j; emit xi,yi */
  sc[idx++] = mtx->wbx[jmod][d-1] + pi2->wb->tl + pi2->wb->pl[idx5(xi,yi)];

				/* j right; connect to i, j-1; emit xj,yj */
  sc[idx++] = mtx->wbx[jmodmin][d-1] + pi2->wb->tr + pi2->wb->pr[idx5(xj,yj)];

				/* i,j pair; connect to i+1,j-1; emit xy */
  sc[idx++] = mtx->vx[jmod][d] 
       + pi2->wb->tv 
       + pi2->wb->pp[idx5(xi,yi)][idx5(xj,yj)];

				/* bifurcations */
  for (mid = 0; mid < d; mid++)
    sc[idx++] = mtx->wbx[jmod][mid] 
      + mtx->wbx[(jmod-mid-1<0)?jmod-mid-1+win:jmod-mid-1][d-mid-1] 
      + pi2->wb->tw;
  
				/* pick the best */
  mtx->wbx[jmod][d] = DLog2Sum(sc, idx);
}

/* Function: insidevpscanfast() 
 * 
 * Date:     ER, Mon Oct 21 15:36:40 CDT 2002  [STL]
 *
 * Purpose: calculate array vp for internal loops for the Inside algorithm
 *           
 * Arguments: sX    - sequenceX (0..len-1) 
 *                    representing array indices of symbols
 *            sY    - sequenceX (0..len-1) 
 *                    representing array indices of symbols
 *            cfg   - context-free grammar state transitions, float log2 form
 *            ntc   - context-free grammar nucleotide composition, float log2 form
 *             vp   - DP matrix, already alloc'ed 
 *             vx   - DP matrix, already alloc'ed 
 *            j,d   - coordinates of the matrix element
 *
 *            
 * Return:    (void)           
 *            vp is filled.
 */       
static void 
insidevpscanfast(int *sX, int *sY, struct pi2model_s *pi2, int win, int j, int jmod, int d, double *vp, double **vx)
{
  int    mid;  
  int    nt;
  double scntc;
  double sc;
  
  /* initialize vp for mid = d-4 (first possible internal loop)
   */
  if (d < 4) 
    { 
      vp[d] = -BIGFLOAT; 
      return; 
    }
  else if (d > 3) 
    {
      vp[d-4] = vx[(jmod-2<0)?jmod-2+win:jmod-2][d-4] 
	+ pi2->v->t2i 
	+ pi2->is2i->ps[idx5(sX[j-1],  sY[j-1])]   
	+ pi2->is2i->ps[idx5(sX[j-d+1],sY[j-d+1])] 
	+ pi2->v   ->pp[idx5(sX[j-d+2],sY[j-d+2])][idx5(sX[j-2],sY[j-2])];
    }
  

  /* calculate the probability of the "new element" 
   *     sc+k 
   *           (sc = )
   *           (k  = pi2->is2->ps[idx5(sX[j-d+1],sY[j-d+1])])
   *
   * then pick the best:
   *
   *    vp = best[(vp+k) , (sc+k)] 
   *       = k + best(vp,sc)
   */
  for (mid = 0; mid < (d-4); mid++) {
    if (vp[mid] < - 50.)  vp[mid] = -BIGFLOAT;
    else 
      {
	
	scntc = 0.;
	for (nt = 1; nt < (d-mid-2); nt++) 
	  scntc += pi2->is2i->ps[idx5(sX[j-nt],sY[j-nt])];
	
	sc = vx[(jmod-d+mid+2<0)?jmod-d+mid+2+win:jmod-d+mid+2][mid] 
	  + scntc 
	  + pi2->v->t2i 
	  + pi2->v->pp[idx5(sX[j-d+2],sY[j-d+2])][idx5(sX[j-d+mid+2],sY[j-d+mid+2])];
	
	if (sc-vp[mid] > -50.)
	  {
	    vp[mid] += pi2->is2i->ps[idx5(sX[j-d+1],sY[j-d+1])] + LOG2(1. + EXP2(sc-vp[mid]));
	  }
	else 
	  {
	    vp[mid] +=  pi2->is2i->ps[idx5(sX[j-d+1],sY[j-d+1])];
	  }
      }
  }
}

