#include <time.h> 
#include <stdio.h> 
#include <stdlib.h> 
#include <math.h> 
#include <string.h> 
#include <ctype.h> 
 
#include "parameters.h" 
#include "struct.h" 
#include "io.h"
 
extern void error(char *message); 
extern void merror(char *msg1, char *msg2); 
 
extern struct algn_pos *find_eqc(struct algn_pos **ap, int seqnum, int pos); 
 
/** 
 * 
 * diag.c: Creation of diagonals & calculation of scores and weights  
 * 
 * 2003-10-08  A.R.Subramanian 
 *             (Initial) 
 */ 
 
 
/** 
 * factory method that creates a seq_part from the given parameters 
 * 
 * The returned pointer must be deallocted explicitely from memory. 
struct seq_part* create_seq_part(int num, struct seq* aSeq, unsigned int startpos) { 
  struct seq_part* sp = calloc(1, sizeof(struct seq_part)); 
  sp->sq = aSeq; 
  sp->num=num; 
  sp->startpos =startpos; 
  return sp; 
} 
 */ 
 
/** 
 * factory method that creates a diagonal from the given sequence parts 
 * 
 * The pointer returned has to be deallocted explicitely from memory. 
 */ 
struct diag* create_diag(int n1, struct seq* sq1, unsigned int sp1,  
			 int n2, struct seq* sq2, unsigned int sp2, 
			 int dlength) { 
  struct diag* dg = malloc(sizeof(struct diag)); 
 
  if(sq1->length < sp1+dlength) { 
    merror("create_diag(): startpos+diaglength exceeds sequence length in diag ", sq1->name); 
  } 
  if(sq2->length < sp2+dlength) { 
    merror("create_diag(): startpos+diaglength exceeds sequence length in diag ", sq2->name); 
  } 
 
  dg->seq_p1.num = n1; 
  dg->seq_p1.sq = sq1; 
  dg->seq_p1.startpos = sp1; 
 
  dg->seq_p2.num = n2; 
  dg->seq_p2.sq = sq2; 
  dg->seq_p2.startpos = sp2; 
 
  dg->pred_diag = NULL; 
  dg->col_pred_diag = NULL; 
  dg->length = dlength; 
  //dg->onlyOverThres= 0; 
  dg->score= -1; 
  dg->weight = 0.0; 
  dg->weight_sum = 0.0; 
  dg->ov_weight = 0.0; 
  dg->weight_fac = 1.0; 
  return dg; 
} 
 
/** 
 * frees the memory of the given diagonal and the included seq_parts 
 */ 
void free_diag(struct diag* dg) { 
  free(dg); 
} 
 
/** 
 * calculuates "m over n" 
 */ 
unsigned long binomial(long m, long n) { 
  double result=1.0; 
  long i; 
  for(i=0;i<n;i++) { 
    result *= ((double)(m-i))/(double)(n-i); 
  } 
  return (unsigned long)result; 
} 
 
/** 
 * creates temporary probability distribution 
 */ 
long double **create_tmp_pdist(struct prob_dist *pdist) { 
  int length = pdist->max_dlen; 
  struct scr_matrix *smatrix = pdist->smatrix; 
 
  long double **dist = calloc(length+1, sizeof(long double *)); 
  if(dist==NULL) error("create_tmp_dist(): (1) Out of memory when allocating data !"); 
 
  int i; 
  long mxscr, sm_max_scr=smatrix->max_score; 
  for(i=0;i<=length;i++) { 
    mxscr = i *sm_max_scr; 
    dist[i] = calloc(mxscr+1, sizeof(long double )); 
    if(dist[i]==NULL) error("create_tmp_dist(): (3) Out of memory at iteration" ); 
  } 
  return dist; 
} 
 
/** 
 * frees temporary probability distribution 
 */ 
void free_tmp_pdist(long double **dist, int length) { 
  int i; 
  for(i=0;i<=length;i++) { 
    free(dist[i]); 
  } 
  free(dist); 
} 
 
void fill_tmp_pdist(struct prob_dist *pdist, long double **tmp_dist, int slen1, int slen2) { 
  unsigned int length = pdist->max_dlen; 
  struct scr_matrix * smatrix = pdist->smatrix; 
 
 
  unsigned int i; 
  long mxscr, sm_max_scr=smatrix->max_score,scr; 
 
  long double factor, np, np2,prob; 
  long double seq_factor= (((long double)slen1)*(slen2)); 
 
  for(i=1;i<=length;i++) { 
    mxscr = i *sm_max_scr; 
 
     
    factor = (long double)(seq_factor)/(long double)(4.0*i*i); // original ! 
     
    for(scr=0;scr<=mxscr;scr++) { 
      prob = pdist->data[i][scr]; 
 
 
      np2 =prob * (factor); 
      if(np2>=para->DIAG_CALC_WEIGHT_THRESHOLD) { // curent 
	np = (long double)1.0- pow(1.0-prob,factor); // current 
      } else { 
	np = np2; 
      } 
      tmp_dist[i][scr] = -log(np); 
    } 
  } 
} 
 
/** 
 * calculates the score of the given diag by using the given score matrix. the  
 * resulting score is stored within the diag 
 * omitScore = -1: score calculation but weight interpolation with seqlen = 100 
 * omitScore = 0:  normal 
 * omitScore = 1:  no score calculation 
 */ 
inline void real_calc_weight(struct diag* dg, struct scr_matrix* smatrix,  
		 struct prob_dist *pdist, char omitScore, long double **tmp_dist, struct alignment *algn ) { 
   
  if(dg->length==0) { 
    dg->score = 0; 
    dg->weight = -1.0; 
    dg->meetsThreshold = 0; 
    return; 
  } 
   
  unsigned int len = dg->length; 
 
  int pos; 
  long double np=0.0,np2; 
   
  if(omitScore<=0) { 
    unsigned int sp1=dg->seq_p1.startpos; 
    unsigned int sp2=dg->seq_p2.startpos; 
    char *data1 = dg->seq_p1.sq->data; 
    char *data2 = dg->seq_p2.sq->data; 
    int a1, a2; 
    int *c2n = smatrix->char2num; 
    int *sdata = smatrix ->data; 
    int slen = smatrix->length; 
 
    dg->score = 0; 
    for(pos=0;pos<len;pos++) { 
      a1 = c2n[(int) data1[sp1+pos]]; 
      a2 = c2n[(int) data2[sp2+pos]]; 
      dg->score+=(long)sdata[slen*a1+a2]; 
    } 
  } 
   
  long double prob; 
 
  long double factor; 
 
  dg->meetsThreshold = 0; 
   
   
  if(dg->score <= pdist->smatrix->avg_sim_score*dg->length) { 
    dg->total_weight = 0.0; 
    dg->ov_weight = 0.0; 
    dg->weight_fac = 1.0; 
    dg->weight = 0.0; 
    return; 
  } 
   
 
  if(len<=pdist->max_dlen) { 
 
    if(tmp_dist==NULL) { 
      prob  = pdist->data[len][dg->score]; 
 
      if(omitScore>=0) { 
	factor = (long double)((dg->seq_p1.sq->length )* (dg->seq_p2.sq->length ))/(long double)(4.0*len*len); // original 
      } else { 
	factor = (long double)(10000.0)/(long double)(4.0*len*len); 
      } 
      np2 =prob * (factor); 
      if(np2>=para->DIAG_CALC_WEIGHT_THRESHOLD) {  
	np = (long double)1.0- pow(1.0-prob,factor); // current 
      } else { 
	np = np2; 
      } 
      dg->weight = -log(np);  
    } else { 
      dg->weight = tmp_dist[len][dg->score]; 
    } 
 
    dg->total_weight = (dg->weight+dg->ov_weight)  * dg->weight_fac; 
    if( (dg->length >= para->DIAG_MIN_LENGTH) && (dg->weight > para->DIAG_THRESHOLD_WEIGHT)) { 
      dg->meetsThreshold = 1; 
    } else { 
    } 
  } 
} 
 
inline void calc_weight(struct diag* dg, struct scr_matrix* smatrix,  
		 struct prob_dist *pdist) { 
  real_calc_weight(dg, smatrix, pdist, 0,NULL,NULL); 
} 
 
 
 
/** 
 * calculates the overlap weight for the given diag 
 */ 
inline void calc_ov_weight(struct diag* dg, struct diag_col *dcol, struct scr_matrix* smatrix,  
		    struct prob_dist *pdist) { 
  int sn1 = dg->seq_p1.num; 
  int sn2 = dg->seq_p2.num; 
  int snt, sn; 
  struct seq *seq1 = dg->seq_p1.sq; 
  struct seq *seq2 = dg->seq_p1.sq; 
  struct diag* tdg = create_diag(sn1, seq1,0, 
				   1, seq2,0,0);   
  struct diag* dg2; 
  struct simple_diag_col *sdcol; 
 
  int i,j, slen=dcol->seq_amount, dlen; 
  int sp1 = dg->seq_p2.startpos,tsp1; 
  int tep1,t; 
  double w; 
  struct seq_part *seq_p, *d_seq_p1, *d_seq_p2; 
  dg->ov_weight = 0.0; 
  int tstartpos; 
  if(dg->length >0) { 
    for(sn=0;sn<2;sn++) { 
      tstartpos = (sn==0 ? dg->seq_p1.startpos : dg->seq_p2.startpos); 
      tdg->seq_p1.sq = (sn==0 ? seq1 : seq2); 
      tdg->seq_p1.num = (sn==0 ? sn1 : sn2);; 
      // other way 
      seq_p = (sn==0 ? &dg->seq_p2 : &dg->seq_p1); 
      tsp1 = seq_p->startpos; 
      tep1 = seq_p->startpos+dg->length-1; 
 
      snt = (sn==0 ? sn2 : sn1); 
      for(i=0; i<slen;i++) { 
	if(i!=sn1 && i!=sn2) { 
	  //    printf("OV %i %i!\n",i, slen); 
	   
	  sdcol = dcol->diag_matrix[snt*slen + i]; 
	  tdg->seq_p2.num=i; 
	  dlen = sdcol->length; 
	  for(j=0;j<dlen;j++) { 
	    dg2 = sdcol->data[j]; 
	    if(snt<i) { 
	      d_seq_p1 = &dg2->seq_p1; 
	      d_seq_p2 = &dg2->seq_p2; 
	    } else { 
	      d_seq_p1 = &dg2->seq_p2; 
	      d_seq_p2 = &dg2->seq_p1; 
	    } 
	    if(j==0) { 
	      tdg->seq_p2.sq = d_seq_p2->sq; 
	    } 
	    if(dg2->length >0) { 
	      if(d_seq_p1->startpos>tsp1) tsp1 = d_seq_p1->startpos; 
	      t=d_seq_p1->startpos+dg2->length-1; 
	      if(t<tep1) tep1 = t; 
	      if(tsp1<=tep1) { 
		//tdg->seq_p2.sq=dg2->seq_p2.sq; 
		tdg->seq_p1.startpos =  tstartpos + tsp1- sp1; 
		tdg->seq_p2.startpos = d_seq_p2->startpos + tsp1- d_seq_p1->startpos; 
		 
		tdg->length = tep1-tsp1+1; 
		//real_calc_weight(tdg, smatrix, pdist,-1,NULL,NULL); 
		real_calc_weight(tdg, smatrix, pdist,0,NULL,NULL); 
		if(tdg->meetsThreshold) { 
		  w = tdg->weight; 
		  //printf("add %.20f\n",w); 
		  dg->ov_weight += w; 
		  //dg2->ov_weight += w; 
		} 
	      } 
	    } 
	  } 
	} 
      } 
    } 
  } 
  dg->total_weight = (dg->weight+dg->ov_weight)*dg->weight_fac;// + dg->ov_weight; 
  free(tdg); 
} 
 
 
/** 
 * creates the collection of all diags  
 * 
 * The pointer returned (and the ones included in the struct)  
 * has to be deallocted explicitely from memory. 
 */ 
struct diag_col* create_diag_col(int seq_amount) { 
  struct diag_col *dc = calloc(1, sizeof(struct diag_col)); 
  if(dc==NULL) error("create_diag_col(): (1) Out of memory !"); 
 
  //printf("go for k\n"); 
  dc->diag_matrix = malloc(seq_amount*seq_amount* 
				 sizeof(struct simple_diag_col *)); 
  //printf("2go for k\n"); 
  if(dc->diag_matrix==NULL) error("create_diag_col(): (2) Out of memory !"); 
  return dc; 
} 
 
/** 
 * frees a diagcol and all data included in it 
 */ 
void free_diag_col(struct diag_col* dcol) { 
  int s1,s2,sl=dcol->seq_amount; 
  // printf("damount: %i\n", dcol->diag_amount); 
  //printf("--------------------------------\n"); 
  for(s1 =0;s1<dcol->diag_amount;s1++) { 
    //    print_diag(dcol->diags[s1]); 
    //printf(" NO FREEEEEEEEEE %i %i\n", s1,dcol->diags[s1]); 
    free_diag(dcol->diags[s1]); 
  } 
  free(dcol->diags); 
  for(s1=0;s1<sl;s1++) { 
    for(s2=s1+1;s2<sl;s2++) { 
      free(dcol->diag_matrix[s1+sl*s2]->data); 
      free(dcol->diag_matrix[s1+sl*s2]); 
    } 
  } 
  free(dcol->diag_matrix); 
  free(dcol); 
} 
 
/** 
 * finds all relevant diags by the DIALIGN METHOD  
 * 
 * The pointer returned (and the ones included in the struct)  
 * has to be deallocted explicitely from memory. 
 */ 
inline struct simple_diag_col* find_diags_dialign(struct scr_matrix *smatrix,  
				struct prob_dist *pdist, struct seq* seq1,  
				struct seq* seq2, struct alignment *algn, 
				 long double **tmp_dist) { 
  struct simple_diag_col* dcol = calloc(1, sizeof(struct simple_diag_col)); 
  if(dcol==NULL) error("find_diags_dialign(): (1) Out of memory !"); 
   
  unsigned int size = 16; 
  int length = 0; 
  struct diag **data = calloc(size, sizeof(struct diag* )); 
  //   printf("go for k\n"); 
  if(data==NULL) error("find_diags_dialign(): (2) Out of memory !"); 
   
  long slen1 = seq1->length; 
  long slen2 = seq2->length; 
  unsigned int max_dlen = pdist->max_dlen; 
 
  struct diag *dg = create_diag(seq1->num, seq1,0, 
				seq2->num, seq2,0,0);   
  struct diag* tdg,*tdg2; 
 
  int  i,j,k,kpos,jmin=0,jmax=slen2; 
 
  int sn1 = seq1->num; 
  int sn2 = seq2->num; 
  //    printf("%i\n",slen2);  
  int *c2n = smatrix->char2num; 
  int *sdata = smatrix ->data; 
  char *data1 = seq1->data; 
  char *data2 = seq2->data; 
  int smatrixlen = smatrix->length; 
  int a1,a2; 
  int c_klen,c_kscore; 
 
  int maxslen = slen1; 
  if(slen2>maxslen) maxslen = slen2; 
 
  int max_pool = (slen1+slen2-1); 
  struct diag **diag_col = malloc(sizeof(struct diag*)*(slen1+1)); 
  struct diag **diag_row = malloc(sizeof(struct diag*)*(slen2+1)); 
  struct diag **pool_diags=malloc(sizeof(struct diag *)*max_pool); 
  int pooled = 0; 
  int thres_sim_score =para->PROT_SIM_SCORE_THRESHOLD; 
  char hasAli = (algn!=NULL); 
  struct algn_pos **ap,*tap; 
  double diag_thres = para->DIAG_THRESHOLD_WEIGHT; 
  double avg_sim_score = pdist->smatrix->avg_sim_score; 
   
  int maxd,maxd2,cons_maxd; 
  int score1=0,score2 = 0; 
  char prevail; 
 
  if(hasAli) { 
    ap = algn->algn; 
    //thres_sim_score =thres_sim_score/2; 
    //if(thres_sim_score<4) thres_sim_score = 4; 
    //diag_thres = 0.0; 
  } 
  // DIALIGN  
  for(k=0;k<=slen1;k++) { 
    diag_col[k]=create_diag(seq1->num, seq1,0, 
			    seq2->num, seq2,0,1);  
     
    //diag_col[k]->length = 1; 
    diag_col[k]->weight_sum = 0.0; 
    diag_col[k]->weight = 0.0; 
    pool_diags[pooled] = diag_col[k]; 
    pool_diags[pooled]->pool_pos = pooled; 
    pooled++; 
  } 
  for(k=0;k<=slen2;k++) { 
    diag_row[k] = diag_col[0]; 
  } 
   
 
  //  float tolerance, tol_avg= (float)para->PROT_DIAG_MIN_AVG_FACTOR*avslen;  
  double old_weight; 
 
 
  if(max_dlen> slen1/2.0) max_dlen = slen1/2.0; 
  if(max_dlen> slen2/2.0) max_dlen = slen2/2.0; 
 
  for(i=0;i<=slen1;i++) { 
 
 
    // merge row/col 
    //    printf("%i \n",i); 
    //printf("before %i\n",i); 
    if(i>0) { 
      tdg = diag_col[i]; 
      while(tdg!=NULL) { 
	kpos = tdg->seq_p2.startpos+tdg->length; 
	if(tdg->weight_sum > diag_row[kpos]->weight_sum) { 
	  //printf(" startpos: %i %i %i %i %i %.20f %.20f\n",i,tdg->seq_p1.startpos, tdg->seq_p2.startpos, tdg->length, kpos, tdg->weight_sum, diag_row[kpos]->weight_sum); 
	  diag_row[kpos] = tdg; 
	  prevail = 1; 
	} else { 
	  prevail = 0; 
	} 
	tdg2 = tdg; 
	tdg = tdg->col_pred_diag; 
	if(! prevail) { 
	  //if(tdg2->score/tdg2->length < 10) { 
	  pool_diags[tdg2->pool_pos]=NULL; 
	  //printf(" free: %i %i\n",tdg2->pool_pos, tdg2); 
	  free(tdg2); 
	    //} 
	} 
      } 
    } 
    for(j=1;j<=slen2;j++) { 
      if(diag_row[j-1]->weight_sum > diag_row[j]->weight_sum) { 
	//printf(" %.20f %.20f\n", diag_row[j]->weight_sum, diag_row[j-1]->weight_sum); 
	diag_row[j] = diag_row[j-1]; 
      } 
    } 
    if(i==slen1) break; 
    if(hasAli) { 
      //printf("pre %i\n", a1); 
      tap = find_eqc(ap, sn1, i); 
      //printf("after %i\n", a1); 
       
      if(tap->predF!=NULL) { 
	jmin = tap->predF[sn2]+1; 
      } else { 
	jmin = 0; 
      } 
       
      if(tap->succF!=NULL) { 
	jmax = tap->succF[sn2]; 
      }else { 
	jmax = slen2; 
      } 
       
      if(jmin<0) jmin = 0; 
    }  
 
     
     
    //printf(" after %i\n",i); 
    for(j=jmin;j<jmax;j++) { 
      //printf("%i %i\n",i,j); 
       
      if(i<slen1 && j<slen2) { 
	a1 = c2n[(int) data1[i]]; 
	a2 = c2n[(int) data2[j]]; 
	score1 = sdata[smatrixlen*a1+a2]; 
      } else { 
	score1 = 0; 
      } 
       
      if(score1>=thres_sim_score) { 
	maxd = slen1 - i; 
	maxd2 = slen2 - j; 
	if(maxd >maxd2) maxd = maxd2; 
	if(maxd > max_dlen) maxd = max_dlen; 
	 
	dg->seq_p1.startpos = i; 
	dg->seq_p2.startpos = j; 
	dg->score = score1; 
 
	//printf("%i %i %i\n",i,j,maxd); 
	cons_maxd = maxd+1; 
	old_weight = 0.0; 
	 
	c_klen = 0; 
	c_kscore = 0; 
	 
	for(k=1;k<=maxd;k++) { 
	  //if(k>slen1/2.0) break; 
	  //if(k>slen2/2.0) break; 
	  //if(hasAli) printf("%i %i %i\n", i,j,k); 
	  //if(i+k>slen1) error("ALARM 1"); 
	  //if(j+k>slen2) error("ALARM 2"); 
	  dg->length = k; 
	  kpos = i+k; 
	  if(hasAli) { 
	    a1 = i+k-1; 
	    a2 = j+k-1; 
	    //printf("pre %i\n", a1); 
	    tap = find_eqc(ap, sn1, a1); 
	    //printf("after %i\n", a1); 
	    if(tap->predF!=NULL) { 
	      if( (tap->predF[sn2] - a2)>0) break; 
	    }  
	    if(tap->succF!=NULL) { 
	      if((a2 - tap->succF[sn2])>0) break; 
	    } 
	  }  
 
	   
	  if(k>1) { 
	    a1 = c2n[(int) data1[kpos-1]]; 
	    a2 = c2n[(int) data2[j+k-1]]; 
	    score2 = sdata[smatrixlen*a1+a2]; 
	    dg->score += score2; 
	  } else { 
	    score2 = score1; 
	  } 
 
	  /* 
	  if(k>6) { 
	    a1 = c2n[data1[kpos-1-6]]; 
	    a2 = c2n[data2[j+k-1-6]]; 
	    oscore2 = sdata[smatrixlen*a1+a2]; 
	    c_klen = 6; 
	  } else { 
	    oscore2 = 0; 
	    c_klen++; 
	  } 
	  c_kscore += score2 - oscore2; 
	  */ 
	  //	  if(!hasAli )  
	   
	  if((dg->score < avg_sim_score*k) ) { 
	    //if((k>=0.1*avslen) ||(k==1) )break; 
	    break; 
	  } 
 
	  //	  if(k>8 && (c_kscore<2.0*c_klen)) break; 
	   
 
	  if(1) { //||!hasAli) { 
	     
 
	    c_klen++; 
	    c_kscore+=score2; 
 
	    //if(!hasAli) 
	    if( (c_klen>=para->PROT_DIAG_MAX_UNDER_THRESHOLD_POS) &&  
		(c_kscore< (para->PROT_DIAG_AVG_SCORE_THRESHOLD*c_klen))) { 
	      //	      double tolerance = 20.0+40.0*((double)c_kscore)/(3.5*c_klen); 
	      //(4.0*((double)dg->score-c_kscore)/(16.0*(k-c_klen))*c_klen)) ) { 
	      if ( (k>para->PROT_DIAG_MIN_LENGTH_THRESHOLD)) { 
		break; 
	      } else { 
		if(maxd>para->PROT_DIAG_MIN_LENGTH_THRESHOLD) maxd = para->PROT_DIAG_MIN_LENGTH_THRESHOLD; 
	      } 
	    }   
	  } 
	  /* 
	  //printf("%i\n",klen); 
	  if( 0 && (kscore < avg_sim_score*klen)) { // && (dg->score>1.2*k*avg_sim_score)) { 
 
	    if(k>=cons_maxd) { 
	      break; 
	    } 
 
	    tolerance = ((float)para->PROT_DIAG_MIN_LENGTH_THRESHOLD -  
	    		 ((float)(dg->score)/k-w_offset)/w_scal * para->PROT_DIAG_MIN_LENGTH_THRESHOLD_TOLERANCE); 
 
	    if( (klen>=para->PROT_DIAG_MAX_UNDER_THRESHOLD_POS) && (underavg>=para->PROT_DIAG_UNDER_THRESHOLD_CHANCES)  
		){ 
	      break;  
	    } else if(klen>=PROT_DIAG_MAX_UNDER_THRESHOLD_POS){ 
	      underavg++; 
		//} 
 
	    } 
	     
	  } 
	*/ 
 
 
	  //	  if(dg->weight < 1.0*old_weight) break; 
 
	  if(score2 >= thres_sim_score) { 
	    c_klen=0; 
	    c_kscore=0; 
	  //if( (kscore >= avg_sim_score*klen) && (score2>=thres_sim_score)) { 
	    // new: 
	    /* 
	    contin = 1; 
	    if(klen>1 && !hasAli) { 
	      if( (((double)kscore)/klen)<.65*((double)dg->score)/dg->length) contin=0; 
	    } 
	    */ 
	    if(1) { 
	      //underavg = 0; 
	      //	      klen = 0; 
	      //kscore = 0; 
	      //if(i==142 && j==490) printf(" s1=%i s2=%i i=%i j=%i k=%i   %i %.20f\n",sn1,sn2,i,j,k,dg->score,dg->weight); 
	       
	      //if(!hasAli && (dg->weight<0.7)) dg->meetsThreshold=0; 
	      if(!hasAli) { 
		dg->weight = tmp_dist[k][dg->score]; 
		dg->meetsThreshold = (dg->weight>diag_thres ? 1 : 0); 
	      } else { 
		real_calc_weight(dg, smatrix, pdist,1,tmp_dist,algn);	     
	      }	       
	      //	      if(hasAli  || (dg->score/dg->length>=7) || k>=4) 
	      if(dg->meetsThreshold && (dg->weight>=old_weight)) { 
		//		underavg=0; 
		old_weight = dg->weight; 
		if(max_pool<=pooled) { 
		  max_pool += maxslen; 
		  pool_diags = realloc(pool_diags, sizeof(struct diag*)*max_pool); 
		} 
		tdg = malloc(sizeof(struct diag)); 
		//if(tdg==diag_row[j]) printf("%i %i\n", tdg, diag_row[j]); 
		pool_diags[pooled] = tdg; 
		dg->pool_pos = pooled; 
		pooled++; 
		*tdg = *dg; 
		tdg->pred_diag = diag_row[j]; 
		/* 
		ttdg = tdg->pred_diag; 
		tdg->weight += -log(slen1)+log(tdg->seq_p1.startpos - (ttdg->seq_p1.startpos+ttdg->length-1)-1); 
		tdg->weight += -log(slen2)+log(tdg->seq_p2.startpos - (ttdg->seq_p2.startpos+ttdg->length-1)-1); 
		*/ 
		tdg->weight_sum =  diag_row[j]->weight_sum+tdg->weight; 
		//printf("%.20f\n", diag_row [j]->weight_sum); 
		//printf("%i %i %i %i %f %f\n", i,j,tdg->length, kpos+1, tdg->weight_sum, diag_col[kpos+1]->weight_sum); 
		tdg->col_pred_diag = diag_col[kpos]; 
 
		diag_col[kpos] = tdg; 
	      } /*else { 
		underavg++; 
		if(underavg>0.25*k && k>20) break; 
		}*/ 
	    } 
	  } 
	} 
	//printf("after k \n"); 
      } 
       
    } 
  } 
   
  tdg = diag_row[slen2]; 
  dcol->total_weight = 0; 
  double lencomp = (log(slen1)+log(slen2)); 
 
  //struct diag *ttdg = tdg; 
  /* 
  while((ttdg!=NULL)) { 
    if (ttdg->weight <=0.0) break;  
    //avscore += ((float)tdg->score)/(float)tdg->length-4.0; 
    num++;  
    //ttdg = ttdg->pred_diag; 
  } 
 
  double thres = 0.2*avscore/(float)num; 
  */ 
  //  if(thres<0.69) thres = 0.69; 
 
  while((tdg!=NULL)) { 
    //printf(" %.20f %.20f\n", tdg->weight_sum, tdg->weight); 
    if (tdg->weight <=0.0) break; 
    //    dcol->total_weight += lencomp; 
    //printf("%i\n",tdg); 
    //tdg->weight = smatrix->raw_dist[tdg->length][tdg->score]; 
    if(1) { 
      //if(hasAli || (((float)tdg->score)/(float)tdg->length >=(thres+4.0))) { 
      //tdg->weight += lencomp; 
      //dcol->total_weight += smatrix->raw_dist[tdg->length][tdg->score];//+lencomp; 
      //      if(hasAli) print_diag(tdg); 
      dcol->total_weight += tdg->weight+lencomp; 
 
      data[length] = tdg; 
      tdg->weight_sum = -1.0; 
      length++; 
      if(length >= size) { 
	size += 64; 
	data = realloc(data, sizeof(struct diag *)*size); 
	if(data==NULL) error("find_diags(): (3) Out of memory !"); 
      } 
    } 
    tdg = tdg->pred_diag; 
    //    if(tdg->weight>=5.0) dcol->total_weight += tdg->weight+lencomp; 
  } 
 
  //printf("before clear pool %i\n",pooled); 
  for(k=0;k<pooled;k++) { 
    if(pool_diags[k]!=NULL) 
      if(pool_diags[k]->weight_sum>-1.0) { 
	//printf(" last free %i %i\n", k, pool_diags[k]); 
	free(pool_diags[k]); 
      } 
  } 
   
  free(pool_diags); 
  free(diag_col); 
  free(diag_row); 
  free_diag(dg); 
  dcol->length = length; 
 
  data = realloc(data, sizeof(struct diag *)*length); 
  dcol->data = data; 
 
  if(para->DEBUG>5) { 
    for(i=0;i<length;i++) { 
      print_diag(data[i]); 
      printf("\n"); 
    } 
  } 
 
  return dcol; 
} 
 
 
/** 
 * finds all relevant diags by dynamic programming on diagonal stripes 
 * 
 * The pointer returned (and the ones included in the struct)  
 * has to be deallocted explicitely from memory. 
 */ 
inline struct simple_diag_col* find_diags_dyn(struct scr_matrix *smatrix,  
				struct prob_dist *pdist, struct seq* seq1,  
				struct seq* seq2, struct alignment *algn, 
				 long double **tmp_dist) { 
 
  struct simple_diag_col* dcol = calloc(1, sizeof(struct simple_diag_col)); 
  if(dcol==NULL) error("find_diags_dyn(): (1) Out of memory !"); 
   
  unsigned int size = 64; 
  int  l, k,lastk, maxl; 
  int length = 0; 
  struct diag **data = calloc(size, sizeof(struct diag *)); 
  if(data==NULL) error("find_diags_dyn(): (2) Out of memory !"); 
   
  int slen1 = seq1->length; 
  int slen2 = seq2->length; 
  unsigned int max_dlen = pdist->max_dlen; 
 
  struct diag* dg = create_diag(seq1->num, seq1,0, 
				seq2->num, seq2,0,0);   
  struct diag* tdg; 
 
  int i,j,d; 
 
  int sn1 = seq1->num; 
  int sn2 = seq2->num; 
  //    printf("%i\n",slen2);  
  int *c2n = smatrix->char2num; 
  int *sdata = smatrix ->data; 
  char *data1 = seq1->data; 
  char *data2 = seq2->data; 
  int slen = smatrix->length; 
 
  int a1,a2; 
  int score1=0,score2 = 0; 
 
  int maxslen = slen1; 
  if(slen2>maxslen) maxslen = slen2; 
  double avslen = ((double)slen1+slen2)/2.0; 
 
  int delta; 
  int sim_thr_pred_pos[maxslen]; 
  //int sim_thr_succ_pos[maxslen]; 
  int scores[maxslen]; 
  long score_sum[maxslen]; 
  long s_sum; 
  int old_thr_pos; 
 
  //double *dyn_weight = calloc(maxslen, sizeof(double)); 
  double weight; 
  struct diag **dyn_diags=malloc(sizeof(struct diag *)*maxslen); 
  int max_pool = maxslen; 
  struct diag **pool_diags=malloc(sizeof(struct diag *)*max_pool); 
  int pooled = 0; 
 
  int thres_sim_score = para->PROT_SIM_SCORE_THRESHOLD; 
 
  int kscore, klen; 
 
  char hasAli = (algn!=NULL); 
 
  double diag_thres = para->DIAG_THRESHOLD_WEIGHT; 
  double avg_sim_score = pdist->smatrix->avg_sim_score; 
 
  struct algn_pos **ap,*tap; 
  if(hasAli) { 
    ap = algn->algn; 
    thres_sim_score =thres_sim_score/2; 
    if(thres_sim_score<4) thres_sim_score = 4; 
    diag_thres = 0.0; 
  } 
 
 
  for(d=-slen1+1;d<slen2;d++) { 
    //printf(" d=%i\n",d);  
     
    if(d<=0) { 
      i = - d; 
      j=0; 
      maxl = slen1-i; 
      if(slen2<maxl) maxl = slen2; 
    } else { 
      i=0; 
      j=d; 
      maxl = slen2 -j; 
      if(slen1<maxl) maxl = slen1; 
    } 
 
    // prepare values 
    old_thr_pos=-1; 
    s_sum=0; 
    for(k=0;k<maxl;k++) { 
      // hier: hasAlipredF/SuccF abfragen !!!! 
      if(hasAli) { 
	a1 = i+k; 
	a2 = j+k; 
	//printf("pre %i\n", a1); 
	tap = find_eqc(ap, sn1, a1); 
	//printf("after %i\n", a1); 
	if(tap->predF!=NULL) { 
	  if( (tap->predF[sn2] - a2)>0) break; 
	}  
	if(tap->succF!=NULL) { 
	  if((a2 - tap->succF[sn2])>0) break; 
	} 
      }  
 
      a1 = c2n[(int) data1[i+k]]; 
      a2 = c2n[(int) data2[j+k]]; 
      score1 = sdata[slen*a1+a2]; 
      scores[k] = score1; 
      s_sum+= score1; 
      score_sum[k] = s_sum; 
      if(score1 < thres_sim_score) { 
	sim_thr_pred_pos[k] = old_thr_pos; 
      } else { 
	//if(old_thr_pos>=0) sim_thr_succ_pos[old_thr_pos] = k; 
	old_thr_pos = k; 
      } 
    } 
    maxl = k; 
    //if(old_thr_pos>=0) sim_thr_succ_pos[old_thr_pos] = maxl; 
 
    dyn_diags[0] = create_diag(seq1->num, seq1,0, 
				seq2->num, seq2,0,0);  
    dyn_diags[0]->weight_sum = 0.0; 
    dyn_diags[0]->weight = 0.0; 
    pool_diags[0] = dyn_diags[0]; 
    pooled = 1; 
    lastk=0; 
 
    for(k=0;k<maxl;k++) { 
      //printf("process %i %i\n", k,maxl); 
 
      if(k>0) { 
	dyn_diags[k] = dyn_diags[lastk]; 
	//dyn_weight[k] = dyn_weight[lastk]; 
      } 
      if(hasAli) { 
	a1 = i+k; 
	a2 = j+k; 
	//printf("pre %i\n", a1); 
	tap = find_eqc(ap, sn1, a1); 
	//printf("after %i\n", a1); 
	if(tap->predF!=NULL) { 
	  if( (tap->predF[sn2] - a2)>0) break; 
	}  
	if(tap->succF!=NULL) { 
	  if((a2 - tap->succF[sn2])>0) break; 
	} 
      }  
 
      score1 = scores[k]; 
      if(score1>=thres_sim_score) { 
 
	for(l=para->DIAG_MIN_LENGTH;l<=max_dlen; l++) { 
	  delta = k-l+1; 
	  dg->seq_p1.startpos = i+delta; 
	  dg->seq_p2.startpos = j+delta; 
 
	  kscore = 0; 
	  klen = 0; 
 
	  if((dg->seq_p1.startpos<0) || (dg->seq_p2.startpos<0)) { 
	    break; 
	  } else { 
 
 
	    dg->length = l; 
	    //printf("%i %i \n", i-l+1, j-l+1); 
 
	    score2 = scores[delta]; 
	    klen++; 
	    kscore += score2; 
 
	    if( (kscore < avg_sim_score*klen)) { // && (dg->score>1.2*k*avg_sim_score)) { 
	       
	      /* experiment */ 
	      if( ( (k>=0.2*avslen) || (k>=20) || ( (i-j)>0.3*avslen)) && klen>=4) break; 
	    } 
	   
	    if(kscore >= avg_sim_score*klen ) { 
	      //if(score2 >= thres_sim_score) { 
	      kscore = 0; 
	      klen = 0; 
	      dg->score = score_sum[k] - (delta > 0 ? score_sum[delta-1] : 0); 
	      if(dg->score <= avg_sim_score*dg->length) break; 
	      if(!hasAli) { 
		dg->weight = tmp_dist[dg->length][dg->score]; 
		dg->meetsThreshold = (dg->weight>diag_thres ? 1 : 0); 
	      } else { 
		real_calc_weight(dg, smatrix, pdist,1,tmp_dist,algn);	     
	      } 
	       
	      if(dg->meetsThreshold) { 
		if(max_pool<=pooled) { 
		  max_pool += maxslen; 
		  pool_diags = realloc(pool_diags, sizeof(struct diag*)*max_pool); 
		} 
		if(delta==0) { 
		  if(dg->weight > dyn_diags[k]->weight_sum) { 
		    dg->weight_sum = dg->weight; 
		    dyn_diags[k] = malloc(sizeof(struct diag));  
		    pool_diags[pooled] = dyn_diags[k]; 
		    pooled++; 
		    *dyn_diags[k] = *dg; 
		    dyn_diags[k]->pred_diag = NULL; 
		  } 
		} else { 
		  weight = dg->weight + dyn_diags[delta-1]->weight_sum; 
		  if( (weight) >= dyn_diags[k]->weight_sum) { 
		    dg->weight_sum = weight; 
		    dyn_diags[k] = malloc(sizeof(struct diag));  
		    pool_diags[pooled] = dyn_diags[k]; 
		    pooled++; 
		    *dyn_diags[k] = *dg; 
		    if(dyn_diags[delta-1]->weight >0) { 
		      dyn_diags[k]->pred_diag = dyn_diags[delta-1]; 
		    } else { 
		      dyn_diags[k]->pred_diag = NULL; 
		    } 
		  } 
		} 
 
		lastk = k; 
	      } 
	       
	    } else { 
	      l += (delta - sim_thr_pred_pos[delta])-1; 
	    } 
	  } 
	} 
      } 
    } 
    tdg = dyn_diags[lastk]; 
    while((tdg!=NULL)) { 
      if (tdg->weight <=0.0) break; 
       
      data[length] = tdg; 
      tdg->weight_sum = -1.0; 
      length++; 
      if(length >= size) { 
	size += 64; 
	data = realloc(data, sizeof(struct diag *)*size); 
	if(data==NULL) error("find_diags(): (3) Out of memory !"); 
      } 
      tdg = tdg->pred_diag; 
    } 
    for(k=0;k<pooled;k++) { 
      if(pool_diags[k]->weight_sum>-1.0) free(pool_diags[k]); 
    } 
  } 
 
  data = realloc(data, sizeof(struct diag *)*length); 
  free(pool_diags); 
  free(dyn_diags); 
  free_diag(dg); 
  dcol->length = length; 
  dcol->data = data; 
 
  if(para->DEBUG>5) { 
    for(i=0;i<length;i++) { 
      print_diag(data[i]); 
      printf("\n"); 
    } 
  } 
 
  return dcol; 
} 
 
 
/** 
 * Finds all diags of each pair of sequences in in_seq_col by using 
 * the function above 
 * 
 * The pointer returned (and the ones included in the struct)  
 * has to be deallocted explicitely from memory. 
 */ 
struct diag_col *find_all_diags(struct scr_matrix *smatrix,  
				struct prob_dist *pdist, 
				struct seq_col *in_seq_col, struct alignment *algn) { 
  unsigned int s1, s2, rs2, sl = in_seq_col->length, sp, ap; 
  struct diag_col *all_diags = create_diag_col(sl); 
  struct simple_diag_col *sdcol; 
 
  unsigned int diag_amount = 0; 
  struct diag *dg; 
 
  char hasAli = (algn!=NULL); 
 
  long double **tmp_dist = NULL; 
  if(!hasAli) tmp_dist = create_tmp_pdist(pdist); 
 
  int s2max = sl; 
  int s2width =(int) sqrt(sl); 
 
  double total=0.0; 
  //double imp[sl]; 
  //  for(s1=0;s1<sl;s1++) { 
  //  imp[s1] = 0.0; 
  //} 
  double totala[sl]; 
  memset(totala,0,sizeof(double)*sl); 
  for(s1=0;s1<sl;s1++) { 
    if(para->FAST_PAIRWISE_ALIGNMENT && s2width+1<sl) { 
      s2max = s1+s2width+1; 
      //printf("%i %i\n",s1, s2max); 
    } 
    //printf("before enter %i\n", s1); 
    for(s2=s1+1;s2<s2max;s2++) { 
      rs2 = s2 % sl; 
      if(!hasAli) { 
	fill_tmp_pdist(pdist,tmp_dist,in_seq_col->seqs[s1].length,in_seq_col->seqs[rs2].length ); 
      } 
      if(para->DEBUG>5) printf("%i %i\n", s1,s2); 
      //time1 = clock(); 
      //sdcol=find_diags_dyn(smatrix, pdist, &in_seq_col->seqs[s1], 
      //		 &in_seq_col->seqs[s2],algn,tmp_dist); 
       
      /* 
      doAlign = 1; 
      if(hasAli) { 
	if(algn->redo_seqs[s1*sl+s2]==0)  
	  doAlign = 0; 
      } 
       
      if(doAlign) { 
      */ 
      if(in_seq_col->seqs[s1].length > 0 && in_seq_col->seqs[s2].length > 0) { 
	//if(para->DEBUG>1) printf(" %i %i %i\n", s1, rs2,sl-1); 
	//	printf("find diags %i %i\n",s1,s2); 
        sdcol=find_diags_dialign(smatrix, pdist, &in_seq_col->seqs[s1], 
				 &in_seq_col->seqs[rs2],algn,tmp_dist); 
	//	imp[s1] += sdcol->total_weight; 
	//imp[s2] += sdcol->total_weight; 
	total += sdcol->total_weight; 
	totala[s1] +=sdcol->total_weight;  
	totala[s2] +=sdcol->total_weight;  
	//printf(" num of diags:%i\n ", sdcol->length); 
	/* 
	  } else { 
	  sdcol = calloc(1, sizeof(struct simple_diag_col)); 
	  sdcol->length = 0; 
	  } 
	*/ 
	//printf("%i %i %f\n", s1,s2, (clock()-time1)/CLOCKS_PER_SEC); 
	 
	all_diags->diag_matrix[s1+sl*rs2] = sdcol; 
	all_diags->diag_matrix[rs2+sl*s1] = sdcol; 
	diag_amount += sdcol->length; 
      } 
    } 
  } 
  if(!hasAli) free_tmp_pdist(tmp_dist, pdist->max_dlen); 
  all_diags->diags= calloc(diag_amount, sizeof(struct diag*)); 
  if(all_diags->diags==NULL) error("find_all_diags(): (1) Out of memory !"); 
 
  ap=0; 
  for(s1=0;s1<sl;s1++) { 
    for(s2=s1+1;s2<sl;s2++) { 
      sdcol=all_diags->diag_matrix[s1+sl*s2]; 
      if(sdcol!=NULL) { 
	for(sp=0;sp<sdcol->length;sp++) { 
	  //	  if(hasAli || (sdcol->data[sp]->weight >0.01*(sdcol->total_weight/sdcol->length))) { 
	    all_diags->diags[ap]=sdcol->data[sp]; 
	    ap++; 
	    //} else { 
	    //  free(sdcol->data[sp]); 
	    //diag_amount--; 
	    // } 
	} 
      } 
    } 
  } 
   
  all_diags->seq_amount = sl; 
   
  for(s1=0;s1<sl;s1++) { 
    for(s2=s1+1;s2<sl;s2++) { 
      sdcol=all_diags->diag_matrix[sl*s1+s2]; 
      if(sdcol!=NULL) { 
	for(sp=0;sp<sdcol->length;sp++) { 
	  dg = sdcol->data[sp]; 
	  //	  if(hasAli) print_diag(dg); 
	  dg->weight_fac = pow(sdcol->total_weight/total,2.0); 
	  //	  dg->ov_weight = sdcol->total_weight; 
	  /* 
	  if(1 || !hasAli) { 
	    dg->weight_fac = sdcol->total_weight*totala[s1]/(sl-1)*sdcol->total_weight*totala[s2]/(sl-1); 
	  } 
	*/ 
	  //	  dg->weight_fac =pow(sdcol->total_weight*(sl-1),2.0)/(totala[s1]*totala[s2]); 
 
	  if(!hasAli) {
	    if(para->DO_OVERLAP) {
	      dg->weight_fac = 1.0;
	      calc_ov_weight(dg,all_diags, smatrix,pdist); 
	    }
	    dg->total_weight = (dg->weight+dg->ov_weight) *dg->weight_fac; 
	  } else {
	    dg->weight_fac = 1.0;
	  }
	} 
      } 
    } 
  } 
   
  all_diags->diag_amount = diag_amount; 
  return all_diags; 
} 
