#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <gmp.h>
#include <mpfr.h>
#include "score.h"
#include <db.h> 
#include "store.h"

//temporary scoring database
static DB *dbtemp=NULL;
//number of "good" and "bad" pages in permanent database
static unsigned int BAD, GOOD;
//parameters for scoring
static double X = 0.5;
static double S = 1;
static double DEV = 0.4;
//the value rreturned by function score_getChi2
mpfr_t sum;
//variable where is stored the int value of a token
unsigned int valeur[2];
//number of tokens in the temporary database (used for verbose mode = 2) 
unsigned int tokens = 0;

/*
* Initialize variables for scoring (number of "good" and "bad" pages) and open temporary database for scoring. 
*/
void score_initScoring(void)
{
  unsigned int* tmp = 0;
  int ret;
  u_int32_t flags;

  flags = DB_CREATE|DB_TRUNCATE;	
    ret = db_create(&dbtemp, NULL, 0);
    if (ret != 0){
    fprintf(stderr, "Error while creating database\n");
    exit(0);
    }
    
    dbtemp->set_cachesize(dbtemp,0,(unsigned int)270384*270384,1);
    ret = dbtemp->open(dbtemp,       
		       NULL,       
		       NULL, 
		       0,       
		       DB_BTREE,   
		       flags,     
		       0);
    
    if(ret != 0){
      fprintf(stderr, "Error while opening basetemp");
    }
  tmp = store_getScore("~~nombre-pages~~");
  BAD = tmp[0];
  GOOD = tmp[1];
}

/*
* Display the score of a page and more (depending on verbose mode).
* @param verbose (1 = "simple verbose mode", 2 = "extra verbose mode")
*/
void score_getProbability(int verbose)
{
  unsigned int tabstats[10], nbtokens=0;
  char*  tabtokens[tokens];
  unsigned int tabnbap[tokens];
  unsigned int tabgood[tokens];
  unsigned int tabbad[tokens];
  double tabfw[tokens];
  char *tabsign[tokens];
  int    tabind[tokens];

  double graham=0, fw=0;
  mpfr_t *ret, h, s, res;
  unsigned int* tmp;
  unsigned int bad, good, frequency;
  double sommeLog = 0, sommemLog=0;
  unsigned int intervalle = 0;
  int retb;
  int i=0,j=0;
  for (i=0;i<10;i++) tabstats[i]=0;
  i=0;

  DBT key, data;
  DBC *cursorp;
  
  dbtemp->cursor(dbtemp, NULL, &cursorp, 0);  
  memset(&key, 0, sizeof(DBT));
  memset(&data, 0, sizeof(DBT));
  
  while ((retb = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
    tmp = store_getScore(key.data);
    bad = tmp[0];
    good = tmp[1];
    tmp = score_getScore(key.data);
    frequency = tmp[0];
    cursorp->c_del(cursorp, 0);
    if (bad+good != 0){
      graham = (1.0*bad/BAD)/((1.0*bad/BAD)+(1.0*good/GOOD));
      fw = (S*X + (bad+good)*graham)/(S+bad+good);
    }else{
      graham = 0.0;
      fw = X;
    }
    if (fw<DEV || fw>1-DEV){
      intervalle  = intervalle + frequency;
      sommeLog = sommeLog + (log(fw)*frequency);
      sommemLog = sommemLog + (log(1-fw)*frequency);
      tabsign[i] = "+";
    }else tabsign[i] = "-";
    if(verbose == 1){
      int indice = (int)floor(fw*10);
      tabstats[indice] += frequency;
      nbtokens += frequency;
    }
    if(verbose == 2){
      tabtokens[i] = NULL;
      tabtokens[i] = malloc(sizeof(char)*(strlen(key.data)+1)); 
      strcpy(tabtokens[i], key.data);
      tabnbap[i] = frequency;
      tabgood[i] = good;
      tabbad[i] = bad;
      tabfw[i] = fw;
      tabind[i] = i;
      i++;
      nbtokens += frequency;
    }
  }
  if(verbose == 2){
    triRapide(tabfw,0, tokens-1, tabind);
    printf("%-45s %7s %10s %7s %7s %9s \n", "tokens", "nbapp", "total", "good", "bad", "proba");
    for(i=0; i<tokens; i++){
      printf("%-45s %7u %10u %7u %7u %9f %s\n", tabtokens[tabind[i]], tabnbap[tabind[i]],tabgood[tabind[i]]+tabbad[tabind[i]], tabgood[tabind[i]], tabbad[tabind[i]], tabfw[i], tabsign[tabind[i]]);
    }
    printf("nbtokens : %d\n", nbtokens);
  }

  mpfr_init(h);
  mpfr_init(s);
  mpfr_init(res);
  
  ret = score_getChi2(-2*sommeLog, 2*intervalle);
  mpfr_set(h, *ret, GMP_RNDN);
  ret = score_getChi2(-2*sommemLog, 2*intervalle);
  mpfr_set(s, *ret, GMP_RNDN);

  mpfr_neg(s, s, GMP_RNDN);
  mpfr_add(res, h, s, GMP_RNDN);
  mpfr_add_ui(res, res, 1, GMP_RNDN);
  mpfr_div_ui(res, res, 2, GMP_RNDN);

  if (cursorp != NULL) 
    cursorp->c_close(cursorp); 

  if(verbose == 1){
    for (i=0; i<10; i++){
      printf("%-3d-%3d     %4d : ", i*10, (i+1)*10, tabstats[i]);
      for(j=0; j<(int)floor(((double)tabstats[i]/nbtokens)*100); j++){
        printf("#");
      }
      printf("\n");
    }
    printf("nbtokens : %d\n", nbtokens);
  }

  printf("%f\n", mpfr_get_d (res, GMP_RNDN));
  //TESTS
  //double p = mpfr_get_d(res, GMP_RNDN);
  //if(p<0.95) printf("i\n");
}

/*
* Calulate and returns the inverse Chi square value of the combined probabilities. 
* @param chi the sum of logarithms
* @param df the degrees of liberty
* @return sum the inverse Chi square value
*/
mpfr_t* score_getChi2(double chi, unsigned int df)
{
  mpfr_t term, temp, tmp ,m, mm;
  unsigned int i=1;
  
  mpfr_init(sum);
  mpfr_init(term);
  mpfr_init(temp);
  mpfr_init(tmp);
  mpfr_init(m);
  mpfr_init(mm);

  mpfr_set_d(m, chi / 2.0, GMP_RNDN);
  mpfr_neg(mm,m, GMP_RNDD);
  mpfr_exp(tmp, mm, GMP_RNDN);
  mpfr_set(sum, tmp, GMP_RNDN);
  mpfr_set(term, tmp, GMP_RNDN);
  for(i=1;i<=df/2;i++){
    mpfr_div_ui(temp, m, i, GMP_RNDN);
    mpfr_mul(term, term, temp, GMP_RNDN);
    mpfr_add(sum, sum, term, GMP_RNDN);
  }

  if ( mpfr_cmp_ui(sum,1) > 0){
    mpfr_set_d(sum, 1.0, GMP_RNDN);
    return (&sum);
  }else{ 
    return (&sum);
  }
}

/*
* Get the number of occurences of a token in the temporary scoring database.
* @param token the token to get the score from
* @return data.data the number of occurences of the token
*/
unsigned int* score_getScore(char *token)
{     
  int ret;
  DBT key, data;
  
  memset(&key, 0, sizeof(DBT));
  memset(&data, 0, sizeof(DBT));    
 
  key.data = token;
  key.size = strlen(token)+1;
  
  ret = dbtemp->get(dbtemp, NULL, &key, &data, 0);
  
  if(ret == DB_NOTFOUND){
    valeur[0] = 0;
    valeur[1] = 0;
    return (valeur);
  }else
    return (to_int((unsigned char*)data.data));
}


/*
* Store the given token in the the temporary scoring database
* @param token the token to store
* @param type the type of token (tag, word, biword or domain)
*/
void score_storeToken(char *token, int type)
{
 int ret; 
 DBT key, data;
 unsigned int *tmp; 
 unsigned char *val=NULL;     
 char *debut = NULL;
 char *chaine = NULL;

 tokens +=1 ;

 switch(type){
 case TAGS :    
   debut = malloc(sizeof(char)*(strlen("tag:")+1));
   strcpy(debut, "tag:");
   break;
 case WORDS :
   debut = malloc(sizeof(char)*(strlen("word:")+1));
   strcpy(debut, "word:");
   break;
 case BIWORDS :
   debut = malloc(sizeof(char)*(strlen("biword:")+1));
   strcpy(debut, "biword:");
   break;
 case DOMAINS :
   debut = malloc(sizeof(char)*(strlen("domain:")+1));
   strcpy(debut, "domain:");
   break;
 default :
   debut =  malloc(sizeof(char)*(strlen("")+1));
   strcpy(debut, "");
   break;
 }

 memset(&key, 0, sizeof(DBT));
 memset(&data, 0, sizeof(DBT));    

 chaine = malloc(sizeof(char)*(strlen(token)+strlen(debut)+1));
 strcpy(chaine, debut);
 strcat(chaine, token);
 
 key.data = chaine;
 key.size = strlen(chaine)+1;
 
 val = to_hex(1, 0);

 data.data = val;
 data.size = 4; 
  
 ret = dbtemp->put(dbtemp, NULL, &key, &data, DB_NOOVERWRITE);
 if (ret == DB_KEYEXIST) {
   dbtemp->get(dbtemp, NULL, &key, &data, 0);
   tmp = to_int(data.data);
   val = to_hex(tmp[0]+1, 0);
   data.data = val;
   data.size = 4;
   tokens -= 1;
   dbtemp->put(dbtemp, NULL, &key, &data, 0);
 }
 
 if (debut != NULL) {free(debut); debut=NULL;}
 else printf("var debut NULL in store_storeToken");
 if(chaine != NULL) {free(chaine); chaine=NULL;}
 else printf("var chaine NULL in store_storeToken\n");
}

/*
* Close the temporary scoring database.
*/
void score_closedb(void)
{
  if (dbtemp != NULL)
    dbtemp->close(dbtemp, 0); 
}

/*
* Print a mpfr_t value on the standard output.
* @param string the string to display
* @param var the variable to display
*/
void print_mpfr(char *string, mpfr_t var)
{
  printf ("%s", string);
  mpfr_out_str (stdout, 10, 0, var, GMP_RNDN);
  putchar ('\n');
}

/*
* Function to sort an array by quicksort (used for extra verbose mode)
* @param t the array to sort
* @param debut index of the first element of the array
* @param fin index of the last element of the array
* @param tind array of the indexes of the array
*/
void triRapide(double *t,int debut, int fin, int *tind) {

  if(debut<fin) {

    int placePivot;

    partitionner(t, debut, fin, &placePivot, tind);
    triRapide(t, debut, placePivot-1, tind);
    triRapide(t, placePivot+1, fin, tind);
  }
}

/**
* Used by function triRapide
*/
void partitionner(double *t, int debut, int fin, int *pPosition, int *tind) {

  double valeurPivot=t[debut];
  int i;

  *pPosition=debut;
  for(i=debut+1;i<=fin;i++)
    if(t[i]<valeurPivot) {
      (*pPosition)++; // parenthésage obligatoire
      echanger(&t[*pPosition],&t[i]);
      echangerInt(&tind[*pPosition],&tind[i]);
    }
  echanger(&t[debut],&t[*pPosition]);
  echangerInt(&tind[debut],&tind[*pPosition]);
}

/**
* Used by function triRapide
*/
void echanger(double *px, double *py) {

  double aux = *px;

  *px = *py;
  *py = aux;
}

/**
* Used by function triRapide
*/
void echangerInt(int *px, int *py) {

  int aux = *px;

  *px = *py;
  *py = aux;
}

