#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/types.h>

#include "indexl.h"

#define MBITS 27
#define NBITS 5
#define NBITM 0x1f

typedef struct {
  float f;
  unsigned v;
  unsigned w;
} Q;

static INLINE int
qcomp(const void *v1,const void *v2) {

  const Q *q1=v1,*q2=v2;

  return q1->f>q2->f ? 1 : -1;

}

int
huff_build_from_freq(Words *w) {

  Huff *z;
  unsigned i,nt;
  Hash_totals *ht;
  Map h={0};
  Q qq,*q1,*q,*qe,*hw;
  Datum *n;
  Gen g;

  if (!(ht=hash_get_totals(&w->d))) {
    err("Can't get hash totals in huff_build_from_freq\n");
    return 0;
  }
  nt=ht->nt;

  if (!map_get(&w->h,0,sizeof(unsigned)+ht->nc+
	       sizeof(unsigned)*ht->nw+sizeof(Huff)*(ht->nw-1)))
    return 0;
  z=w->h.m1;
  w->he=w->h.m=z+ht->nw-1;

  for (;(n=hash_seq(&w->d,&g));) {

    qq.f=(float)n->code.u/nt;
    qq.v=w->h.m-w->h.m1;
    qq.w=g.w;

    if (!map_write_element(&w->h,g.w))
      return 0;
    if (!map_write_gen(&w->h,g))
      return 0;
    if (!map_write_element(&h,qq))
      return 0;

  }

  i=(Huff *)w->he-(Huff *)w->h.m1;
  if (!map_write_element(&w->h,i))
    return 0;
  if (!map_get(&w->h,0,w->h.size-sizeof(i)))
    return 0;

  q1=h.m1;
  qe=h.me;
  qsort(q1,qe-q1,sizeof(*q1),qcomp);

  qq.w=0;
  z=(Huff *)w->he-1;
  for (q=q1;q<qe-1;) {

    for (i=0;i<2;i++) 
      z->v[i]=q[i].v;

    qq.f=q[0].f+q[1].f;
    qq.v=(void *)z - w->h.m1;
    z--;

    hw=bsearch1(&qq,q+2,qe-q-2,sizeof(*q),qcomp,&i);

    hw--;
    q++;
    memcpy(q,q+1,sizeof(*q)*(hw-q));
    *hw=qq;

  }

  if (++z!=w->h.m1) {
    err("z error\n");
    return 0;
  }

  if (!map_close(&h))
    return 0;

  return 1;

}

int
huff_build_from_codes(Words *w) {

  unsigned i;
  Huff *lz,*z;
  Hash_totals *ht;
  Datum *n;
  Gen g;

  if (!(ht=hash_get_totals(&w->d))) {
    err("Can't get hash totals in huff_build_from_codes\n");
    return 0;
  }

  if (!map_get(&w->h,0,sizeof(unsigned)+
	       sizeof(Huff)*(ht->nw-1)+sizeof(unsigned)*ht->nw+ht->nc))
    return 0;
  w->he=w->h.m=(Huff *)w->h.m1+ht->nw-1;

  for (lz=w->h.m1;(n=hash_seq(&w->d,&g));) {

    register unsigned t1=n->code.c.value;
    register unsigned *q;

    for (z=w->h.m1;lz<(Huff *)w->he && --n->code.c.bits;t1=t1>>1) {

      q=z->v+ (t1 & 0x01);

      if (w->h.m1+*q>=w->he) {
	err("String %s found too early\n",((W *)(w->h.m1+*q))->c);
	return 0;
      }
      if (!*q) {
	lz++;
	*q=(void *)lz-w->h.m1;
      }
      z=w->h.m1+*q;

    }

    q=z->v+ (t1 & 0x01);

    if (w->h.m1+*q>=w->he) {
      err("String %s already entered\n",((W *)(w->h.m1+*q))->c);
      return 0;
    }
    *q=w->h.m-w->h.m1;

    if (!map_write_element(&w->h,g.w) ||
	!map_write_gen(&w->h,g))
      return 0;

  }    

  i=(Huff *)w->he-(Huff *)w->h.m1;
  if (!map_write_element(&w->h,i))
    return 0;
  if (!map_get(&w->h,0,w->h.size-sizeof(i)))
    return 0;

  return 1;

}

static unsigned Max_bits;

static INLINE int
huff_load_codes1(Words *w,Huff *h) {

  static Datum n;
  register unsigned t,i;
  Gen g;
  W *ww;

  t=1<<n.code.c.bits;
  if (++n.code.c.bits>=Max_bits) {
    err("Maximum bit number exceeded: %u\n",Max_bits);
    return 0;
  }

  for (i=0;i<2;i++) {

    if (i) 
      n.code.c.value|=t;

    if (w->h.m1+h->v[i]<w->he) {
      if (!huff_load_codes1(w,w->h.m1+h->v[i]))
	return 0;
    } else {
      ww=w->h.m1+h->v[i];
      g.v=ww->c;
      g.w=ww->u;
      if (!hash_put(&w->d,&g,&n))
	return 0;
    }

    if (i)
      n.code.c.value &= ~t;

  }

  n.code.c.bits--;
  t=t>>1;

  return 1;

}

int
huff_load_codes(Words *w) {

  Code c;

  Max_bits=(c.value=-1)+1;

  return huff_load_codes1(w,w->h.m1);

}


W *
huff_decode(Huff *h1,Huff *he,unsigned **y,unsigned *ye,unsigned *b,unsigned bs) {

  register Huff *c,*h;
  register unsigned tb,u;

  if (*y==ye-1 && *b > bs)
    return NULL;

  for (tb=*b,c=h=h1;*y<ye;(*y)++,tb=1) {

    for (u=**y;tb;tb+=tb) {
  
      if (c!=h) {
	*b=tb;
	return (W *)c;
      }

      if (u & tb) 
	c=(void *)h1+h->v[1];
      else
	c=(void *)h1+h->v[0];

      if (c<he) 
	h=c;
      
    }

  }

  *b=tb;
  return c==h ? NULL : (W *)c;

}

int
huff_test(Words *w) {

  W *ww,*ww1;
  Gen g;
  Datum *n;
  unsigned u,ns,*up;

  for (ww=w->he;ww<(W *)w->h.m;ww=(W *)(ww->c+ww->u)) {

    g.w=ww->u;
    g.v=ww->c;

    n=hash_get(&w->d,&g);

    u=n->code.c.value;
    up=&u;
    ns=1;

    ww1=huff_decode(w->h.m1,w->he,&up,up+1,&ns,-1);

    if (ww1!=ww) {
      err("Huff test wordlist mismatch: %-*.*s %-*.*s\n",
	    ww1->u,ww1->u,ww1->c,ww->u,ww->u,ww->c);
      return 0;
    }

  }

  for (;(n=hash_seq(&w->d,&g));) {

    u=n->code.c.value;
    up=&u;
    ns=1;

    ww=huff_decode(w->h.m1,w->he,&up,up+1,&ns,-1);

    if (!ww || g.w!=ww->u || memcmp(g.v,ww->c,g.w)) {
      err("Huff test database mismatch: %-*.*s %-*.*s\n",
	    ww->u,ww->u,ww->c,g.w,g.w,g.v);
      return 0;
    }

  }


  return 1;

}

