/* {{{1 GNU General Public License

Program Tops - a stack-based computing environment
Copyright (C) 1999-2005  Dale R. Williamson

Author: Dale R. Williamson <dale.williamson@prodigy.net>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
1}}} */

/* hash.c  May 1999

Copyright (c) 1999  D. R. Williamson
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "main.h"
#include "stk.h"

#include "exe.h"
#include "hash.h"
#include "inpo.h"
#include "mat.h"
#include "math1.h"
#include "mem.h"
#include "tex.h"

unsigned int binh(char *S, int bins)
/* Returns zero-based hash bin number for string S.

   Warning: for speed, does not check for bins=0, which produces 
   division by 0.

   Reference:
      WWW:
         File hashtab.c for XPM program, an X pixmap storing/retrieval
         program (1998):
            //www.inria.fr/koala/lehors/xpm.html
            Boston, USA: ftp://ftp.x.org/contrib
            Sophia Antipolis, France: ftp://koala.inria.fr/pub/xpm

      On machine gutter:
         File hashtab.c in
            /home/dale/proj/sage/sou/pix/xpm-3.4k/lib */
{
   unsigned int hash=0;

   while (*S) { /* computing hash function:

      Trying to approximate a pseudo-random number:
         << 5 makes it bigger, -hash makes it smaller, +*S adds some: */

      hash = (hash << 5) - hash + *S++; /* Mock Lisp function in Ref */
   }
   hash=hash % bins;
   return hash;
}

int hash() /* _hash (hT bins --- hB) */
/* For rows of text in T, returns column B containing an index for each.
   Indices returned in B are 0-based. */ 
{
   int bins,chars,rows;
   register int k=0;
   register double *B;
   char *S,*T;

   if((tos-1)->typ!=VOL && (tos-1)->typ!=STR && (tos-1)->typ!=MAT) {
      stkerr(" hash: ","expect STR, VOL, or MAT");
      return 0;
   }
   if(!popint(&bins)) return 0;

   if(tos->typ!=MAT) {
      T=tos->tex;
      chars=tos->col;
   }
   else {
      T=(char *)tos->mat;
      chars=sizeof(double)*tos->col;
   }
   if(!matstk((rows=tos->row),1,"_hash")) return 0;
   B=tos->mat;

   if((S=(char *)malloc(1+chars))==NULL) return 0;
   *(S+chars)='\0';

   for(;k<rows;k++) {
      memcpy(S,T,chars); 
      strchop1(S); /* removing leading and trailing blanks */
      *B=(double)binh(S,bins);
      B++;
      T+=chars;
   }
   mallfree((void *)&S);
   return(lop());
}

int hash_ActiveBins() /* hash_ActiveBins (hTab --- hActiveBins) */
/* Building the ActiveBins matrix for word hash_make.

   Matrix Tab is as long as all the Keys, and column 1 holds the bin
   number of each, in ascending order and possibly with duplicates;
   column 2 holds the corresponding 0-based row numbers of Keys (which 
   are also row numbers of Vals).

   Matrix Active bins created has two columns:
      col 1, row k: sorted bin number (unique--no duplicates)
      col 2, row k: memptr to list of Keys (and Vals) rows (0-based)

   These phrases in word hash_make are replaced by this function:

       \ Initializing matrix ActiveBins (column 1 holds one of each
       \ different bin number from column 1 of Tab, and column 2 is
       \ null):
         (hTab) this 1st catch again sling1 rake lop (bins)
         these rows one null (bins 0) park "ActiveBins" book

       \ Building column 2 memptr list of matrix ActiveBins:
         (hTab) ActiveBins rows 1st
         DO (hTabRem) this 1st catch (hBins)
            ActiveBins I pry, (binI) those rows one fill
            (hBins hBinI) = (hR)
            (hTab hR) rake (hTabRem hTabI)
            (hTabI) 2nd catch (hRowsKeys)
            (hRowsKeys) memput (memptr)
            (memptr) ActiveBins I 2nd store
         LOOP (hTabEmpty) drop

   This C function is much faster than the high level one above; the 
   first indication this might be true is the high level DO LOOP.

   C functions like this one are essential for a fast system.  But writ-
   ing everything in C while an approach is being designed takes a very 
   long time as functions are compiled, debugged, and then discarded as
   better avenues are found.

   Writing high level functions like the one above is much quicker be-
   cause they can be written interactively, step by step.  In a very 
   short time a working solution can be designed and running.

   Then writing low level C functions like this one, speeding the flow 
   at bottlenecks, is very easy and is done once and for all. */
{
   register double *A,*P,*T;
   register int count=0,k=1;
   register int *C;
   int *C0;
   int rowA=1,rowT,rows;

   T=tos->mat;
   rowT=tos->row;

   if(!matstk(rowT,2,"_ActiveBins")) return 0;
   A=tos->mat;

   if((C0=(int *)malloc(1+rowT*sizeof(int)))==NULL) return 0;
   C=C0;

/* Building column 1 of ActiveBins, the sorted list of bins with
   no duplicates: */
   *A=*T;
   A++;
   T++;
   *C=1;
   
   for(;k<rowT;k++) {
      if(*T!=*(T-1)) {
         *A=*T;
         A++;

         *C+=count;
         C++;
         *C=1;

         rowA++;
         count=0;
      }
      else count++;
      T++;
   }
   *C+=count;
   tos->row=rowA;

/* Building column 2 of ActiveBins, a memptr list to arrays of Keys 
   rows (which are the same as Vals rows): */
   A=rowA+tos->mat;     /* top of A column 2 */
   T=rowT+(tos-1)->mat; /* top of T column 2 */
   C=C0;                /* counts in each row of A */

   for(k=0;k<rowA;k++) {
      matstk(rows=*C,1,"_mem");
      P=tos->mat;
      memcpy((double *)P,(double *)T,rows*sizeof(double));
      memput();

      popd(A); /* memptr into A */

      A++;
      T+=rows;
      C++;
   }
   mallfree((void *)&C0);
   return(lop());
}

int hash_bin_fetch1() /* hash_bin_fetch1 (hBinsActive b --- hA) */
/* Returned A is a list of the hash Keys rows from hash_make that
   occupy bin b.

   Note that incoming bin b is a 0-based index, always less than nbins
   returned by word hash_bins.

   This function replaces the following in word hash_bin_fetch:

      (hBinNums b) bsearch (hB r f)
      IF (hB r) reach 2nd pry (memptr) mempeek (hA)
      ELSE 2drop purged \ b is empty: not an active bin
      THEN */
{
   double *B,b;
   int r,rows;

   popd(&b);
   B=tos->mat;
   rows=tos->row;

   if(bsearchd(b,B,rows,&r)) {
   /* Fetch the memptr NUM at B(r,2) and turn it into an array: */
      pushd(0);
      memcpy(&tos->real,(B+r+rows),sizeof(long));
      if(!tos->real) return(
         drop() && matstk(0,0,"_purged")
      );

   /* Getting column vector of memptr numbers: */
      mempeek();
      return(lop());
   }
/* b is empty; not an active bin: */
   else return(drop() && matstk(0,0,"_purged"));
}

int hash_lookup1() /* 
   hash_lookup1 (hKeys hBins ptrKEYS ptrVALS ptrBinsActive --- hV1 hV2
      ... hVn n)

   Looking up Vk matching Keys(k).

   This function replaces the following in word hash_lookup:

      depth push, Keys rows 1st
      DO Hash Bins I pry (hHash b) 2dup
         (hHash b) hash_bin_vals (hVals) rev
         (hHash b) hash_bin_keys (hKeys)
         Keys I quote (hKeys Key.i) grepe (hR)
         (hVals hR) reach notrailing (hVal)
         "_V" I suffix naming (hV.i)
      LOOP
      depth pull less

   Using this function also required some rearrangements in the orig-
   inal word hash_lookup.  Of note is the use of catalog item ptrs sent
   to this function, instead of handles, and the use below of function
   exe1() to fire a ptr to place its catalog item handle on the stack.

   Function exe1() works like word exe (and function exe()), but does
   no checking to see if a ptr is valid.  It is meant for speed where
   there is little chance of receiving an invalid ptr. */
{
   double ptrVALS,ptrBinsActive;

   char *KEYS,*Keys,*s,*S;
   register double *Bins,*R,*Rows;
   register int j,k=0,nrow=0;

   char *name;
   int chars,rows,width;

   popd(&ptrBinsActive);
   popd(&ptrVALS);

/* ptrKEYS: */
   exe(); /* hKEYS */
   KEYS=tos->tex;
   width=tos->col;

   if((S=(char *)malloc(1+width))==NULL) return 0;
   *(S+width)='\0';

/* hKeys hBins hKEYS */
   rows=(tos-1)->row; /* size of Bins and Keys */
   Bins=(tos-1)->mat;
   Keys=(tos-2)->tex;
   chars=(tos-2)->col;

   if((s=(char *)malloc(1+chars))==NULL) {
      mallfree((void *)&S);
      return 0;
   }
   *(s+chars)='\0';

   for(;k<rows;k++) {
      exe1(ptrVALS); /* hVALS */

      memcpy(s,Keys,chars);
      Keys+=chars;
      strchop1(s);

      exe1(ptrBinsActive); /* hBinsActive */
      pushd(*Bins); /* b */
      Bins++;

      hash_bin_fetch1(); /* hA */
      R=tos->mat;

      matstk(tos->row,1,""); /* hRows */
      Rows=tos->mat;

      nrow=0;
      for(j=0;j<tos->row;j++) {
         memcpy(S,KEYS+loclin((int)*R,width),width);
         strchop1(S);

         if(strmatch(S,s)) {
            *Rows=XBASE+*R;
            Rows++;
            nrow++;
         }
         R++;
      }
      tos->row=nrow;
   /* hVals hA hRows */
      lop();
      
   /* hVals hRows */
      reach();
      notrailing();

      name=mprintf("_V%d",1+k);
      pushq2(name,strlen(name));
      naming();
      mallfree((void *)&name);
   }
   for(k=0;k<3;k++) {
      pushint(rows);
      roll();
      drop();
   }
   mallfree((void *)&s);
   mallfree((void *)&S);
   return(pushint(rows));
}
