/*            Copyright (C) 1999, 2000, 2001, 2002 Stijn van Dongen
 *
 * You can redistribute and/or modify this file under the terms of the GNU
 * General Public License; either version 2 of the License or (at your option)
 * any later version.  You should have received a copy of the GPL along with
 * this file, in the file COPYING.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "util/types.h"
#include "util/link.h"
#include "util/hash.h"
#include "util/ting.h"
#include "util/types.h"
#include "util/io.h"


int main
(  int   argc
,  char* argv[]
)
   {  int            a
   ;  int            ct          =  0
   ;  int            found       =  0
   ;  int            hashoptions =  0
   ;  int            n_buckets   =  1024
   ;  float          load        =  1.0
   ;  mcxbool        consthash   =  FALSE
   ;  mcxbool        buildonly   =  FALSE
   ;  mcxbool        walkhash    =  FALSE
   ;  mcxbool        searchegg   =  FALSE
   ;  mcxbool        show        =  FALSE
   ;  const char*    pattern     =  "egg"
   ;  mcxTing*        txts       =  mcxTingEmpty(NULL, 30)
   ;  mcxmode        rlmode      =  MCX_READLINE_CHOMP | MCX_READLINE_SKIP_EMPTY
   ;  u32            (*strhash)(const void* str)   =  mcxTingDPhash
   ;  mcxTing        *txt
   ;  mcxKV          *kv

   ;  mcxIO          *xf
   ;  mcxHash*       hash
   ;  mcxHashSettings   settings

   ;  if (argc < 2 || !strcmp(argv[1], "-h"))
      goto help

   ;  xf =  mcxIOnew(argv[1], "r")
   ;  mcxIOopen(xf, EXIT_ON_FAIL)

   ;  a = 2
   ;  while (a < argc)
      {  if (!strcmp(argv[a], "-b"))
         {  if (a++ + 1 <argc)
            n_buckets = atoi(argv[a]) 
         ;  else
            goto arg_missing
      ;  }
         else if (!strcmp(argv[a], "-lb"))
         {  if (a++ + 1 <argc)
            {  int   l_buckets = atoi(argv[a]) 
            ;  n_buckets   =  2
            ;  while (--l_buckets)
               n_buckets <<=  1
         ;  }
            else
            goto arg_missing
      ;  }
         else if (!strcmp(argv[a], "--const"))
         {  consthash = TRUE
      ;  }
         else if (!strcmp(argv[a], "--build"))
         {  buildonly = TRUE
      ;  }
         else if (!strcmp(argv[a], "--walk"))
         {  walkhash = TRUE
      ;  }
         else if (!strcmp(argv[a], "--show"))
         {  show = TRUE
         ;  walkhash = TRUE 
      ;  }
         else if (!strcmp(argv[a], "-egg"))
         {  if (a++ + 1 <argc)
            pattern = argv[a] 
         ;  else
            goto arg_missing
         ;  searchegg = TRUE
      ;  }
         else if (!strcmp(argv[a], "-load"))
         {  if (a++ + 1 <argc)
            load = atof(argv[a]) 
         ;  else
            goto arg_missing
      ;  }
         else if (!strcmp(argv[a], "-lload"))
         {  if (a++ + 1 <argc)
            {  int iload = 2
            ;  int x = atoi(argv[a]) 
            ;  while (--x)
               iload <<= 1
            ;  load = iload
         ;  }
            else
            goto arg_missing
      ;  }
         else if (!strcmp(argv[a], "-hf"))
         {  if (a++ + 1 <argc)
            {  if (!(strhash = mcxTingHFieByName(argv[a])))
               fprintf
               (  stdout
               ,  "[hashfile] hash option <%s> not supported\n", argv[a]
               )
               ,  exit(1)
         ;  }
            else
            goto arg_missing
      ;  }
         else if (!strcmp(argv[a], "-h"))
         {  help:
fprintf
(  stdout
,
"Usage: hashfile <file fname> [options]* [search-strings]*\n"
"   Options:\n"
"   -b <#buckets>, -lb <2log-of-#buckets>\n"
"        Number of buckets initially created.\n"
"   -f <dp|bj|ct|bd|ge|djb|elf|svd1|svd2|svd>\n"
"        Hash function to use:\n"
"        o  Daniel Philips (default)\n"
"        o  Bob Jenkins\n"
"        o  Chris Torek\n"
"        o  Berkely Databse\n"
"        o  GNU Emacs\n"
"        o  Dan Bernstein\n"
"        o  UNIX ELF\n"
"        o  Some random and less random attempts of mine\n"
"   -load <load>, -lload <2log-of-load>\n"
"        Hash doubles when *average* bucket size exceeds load.\n"
"   --build    Exit after building hash.\n"
"   --const    Disable hash growing (how un-Dutch).\n"
"   --walk     Walk entire hash after creation.\n"
"   --show     Walk entire hash after creation, print all buckets.\n"
"   -egg <string> Look for an egg dressed in string.\n"
)
         ,  exit(0)
      ;  }
         else if (0)
         {  arg_missing:
         ;  fprintf
            (  stdout
            ,  "[hashfile] Flag %s needs argument; see help (-h)\n" 
            ,  argv[argc-1]
            )
         ;  exit(1)
      ;  }
         else
         break

      ;  a++
   ;  }

      hash                 =  mcxHashNew(n_buckets, strhash, mcxTingCmp)

   ;  if (consthash)
      hashoptions |= MCX_HASH_OPT_CONSTANT

   ;  mcxHashSetOpts(hash, load, hashoptions)

   ;  mcxHashGetSettings(hash, &settings)
   ;  n_buckets = settings.n_buckets

   ;  fprintf(stdout, "\n---> building hash ..\n")
   ;  while((txt = mcxIOreadLine(xf, NULL, rlmode)))
      {  mcxKV*   kv =  mcxHashSearch(txt, hash, MCX_DATUM_INSERT)
      ;  if (!kv)
         fprintf(stdout, ">>> >>> void kv!\n")
      ;  else if ((mcxTing*)kv->key != txt)
         /* todo / fixme: should free sth!? */
         fprintf(stdout, ">>> >>> [%.50s] overwrite!\n", txt->str)
      ;  else if (strcmp(((mcxTing*)kv->key)->str, txt->str))
         fprintf(stdout, ">>> >>> string diff!\n")
      ;  ct++
   ;  }

      fprintf
      (stdout, "done building hash (%d insertions, %d lines).\n", ct, xf->lc)
   ;  mcxHashGetSettings(hash, &settings)
   ;  fprintf
      (  stdout
      ,  "hash stats %d entries, %d buckets initial, %d buckets final\n"
      ,  settings.n_entries
      ,  n_buckets
      ,  settings.n_buckets
      )
   ;  fprintf
      (  stdout
      ,  "hash settings: [load %.3f] [mask %d] [bits %d]\n"
      ,  settings.load
      ,  settings.mask
      ,  settings.n_bits
      )

   ;  if (buildonly)
      exit(0)

   ;  if (walkhash)
      {  mcxHashWalk*   hashwalk =  mcxHashWalkNew(hash)
      ;  int bucketidx = -1

      ;  fprintf(stdout, "---> walking hash ..\n")
      ;  ct =  0
      ;  found =  0

      ;  while((kv = mcxHashWalkStep(hashwalk)))
         {  ct++
         ;  if (mcxHashSearch((mcxTing*) kv->key, hash, MCX_DATUM_FIND))
            found++
         ;  if (searchegg && strstr(((mcxTing*)kv->key)->str, pattern))
            fprintf(stdout, "() %s\n", ((mcxTing*)kv->key)->str)
         ;  else if (show)
            {  if (hashwalk->bucket > bucketidx)
               {  bucketidx =  hashwalk->bucket
               ;  fprintf(stdout, "%d @ %s\n",bucketidx,((mcxTing*)kv->key)->str)
            ;  }
               else
               fprintf(stdout, "* %s\n", ((mcxTing*)kv->key)->str)
         ;  }
      ;  }
         fprintf(stdout, "done walking hash (%d walked, %d found).\n", ct, found)
      ;  mcxHashWalkFree(&hashwalk)
   ;  }

      while (a < argc)
      {  mcxTingWrite(txts, argv[a])
      ;  kv                =  mcxHashSearch(txts, hash, MCX_DATUM_FIND)

      ;  if (kv)
         fprintf(stdout, "---> present: <%s>\n", ((mcxTing*)kv->key)->str)
      ;  else
         fprintf(stdout, "--->  absent:  <%s>\n", txts->str)

      ;  a++
   ;  }

      mcxHashStats(stdout, hash)

   ;  fprintf(stdout, "---> deleting hash ..\n")
   ;  mcxHashFree(&hash, mcxTingFree_v, NULL)
   ;  fprintf(stdout, "done deleting hash.\n")
   ;  fprintf(stdout, "\n")

   ;  return 0
;  }



