/* Copyright (C) 2000-2009 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#include "udm_uniconv.h"
#include "udm_utils.h"

#define MCONV_BUF_SIZE 4096


static const char*
csgroup(const UDM_CHARSET *cs)
{
  switch(cs->family){
    case UDM_CHARSET_ARABIC    :  return "Arabic";
    case UDM_CHARSET_ARMENIAN  :  return "Armenian";
    case UDM_CHARSET_BALTIC    :  return "Baltic";
    case UDM_CHARSET_CELTIC    :  return "Celtic";
    case UDM_CHARSET_CENTRAL  :  return "Central Eur";
    case UDM_CHARSET_CHINESE_SIMPLIFIED:  return "Chinese Simplified";
    case UDM_CHARSET_CHINESE_TRADITIONAL:  return "Chinese Traditional";
    case UDM_CHARSET_CYRILLIC  :  return "Cyrillic";
    case UDM_CHARSET_GREEK    :  return "Greek";
    case UDM_CHARSET_HEBREW    :  return "Hebrew";
    case UDM_CHARSET_ICELANDIC  :  return "Icelandic";
    case UDM_CHARSET_JAPANESE  :  return "Japanese";
    case UDM_CHARSET_KOREAN    :  return "Korean";
    case UDM_CHARSET_NORDIC    :  return "Nordic";
    case UDM_CHARSET_SOUTHERN  :  return "South Eur";
    case UDM_CHARSET_THAI    :  return "Thai";
    case UDM_CHARSET_TURKISH  :  return "Turkish";
    case UDM_CHARSET_UNICODE  :  return "Unicode";
    case UDM_CHARSET_VIETNAMESE  :  return "Vietnamese";
    case UDM_CHARSET_WESTERN  :  return "Western";
    case UDM_CHARSET_GEORGIAN       :  return "Georgian";
    case UDM_CHARSET_INDIAN   :  return "Indian";
    default        :  return "Unknown";
  }
}


static int
cmpgrp(const void *v1, const void *v2)
{
  int res;
  const UDM_CHARSET *c1=v1;
  const UDM_CHARSET *c2=v2;
  if ((res=strcasecmp(csgroup(c1),csgroup(c2))))return res;
  return strcasecmp(c1->name,c2->name);
}


static void
display_charsets(void)
{
  UDM_CHARSET *cs=NULL;
  UDM_CHARSET c[100];
  size_t i=0;
  size_t n=0;
  int family=-1;
  
  for(cs=UdmGetCharSetByID(0) ; cs && cs->name ; cs++)
  {
    /* Skip not compiled charsets */
    if(cs->family != UDM_CHARSET_UNKNOWN)
      c[n++]=*cs;
  }
  fprintf(stderr,"\n%d charsets available:\n",n);

  UdmSort(c,n,sizeof(UDM_CHARSET),&cmpgrp);
  for(i=0;i<n;i++){
    if(family!=c[i].family){
      fprintf(stderr,"\n%19s : ",csgroup(&c[i]));
      family=c[i].family;
    }
    fprintf(stderr,"%s ",c[i].name);
  }
  fprintf(stderr,"\n");
}


static int
usage(int level)
{

  fprintf(stderr,
"\n\
mconv from %s-%s-%s\n\
http://www.mnogosearch.org/ (C)1998-2009, LavTech Corp.\n\
\n\
Usage: mconv [OPTIONS] -f charset_from -t charset_to  < infile > outfile\n\
\n\
Converting options:\n\
  -v            verbose output\n\
  -e            use HTML escape entities for input\n\
  -E            use HTML escape entities for output\n\
"
"  -h,-?         print help page and exit\n\
  -hh,-??       print more help and exit\n\
\n\
\n\
Please post bug reports and suggestions at http://www.mnogosearch.org/bugs/\n",
  PACKAGE,VERSION,UDM_DBTYPE);
  
  if (level > 1) display_charsets();
  return(0);
}


int main(int argc, char **argv)
{
  char *charset_from = NULL, *charset_to = NULL;
  UDM_CHARSET *CH_F, *CH_T;
  UDM_CONV conv;
  int html_from= 0, html_to= 0;
  int ch, help= 0, verbose= 0;
  char from_buf[1024];
  char to_buf[1024*8];
  size_t nbytes;

  while ((ch = getopt(argc, argv, "Eehv?t:f:")) != -1)
  {
    switch (ch)
    {
      case 'E': html_to = 1; break;
      case 'e': html_from = 1; break;
      case 'v': verbose = 1; break;
      case 't': charset_to =  optarg; break;
      case 'f': charset_from = optarg; break;
      case '?':
      case 'h':
      default: help++;
    }
  }

  argc -= optind;argv += optind;

  if((argc>1) || (help) || (!charset_from) || (!charset_to))
  {
    usage(help);
    return(1);
  }

  if (!(CH_F = UdmGetCharSet(charset_from)))
  {
    if (verbose)
    {
      fprintf(stderr, "Charset: %s not found or not supported", charset_from);
      display_charsets();
    }
    exit(1);
  }
  
  
  if (!(CH_T = UdmGetCharSet(charset_to)))
  {
    if (verbose)
    {
      fprintf(stderr, "Charset: %s not found or not supported", charset_to);
      display_charsets();
    }
    exit(1);
  }

  UdmConvInit(&conv, CH_F, CH_T, (html_from) ? UDM_RECODE_HTML : 0);

  while((nbytes= fread(from_buf, 1, sizeof(from_buf), stdin)))
  {
    nbytes= UdmConv(&conv, to_buf, sizeof(to_buf), from_buf, nbytes);
    nbytes= fwrite(to_buf, 1, nbytes, stdout);
    if (feof(stdin))
      break;
  }

  if (ferror(stdin))
  {
    if (verbose) fprintf(stderr, "An input error ocurred");
    exit(1);
  }

  fflush(NULL);
  return 0;
}
