/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003, 2004 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
    Return values: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused Ocrad to panic.
*/

#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <getopt.h>
#include "common.h"
#include "rectangle.h"
#include "bitmap.h"
#include "block.h"
#include "blockmap.h"
#include "character.h"
#include "textline.h"
#include "textblock.h"


namespace {

// Date of this version: 2004-02-09

const char *const program_version = "0.7";
const char *const program_year    = "2004";


void show_version() throw()
  {
  std::printf( "GNU Ocrad version %s\n", program_version );
  std::printf( "Copyright (C) %s Antonio Diaz.\n", program_year );
  std::printf( "This program is free software; you may redistribute it under the terms of\n" );
  std::printf( "the GNU General Public License.  This program has absolutely no warranty.\n" );
  }


void show_error( const char * program_name, const char * msg ) throw()
  {
  if( msg && msg[0] != 0 ) std::fprintf( stderr,"ocrad: %s\n", msg );
  if( program_name && program_name[0] != 0 )
    std::fprintf( stderr,"Try `%s --help' for more information.\n", program_name );
  }


void show_help( const char * program_name, bool verbose ) throw()
  {
  std::printf( "GNU Ocrad, Optical Character Recognition program\n" );
  std::printf( "Reads pbm file(s), or standard input, and sends text to standard output\n" );
  std::printf( "\nUsage: %s [options] [files]\n", program_name );
  std::printf( "Options:\n");
  std::printf( "  -h, --help               display this help and exit\n");
  std::printf( "  -V, --version            output version information and exit\n");
  std::printf( "  -a, --append             append text to output file\n");
  std::printf( "  -b, --block=<n>          process only the specified text block\n");
  std::printf( "  -c, --charset=<name>     try `--charset=help' for a list of names\n");
  std::printf( "  -f, --force              force overwrite of output file\n");
  std::printf( "  -F, --format=<fmt>       output format (byte, utf8)\n");
  std::printf( "  -i, --invert             invert image levels (white on black)\n");
  std::printf( "  -l, --layout=<n>         layout analysis, 0=none, 1=column, 2=full\n");
  std::printf( "  -o <file>                place the output into <file>\n");
  std::printf( "  -v, --verbose            be verbose\n");
  std::printf( "  -x <file>                export OCR Results File to <file>\n");
  if( verbose )
    {
    std::printf( "  -1, -4                   pbm output file type (debug)\n");
    std::printf( "  -C, --copy               'copy' input to output (debug)\n");
    std::printf( "  -D, --debug=<level>      (0-100) output intermediate data (debug)\n");
//    std::printf( "  -L, --level              level input image before doing OCR\n");
    std::printf( "  -R <scale>               reduce input image by <scale> (debug)\n");
    std::printf( "  -S <type>                make a 'special file' (debug)\n");
    std::printf( "  -T <threshold>           set threshold for -R option (debug)\n");
    }
  std::printf( "\nReport bugs to bug-ocrad@gnu.org\n");
  }


void ocr( const Bitmap & page_image, const Charset & charset,
                                     const Control & control, int c ) throw()
  {
  Blockmap blockmap( page_image, c, control.debug_level );
  if( !blockmap.blocks() ) return;

  if( control.debug_level >= 90 )
    { blockmap.print( control.outfile, control.debug_level ); return; }

  Textblock textblock( charset, blockmap.block_vector() );

  if( control.debug_level >= 86 )
    {
    bool graph = ( control.debug_level >= 88 );
    bool recursive = ( control.debug_level & 1 );
    textblock.dprint( control, graph, recursive );
    }

  if( control.debug_level == 0 ) textblock.print( control );
  if( control.exportfile ) textblock.xprint( control );
  }


int process_file( FILE *infile, const Charset & charset,
                                const Control & control ) throw()
  {
  try
    {
    Bitmap page_image( infile, control.invert );

    page_image.analyse_layout( control.layout_level );
    if( control.verbose ) std::fprintf( stderr, "number of text blocks %d\n",
                                page_image.rectangles() );

    if( control.rindex >= page_image.rectangles() )
      {
      std::fprintf( stderr,"This page has only %d text block(s)\n",
                    page_image.rectangles() );
      return 1;
      }

    if( control.level )
      std::fprintf( stderr, "\nslope_best = %d\n",
                    page_image.horizontalify( control.verbose ) );

    if( control.scale != 0 )
      {
      Bitmap reduced( page_image, control.scale, control.threshold );
      reduced.save( control.outfile, control.filetype ); return 0;
      }
    else if( control.specialtype != 0 )
      {
      Bitmap::type t;
      if( control.specialtype == 'v' ) t = Bitmap::vertical_histogram ;
      else if( control.specialtype == 'h' ) t = Bitmap::horizontal_histogram ;
      else if( control.specialtype == 'g' ) t = Bitmap::connected_ground ;
      else { show_error( 0, "bad special type" ); return 1; }
      Bitmap sb( page_image, t );
      sb.save( control.outfile, control.filetype ); return 0;
      }
    else if( control.copy )
      {
      if( control.rindex < 0 && page_image.rectangles() == 1 )
        { page_image.save( control.outfile, control.filetype ); return 0; }
      for( int c = 0; c < page_image.rectangles(); ++c )
        if( control.rindex < 0 || control.rindex == c )
          {
          Bitmap bitmap( page_image, page_image.rectangle_vector()[c] );
          bitmap.save( control.outfile, control.filetype );
          }
      return 0;
      }

    if( control.exportfile )
      std::fprintf( control.exportfile, "total blocks %d\n",
                    ( control.rindex < 0 ) ? page_image.rectangles() : 1 );

    // call the character recognizer for every rectangle of text
    for( int c = 0; c < page_image.rectangles(); ++c )
      if( control.rindex < 0 || control.rindex == c )
        {
        if( control.exportfile )
          {
          const Rectangle & r = page_image.rectangle_vector()[c];
          std::fprintf( control.exportfile, "block %d %d %d %d %d\n", c + 1,
                        r.left(), r.top(), r.width(), r.height() );
          }
        ocr( page_image, charset, control, c );
        }
    }
  catch( Bitmap::Error e ) { std::fputs( e.s, stderr ); return 2; }
  return 0;
  }


const char * my_basename( const char * filename ) throw()
  {
  const char * c = filename;
  while( *c ) { if( *c == '/' ) filename = c + 1; ++c; }
  return filename;
  }

} // end namespace


// 'infile' contains the scanned image (in pbm format) to be converted
// to text.
// 'outfile' is the destination for the text version of the scanned
// image. (or for a pbm file if debugging).
// 'exportfile' is the Ocr Results File.
int main( int argc, char *argv[] ) throw()
  {
  Charset charset;
  Control control;
  char *outfile_name = 0, *exportfile_name = 0;

  // scan arguments

  while( true )			// process options
    {
    static struct option long_options[] =
      {
      {"append", no_argument, 0, 'a'},
      {"block", required_argument, 0, 'b'},
      {"charset", required_argument, 0, 'c'},
      {"copy", no_argument, 0, 'C'},
      {"debug", required_argument, 0, 'D'},
      {"force", no_argument, 0, 'f'},
      {"format", required_argument, 0, 'F'},
      {"help", no_argument, 0, 'h'},
      {"invert", no_argument, 0, 'i'},
      {"layout", required_argument, 0, 'l'},
      {"level", no_argument, 0, 'L'},
      {"verbose", no_argument, 0, 'v'},
      {"version", no_argument, 0, 'V'},
      {0, 0, 0, 0}
      };

    int c = getopt_long( argc, argv, "14ab:c:CD:fF:hiLl:o:R:S:T:Vvx:",
                         long_options, 0 );
    if( c == -1 ) break;		// all options processed

    switch( c )
      {
      case 0: break;
      case '?': show_error( argv[0], 0 ); return 1;	// bad option
      case '1':
      case '4': control.filetype = c; break;
      case 'a': control.append = true; break;
      case 'b': control.rindex = std::strtol( optarg, 0, 0 ) - 1; break;
      case 'c': if( !charset.enable( optarg ) )
                  { charset.show_error( optarg ); return 1; }
                break;
      case 'C': control.copy = true; break;
      case 'D': control.debug_level = std::strtol( optarg, 0, 0 ); break;
      case 'f': control.force = true; break;
      case 'F': if( !control.set_format( optarg ) )
                  { show_error( argv[0], "bad output format" ); return 1; }
                break;
      case 'h': show_help( argv[0], control.verbose ); return 0;
      case 'i': control.invert = true; break;
      case 'l': control.layout_level = std::strtol( optarg, 0, 0 ); break;
      case 'L': control.level = true; break;
      case 'o':	outfile_name = optarg; break;
      case 'R': control.scale = std::strtol( optarg, 0, 0 ); break;
      case 'S': control.specialtype = optarg[0]; break;
      case 'T': control.threshold = std::strtol( optarg, 0, 0 ); break;
      case 'v': control.verbose = true; break;
      case 'V':	show_version(); return 0;
      case 'x':	exportfile_name = optarg; break;
      default: return 1;
      }
    } // end process options

  if( outfile_name )
    {
    if( control.append ) control.outfile = std::fopen( outfile_name, "a" );
    else if( control.force ) control.outfile = std::fopen( outfile_name, "w" );
    else if( ( control.outfile = std::fopen( outfile_name, "wx" ) ) == 0 )
      {
      std::fprintf( stderr, "Output file %s already exists.\n", outfile_name );
      return 1;
      }
    if( !control.outfile )
      { std::fprintf( stderr, "Cannot open %s\n", outfile_name ); return 1; }
    }

  if( exportfile_name )
    {
    control.exportfile = std::fopen( exportfile_name, "w" );
    if( !control.exportfile )
      { std::fprintf( stderr, "Cannot open %s\n", exportfile_name ); return 1; }
    std::fprintf( control.exportfile,
                  "# Ocr Results File. Created by GNU Ocrad version %s\n",
                  program_version );
    }

// process any remaining command line arguments (input files)
  FILE *infile = (optind < argc) ? 0 : stdin;
  char *infile_name = "-";
  int retval = 0;
  while( true )
    {
    if( infile == stdin )
      {
      std::ungetc( std::getc( infile ), infile );
      if( std::feof( infile ) || std::ferror( infile ) ) infile = 0;
      }
    while( infile != stdin )
      {
      if( infile ) std::fclose( infile );
      if( optind >= argc ) { infile = 0; break; }
      infile_name = argv[optind++];
      if( std::strcmp( infile_name, "-" ) == 0 ) infile = stdin;
      else infile = std::fopen( infile_name, "r" );
      if( infile ) break;
      std::fprintf( stderr, "Cannot open %s\n", infile_name );
      if( retval == 0 ) retval = 1;
      }
    if( !infile ) break;

    if( control.exportfile )
      std::fprintf( control.exportfile, "source file %s\n",
                    my_basename( infile_name ) );

    int tmp = process_file( infile, charset, control );
    if( tmp > 0 && infile == stdin ) infile = 0;
    if( tmp > retval ) retval = tmp;
    std::fflush( control.outfile );
    if( control.exportfile ) std::fflush( control.exportfile );
    }
  std::fclose( control.outfile );
  if( control.exportfile ) std::fclose( control.exportfile );
  return retval;
  }
