/*  ocrad - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <cstdio>
#include <list>
#include <map>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "block.h"
#include "profile.h"
#include "features.h"
#include "iso_8859_1.h"
#include "character.h"


// First attempt at recognition without relying on context.
void Character::recognize1( int charbox_vcenter ) throw()
  {
  if( _block_list.size() == 1 ) recognize11( charbox_vcenter );
  else if( _block_list.size() == 2 ) recognize12();
  else if( _block_list.size() == 3 ) recognize13();
  }


// Recognizes 1 block characters.
// 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghklmnopqrstuvwxyz
// #$&'()*+,-./<>@[\]^_`{|}~
void Character::recognize11( int charbox_vcenter ) throw()
  {
  const Block & b = _block_list.front();
  if( b.block_list().size() == 0 ) recognize110( charbox_vcenter );
  else if( b.block_list().size() == 1 ) recognize111();
  else if( b.block_list().size() == 2 ) recognize112();
  }


// Recognizes 1 block characters without holes.
// 12357CEFGHIJKLMNSTUVWXYZcfhklmnrstuvwxyz
// '()*+,-./<>@[\]^_`{|}~
void Character::recognize110( int charbox_vcenter ) throw()
  {
  const Block & b = _block_list.front();
  Features f( b );
  unsigned char ch = f.test_solid();

  // FIXME all this mess of commas et all
  if( !ch && f.test_comma() ) ch = ',';
  if( !ch && b.bottom() <= charbox_vcenter &&
      2 * b.height() > 3 * b.width() && f.bp.minima() == 1 )
    { if( f.tp.iminimum() >= f.bp.iminimum() ) ch = '\''; else ch = '`'; }
  if( !ch && 2 * b.height() > 3 * b.width() && b.top() >= charbox_vcenter &&
      f.bp.minima() == 1 ) ch = ',';
  if( ch == ',' && b.bottom() <= charbox_vcenter ) ch = '\'';
  else if( ch == '\'' )
    {
    if( b.top() >= charbox_vcenter ) ch = ',';
    else if( b.top() + b.width() <= charbox_vcenter &&
             b.bottom() - b.width() >= charbox_vcenter ) ch = 'I';
    }
  if( ch ) { add_guess( ch, 0 ); return; }

  ch = f.test_CEFIJLlT();   if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_frt();        if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_cG();         if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_235Esz();     if( ch ) { add_guess( ch, 0 ); return; }
  ch = f.test_HKMNUuvwYy(); if( ch ) { add_guess( ch, 0 ); return; }
  if( f.bp.minima( b.height() / 8 ) == 2 )
    {
    if( f.tp.minima( b.height() / 8 ) == 2 && f.lp.istip() && f.rp.istip() )
      { add_guess( 'x', 0 ); return; }
    ch = f.test_hnw(); if( ch == 'n' )	// Looks for merged 'rr' or 'fl'
      {
      if( b.width() > b.height() )
        {
        int col = b.seek_left( b.vcenter(), b.right() );
        if( col < b.hpos( 90 ) && !b.escape_top( b.vcenter(), col ) )
          { only_guess( 0, b.left() ); add_guess( 'r', b.hcenter() );
          add_guess( 'r', b.right() ); return; }
        }
      int dmax = 0; bool bar = false;
      for( int row = b.vcenter(); row > b.vpos( 25 ); --row )
        {
        int d = b.hcenter() - b.seek_left( row, b.hcenter() );
        if( d > dmax ) dmax = d;
        else if( 2 * d < dmax && dmax > 2 ) bar = true;
        if( bar && similar( d, dmax, 25 ) )
          {
          int col, limit = b.seek_right( b.vcenter(), b.hcenter() );
          for( col = b.hcenter(); col <= limit; ++col )
            if( b.seek_bottom( b.vcenter(), col ) < b.bottom() ) break;
          if( b.left() < col && col < b.right() )
            { only_guess( 0, b.left() ); add_guess( 'f', col - 1 );
            add_guess( 'l', b.right() ); return; }
          }
        }
      }
    if( ch ) { add_guess( ch, 0 ); return; }
    }
  if( f.bp.minima() == 3 ) { add_guess( 'm', 0 ); return; }
  if( f.bp.minima() == 4 )
    {
    int col = b.seek_right( b.bottom() - 1, b.left() );
    col = b.seek_right( b.bottom() - 1, col + 1, false );
    col = b.seek_right( b.bottom() - 1, col + 1 );
    if( col > b.left() && col < b.right() )
      { only_guess( 0, b.left() ); add_guess( 'r', col );
      add_guess( 'm', b.right() ); return; }
    }

  if( f.tp.minima() == 3 ) { add_guess( 'w', 0 ); return; }
  if( b.width() > 2 * b.height() && f.tp.minima() == 2 && f.bp.minima() == 2 )
    { add_guess( '~', 0 ); return; }
  if( f.rp.isconvex() && b.height() > 2 * b.width() )
    { add_guess( ')', 0 ); return; }

  ch = f.test_line();
  if( ch )
    {
    if( ch == '\'' && b.top() >= charbox_vcenter ) ch = ',';
    add_guess( ch, 0 ); return;
    }

  ch = f.test_misc();
  if( ch ) { add_guess( ch, 0 ); return; }
  }


// Recognizes 1 block characters with 1 hole.
// 0469ADOPQRabdegopq#
void Character::recognize111() throw()
  {
  Block & b = _block_list.front();
  const Block & h = b.block_list().front();
  Features f( b );
//  Features fh( h );

  if( similar( h.top() - b.top(), b.bottom() - h.bottom(), 40 ) )
    {
    // hole is vertically centered
    if( f.lp.isflats() && !f.rp.isflats() ) { add_guess( 'D', 0 ); return; }
    if( !f.lp.isconvex() && !f.rp.isconvex() )
      {
      if( f.tp.minima() == 2 ) { add_guess( '#', 0 ); return; }
      if( f.tp.minima() == 1 && f.bp.minima() == 1 )
        {
        if( f.bp.isconvex() )
          { add_guess( ISO_8859_1::SEACUTE, 0 ); return; }
        else if( !f.lp.isflats() ) { add_guess( '4', 0 ); return; }
        }
      if( f.bp.minima() == 2 ) { add_guess( 'A', 0 ); return; }
      }
    if( similar( h.left() - b.left(), b.right() - h.right(), 40 ) )
      {
      if( b.width() > 3 * h.width() && f.vscan()[b.vcenter() - b.top()] == 4 )
        { add_guess( '@', 0 ); return; }
//      if( h.block_list().size() ) { add_guess( '0', 0 ); return; }
//      if( f.lp.ispit() )
        {
        if( b.width() >= b.height() || similar( b.height(), b.width(), 21 ) )
          { add_guess( 'o', 0 ); return; }
        add_guess( 'O', 0 ); return;
        }
      }
    if( h.left() - b.left() > b.right() - h.right() && f.rp.ispit() )
      { add_guess( 'D', 0 ); return; }
    if( f.bp.minima() > 1 || f.rp.minima() > 1 )
      { add_guess( 'a', 0 ); return; }
    return;
    }
  if( h.top() - b.top() < b.bottom() - h.bottom() )
    {
    // hole is high
    if( f.bp.minima( b.height() / 10 ) == 2 && f.bp.isctip() )
      {
      if( f.tp.isvpit() ) { add_guess( 'A', 0 ); return; }
      add_guess( 'R', 0 ); return;
      }
    unsigned char ch = f.test_49egpq();
    if( ch ) { add_guess( ch, 0 ); return; }
    }
  if( h.top() - b.top() > b.bottom() - h.bottom() )
    {
    // hole is low
    unsigned char ch = f.test_6abd();
    if( ch )
      {
      add_guess( ch, 0 );
      if( ch == ISO_8859_1::SOACUTE )
        {
        int row = h.top() - ( b.bottom() - h.bottom() ) - 1;
        if( row <= b.top() || row + 1 >= h.top() ) return;
        Block b1( b, *b.blockmap(), b.id() );
        b.top( row + 1 ); b1.bottom( row );
        _block_list.push_back( b1 );
        }
      return;
      }
    }
  }


// Recognizes 1 block characters with 2 holes.
// 8BQg$&
void Character::recognize112() throw()
  {
  const Block & b = _block_list.front();
  const Block & h1 = b.block_list().front();		// upper hole
  const Block & h2 = b.block_list().back();		// lower hole
  int a1 = h1.area();
  int a2 = h2.area();
//  Features f( b );

  if( h1.includes_vcenter( h2 ) && h2.includes_vcenter( h1 ) )
    { add_guess( 'm', 0 ); return; }

  Profile lp( b, Profile::left );
  Profile bp( b, Profile::bottom );
  if( similar( a1, a2, 50 ) )		// I don't like this
    {
    if( lp.isflats() ) { add_guess( 'B', 0 ); return; }

    int col1 = h1.seek_left( h1.bottom(), h1.right() + 1 ) - 1;
    int col2 = h2.seek_right( h2.top(), h2.left() - 1 ) + 1;
    if( col1 <= col2 ) { add_guess( '$', 0 ); return; }

    if( b.hcenter() > h1.hcenter() && b.hcenter() > h2.hcenter() &&
        ( b.hcenter() >= h1.right() || b.hcenter() >= h2.right() ) )
      { add_guess( '&', 0 ); return; }
    for( int row = h1.bottom() + 1; row < h2.top(); ++row )
      if( b.id( row, hcenter() ) == 0 ) { add_guess( 'g', 0 ); return; }
    if( bp.isconvex() ) { add_guess( '8', 0 ); return; }
    add_guess( 'B', 0 ); return;
    }
  if( a1 > a2 ) { add_guess( 'Q', 0 ); return; }
  add_guess( '&', 0 );
  }


// Recognizes 2 block characters.
// ij!%:;=?|
void Character::recognize12() throw()
  {
  const Block & b1 = _block_list.front();		// lower block
  const Block & b2 = _block_list.back();		// upper block
  int a1 = b1.area();
  int a2 = b2.area();
  Features f1( b1 );
  Features f2( b2 );

  if( similar( a1, a2, 10 ) )
    {
    if( width() > height() || similar( width(), height(), 50 ) )
      { add_guess( '=', 0 ); return; }
    if( b2.height() >= 2 * b2.width() ) { add_guess( '|', 0 ); return; }
    add_guess( ':', 0 ); return;
    }
  if( similar( a1, a2, 60 ) )
    {
    if( f2.test_solid() == '.' && b1.height() > b2.height() )
      { add_guess( ';', 0 ); return; }
    unsigned char ch = f1.test_solid();
    if( ch == '-' || ch == '_' ) { add_guess( '', 0 ); return; }
    add_guess( '%', 0 ); return;
    }
  if( a1 > a2 )
    {
    unsigned char ch = f2.test_solid();
    if( ch == '.' || ch == '\'' )
      {
      if( f1.bp.minima( b1.height() / 4 ) == 2 &&
          b1.top() > b2.bottom() && b1.hcenter() < b2.left() )
        {
        Character c1( b1 ); c1.recognize1( vcenter() );
        Character c2( b2 ); c2.recognize1( vcenter() );
        if( c1.guesses() == 1 && c1.guess( 0 ).ch == 'n' &&
            c2.guesses() == 1 && c2.guess( 0 ).ch == '.' )
          {
          int col, limit = b1.seek_right( b1.vcenter(), b1.hcenter() );
          for( col = b1.hcenter(); col <= limit; ++col )
            if( b1.seek_bottom( b1.vcenter(), col ) < b1.bottom() ) break;
          if( b1.left() < col && col < b1.right() )
            { only_guess( 0, b1.left() ); add_guess( 'r', col - 1 );
            add_guess( 'i', b1.right() ); return; }
          }
        }
      if( f1.bp.minima( b1.height() / 4 ) != 1 ) return;
      int hdiff;
      if( b1.bottom_hook( &hdiff ) )
        {
        if( abs( hdiff ) < b1.height() / 2 ) return;
        if( hdiff > 0 && f1.rp.increasing( f1.rp.pos( 75 ) ) )
          { add_guess( 'j', 0 ); return; }
        if( hdiff < 0 )
          {
          if( f1.wp.max() > 2 * f2.wp.max() && f1.lp.minima() == 1 )
            { add_guess( '', 0 ); return; }
          add_guess( 'i', 0 ); return;
          }
        }
      if( similar( f1.wp.max(), f2.wp.max(), 20 ) && f1.wp[1] < f2.wp.max() )
        { add_guess( '', 0 ); return; }
      if( b1.seek_bottom( b1.vcenter(), b1.hpos( 25 ) ) < b1.bottom() &&
          f1.rp.increasing( f1.rp.pos( 75 ) ) )
        { add_guess( 'j', 0 ); return; }
      add_guess( 'i', 0 ); return;
      }
    int slope;
    unsigned char atype = '\'';
    if( f2.tp.ispit() ) atype = '^';
    else if( f2.rp.straight( &slope ) && slope < 0 ) atype = '`';
    Character c( b1 );
    c.recognize1( c.vcenter() );
    if( c.guesses() ) ch = ISO_8859_1::compose( c.guess( 0 ).ch, atype );
    else ch = 0;
    if( ch ) add_guess( ch, 0 ); else add_guess( atype , 0 );
    return;
    }
  unsigned char ch = f1.test_solid();
  if( ch == '.' )
    {
    if( similar( b1.width(), b2.width(), 50 ) ) { add_guess( '!', 0 ); return; }
    add_guess( '?', 0 ); return;
    }
  if( ch == '-' || ch == '_' )
    {
    if( b2.block_list().size() == 1 )
      {
      const Block & h = b2.block_list().front();
      if( similar( h.left() - b2.left(), b2.right() - h.right(), 40 ) )
        { add_guess( '', 0 ); return; }
      add_guess( '', 0 ); return;
      }
    }
  }


// Recognizes 3 block characters.
// %
void Character::recognize13() throw()
  {
  const Block & b1 = _block_list.front();
  Features f1( b1 );
  if( f1.test_solid() == '.' ) add_guess( '', 0 );
  Character c( b1 );
  c.recognize1( c.vcenter() );
  if( c.guesses() )
    switch( c.guess( 0 ).ch )
      {
      case 'A': add_guess( '', 0 ); return;
      case 'E': add_guess( '', 0 ); return;
      case 'I': add_guess( '', 0 ); return;
      case 'O': add_guess( '', 0 ); return;
      case 'V':
      case 'U': add_guess( '', 0 ); return;
      case 'a': add_guess( '', 0 ); return;
      case 'e': add_guess( '', 0 ); return;
      case 'l':
      case 'i': add_guess( '', 0 ); return;
      case 'o': add_guess( '', 0 ); return;
      case 'v':
      case 'u': add_guess( '', 0 ); return;
      }
  }
