/*  ocrad - Optical Character Recognition program
    Copyright (C) 2003 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include <cctype>
#include <cstdio>
#include <list>
#include <map>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "block.h"
#include "blockmap.h"
#include "character.h"
#include "textline.h"


void Textline::add_character( const Character & c ) throw()
  {
  data.push_back( c );
  }


void Textline::insert_character( const int i, const Character & c )
						throw( Internal_error )
  {
  if( i > characters() ) throw Internal_error( "character index too big (insert)" );
  data.insert( data.begin() + i, c );
  }


void Textline::delete_character( const int i ) throw( Internal_error )
  {
  if( i > characters() ) throw Internal_error( "character index too big (delete)" );
  data.erase( data.begin() + i );
  }


void Textline::swap_characters( const int i, const int j )
						throw( Internal_error )
  {
  if( i > characters() || j > characters() )
    throw Internal_error( "character index too big (swap_characters)" );
  Character tmp = data[i]; data[i] = data[j]; data[j] = tmp;
  }


Character & Textline::character( const int i ) const throw( Internal_error )
  {
  if( i >= characters() ) throw Internal_error( "character index too big" );
  return data[i];
  }


int Textline::mean_height() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].height();
  return sum / characters();
  }


int Textline::mean_width() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].width();
  return sum / characters();
  }


int Textline::mean_gap_width() const throw()
  {
  int sum = 0, gaps = 0;

  if( characters() < 2 ) return sum;
  for( int i = 1; i < characters(); ++i )
    if( data[i].left() > data[i-1].right() )
      { ++gaps; sum += data[i].left() - data[i-1].right() - 1; }
  if( gaps ) sum /= gaps;
  return sum;
  }


int Textline::mean_hcenter() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].hcenter();
  return sum / characters();
  }


int Textline::mean_vcenter() const throw()
  {
  int sum = 0;

  if( characters() == 0 ) return sum;
  for( int i = 0; i < characters(); ++i ) sum += data[i].vcenter();
  return sum / characters();
  }


void Textline::join( Textline & l ) throw()
  {
  for( int i = 0; i < l.characters(); ++i )
    data.push_back( l.data[i] );
  l.data.clear();
  }


void Textline::print( FILE * outfile, const bool graph, const bool recursive )
								const throw()
  {
  if( graph || recursive ) fprintf( outfile, "mean_height = %d\n", mean_height() );

  for( int i = 0; i < characters(); ++i )
    character( i ).print( outfile, graph, recursive );
  fputs( "\n", outfile );
  }


void Textline::recognize1() const throw()
  {
  if( characters() == 0 ) return;
  int mh = mean_height(), l, r;

  for( l = 0; l < characters() / 2; ++l )
    if( similar( character( l ).height(), mh, 20 ) ) break;
  for( r = characters() - 1; r > characters() / 2; --r )
    if( similar( character( r ).height(), mh, 20 ) ) break;
  if( r - l < characters() / 2 )
    {
    for( l = 0; l < characters() / 2; ++l )
      if( character( l ).height() > mh ) break;
    for( r = characters() - 1; r > characters() / 2; --r )
      if( character( r ).height() > mh ) break;
    if( r - l < characters() / 2 ) { l = 0; r = characters() - 1; }
    }

  int xl = character( l ).hcenter(), yl = character( l ).vcenter();
  int xr = character( r ).hcenter(), yr = character( r ).vcenter();
  int dx = xr - xl, dy = yr - yl;
  for( int i = 0; i < characters(); ++i )
    {
    int charbox_vcenter = yl;
    if( dx )
      charbox_vcenter += ( dy * ( character( i ).hcenter() - xl ) / dx );
    character( i ).recognize1( charbox_vcenter );
    }
  }


void Textline::recognize2() throw()
  {
  if( characters() == 0 ) return;

  // transform some small letters to capitals
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guess_map().size() == 1 )
      {
      char ch = c1.guess_map().begin()->first;
      if( isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch != 'c' && ch != 'o' && ch != 's' && ch != 'v' && ch != 'w' &&
          ch != 'x' && ch != 'z' ) continue;
      if( 4 * c1.height() > 5 * mean_height() )
        { c1.only_guess( toupper( ch ), 0 ); continue; }
      if( 5 * c1.height() < 4 * mean_height() ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guess_map().size() >= 1 )
          {
          char ch2 = c2.guess_map().begin()->first;
          if( isspace( ch2 ) ) break;
          if( isupper( ch2 ) && similar( c1.height(), c2.height(), 10 ) )
            { c1.add_guess( toupper( ch ), 1 ); break; }
          }
        }
      }
    }

  // transform a small p to a capital p
  for( int i = characters() - 1; i > 0; --i )
    {
    Character & c1 = character( i - 1 );
    if( c1.guess_map().size() == 1 && c1.guess_map().begin()->first == 'p' )
      {
      Character & c2 = character( i );
      if( c2.guess_map().size() == 0 ) continue;
      switch( c2.guess_map().begin()->first )
        {
        case 'g': case 'j': case 'p': case 'q': case 'y':
                  if( c1.bottom() + 2 > c2.bottom() ) continue; break;
        case 'Q': if( abs( c1.top() - c2.top() ) > 2 ) continue; break;
        default : if( abs( c1.bottom() - c2.bottom() ) > 2 ) continue; break;
        }
      c1.only_guess( 'P', 0 );
      }
    }

  // transform small o with accent or diaeresis to capital
  for( int i = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.block_list().size() >= 2 && c1.guess_map().size() == 1 )
      {
      char ch = c1.guess_map().begin()->first;
      if( ch != '' && ch != '' && ch != '' && ch != '' ) continue;
      const Block & b = c1.block_list().front();
      if( 4 * b.height() > 5 * mean_height() )
        switch( ch )
          {
          case '': c1.only_guess( '', 0 ); break;
          case '': c1.only_guess( '', 0 ); break;
          case '': c1.only_guess( '', 0 ); break;
          case '': c1.only_guess( '', 0 ); break;
          }
      }
    }

  // join two adjacent single quotes into a double quote
  for( int i = 0; i < characters() - 1; ++i )
    {
    Character & c1 = character( i );
    Character & c2 = character( i + 1 );
    if( c1.guess_map().size() == 1 && c2.guess_map().size() == 1 )
      {
      char ch1 = c1.guess_map().begin()->first;
      char ch2 = c2.guess_map().begin()->first;
      if( ( ch1 == '\'' || ch1 == '`' ) && ch1 == ch2 &&
          similar( c1.width(), c2.left() - c1.right(), 60 ) )
        { c1.join( c2 ); c1.only_guess( '"', 0 ); delete_character( i + 1 ); }
      }
    }

  // transform 'O' or 'l' into '0' or '1'
  for( int i = 0, begin = 0; i < characters(); ++i )
    {
    Character & c1 = character( i );
    if( c1.guess_map().size() == 1 )
      {
      char ch = c1.guess_map().begin()->first;
      if( isspace( ch ) ) { begin = i + 1 ; continue; }
      if( ch != 'O' && ch != 'l' ) continue;
      for( int j = begin; j < characters(); ++j ) if( j != i )
        {
        Character & c2 = character( j );
        if( c2.guess_map().size() >= 1 )
          {
          char ch2 = c2.guess_map().begin()->first;
          if( isspace( ch2 ) ) break;
          if( isdigit( ch2 ) && similar( c1.height(), c2.height(), 10 ) )
            { c1.add_guess( (ch == 'l') ? '1' : '0', 1 ); break; }
          }
        }
      }
    }
  }
