#!/usr/bin/perl

# This file is part of The New Aspell
# Copyright (C) 2004 by Kevin Atkinson under the GNU LGPL
# license version 2.0 or 2.1.  You should have received a copy of the
# LGPL license along with this library if you did not you can find it
# at http://www.gnu.org/.

use strict;
use warnings;

use constant { 
  CHAR => 0, TYPE => 1, UPPER => 2, LOWER => 3, TITLE => 4, PLAIN => 5
};

sub uni_char($$$); # uni def what

if ($#ARGV < 1) 
  {die "Usage: $0 [--no-ascii] <unicode data file> <textual reference table(s)>\n"}

my $no_ascii = 0;
if ($ARGV[0] eq '--no-ascii')
{
  $no_ascii = 1;
  shift;
}

my (%unidata);

open IN, $ARGV[0] or die "Can't open \"$ARGV[0]\": $!\n";

while (<IN>) {
  chop;
  s/\s+#.+//;
  next unless $_;
  my @data = split / /;
  $unidata{$data[0]} = \@data;
}

shift;

my %uni_char;
my %char_uni;

foreach my $file (@ARGV) {
  my ($base) = $file =~ /^(.+)\.txt/i or die "$file does not end in \".txt\"\n";
  $base = lc $base;

  open IN,  $file        or die "Can't open \"$ARGV[0]\": $!\n";
  open OUT,">$base\.dat" or die "Can't create \"$ARGV[0]\": $!\n";

  my @ascii = $no_ascii ? (0..64,91..96,123..127) : (0..127);
  my @chardata;
  undef %char_uni;
  undef %uni_char;

  my $i = 1;
  while (<IN>) {
    /^\s*(\=|0x)([A-F0-9]{2})\s+(U\+|0x)([A-F0-9]{4})/ or next;
    #print "$2 $4\n";
    my $char = hex($2);
    my $uni  = hex($4);
    printf("Warning remapping '%c' (0x%X) may cause problems with Aspell.\n",
           $char, $char)
        if $char != $uni && ($char < 2 
                             || (32 <= $char && $char <= 64)
                             || (91 <= $char && $char <= 96)
                             || (123 <= $char && $char <= 127));
    $char_uni{$2} = $4;
    $uni_char{$4} = $2;
    $i++;
  }

  foreach my $i (@ascii) {
    my $char = sprintf("%02X",$i);
    my $unichar = "00".$char;
    next if defined $char_uni{$char};
    $char_uni{$char} = $unichar;
    $uni_char{$unichar} = $char;
  }

  foreach my $char (sort keys %char_uni) {
    my $unichar = $char_uni{$char};
    my $info = $unidata{$unichar};
    next unless defined $info;
    $chardata[hex $char] =
      [$char,
       $unichar,
       uc($info->[TYPE]),
       uni_char($info->[UPPER], $char, "upper"),
       uni_char($info->[LOWER], $char, "lower"),
       uni_char($info->[TITLE], $char, "title"),
       uni_char($info->[PLAIN], $char, "plain")];
  }

  print OUT "# Aspell Character Data File.  Do Not Edit!\n";
  print OUT "# <char> <uni> <type> <upper> <lower> <title> <plain>\n";
  #print OUT "$base\n";
  foreach my $i (0..255) {
    my $d = $chardata[$i];
    if (defined $d) {
      print OUT join(" ", @$d), "\n";
    } else {
      my $c = sprintf "%02X", $i;
      printf OUT "$c %04X - $c $c $c $c\n", 0xE000 + $i;
    }
  }
}

sub uni_char($$$) {
  my ($uni, $def, $what) = @_;
  my $chr =  $uni_char{$uni};
  return $chr if defined $chr;
  print STDERR "Warning U+$uni mot mapped.  It is needed for the \"$what\" mapping of 0x$def.\n"
    if ($unidata{$uni} eq 'L' || $unidata{$uni} eq 'V') 
      && ($unidata{$char_uni{$def}} eq 'L' || $unidata{$char_uni{$def}} eq 'V');
  return $def;
}
