#!/usr/bin/perl

open FILE, "$ARGV[0]" or die "Cannot open input file $ARGV[0]\n";
open FILE_OUT, ">$ARGV[1]" or die "Cannot open input file $ARGV[1]\n";

sub lex_order (\@\@) { # lexicographic ordering

    my (@string1) = @{shift @_};
    my (@string2) = @{shift @_};

    my ($i);

    $i = 0;

    while ( $string1[$i] == $string2[$i] && $i <= $#string1 && $i <= $#string2 ) { $i++ }

    if ( $i > $#string1 || $i > $#string2 ) { 
   
	return $#string1 <=> $#string2;

    }

    return ( $string1[$i] <=> $string2[$i] );

}

sub numerically { $b <=> $a; }

sub alphabetically { # lexicographic ordering based on the ASCII order defined in @main_order

    my (@chars1) = map { $main_order{$_} } split //, $a;
    my (@chars2) = map { $main_order{$_} } split //, $b;

    return lex_order @chars1, @chars2;

}

sub lexicographically { # lexicographic ordering for numeric sequences separated by spaces

    my (@chars1) = split / /, $a;
    my (@chars2) = split / /, $b;

    return lex_order @chars1, @chars2;
}

$alphabet = "\#\$\%^&*<>[]{}()+-=_|\\,:;~`.?!\'\"\@0123456789AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
@main_set = split //, $alphabet;
map { $main_order{$_} = index $alphabet, $_ } @main_set; # inefficient ...

$ldelim[1] = "\\["; $rdelim[1] = "]";
$ldelim[0] = "\\("; $rdelim[0] = ")";
$ldelim[5] = "\\e"; $rdelim[5] = "e";
$ldelim[4] = "\\f"; $rdelim[4] = "f";
$ldelim[3] = "\\g"; $rdelim[3] = "g";

while (<FILE>) {

    $input = $_;

    if ( $input =~ /\\(.)TI\s*([0-9]+)\s*\{(\\[a-z]+)\s*\{(.+)\}\}\{(.+)\}\{(.+)\}\{(.+)\}\n/ ) {

	($domain, $rank, $type, $key, $nspace, $pageno, $ref) = ($1, $2, $3, $4, $5, $6, $7);
        $key =~ s/\{([0-9]+)\}/pack "c1", $1/eg;

        push @{$index{$domain}{$key}{refs}}, "$ref $rank";
        $index{$domain}{$key}{nspace} = $nspace;
        $index{$domain}{$key}{type} = $type;

    }

}

$i = 0;

foreach $domain (sort keys %index ) {

    if ( $i > 0) {

	print FILE_OUT "\\indexseparator{$domain}{$i}\n";

    }

    $i++;

    foreach $key ( sort alphabetically keys %{$index{$domain}} ) {

        %ref_list = ();
	map { exists $ref_list{$_} ? ($ref_list{$_}++) : ($ref_list{$_} = 0) } @{$index{$domain}{$key}{refs}};

        @ref_list = sort lexicographically keys %ref_list;
        @ref_list = map { @r = split / /, $_; "$ldelim[$r[1]]$r[0]$rdelim[$r[1]]" } @ref_list;

	$ukey = $key;
	$ukey =~ s/(.)/"\{".(unpack "c1", $1)."\}"/eg; 
    
	print FILE_OUT "\\GI{$index{$domain}{$key}{nspace}}{$index{$domain}{$key}{type}}{$ukey}, ", 
             (join ', ', @ref_list), ". \% $key, sec nos. ", (join ', ', @ref_list ), "\n";
	
    }
}

