#!/usr/local/bin/perl -w

use XML::Twig;
use Getopt::Std;
use File::Basename;
use strict;

my %opt;
getopts('do:i:', \%opt);

# Debug:
my $DEBUG = 0;
if (defined($opt{d})) {
  $DEBUG = 1;
  print STDERR 'This is bib2xml, $Id: bib2xml,v 1.16 2004/05/09 11:07:07 twid Exp $', "\n";
}

# Output encoding:
my ($layer, $filter, $encoding) = ('utf8', undef, 'UTF-8');
if (defined($opt{o})) {
  if ($opt{o} eq 'utf8') {
    # standard
  } elsif ($opt{o} eq 'latin1') {
    ($layer, $filter, $encoding) = ('bytes', 'latin1', 'ISO-8859-1');
  } elsif ($opt{o} eq 'ascii') {
    ($layer, $filter, $encoding) = ('bytes', 'safe_hex', 'US-ASCII');
  } else {
    die "Unknown output encoding '$opt{o}'.";
  }
}
print STDERR "Output encoding: $encoding\n" if $DEBUG;
print STDERR "Output layer: $layer\n" if $DEBUG && $layer;
print STDERR "Output filter: $filter\n" if $DEBUG && $filter;

# Input encoding:
my ($accect8bit, $inputenc) = (0, 'ascii');
if (defined($opt{i})) {
  if ($opt{i} eq 'ascii') {
    # standard
  } elsif ($opt{i} eq 'latin1') {
    ($accect8bit, $inputenc) = (1, 'latin1');
  } else {
    die "Unknown input encoding '$opt{i}'.";
  }
}
print STDERR "Input encoding: $inputenc\n" if $DEBUG;

# Get the filename.
my $input_filename = shift @ARGV;
$input_filename or die "Usage: $0 filename";

my ($basename, $path) = fileparse($input_filename, '\.bib');
my $input_basename = $input_filename;
$input_basename =~ s/\.bib$//;
my $output_filename = $basename . '.xml';
print STDERR "Input file name: $input_filename\n" if $DEBUG;
print STDERR "Input file name without suffix: $input_basename\n" if $DEBUG;
print STDERR "Output file name: $output_filename\n" if $DEBUG;

# Don't bother dealing with existing files -- exit
# and let the user sort it out:
-e $output_filename and die "I won't overwrite $output_filename.";

# Create a LaTeX-style aux file to instruct BibTeX:
open (AUX, ">tmp$$.aux") or die "Cannot write fake LaTeX aux file tmp$$.aux: $!.";
print AUX <<"EOF;";
\\citation{*}
\\bibstyle{bib2xml}
\\bibdata{$input_basename}
EOF;
close AUX;

# Run BibTeX:
system "bibtex -min-crossrefs=1 tmp$$";

# Now transform the BibTeX output into valid XML:
open (XML, ">tmp$$.xml") or die "Cannot create temporary XML file tmp$$.xml: $!.";
print XML "<?xml version='1.0' encoding='US-ASCII'?>\n";
print XML "<!DOCTYPE biblio SYSTEM \"bibulus.dtd\" []>\n";
open (BBL, "<tmp$$.bbl") or die "Cannot open bbl file tmp$$.bbl: $!.";
while (<BBL>) {

  if (/([\x80-\xff])/) {	# handle 8-bit characters
    if (!$accect8bit) {
      die "Illegal 8-bit character (did you forget -i?): $1.";
    } else {
      if ($inputenc eq 'latin1') {
	if (/([\x80-\xa0])/) {
	  die "This character does not exist in Latin-1: $1.";
	} else {
	  # Fortunately, it's very easy to convert Latin-1 to Unicode:
	  s/([\xa1-\xff])/&eschex($1)/eg; # '\xab' -> '&#xab;'
	}
      }
    }
  }

  # Clean up BibTeX output:
  s/\%\n//;			# pseudo line breaks
  s/\\-//g;			# hyphenation points
  y/~/ /;			# non-breakable spaces
  s/\\\&/\&amp;/g;		# &
  s/[\t\ ]+/ /g;

  # fix dates
  s/month='(\d+) (\d+)'/day='$1' month='$2'/;
  s/month='(\d+)(-|, )(\d+)'/month='$3'/;
  s/{\\noopsort{.*?}}//g;
  s/(?<=\<edition\>)(.*?)(?=\<\/edition\>)/&ordinal($1)/e;
  s/(?<=xml:lang=\")(.*?)(?=\")/&language($1)/e;

  # Non-ASCII characters (conversion to Unicode):
  # This list was produced automatically -- should be checked!
  # What other character sequences are often used in LaTeX files?
  s/\\`\{?A\}?/\&#x00c0;/g;
  s/\\'\{?A\}?/\&#x00c1;/g;
  s/\\^\{?A\}?/\&#x00c2;/g;
  s/\\~\{?A\}?/\&#x00c3;/g;
  s/\\"\{?A\}?/\&#x00c4;/g;
  s/\\`\{?a\}?/\&#x00e0;/g;
  s/\\'\{?a\}?/\&#x00e1;/g;
  s/\\^\{?a\}?/\&#x00e2;/g;
  s/\\~\{?a\}?/\&#x00e3;/g;
  s/\\"\{?a\}?/\&#x00e4;/g;
  s/\\=\{?A\}?/\&#x0100;/g;
  s/\\=\{?a\}?/\&#x0101;/g;
  s/\\\.\{?A\}?/\&#x0226;/g;
  s/\\\.\{?a\}?/\&#x0227;/g;
  s/\\AE/\&#x00c6;/g;
  s/\\ae/\&#x00e6;/g;
  s/\\=\{?AE\}?/\&#x01e2;/g;
  s/\\=\{?ae\}?/\&#x01e3;/g;
  s/\\'\{?AE\}?/\&#x01fc;/g;
  s/\\'\{?ae\}?/\&#x01fd;/g;
  s/\\\.\{?B\}?/\&#x1e02;/g;
  s/\\\.\{?b\}?/\&#x1e03;/g;
  s/\\b\{?B\}?/\&#x1e06;/g;
  s/\\b\{?b\}?/\&#x1e07;/g;
  s/\\b\{?D\}?/\&#x1e0e;/g;
  s/\\b\{?d\}?/\&#x1e0f;/g;
  s/\\b\{?h\}?/\&#x1e96;/g;
  s/\\b\{?K\}?/\&#x1e34;/g;
  s/\\b\{?k\}?/\&#x1e35;/g;
  s/\\b\{?L\}?/\&#x1e3a;/g;
  s/\\b\{?l\}?/\&#x1e3b;/g;
  s/\\b\{?N\}?/\&#x1e48;/g;
  s/\\b\{?n\}?/\&#x1e49;/g;
  s/\\b\{?R\}?/\&#x1e5e;/g;
  s/\\b\{?r\}?/\&#x1e5f;/g;
  s/\\b\{?T\}?/\&#x1e6e;/g;
  s/\\b\{?t\}?/\&#x1e6f;/g;
  s/\\b\{?Z\}?/\&#x1e94;/g;
  s/\\b\{?z\}?/\&#x1e95;/g;
  s/\\c(\{| )C\}?/\&#x00c7;/g;
  s/\\c(\{| )c\}?/\&#x00e7;/g;
  s/\\c(\{| )D\}?/\&#x1e10;/g;
  s/\\c(\{| )d\}?/\&#x1e11;/g;
  s/\\c(\{| )E\}?/\&#x0228;/g;
  s/\\c(\{| )e\}?/\&#x0229;/g;
  s/\\c(\{| )G\}?/\&#x0122;/g;
  s/\\c(\{| )g\}?/\&#x0123;/g;
  s/\\c(\{| )H\}?/\&#x1e28;/g;
  s/\\c(\{| )h\}?/\&#x1e29;/g;
  s/\\c(\{| )K\}?/\&#x0136;/g;
  s/\\c(\{| )k\}?/\&#x0137;/g;
  s/\\c(\{| )L\}?/\&#x013b;/g;
  s/\\c(\{| )l\}?/\&#x013c;/g;
  s/\\c(\{| )N\}?/\&#x0145;/g;
  s/\\c(\{| )n\}?/\&#x0146;/g;
  s/\\c(\{| )R\}?/\&#x0156;/g;
  s/\\c(\{| )r\}?/\&#x0157;/g;
  s/\\c(\{| )S\}?/\&#x015e;/g;
  s/\\c(\{| )s\}?/\&#x015f;/g;
  s/\\c(\{| )T\}?/\&#x0162;/g;
  s/\\c(\{| )t\}?/\&#x0163;/g;
  s/\\'\{?C\}?/\&#x0106;/g;
  s/\\'\{?c\}?/\&#x0107;/g;
  s/\\^\{?C\}?/\&#x0108;/g;
  s/\\^\{?c\}?/\&#x0109;/g;
  s/\\\.\{?C\}?/\&#x010a;/g;
  s/\\\.\{?c\}?/\&#x010b;/g;
  s/\\\.\{?D\}?/\&#x1e0a;/g;
  s/\\\.\{?d\}?/\&#x1e0b;/g;
  s/\\d\{?A\}?/\&#x1ea0;/g;
  s/\\d\{?a\}?/\&#x1ea1;/g;
  s/\\d\{?B\}?/\&#x1e04;/g;
  s/\\d\{?b\}?/\&#x1e05;/g;
  s/\\d\{?D\}?/\&#x1e0c;/g;
  s/\\d\{?d\}?/\&#x1e0d;/g;
  s/\\d\{?E\}?/\&#x1eb8;/g;
  s/\\d\{?e\}?/\&#x1eb9;/g;
  s/\\d\{?H\}?/\&#x1e24;/g;
  s/\\d\{?h\}?/\&#x1e25;/g;
  s/\\d\{?I\}?/\&#x1eca;/g;
  s/\\d\{?i\}?/\&#x1ecb;/g;
  s/\\d\{?K\}?/\&#x1e32;/g;
  s/\\d\{?k\}?/\&#x1e33;/g;
  s/\\d\{?L\}?/\&#x1e36;/g;
  s/\\d\{?l\}?/\&#x1e37;/g;
  s/\\d\{?M\}?/\&#x1e42;/g;
  s/\\d\{?m\}?/\&#x1e43;/g;
  s/\\d\{?N\}?/\&#x1e46;/g;
  s/\\d\{?n\}?/\&#x1e47;/g;
  s/\\d\{?O\}?/\&#x1ecc;/g;
  s/\\d\{?o\}?/\&#x1ecd;/g;
  s/\\i/\&#x0131;/g;
  s/\\d\{?R\}?/\&#x1e5a;/g;
  s/\\d\{?r\}?/\&#x1e5b;/g;
  s/\\d\{?S\}?/\&#x1e62;/g;
  s/\\d\{?s\}?/\&#x1e63;/g;
  s/\\d\{?T\}?/\&#x1e6c;/g;
  s/\\d\{?t\}?/\&#x1e6d;/g;
  s/\\d\{?U\}?/\&#x1ee4;/g;
  s/\\d\{?u\}?/\&#x1ee5;/g;
  s/\\d\{?V\}?/\&#x1e7e;/g;
  s/\\d\{?v\}?/\&#x1e7f;/g;
  s/\\d\{?W\}?/\&#x1e88;/g;
  s/\\d\{?w\}?/\&#x1e89;/g;
  s/\\d\{?Y\}?/\&#x1ef4;/g;
  s/\\d\{?y\}?/\&#x1ef5;/g;
  s/\\d\{?Z\}?/\&#x1e92;/g;
  s/\\d\{?z\}?/\&#x1e93;/g;
  s/\\`\{?E\}?/\&#x00c8;/g;
  s/\\'\{?E\}?/\&#x00c9;/g;
  s/\\^\{?E\}?/\&#x00ca;/g;
  s/\\"\{?E\}?/\&#x00cb;/g;
  s/\\`\{?e\}?/\&#x00e8;/g;
  s/\\'\{?e\}?/\&#x00e9;/g;
  s/\\^\{?e\}?/\&#x00ea;/g;
  s/\\"\{?e\}?/\&#x00eb;/g;
  s/\\=\{?E\}?/\&#x0112;/g;
  s/\\=\{?e\}?/\&#x0113;/g;
  s/\\\.\{?E\}?/\&#x0116;/g;
  s/\\\.\{?e\}?/\&#x0117;/g;
  s/\\~\{?E\}?/\&#x1ebc;/g;
  s/\\~\{?e\}?/\&#x1ebd;/g;
  s/\\NG/\&#x014a;/g;
  s/\\ng/\&#x014b;/g;
  s/\\DH/\&#x00d0;/g;
  s/\\dh/\&#x00f0;/g;
  s/\\\.\{?F\}?/\&#x1e1e;/g;
  s/\\\.\{?f\}?/\&#x1e1f;/g;
  s/\\^\{?G\}?/\&#x011c;/g;
  s/\\^\{?g\}?/\&#x011d;/g;
  s/\\\.\{?G\}?/\&#x0120;/g;
  s/\\\.\{?g\}?/\&#x0121;/g;
  s/\\'\{?G\}?/\&#x01f4;/g;
  s/\\'\{?g\}?/\&#x01f5;/g;
  s/\\=\{?G\}?/\&#x1e20;/g;
  s/\\=\{?g\}?/\&#x1e21;/g;
  s/\\^\{?H\}?/\&#x0124;/g;
  s/\\^\{?h\}?/\&#x0125;/g;
  s/\\\.\{?H\}?/\&#x1e22;/g;
  s/\\\.\{?h\}?/\&#x1e23;/g;
  s/\\"\{?H\}?/\&#x1e26;/g;
  s/\\"\{?h\}?/\&#x1e27;/g;
  s/\\H\{?O\}?/\&#x0150;/g;
  s/\\H\{?o\}?/\&#x0151;/g;
  s/\\H\{?U\}?/\&#x0170;/g;
  s/\\H\{?u\}?/\&#x0171;/g;
  s/\\`\{?I\}?/\&#x00cc;/g;
  s/\\'\{?I\}?/\&#x00cd;/g;
  s/\\^\{?I\}?/\&#x00ce;/g;
  s/\\"\{?I\}?/\&#x00cf;/g;
  s/\\`\{?i\}?/\&#x00ec;/g;
  s/\\'\{?i\}?/\&#x00ed;/g;
  s/\\^\{?i\}?/\&#x00ee;/g;
  s/\\"\{?i\}?/\&#x00ef;/g;
  s/\\~\{?I\}?/\&#x0128;/g;
  s/\\~\{?i\}?/\&#x0129;/g;
  s/\\=\{?I\}?/\&#x012a;/g;
  s/\\=\{?i\}?/\&#x012b;/g;
  s/\\\.\{?I\}?/\&#x0130;/g;
  s/\\^\{?J\}?/\&#x0134;/g;
  s/\\^\{?\\j\}?/\&#x0135;/g;
  s/\\k(\{| )A\}?/\&#x0104;/g;
  s/\\k(\{| )a\}?/\&#x0105;/g;
  s/\\k(\{| )E\}?/\&#x0118;/g;
  s/\\k(\{| )e\}?/\&#x0119;/g;
  s/\\k(\{| )I\}?/\&#x012e;/g;
  s/\\k(\{| )i\}?/\&#x012f;/g;
  s/\\k(\{| )O\}?/\&#x01ea;/g;
  s/\\k(\{| )o\}?/\&#x01eb;/g;
  s/\\k(\{| )U\}?/\&#x0172;/g;
  s/\\k(\{| )u\}?/\&#x0173;/g;
  s/\\'\{?K\}?/\&#x1e30;/g;
  s/\\'\{?k\}?/\&#x1e31;/g;
  s/\\'\{?L\}?/\&#x0139;/g;
  s/\\'\{?l\}?/\&#x013a;/g;
  s/\\OE/\&#x0152;/g;
  s/\\oe/\&#x0153;/g;
  s/\\'\{?M\}?/\&#x1e3e;/g;
  s/\\'\{?m\}?/\&#x1e3f;/g;
  s/\\\.\{?M\}?/\&#x1e40;/g;
  s/\\\.\{?m\}?/\&#x1e41;/g;
  s/\\~\{?N\}?/\&#x00d1;/g;
  s/\\~\{?n\}?/\&#x00f1;/g;
  s/\\'\{?N\}?/\&#x0143;/g;
  s/\\'\{?n\}?/\&#x0144;/g;
  s/\\`\{?N\}?/\&#x01f8;/g;
  s/\\`\{?n\}?/\&#x01f9;/g;
  s/\\\.\{?N\}?/\&#x1e44;/g;
  s/\\\.\{?n\}?/\&#x1e45;/g;
  s/\\`\{?O\}?/\&#x00d2;/g;
  s/\\'\{?O\}?/\&#x00d3;/g;
  s/\\^\{?O\}?/\&#x00d4;/g;
  s/\\~\{?O\}?/\&#x00d5;/g;
  s/\\"\{?O\}?/\&#x00d6;/g;
  s/\\`\{?o\}?/\&#x00f2;/g;
  s/\\'\{?o\}?/\&#x00f3;/g;
  s/\\^\{?o\}?/\&#x00f4;/g;
  s/\\~\{?o\}?/\&#x00f5;/g;
  s/\\"\{?o\}?/\&#x00f6;/g;
  s/\\=\{?O\}?/\&#x014c;/g;
  s/\\=\{?o\}?/\&#x014d;/g;
  s/\\\.\{?O\}?/\&#x022e;/g;
  s/\\\.\{?o\}?/\&#x022f;/g;
  s/\\'\{?P\}?/\&#x1e54;/g;
  s/\\'\{?p\}?/\&#x1e55;/g;
  s/\\\.\{?P\}?/\&#x1e56;/g;
  s/\\\.\{?p\}?/\&#x1e57;/g;
  s/\\r(\{| )A\}?/\&#x00c5;/g;
  s/\\r(\{| )a\}?/\&#x00e5;/g;
  s/\\r(\{| )U\}?/\&#x016e;/g;
  s/\\r(\{| )u\}?/\&#x016f;/g;
  s/\\r(\{| )w\}?/\&#x1e98;/g;
  s/\\'\{?R\}?/\&#x0154;/g;
  s/\\'\{?r\}?/\&#x0155;/g;
  s/\\\.\{?R\}?/\&#x1e58;/g;
  s/\\\.\{?r\}?/\&#x1e59;/g;
  s/\\r(\{| )y\}?/\&#x1e99;/g;
  s/\\'\{?S\}?/\&#x015a;/g;
  s/\\'\{?s\}?/\&#x015b;/g;
  s/\\^\{?S\}?/\&#x015c;/g;
  s/\\^\{?s\}?/\&#x015d;/g;
  s/\\\.\{?S\}?/\&#x1e60;/g;
  s/\\\.\{?s\}?/\&#x1e61;/g;
  s/\\ss/\&#x00df;/g;
  s/\\\.\{?T\}?/\&#x1e6a;/g;
  s/\\\.\{?t\}?/\&#x1e6b;/g;
  s/\\"\{?t\}?/\&#x1e97;/g;
  s/\\TH/\&#x00de;/g;
  s/\\th/\&#x00fe;/g;
  s/\\u(\{| )A\}?/\&#x0102;/g;
  s/\\u(\{| )a\}?/\&#x0103;/g;
  s/\\u(\{| )E\}?/\&#x0114;/g;
  s/\\u(\{| )e\}?/\&#x0115;/g;
  s/\\u(\{| )G\}?/\&#x011e;/g;
  s/\\u(\{| )g\}?/\&#x011f;/g;
  s/\\u(\{| )I\}?/\&#x012c;/g;
  s/\\u(\{| )i\}?/\&#x012d;/g;
  s/\\u(\{| )O\}?/\&#x014e;/g;
  s/\\u(\{| )o\}?/\&#x014f;/g;
  s/\\u(\{| )U\}?/\&#x016c;/g;
  s/\\u(\{| )u\}?/\&#x016d;/g;
  s/\\`\{?U\}?/\&#x00d9;/g;
  s/\\'\{?U\}?/\&#x00da;/g;
  s/\\^\{?U\}?/\&#x00db;/g;
  s/\\"\{?U\}?/\&#x00dc;/g;
  s/\\`\{?u\}?/\&#x00f9;/g;
  s/\\'\{?u\}?/\&#x00fa;/g;
  s/\\^\{?u\}?/\&#x00fb;/g;
  s/\\"\{?u\}?/\&#x00fc;/g;
  s/\\~\{?U\}?/\&#x0168;/g;
  s/\\~\{?u\}?/\&#x0169;/g;
  s/\\=\{?U\}?/\&#x016a;/g;
  s/\\=\{?u\}?/\&#x016b;/g;
  s/\\v(\{| )A\}?/\&#x01cd;/g;
  s/\\v(\{| )a\}?/\&#x01ce;/g;
  s/\\v(\{| )C\}?/\&#x010c;/g;
  s/\\v(\{| )c\}?/\&#x010d;/g;
  s/\\v(\{| )D\}?/\&#x010e;/g;
  s/\\v(\{| )d\}?/\&#x010f;/g;
  s/\\v(\{| )DZ\}?/\&#x01c4;/g;
  s/\\v(\{| )dz\}?/\&#x01c6;/g;
  s/\\v(\{| )E\}?/\&#x011a;/g;
  s/\\v(\{| )e\}?/\&#x011b;/g;
  s/\\v(\{| )EZH\}?/\&#x01ee;/g;
  s/\\v(\{| )ezh\}?/\&#x01ef;/g;
  s/\\v(\{| )G\}?/\&#x01e6;/g;
  s/\\v(\{| )g\}?/\&#x01e7;/g;
  s/\\v(\{| )H\}?/\&#x021e;/g;
  s/\\v(\{| )h\}?/\&#x021f;/g;
  s/\\v(\{| )I\}?/\&#x01cf;/g;
  s/\\v(\{| )i\}?/\&#x01d0;/g;
  s/\\v(\{| )j\}?/\&#x01f0;/g;
  s/\\v(\{| )K\}?/\&#x01e8;/g;
  s/\\v(\{| )k\}?/\&#x01e9;/g;
  s/\\v(\{| )L\}?/\&#x013d;/g;
  s/\\v(\{| )l\}?/\&#x013e;/g;
  s/\\v(\{| )N\}?/\&#x0147;/g;
  s/\\v(\{| )n\}?/\&#x0148;/g;
  s/\\v(\{| )O\}?/\&#x01d1;/g;
  s/\\v(\{| )o\}?/\&#x01d2;/g;
  s/\\v(\{| )R\}?/\&#x0158;/g;
  s/\\v(\{| )r\}?/\&#x0159;/g;
  s/\\v(\{| )S\}?/\&#x0160;/g;
  s/\\v(\{| )s\}?/\&#x0161;/g;
  s/\\v(\{| )T\}?/\&#x0164;/g;
  s/\\v(\{| )t\}?/\&#x0165;/g;
  s/\\v(\{| )U\}?/\&#x01d3;/g;
  s/\\v(\{| )u\}?/\&#x01d4;/g;
  s/\\~\{?V\}?/\&#x1e7c;/g;
  s/\\~\{?v\}?/\&#x1e7d;/g;
  s/\\v(\{| )Z\}?/\&#x017d;/g;
  s/\\v(\{| )z\}?/\&#x017e;/g;
  s/\\^\{?W\}?/\&#x0174;/g;
  s/\\^\{?w\}?/\&#x0175;/g;
  s/\\`\{?W\}?/\&#x1e80;/g;
  s/\\`\{?w\}?/\&#x1e81;/g;
  s/\\'\{?W\}?/\&#x1e82;/g;
  s/\\'\{?w\}?/\&#x1e83;/g;
  s/\\"\{?W\}?/\&#x1e84;/g;
  s/\\"\{?w\}?/\&#x1e85;/g;
  s/\\\.\{?W\}?/\&#x1e86;/g;
  s/\\\.\{?w\}?/\&#x1e87;/g;
  s/\\\.\{?X\}?/\&#x1e8a;/g;
  s/\\\.\{?x\}?/\&#x1e8b;/g;
  s/\\"\{?X\}?/\&#x1e8c;/g;
  s/\\"\{?x\}?/\&#x1e8d;/g;
  s/\\'\{?Y\}?/\&#x00dd;/g;
  s/\\'\{?y\}?/\&#x00fd;/g;
  s/\\"\{?y\}?/\&#x00ff;/g;
  s/\\^\{?Y\}?/\&#x0176;/g;
  s/\\^\{?y\}?/\&#x0177;/g;
  s/\\"\{?Y\}?/\&#x0178;/g;
  s/\\=\{?Y\}?/\&#x0232;/g;
  s/\\=\{?y\}?/\&#x0233;/g;
  s/\\\.\{?Y\}?/\&#x1e8e;/g;
  s/\\\.\{?y\}?/\&#x1e8f;/g;
  s/\\`\{?Y\}?/\&#x1ef2;/g;
  s/\\`\{?y\}?/\&#x1ef3;/g;
  s/\\~\{?Y\}?/\&#x1ef8;/g;
  s/\\~\{?y\}?/\&#x1ef9;/g;
  s/\\'\{?Z\}?/\&#x0179;/g;
  s/\\'\{?z\}?/\&#x017a;/g;
  s/\\\.\{?Z\}?/\&#x017b;/g;
  s/\\\.\{?z\}?/\&#x017c;/g;
  s/\\^\{?Z\}?/\&#x1e90;/g;
  s/\\^\{?z\}?/\&#x1e91;/g;

  # Now deal with LaTeX fonts and other scoping stuff:
  while (s/(\\(\w+))?\{(.*?)\}/&repl($2,$4)/e) {
  }
  print XML;
}
close BBL;
close XML;

# Now prettyprint it
open (XML,">:$layer", $output_filename)
  or die "Cannot create XML file $output_filename: $!.";
my $bib = new XML::Twig(pretty_print => 'indented',
                        output_filter => $filter);
$bib->parsefile("tmp$$.xml");
$bib->set_encoding($encoding);
$bib->flush(\*XML);
close XML;

# clean up the mess
if (!$DEBUG) {
  unlink <tmp$$.*>;
} else {
  print STDERR "Since you're running in debugging mode,\n";
  print STDERR "I'll leave the tmp$$.* files around.\n";
}

exit 0;				# Goodbye!


# transform English ordinal numbers into digits
sub ordinal {
  my ($t) = @_;

  $t =~ /first/i and return 1;
  $t =~ /second/i and return 2;
  $t =~ /third/i and return 3;
  $t =~ /fourth/i and return 4;
  $t =~ /fifth/i and return 5;
  $t =~ /sixth/i and return 6;
  $t =~ /seventh/i and return 7;
  $t =~ /eighth/i and return 8;
  $t =~ /ninth/i and return 9;
  $t =~ /tenth/i and return 10;
  $t =~ /eleventh/i and return 11;
  $t =~ /twelfth/i and return 12;
  $t =~ /thirteenth/i and return 13;
  $t =~ /fourteenth/i and return 14;
  $t =~ /fifteenth/i and return 15;
  $t =~ /sixteenth/i and return 16;
  # I guess this list should be continued...

  $t =~ /(\d+)/ and return $1;

  warn "Odd edition number: $t\n";

  return $t;
}

sub language {
  my ($t) = @_;

  $t =~ /^UKenglish/i and return 'en_UK';
  $t =~ /^acadian/i and return 'fr_CA';
  $t =~ /^afrikaans/i and return 'af';
  $t =~ /^american/i and return 'en_US';
  $t =~ /^austrian/i and return 'de_AT';
  $t =~ /^bahasa/i and return 'id';
  $t =~ /^basque/i and return 'eu';
  $t =~ /^brazil/i and return 'pt_BR';
  $t =~ /^brazilian/i and return 'pt_BR';
  $t =~ /^breton/i and return 'br';
  $t =~ /^british/i and return 'en_UK';
  $t =~ /^bulgarian/i and return 'bg';
  $t =~ /^canadian/i and return 'en_CA';
  $t =~ /^canadien/i and return 'fr_CA';
  $t =~ /^catalan/i and return 'ca';
  $t =~ /^croatian/i and return 'hr';
  $t =~ /^czech/i and return 'cs';
  $t =~ /^danish/i and return 'da';
  $t =~ /^dutch/i and return 'nl';
  $t =~ /^english/i and return 'en';
  $t =~ /^esperanto/i and return 'eo';
  $t =~ /^estonian/i and return 'et';
  $t =~ /^finnish/i and return 'fi';
  $t =~ /^francais/i and return 'fr';
  $t =~ /^french/i and return 'fr';
  $t =~ /^frenchb/i and return 'fr';
  $t =~ /^galician/i and return 'gl';
  $t =~ /^german/i and return 'de';
  $t =~ /^germanb/i and return 'de';
  $t =~ /^greek/i and return 'el';
  $t =~ /^hebrew/i and return 'he';
  $t =~ /^hungarian/i and return 'hu';
  $t =~ /^icelandic/i and return 'is';
  $t =~ /^irish/i and return 'ga';
  $t =~ /^italian/i and return 'it';
  $t =~ /^latin/i and return 'la';
  $t =~ /^lsorbian/i and return 'wen';
  $t =~ /^magyar/i and return 'hu';
  $t =~ /^naustrian/i and return 'de_AT';
  $t =~ /^ngerman/i and return 'de';
  $t =~ /^ngermanb/i and return 'de';
  $t =~ /^norsk/i and return 'no';
  $t =~ /^nynorsk/i and return 'nno';
  $t =~ /^polish/i and return 'pl';
  $t =~ /^portuges/i and return 'pt';
  $t =~ /^portuguese/i and return 'pt';
  $t =~ /^romanian/i and return 'ro';
  $t =~ /^russian/i and return 'ru';
  $t =~ /^russianb/i and return 'ru';
  $t =~ /^samin/i and return 'smi';
  $t =~ /^scottish/i and return 'gd';
  $t =~ /^serbian/i and return 'sr';
  $t =~ /^slovak/i and return 'sk';
  $t =~ /^slovene/i and return 'sl';
  $t =~ /^spanish/i and return 'es';
  $t =~ /^swedish/i and return 'sv';
  $t =~ /^turkish/i and return 'tr';
  $t =~ /^ukraineb/i and return 'uk';
  $t =~ /^ukrainian/i and return 'uk';
  $t =~ /^usorbian/i and return 'wen';
  $t =~ /^welsh/i and return 'cy';

  $t =~ /^(\w\w)$/ and return $t;

  warn "Odd language: $t\n";

  return $t;
}

sub repl {
  my ($command, $content) = @_;
  my $tag = '';
  if (defined($command)) {
    $command eq 'emph' and $tag = 'it';
    $command eq 'textbf' and $tag = 'bo';
    $command eq 'texttt' and $tag = 'tt';
  }

  my $r = '';
  $r .= "<$tag>" if $tag;
  $r .= $3;
  $r .= "</$tag>" if $tag;
  return $r;
}

sub eschex {			# returns argument as &#xab;
  my ($a) = @_;
  return sprintf('&#x%x;', ord($a));
}

exit 0;

__END__

=head1 NAME

bib2xml - a program to convert BibTeX databases to Bibulus XML

=head1 SYNOPSIS

  bib2xml [-d]
          [-i input-encoding]
          [-o output-encoding] bibtex-datebase-filename

=head1 DESCRIPTION

C<bib2xml> converts a BibTeX database (normally with the extension C<.bib>)
to Bibulus XML.  It does so by running BibTeX on the original database
with a special style file that generates XML instead of LaTeX output,
cleaning it up a little afterwards.  This means that you need to have
a working BibTeX to run this program.

The C<-d> option enables debugging information.

Valid output encodings for the C<-o> option are C<utf8>, C<latin1> and
C<ascii>.  The default is C<utf8>.

Valid input encodings for the C<-i> option are C<ascii> and C<latin1>.
The default is C<ascii>.

=head1 SEE ALSO

The DTD is defined in F<doc/bibulus.dtd>.

The homepage is L<http://www.nongnu.org/bibulus/>.

=head1 AUTHOR

Thomas M. Widmann, L<twid@cpan.org>

=head1 COPYRIGHT AND LICENSE

Copyright 2003 by Thomas M. Widmann

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.

This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.

=cut
