#!/usr/bin/perl -w
use strict;

my $DATAPATH = "../../data/data";

#pause(4);
#exit(0);

if (scalar @ARGV != 2) {
  print "Syntax: taper.pl <from-language> <to-language>\n";
  exit(1);
}

# find from words

my %words;

foreach my $word (`ls $DATAPATH/$ARGV[0]/*/*.ogg`)
{
  chomp $word;
  my ($number) = $word =~ /(\d+)\.ogg/;
  my @matches = `ls $DATAPATH/$ARGV[1]/*/$number.ogg `;
  if (scalar @matches > 0) 
  {
    print "$number\n";
    $words{$number}{from} = $word;
    chomp $matches[0];
    $words{$number}{to}   = $matches[0];
  }
}

`rm $ARGV[0]-$ARGV[1].*.ogg`;


open OUTFILE, "|oggenc --raw --raw-rate=22050 --raw-chan=2 -q 0 --output=$ARGV[0]-$ARGV[1].ogg -";

# Can't just concatenate the different ogg files, because the sequence numbers
# are not unique enough. Ogg Bug #333. This is not going to get fixed soon.
foreach my $word (sort {$a <=> $b} keys %words) {
  print "$words{$word}{from} $words{$word}{to}\n";
  my $fromfile = $words{$word}{from};
  my $tofile   = $words{$word}{to};
  my $fromduration = getDuration($fromfile);
  my $toduration = getDuration($tofile);

  # Need to do this funky 3 way conversion because oggdec does not 
  # accept the raw option right now
  print  `oggdec -o tmp.wav  $fromfile `;
  print `sox tmp.wav tmp.raw`;
  print OUTFILE `cat tmp.raw`;
  pause(($toduration)*2+1);
  print `oggdec  -o tmp.wav $tofile`;
  print `sox tmp.wav tmp.raw`;
  print OUTFILE `cat tmp.raw`;
  pause(($toduration+$fromduration)*2+3);


}
close OUTFILE;

sub getDuration
  {
    my ($file) = @_;
    my $output = `ogginfo $file`;
    my $seconds = $output =~ /Playback length: 0m:(\d+)s/;
    return $seconds;
  }

sub pause
{
  my ($length) = @_;
  for (my $i=0; $i < $length*22050; $i++ ) {
    print OUTFILE "\0\0";
  }

}
