#!/usr/bin/perl

# This is mk-archiver, a program to archive records from one MySQL table to
# a file and/or another table.
#
# This program is copyright (c) 2007 Baron Schwartz.  Feedback and
# improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

use strict;
use warnings FATAL => 'all';

# ###########################################################################
# This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
# ###########################################################################

# ###########################################################################
# OptionParser package 1178
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package OptionParser;

use Getopt::Long;
use List::Util qw(max);
use English qw(-no_match_vars);

sub new {
   my ( $class, @opts ) = @_;
   my %key_seen;
   my %long_seen;
   my %key_for;
   my %defaults;
   my @mutex;
   my @atleast1;
   my %long_for;
   my %disables;
   my %copyfrom;
   unshift @opts,
      { s => 'help',    d => 'Show this help message' },
      { s => 'version', d => 'Output version information and exit' };
   foreach my $opt ( @opts ) {
      if ( ref $opt ) {
         my ( $long, $short ) = $opt->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
         $opt->{k} = $short || $long;
         $key_for{$long} = $opt->{k};
         $long_for{$opt->{k}} = $long;
         $long_for{$long} = $long;
         $opt->{l} = $long;
         die "Duplicate option $opt->{k}" if $key_seen{$opt->{k}}++;
         die "Duplicate long option $opt->{l}" if $long_seen{$opt->{l}}++;
         $opt->{t} = $short;
         $opt->{n} = $opt->{s} =~ m/!/;
         $opt->{g} ||= 'o';
         if ( (my ($y) = $opt->{s} =~ m/=([mdHhAaz])/) ) {
            $opt->{y} = $y;
            $opt->{s} =~ s/=./=s/;
         }
         $opt->{r} = $opt->{d} =~ m/required/;
         if ( (my ($def) = $opt->{d} =~ m/default(?: ([^)]+))?/) ) {
            $defaults{$opt->{k}} = defined $def ? $def : 1;
         }
         if ( (my ($dis) = $opt->{d} =~ m/(disables .*)/) ) {
            $disables{$opt->{k}} = [ $class->get_participants($dis) ];
         }
      }
      else { # It's an instruction.

         if ( $opt =~ m/at least one|mutually exclusive|one and only one/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $long_for{$_};
               } $class->get_participants($opt);
            if ( $opt =~ m/mutually exclusive|one and only one/ ) {
               push @mutex, \@participants;
            }
            if ( $opt =~ m/at least one|one and only one/ ) {
               push @atleast1, \@participants;
            }
         }
         elsif ( $opt =~ m/default to/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            $copyfrom{$participants[0]} = $participants[1];
         }

      }
   }

   foreach my $dis ( keys %disables ) {
      $disables{$dis} = [ map {
            die "No such option '$_' while processing $dis" unless $long_for{$_};
            $long_for{$_};
         } @{$disables{$dis}} ];
   }

   return bless {
      specs => [ grep { ref $_ } @opts ],
      notes => [],
      instr => [ grep { !ref $_ } @opts ],
      mutex => \@mutex,
      defaults => \%defaults,
      long_for => \%long_for,
      atleast1 => \@atleast1,
      disables => \%disables,
      key_for  => \%key_for,
      copyfrom => \%copyfrom,
      strict   => 1,
      groups   => [ { k => 'o', d => 'Options' } ],
   }, $class;
}

sub get_participants {
   my ( $self, $str ) = @_;
   my @participants;
   foreach my $thing ( $str =~ m/(--?[\w-]+)/g ) {
      if ( (my ($long) = $thing =~ m/--(.+)/) ) {
         push @participants, $long;
      }
      else {
         foreach my $short ( $thing =~ m/([^-])/g ) {
            push @participants, $short;
         }
      }
   }
   return @participants;
}

sub parse {
   my ( $self, %defaults ) = @_;
   my @specs = @{$self->{specs}};
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);

   my %opt_seen;
   my %vals = %{$self->{defaults}};
   @vals{keys %defaults} = values %defaults;
   foreach my $spec ( @specs ) {
      $vals{$spec->{k}} = undef unless defined $vals{$spec->{k}};
      $opt_seen{$spec->{k}} = 1;
   }

   foreach my $key ( keys %defaults ) {
      die "Cannot set default for non-existent option '$key'\n"
         unless $opt_seen{$key};
   }

   Getopt::Long::Configure('no_ignore_case', 'bundling');
   GetOptions( map { $_->{s} => \$vals{$_->{k}} } @specs )
      or $self->error('Error parsing options');

   if ( $vals{version} ) {
      my $prog = $self->prog;
      printf("%s  Ver %s Distrib %s Changeset %s\n",
         $prog, $main::VERSION, $main::DISTRIB, $main::SVN_REV);
      exit(0);
   }

   if ( @ARGV && $self->{strict} ) {
      $self->error("Unrecognized command-line options @ARGV");
   }

   foreach my $dis ( grep { defined $vals{$_} } keys %{$self->{disables}} ) {
      my @disses = map { $self->{key_for}->{$_} } @{$self->{disables}->{$dis}};
      @vals{@disses} = map { undef } @disses;
   }

   foreach my $spec ( grep { $_->{r} } @specs ) {
      if ( !defined $vals{$spec->{k}} ) {
         $self->error("Required option --$spec->{l} must be specified");
      }
   }

   foreach my $mutex ( @{$self->{mutex}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$mutex;
      if ( @set > 1 ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$mutex}[ 0 .. scalar(@$mutex) - 2] );
         $note .= " and --$self->{long_for}->{$mutex->[-1]}"
               . " are mutually exclusive.";
         $self->error($note);
      }
   }

   foreach my $required ( @{$self->{atleast1}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$required;
      if ( !@set ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$required}[ 0 .. scalar(@$required) - 2] );
         $note .= " or --$self->{long_for}->{$required->[-1]}";
         $self->error("Specify at least one of $note");
      }
   }

   foreach my $spec ( grep { $_->{y} && defined $vals{$_->{k}} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'm' ) {
         my ( $num, $suffix ) = $val =~ m/(\d+)([smhd])$/;
         if ( $suffix ) {
            $val = $suffix eq 's' ? $num            # Seconds
                 : $suffix eq 'm' ? $num * 60       # Minutes
                 : $suffix eq 'h' ? $num * 3600     # Hours
                 :                  $num * 86400;   # Days
            $vals{$spec->{k}} = $val;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
      elsif ( $spec->{y} eq 'd' ) {
         my $from_key = $self->{copyfrom}->{$spec->{k}};
         my $default = {};
         if ( $from_key ) {
            $default = $self->{dsn}->parse($self->{dsn}->as_string($vals{$from_key}));
         }
         $vals{$spec->{k}} = $self->{dsn}->parse($val, $default);
      }
      elsif ( $spec->{y} eq 'z' ) {
         my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
         if ( defined $num ) {
            if ( $factor ) {
               $num *= $factor_for{$factor};
            }
            $vals{$spec->{k}} = ($pre || '') . $num;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
   }

   foreach my $spec ( grep { $_->{y} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'H' || (defined $val && $spec->{y} eq 'h') ) {
         $vals{$spec->{k}} = { map { $_ => 1 } split(',', ($val || '')) };
      }
      elsif ( $spec->{y} eq 'A' || (defined $val && $spec->{y} eq 'a') ) {
         $vals{$spec->{k}} = [ split(',', ($val || '')) ];
      }
   }

   return %vals;
}

sub error {
   my ( $self, $note ) = @_;
   $self->{__error__} = 1;
   push @{$self->{notes}}, $note;
}

sub prog {
   (my $prog) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
   return $prog || $PROGRAM_NAME;
}

sub prompt {
   my ( $self ) = @_;
   my $prog   = $self->prog;
   my $prompt = $self->{prompt} || '<options>';
   return "Usage: $prog $prompt\n";
}

sub descr {
   my ( $self ) = @_;
   my $prog = $self->prog;
   my $descr  = $prog . ' ' . ($self->{descr} || '')
          . "  For more details, please use the --help option, "
          . "or try 'perldoc $prog' for complete documentation.";
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g);
   $descr =~ s/ +$//mg;
   return $descr;
}

sub usage_or_errors {
   my ( $self, %opts ) = @_;
   if ( $opts{help} ) {
      print $self->usage(%opts);
      exit(0);
   }
   elsif ( $self->{__error__} ) {
      print $self->errors();
      exit(0);
   }
}

sub errors {
   my ( $self ) = @_;
   my $usage = $self->prompt() . "\n";
   if ( (my @notes = @{$self->{notes}}) ) {
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @notes) . "\n";
   }
   return $usage . "\n" . $self->descr();
}

sub usage {
   my ( $self, %vals ) = @_;
   my @specs = @{$self->{specs}};

   my $maxl = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @specs);

   my $maxs = max(0,
      map { length($_->{l}) + ($_->{n} ? 4 : 0)}
      grep { $_->{t} } @specs);

   my $lcol = max($maxl, ($maxs + 3));
   my $rcol = 80 - $lcol - 6;
   my $rpad = ' ' x ( 80 - $rcol );

   $maxs = max($lcol - 3, $maxs);

   my $usage = $self->descr() . "\n" . $self->prompt();
   foreach my $g ( @{$self->{groups}} ) {
      $usage .= "\n$g->{d}:\n";
      foreach my $spec ( sort { $a->{l} cmp $b->{l} } grep { $_->{g} eq $g->{k} } @specs ) {
         my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
         my $short = $spec->{t};
         my $desc  = $spec->{d};
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
         $desc =~ s/ +$//mg;
         if ( $short ) {
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
         }
         else {
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
         }
      }
   }

   if ( (my @instr = @{$self->{instr}}) ) {
      $usage .= join("\n", map { "  $_" } @instr) . "\n";
   }
   if ( $self->{dsn} ) {
      $usage .= "\n" . $self->{dsn}->usage();
   }
   $usage .= "\nOptions and values after processing arguments:\n";
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @specs ) {
      my $val   = $vals{$spec->{k}};
      my $type  = $spec->{y} || '';
      my $bool  = $spec->{s} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
      $val      = $bool                     ? ( $val ? 'TRUE' : 'FALSE' )
                : !defined $val             ? '(No value)'
                : $type eq 'd'              ? $self->{dsn}->as_string($val)
                : $type =~ m/H|h/           ? join(',', sort keys %$val)
                : $type =~ m/A|a/           ? join(',', @$val)
                :                             $val;
      $usage .= sprintf("  --%-${lcol}s  %s\n", $spec->{l}, $val);
   }
   return $usage;
}

sub prompt_noecho {
   shift @_ if ref $_[0] eq __PACKAGE__;
   my ( $prompt ) = @_;
   local $OUTPUT_AUTOFLUSH = 1;
   print $prompt;
   my $response;
   eval {
      require Term::ReadKey;
      Term::ReadKey::ReadMode('noecho');
      chomp($response = <STDIN>);
      Term::ReadKey::ReadMode('normal');
      print "\n";
   };
   if ( $EVAL_ERROR ) {
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
   }
   return $response;
}

sub groups {
   my ( $self, @groups ) = @_;
   push @{$self->{groups}}, @groups;
}

1;

# ###########################################################################
# End OptionParser package
# ###########################################################################

# ###########################################################################
# TableParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package TableParser;

sub new {
   bless {}, shift;
}

sub parse {
   my ( $self, $ddl, $opts ) = @_;

   if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
      die "Cannot parse table definition; is ANSI quoting enabled or SQL_QUOTE_SHOW_CREATE disabled?";
   }

   my ( $engine ) = $ddl =~ m/\) (?:ENGINE|TYPE)=(\w+)/;

   my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
   my @cols = map { $_ =~ m/`([^`]+)`/g } @defs;

   my %def_for;
   @def_for{@cols} = @defs;

   my (@nums, @null);
   my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
   foreach my $col ( @cols ) {
      my $def = $def_for{$col};
      my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
      die "Can't determine column type for $def" unless $type;
      $type_for{$col} = $type;
      if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
         push @nums, $col;
         $is_numeric{$col} = 1;
      }
      if ( $def !~ m/NOT NULL/ && $def !~ m/text$/ ) {
         push @null, $col;
         $is_nullable{$col} = 1;
      }
      $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
   }

   my %keys;
   foreach my $key ( $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm ) {

      if ( $engine !~ m/MEMORY|HEAP/ ) {
         $key =~ s/USING HASH/USING BTREE/;
      }

      my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
      my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
      $type = $type || $special || 'BTREE';
      if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
         && $engine =~ m/HEAP|MEMORY/i )
      {
         $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
      }

      my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
      my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
      my @cols   = grep { m/[^,]/ } split('`', $cols);
      $name      =~ s/`//g;

      $keys{$name} = {
         colnames    => $cols,
         cols        => \@cols,
         unique      => $unique,
         is_col      => { map { $_ => 1 } @cols },
         is_nullable => scalar(grep { $is_nullable{$_} } @cols),
         type        => $type,
      };
   }

   return {
      cols           => \@cols,
      col_posn       => { map { $cols[$_] => $_ } 0..$#cols },
      is_col         => { map { $_ => 1 } @cols },
      null_cols      => \@null,
      is_nullable    => \%is_nullable,
      is_autoinc     => \%is_autoinc,
      keys           => \%keys,
      defs           => \%def_for,
      numeric_cols   => \@nums,
      is_numeric     => \%is_numeric,
      engine         => $engine,
      type_for       => \%type_for,
   };
}

sub get_ddl {
   my ( $self, $dbh, $db, $tbl ) = @_;
   $dbh->do('/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
      . '@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, "ANSI_QUOTES", ""), ",,", ","), '
      . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
      . '@@SQL_QUOTE_SHOW_CREATE := 1 */');
   my $href = $dbh->selectrow_hashref("SHOW CREATE TABLE `$db`.`$tbl`");
   $dbh->do('/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
      . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */');
   my ($key) = grep { m/create table/i } keys %$href;
   return $href->{$key};
}

1;

# ###########################################################################
# End TableParser package
# ###########################################################################

# ###########################################################################
# DSNParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package DSNParser;

sub new {
   my ( $class, @opts ) = @_;
   my $self = {
      opts => {
         D => {
            desc => 'Database to use',
            dsn  => 'database',
            copy => 1,
         },
         F => {
            desc => 'Only read default options from the given file',
            dsn  => 'mysql_read_default_file',
            copy => 1,
         },
         h => {
            desc => 'Connect to host',
            dsn  => 'host',
            copy => 1,
         },
         p => {
            desc => 'Password to use when connecting',
            dsn  => 'password',
            copy => 1,
         },
         P => {
            desc => 'Port number to use for connection',
            dsn  => 'port',
            copy => 1,
         },
         S => {
            desc => 'Socket file to use for connection',
            dsn  => 'mysql_socket',
            copy => 1,
         },
         u => {
            desc => 'User for login if not current user',
            dsn  => 'user',
            copy => 1,
         },
      },
   };
   foreach my $opt ( @opts ) {
      $self->{opts}->{$opt->{key}} = { desc => $opt->{desc}, copy => $opt->{copy} };
   }
   return bless $self, $class;
}

sub prop {
   my ( $self, $prop, $value ) = @_;
   if ( @_ > 2 ) {
      $self->{$prop} = $value;
   }
   return $self->{$prop};
}

sub parse {
   my ( $self, $dsn, $prev, $defaults ) = @_;
   return unless $dsn;
   $prev     ||= {};
   $defaults ||= {};
   my %vals;
   my %opts = %{$self->{opts}};
   if ( $dsn !~ m/=/ && $self->prop('autokey') ) {
      $vals{ $self->prop('autokey') } = $dsn;
   }
   else {
      my %hash = map { m/^(.)=(.*)$/g } split(/,/, $dsn);
      foreach my $key ( keys %opts ) {
         $vals{$key} = $hash{$key};
         if ( !defined $vals{$key} && defined $prev->{$key} && $opts{$key}->{copy} ) {
            $vals{$key} = $prev->{$key};
         }
         if ( !defined $vals{$key} ) {
            $vals{$key} = $defaults->{$key};
         }
      }
      foreach my $key ( keys %hash ) {
         die "Unrecognized DSN part '$key' in '$dsn'\n"
            unless exists $opts{$key};
      }
   }
   if ( (my $required = $self->prop('required')) ) {
      foreach my $key ( keys %$required ) {
         die "Missing DSN part '$key' in '$dsn'\n" unless $vals{$key};
      }
   }
   return \%vals;
}

sub as_string {
   my ( $self, $dsn ) = @_;
   return $dsn unless ref $dsn;
   return join(',', map { "$_=$dsn->{$_}" } grep { defined $dsn->{$_} } sort keys %$dsn );
}

sub usage {
   my ( $self ) = @_;
   my $usage
      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n"
      . "  KEY  COPY  MEANING\n"
      . "  ===  ====  =============================================\n";
   my %opts = %{$self->{opts}};
   foreach my $key ( sort keys %opts ) {
      $usage .= "  $key    "
             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
             .  ($opts{$key}->{desc} || '[No description]')
             . "\n";
   }
   if ( (my $key = $self->prop('autokey')) ) {
      $usage .= "  If the DSN is a bareword, the word is treated as the '$key' key.\n";
   }
   return $usage;
}

sub get_cxn_params {
   my ( $self, $info ) = @_;
   my $dsn;
   my %opts = %{$self->{opts}};
   my $driver = $self->prop('dbidriver') || '';
   if ( $driver eq 'Pg' ) {
      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(h P));
   }
   else {
      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(F h P S))
         . ';mysql_read_default_group=mysql';
   }
   return ($dsn, $info->{u}, $info->{p});
}

1;

# ###########################################################################
# End DSNParser package
# ###########################################################################

# ###########################################################################
# VersionParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package VersionParser;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub parse {
   my ( $self, $str ) = @_;
   return sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
}

sub version_ge {
   my ( $self, $dbh, $target ) = @_;
   $self->{$dbh} ||= $self->parse(
      $dbh->selectrow_array('SELECT VERSION()'));
   return $self->{$dbh} ge $self->parse($target);
}

1;

# ###########################################################################
# End VersionParser package
# ###########################################################################

# ###########################################################################
# Quoter package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package Quoter;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub quote {
   my ( $self, @vals ) = @_;
   foreach my $val ( @vals ) {
      $val =~ s/`/``/g;
   }
   return join('.', map { '`' . $_ . '`' } @vals);
}

1;

# ###########################################################################
# End Quoter package
# ###########################################################################

# ###########################################################################
# TableNibbler package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package TableNibbler;

sub new {
   bless {}, shift;
}

sub sort_indexes {
   my ( $self, $tbl ) = @_;
   my @indexes
      = sort {
         (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
         || ( !$tbl->{keys}->{$a}->{unique} <=> !$tbl->{keys}->{$b}->{unique} )
         || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
         || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
      }
      grep {
         $tbl->{keys}->{$_}->{type} eq 'BTREE'
      }
      sort keys %{$tbl->{keys}};
   return @indexes;
}

sub find_best_index {
   my ( $self, $tbl, $index ) = @_;
   my $best;
   if ( $index ) {
      ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
   }
   if ( !$best ) {
      if ( $index ) {
         die "Index '$index' does not exist in table";
      }
      else {
         ($best) = $self->sort_indexes($tbl);
         if ( !$best ) {
            die "Cannot find an ascendable index in table";
         }
      }
   }
   return $best;
}

sub generate_asc_stmt {
   my ( $self, %opts ) = @_;

   my $tbl  = $opts{tbl};
   my @cols = $opts{cols} ? @{$opts{cols}} : @{$tbl->{cols}};
   my $q    = $opts{quoter};

   my @asc_cols;
   my @asc_slice;

   my $index = $self->find_best_index($tbl, $opts{index});

   @asc_cols = @{$tbl->{keys}->{$index}->{cols}};
   if ( $opts{ascfirst} ) {
      @asc_cols = $asc_cols[0];
   }

   my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
   foreach my $col ( @asc_cols ) {
      if ( !exists $col_posn{$col} ) {
         push @cols, $col;
         $col_posn{$col} = $#cols;
      }
      push @asc_slice, $col_posn{$col};
   }

   my $asc_stmt = {
      cols  => \@cols,
      index => $index,
      where => '',
      slice => [],
      scols => [],
   };

   if ( @asc_slice ) {
      my @clauses;
      foreach my $i ( 0 .. $#asc_slice ) {
         my @clause;

         foreach my $j ( 0 .. $i - 1 ) {
            my $ord = $asc_slice[$j];
            my $col = $cols[$ord];
            my $quo = $q->quote($col);
            if ( $tbl->{is_nullable}->{$col} ) {
               push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
               push @{$asc_stmt->{slice}}, $ord, $ord;
               push @{$asc_stmt->{scols}}, $col, $col;
            }
            else {
               push @clause, "$quo = ?";
               push @{$asc_stmt->{slice}}, $ord;
               push @{$asc_stmt->{scols}}, $col;
            }
         }

         my $ord = $asc_slice[$i];
         my $col = $cols[$ord];
         my $quo = $q->quote($col);
         my $end = $i == $#asc_slice; # Last clause of the whole group.
         if ( $tbl->{is_nullable}->{$col} ) {
            if ( !$opts{asconly} && $end ) {
               push @clause, "(? IS NULL OR $quo >= ?)";
            }
            else {
               push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo > ?))";
            }
            push @{$asc_stmt->{slice}}, $ord, $ord;
            push @{$asc_stmt->{scols}}, $col, $col;
         }
         else {
            push @{$asc_stmt->{slice}}, $ord;
            push @{$asc_stmt->{scols}}, $col;
            push @clause, (!$opts{asconly} && $end ? "$quo >= ?" : "$quo > ?");
         }

         push @clauses, '(' . join(' AND ', @clause) . ')';
      }
      $asc_stmt->{where} = '(' . join(' OR ', @clauses) . ')';
   }

   return $asc_stmt;
}

sub generate_del_stmt {
   my ( $self, %opts ) = @_;

   my $tbl  = $opts{tbl};
   my @cols = $opts{cols} ? @{$opts{cols}} : ();
   my $q    = $opts{quoter};

   my @del_cols;
   my @del_slice;

   my $index = $self->find_best_index($tbl, $opts{index});

   if ( $index ) {
      @del_cols = @{$tbl->{keys}->{$index}->{cols}};
   }
   else {
      @del_cols = @{$tbl->{cols}};
   }

   my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
   foreach my $col ( @del_cols ) {
      if ( !exists $col_posn{$col} ) {
         push @cols, $col;
         $col_posn{$col} = $#cols;
      }
      push @del_slice, $col_posn{$col};
   }

   my $del_stmt = {
      cols  => \@cols,
      index => $index,
      where => '',
      slice => [],
      scols => [],
   };

   my @clauses;
   foreach my $i ( 0 .. $#del_slice ) {
      my $ord = $del_slice[$i];
      my $col = $cols[$ord];
      my $quo = $q->quote($col);
      if ( $tbl->{is_nullable}->{$col} ) {
         push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
         push @{$del_stmt->{slice}}, $ord, $ord;
         push @{$del_stmt->{scols}}, $col, $col;
      }
      else {
         push @clauses, "$quo = ?";
         push @{$del_stmt->{slice}}, $ord;
         push @{$del_stmt->{scols}}, $col;
      }
   }

   $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')';

   return $del_stmt;
}

sub generate_ins_stmt {
   my ( $self, %opts ) = @_;

   my $tbl  = $opts{tbl};
   my @cols = @{$opts{cols}};

   die "You didn't specify any columns" unless @cols;

   my %col_posn = do { my $i = 0; map { $_ => $i++ } @{$tbl->{cols}} };
   my @ins_cols;
   my @ins_slice;

   foreach my $col ( @cols ) {
      if ( exists $col_posn{$col} ) {
         push @ins_cols, $col;
         push @ins_slice, $col_posn{$col};
      }
   }

   my $ins_stmt = {
      cols  => \@ins_cols,
      slice => \@ins_slice,
   };

   return $ins_stmt;
}

1;

# ###########################################################################
# End TableNibbler package
# ###########################################################################

# ###########################################################################
# And now for the "program".
# ###########################################################################
package main;

use DBI;
use English qw(-no_match_vars);
use List::Util qw(max);
use IO::File;
use sigtrap qw(handler finish untrapped normal-signals);

our $VERSION = '1.0.3';
our $DISTRIB = '1316';
our $SVN_REV = sprintf("%d", q$Revision: 1308 $ =~ m/(\d+)/g || 0);

# Global variables; as few as possible.
my $oktorun = 1;
my %opts;
my $txn_cnt = 0;
my $cnt     = 0;
my $file;
my $src;
my $dst;
my $can_retry = 1;
my $get_sth;
# Holds the arguments for the $sth's bind variables, so it can be re-tried
# easily.
my @beginning_of_txn;
my $dp = new DSNParser(
   { key => 'D', copy => 1, desc => 'Database that contains the table' },
   { key => 't', copy => 1, desc => 'Table to archive from' },
   { key => 'a', copy => 0, desc => 'Database to USE when executing queries' },
   { key => 'i', copy => 1, desc => 'Index to use' },
   { key => 'm', copy => 0, desc => 'Plugin module name' },
   { key => 'b', copy => 0, desc => 'Disable binary logging with SET SQL_LOG_BIN=0' },
);
$dp->prop('required', { t => 1 });
my $vp = new VersionParser;
my $q  = new Quoter;

if ( !caller ) {

   # ############################################################################
   # Get configuration information.
   # ############################################################################

   # Define cmdline args.
   my $opt_parser = new OptionParser(
      { s => 'analyze|A=s', d => 'Run ANALYZE TABLE afterwards on --source and/or --dest' },
      { s => 'ascend|N!',   d => 'Use ascending index optimization (default)' },
      { s => 'ascendfirst', d => 'Ascend only first column of index' },
      { s => 'askpass',     d => 'Prompt for password for connections' },
      { s => 'buffer|b',    d => 'Buffer output to --file and flush at commit' },
      { s => 'chkcols|C!',  d => 'Ensure --source and --dest have same columns (default)' },
      { s => 'columns|c=a', d => 'Comma-separated list of columns to archive' },
      { s => 'commit-each', d => 'Commit each set of fetched and archived rows (disables -z)' },
      { s => 'dest|d=d',    d => 'Table to archive to' },
      { s => 'delayedins',  d => 'Add the DELAYED modifier to INSERT statements' },
      { s => 'file|f=s',    d => 'File to archive to, with DATE_FORMAT()-like formatting' },
      { s => 'forupdate',   d => 'Adds the FOR UPDATE modifier to SELECT statements' },
      { s => 'header|h',    d => 'Print column header at top of --file' },
      { s => 'hpselect',    d => 'Adds the HIGH_PRIORITY modifier to SELECT statements' },
      { s => 'ignore|i',    d => 'Use IGNORE for INSERT statements' },
      { s => 'limit|l=i',   d => 'Number of rows to fetch and archive per statement (default 1)' },
      { s => 'local|L',     d => 'Do not write OPTIMIZE or ANALYZE queries to binlog' },
      { s => 'lpdel',       d => 'Add the LOW_PRIORITY modifier to DELETE statements' },
      { s => 'lpins',       d => 'Add the LOW_PRIORITY modifier to INSERT statements' },
      { s => 'optimize|O=s',d => 'Run OPTIMIZE TABLE afterwards on --source and/or --dest' },
      { s => 'pkonly|k',    d => 'Primary key columns only' },
      { s => 'progress|P=i',d => 'Print progress information every X rows' },
      { s => 'purge|p',     d => 'Purge, not archive; allows to omit --file and --dest' },
      { s => 'quickdel',    d => 'Add the QUICK modifier to DELETE statements' },
      { s => 'replace|r',   d => 'Use REPLACE instead of INSERT statements' },
      { s => 'retries|R=i', d => 'Number of retries per timeout or deadlock (default 1)' },
      { s => 'safeautoinc!',d => 'Do not archive row with max AUTO_INCREMENT (default)'},
      { s => 'sentinel|S=s',d => 'Sentinel file; default /tmp/mk-archiver-sentinel' },
      { s => 'sharelock',   d => 'Adds LOCK IN SHARE MODE to SELECT statements' },
      { s => 'skipfkchk|K', d => 'Turn off foreign key checks' },
      { s => 'sleep|e=i',   d => 'Sleep time between fetches' },
      { s => 'source|s=d',  d => 'Table to archive from (required)' },
      { s => 'statistics',  d => 'Collect and print timing statistics' },
      { s => 'stop',        d => 'Stop running instances by creating the sentinel file' },
      { s => 'test|t',      d => 'Test: print queries and exit without doing anything' },
      { s => 'time|m=m',    d => 'Time to run before exiting (suffix: s/m/h/d)' },
      { s => 'txnsize|z=i', d => 'Number of rows per transaction; disable with 0; default 1)' },
      { s => 'where|W=s',   d => 'WHERE clause to limit which rows to archive (required)' },
      { s => 'whyquit|q',   d => 'Print reason for exiting unless rows exhausted' },
      '--ignore and --replace are mutually exclusive.',
      '--txnsize and --commit-each are mutually exclusive.',
      '--lpins and --delayedins are mutually exclusive.',
      '--sharelock and --forupdate are mutually exclusive.',
      '--analyze and --optimize are mutually exclusive.',
      'Specify at least one of --dest, --file, or --purge.',
      'DSN values in --dest default to values from --source if COPY is yes.',
   );
   $opt_parser->{prompt} = '--source DSN --where WHERE <options>';
   $opt_parser->{dsn}    = $dp;
   $opt_parser->{descr}  = 'nibbles records from a MySQL table.  The --source '
                         . q{and --dest arguments use DSN syntax; if COPY is }
                         . q{yes, --dest defaults to the key's value from --source.};
   %opts = $opt_parser->parse();

   # First things first: if --stop was given, create the sentinel file.
   if ( $opts{stop} ) {
      my $file = IO::File->new($opts{S}, ">>")
         or die "Cannot open $opts{S}: $OS_ERROR\n";
      print $file "Remove this file to permit mk-archiver to run\n"
         or die "Cannot write to $opts{S}: $OS_ERROR\n";
      close $file
         or die "Cannot close $opts{S}: $OS_ERROR\n";
      print STDOUT "Successfully created file $opts{S}\n";
      exit(0);
   }

   $src = $opts{s};
   $dst = $opts{d};

   # Generate a filename with sprintf-like formatting codes.
   if ( $opts{f} ) {
      my @time = localtime();
      my %fmt = (
         d => sprintf('%02d', $time[3]),
         H => sprintf('%02d', $time[2]),
         i => sprintf('%02d', $time[1]),
         m => sprintf('%02d', $time[4] + 1),
         s => sprintf('%02d', $time[0]),
         Y => $time[5] + 1900,
         D => $src && $src->{D} ? $src->{D} : '',
         t => $src && $src->{t} ? $src->{t} : '',
      );
      $opts{f} =~ s/%([dHimsYDt])/$fmt{$1}/g;
   }

   if ( !$opts{help} && $opts{d} ) { # Ensure --source and --dest don't point to the same place
      my $same = 1;
      foreach my $arg ( qw(h P D t S) ) {
         if ( defined $src->{$arg} && defined $dst->{$arg} &&
            $src->{$arg} ne $dst->{$arg} ) {
            $same = 0;
            last;
         }
      }
      if ( $same ) {
         $opt_parser->error("--source and --dest refer to the same table");
      }
   }

   $opt_parser->usage_or_errors(%opts);

   # ############################################################################
   # Set up statistics.
   # ############################################################################
   my %statistics = ();
   my $stat_start;

   if ( $opts{statistics} ) {
      require Time::HiRes;
      my $start    = Time::HiRes::gettimeofday();
      my $obs_cost = Time::HiRes::gettimeofday() - $start; # cost of observation

      *trace = sub {
         my ( $thing, $sub ) = @_;
         my $start = Time::HiRes::gettimeofday();
         $sub->();
         $statistics{$thing . '_time'} += (Time::HiRes::gettimeofday() - $start - $obs_cost);
         ++$statistics{$thing . '_count'};
         $stat_start ||= $start;
      }
   }
   else { # Generate a version that doesn't do any timing
      *trace = sub {
         my ( $thing, $sub ) = @_;
         $sub->();
      }
   }

   # ############################################################################
   # Inspect DB servers and tables.
   # ############################################################################

   my $tp = new TableParser;
   foreach my $table ( grep { $_ } ($src, $dst) ) {
      my $dbh        = get_dbh($table);
      $table->{dbh}  = $dbh;
      $table->{irot} = get_irot($dbh);

      $can_retry = $can_retry && !$table->{irot};

      $table->{db_tbl} = $q->quote(
         map  { $_ =~ s/(^`|`$)//g; $_; }
         grep { $_ }
         ( $table->{D}, $table->{t} )
      );

      # Create objects for archivable and dependency handling, BEFORE getting the
      # tbl structure (because the object might do some setup, including creating
      # the table to be archived).
      if ( $table->{m} ) {
         eval "require $table->{m}";
         die $EVAL_ERROR if $EVAL_ERROR;

         trace('plugin_start', sub {
            $table->{plugin} = $table->{m}->new(
               dbh => $table->{dbh},
               db  => $table->{D},
               tbl => $table->{t},
            );
         });
      }

      $table->{info} = $tp->parse(
         $tp->get_ddl($dbh, $table->{D}, $table->{t}));
   }

   if ( $dst && $opts{C} ) {
      my @not_in_src = grep {
         !$src->{info}->{is_col}->{$_}
      } @{$dst->{info}->{cols}};
      if ( @not_in_src ) {
         die "The following columns exist in --dest but not --source: "
            . join(', ', @not_in_src)
            . "\n";
      }
      my @not_in_dst = grep {
         !$dst->{info}->{is_col}->{$_}
      } @{$src->{info}->{cols}};
      if ( @not_in_dst ) {
         die "The following columns exist in --source but not --dest: "
            . join(', ', @not_in_dst)
            . "\n";
      }
   }

   # ############################################################################
   # Design SQL statements.
   # ############################################################################
   my $dbh = $src->{dbh};
   my $nibbler = new TableNibbler;
   my ($first_sql, $next_sql, $del_sql, $ins_sql);
   my ($sel_stmt, $ins_stmt, $del_stmt);
   my (@asc_slice, @sel_slice, @del_slice, @ins_slice);
   my @sel_cols = $opts{c} ? @{$opts{c}}                                # Explicit
                : $opts{k} ? @{$src->{info}->{keys}->{PRIMARY}->{cols}} # PK only
                :            @{$src->{info}->{cols}};                   # All

   $del_stmt = $nibbler->generate_del_stmt(
      tbl    => $src->{info},
      cols   => \@sel_cols,
      quoter => $q,
      index  => $src->{i},
   );
   @del_slice = @{$del_stmt->{slice}};

   # Generate statement for ascending index, if desired
   if ( $opts{N} ) {
      $sel_stmt = $nibbler->generate_asc_stmt(
         tbl      => $src->{info},
         cols     => $del_stmt->{cols},
         index    => $del_stmt->{index},
         ascfirst => $opts{ascendfirst},
         quoter   => $q,
         # Might prevent rows in the source from being deleted.
         asconly  => $src->{m},
      );
   }
   else {
      $sel_stmt = {
         cols  => $del_stmt->{cols},
         index => undef,
         where => '1=1',
         slice => [], # No-ascend = no bind variables in the WHERE clause.
         scols => [], # No-ascend = no bind variables in the WHERE clause.
      };
   }
   @asc_slice = @{$sel_stmt->{slice}};
   @sel_slice = 0..$#sel_cols;

   $first_sql
      = 'SELECT' . ( $opts{hpselect} ? ' HIGH_PRIORITY' : '' )
      . ' /*!40001 SQL_NO_CACHE */ '
      . join(',', map { $q->quote($_) } @{$sel_stmt->{cols}} )
      . " FROM $src->{db_tbl}"
      . ( $sel_stmt->{index}
         ? (($vp->version_ge($dbh, '4.0.9') ? " FORCE" : " USE")
            . " INDEX(`$sel_stmt->{index}`)")
         : '')
      . " WHERE ($opts{W})";

   if ( $opts{safeautoinc}
         && $sel_stmt->{index}
         && scalar(@{$src->{info}->{keys}->{$sel_stmt->{index}}->{cols}}) == 1
         && $src->{info}->{is_autoinc}->{
            $src->{info}->{keys}->{$sel_stmt->{index}}->{cols}->[0]
         }
   ) {
      my $col = $q->quote($sel_stmt->{scols}->[0]);
      my ($val) = $dbh->selectrow_array("SELECT MAX($col) FROM $src->{db_tbl}");
      $val =~ s/'/''/g;
      $first_sql .= " AND ($col < '$val')";
   }

   $next_sql = $first_sql;
   if ( $opts{N} ) {
      $next_sql .= " AND $sel_stmt->{where}";
   }

   foreach my $thing ( $first_sql, $next_sql ) {
      $thing .= " LIMIT $opts{l}";
      if ( $opts{forupdate} ) {
         $thing .= ' FOR UPDATE';
      }
      elsif ( $opts{sharelock} ) {
         $thing .= ' LOCK IN SHARE MODE';
      }
   }

   # The LIMIT is *always* 1 here, because even though a SELECT can return many
   # rows, an INSERT only does one at a time.  It would not be safe to iterate
   # over a SELECT that was LIMIT-ed to 500 rows, read and INSERT one, and then
   # delete with a LIMIT of 500.  Only one row would be written to the file; only
   # one would be INSERT-ed at the destination.  Every DELETE must be LIMIT 1.
   $del_sql = 'DELETE'
      . ($opts{lpdel}    ? ' LOW_PRIORITY' : '')
      . ($opts{quickdel} ? ' QUICK'        : '')
      . " FROM $src->{db_tbl} WHERE $del_stmt->{where}"
      . " LIMIT 1";

   if ( $dst ) {
      $ins_stmt = $nibbler->generate_ins_stmt(
         tbl  => $dst->{info},
         cols => \@sel_cols,
      );
      @ins_slice = @{$ins_stmt->{slice}};
      $ins_sql = ($opts{r}          ? 'REPLACE'       : 'INSERT')
               . ($opts{lpins}      ? ' LOW_PRIORITY' : '')
               . ($opts{delayedins} ? ' DELAYED'      : '')
               . ($opts{i}          ? ' IGNORE'       : '')
               . " INTO $dst->{db_tbl}("
               . join(",", map { $q->quote($_) } @{$ins_stmt->{cols}} )
               . ") VALUES (" . join(",", map { "?" } @{$ins_stmt->{cols}} ) . ")";
   }
   else {
      $ins_sql = '';
   }

   if ( $opts{t} ) {
      print join("\n", ($opts{f} || ''), $first_sql, $next_sql, $del_sql, $ins_sql), "\n";
      exit(0);
   }

   my $get_first = $dbh->prepare($first_sql);
   my $get_next  = $dbh->prepare($next_sql);
   my $del_row   = $dbh->prepare($del_sql);
   my $ins_row   = $dst->{dbh}->prepare($ins_sql) if $dst; # Different $dbh!

   # ############################################################################
   # Set MySQL options.
   # ############################################################################

   if ( $opts{K} ) {
      $src->{dbh}->do("/*!40014 SET FOREIGN_KEY_CHECKS=0 */");
      if ( $dst ) {
         $dst->{dbh}->do("/*!40014 SET FOREIGN_KEY_CHECKS=0 */");
      }
   }

   # ############################################################################
   # Set up the plugins
   # ############################################################################
   foreach my $table ( $dst, $src ) {
      next unless $table && $table->{plugin};
      trace ('before_begin', sub {
         $table->{plugin}->before_begin(
            cols    => \@sel_cols,
            allcols => $sel_stmt->{cols},
         );
      });
   }

   # ############################################################################
   # Start archiving.
   # ############################################################################
   my $start   = time();
   my $end     = $start + ($opts{m} || 0); # When mk-archiver should exit
   my $now     = $start;
   my $retries = $opts{R};
   printf("%-19s %7s %7s\n", 'TIME', 'ELAPSED', 'COUNT') if $opts{P};
   printf("%19s %7d %7d\n", ts($now), $now - $start, $cnt) if $opts{P};

   $get_sth = $get_first; # Later it may be assigned $get_next
   trace('select', sub {
      $get_sth->execute;
   });
   my $row = $get_sth->fetchrow_arrayref();
   if ( !$row ) {
      $get_sth->finish;
      $src->{dbh}->disconnect();
      $dst->{dbh}->disconnect() if $dst;
      exit(0);
   }

   # Open the file and print the header to it.
   if ( $opts{f} ) {
      my $need_hdr = $opts{h} && !-f $opts{f};
      $file = IO::File->new($opts{f}, ">>")
         or die "Cannot open $opts{f}: $OS_ERROR\n";
      $file->autoflush(1) unless $opts{b};
      if ( $need_hdr ) {
         print $file '', escape(\@sel_cols), "\n"
            or die "Cannot write to $opts{f}: $OS_ERROR\n";
      }
   }

   ROW:
   while (                          # Quit if:
      $row                          # There is no data
      && $retries >= 0              # or retries are exceeded
      && (!$opts{m} || $now < $end) # or time is exceeded
      && !-f $opts{S}               # or the sentinel is set
      && $oktorun                   # or instructed to quit
      )
   {
      my $lastrow = $row;

      if ( !$src->{plugin}
         || trace('is_archivable', sub {
            $src->{plugin}->is_archivable(row => $row)
         })
      ) {

         # Do the archiving.  Write to the file first, since like the file, MyISAM
         # and other tables cannot be rolled back etc.  If there is a problem,
         # hopefully the data has at least made it to the file.
         if ( $file ) {
            trace('print_file', sub {
               print $file '', escape([@{$row}[@sel_slice]]), "\n"
                  or die "Cannot write to $opts{f}: $OS_ERROR\n";
            });
         }

         # INSERT must come first, to be as safe as possible.
         if ( $dst ) {
            my $ins_sth; # Allow the plugin to change which sth is used for the INSERT.
            if ( $dst->{plugin} ) {
               trace('before_insert', sub {
                  $dst->{plugin}->before_insert(row => $row);
               });
               trace('custom_sth', sub {
                  $ins_sth = $dst->{plugin}->custom_sth(row => $row, sql => $ins_sql);
               });
            }
            $ins_sth ||= $ins_row; # Default to the sth decided before.
            my $success = do_with_retries( 'inserting', sub {
               $ins_sth->execute(@{$row}[@ins_slice])
            });
            if ( $success == 0 ) {
               $retries = -1;
               last ROW;
            }
            elsif ( $success == -1 ) {
               --$retries;
               next ROW;
            }
         }

         # DELETE comes after INSERT for safety.
         if ( $src->{plugin} ) {
            trace('before_delete', sub {
               $src->{plugin}->before_delete(row => $row);
            });
         }
         my $success = do_with_retries( 'deleting', sub {
            $del_row->execute(@{$row}[@del_slice])
         });
         if ( $success == 0 ) {
            $retries = -1;
            last ROW;
         }
         elsif ( $success == -1 ) {
            --$retries;
            next ROW;
         }

      }

      $now = time();
      ++$cnt;
      ++$txn_cnt;
      $retries = $opts{R};

      # Possibly flush the file and commit the insert and delete.
      commit() unless $opts{'commit-each'};

      # Report on progress.
      if ( $opts{P} && $cnt % $opts{P} == 0 ) {
         printf("%19s %7d %7d\n", ts($now), $now - $start, $cnt);
      }

      # Get the next row.
      if ( $get_sth->{Active} ) { # Fetch until exhausted
         $row = $get_sth->fetchrow_arrayref();
      }
      if ( !$row ) {
         commit(1) if $opts{'commit-each'};
         if( $opts{e} ) {
            trace('sleep', sub {
               sleep($opts{e});
            });
         }
         $get_sth = $get_next;

         trace('select', sub {
            $get_next->execute(@{$lastrow}[@asc_slice]);
         });

         @beginning_of_txn = @{$lastrow}[@asc_slice] unless $txn_cnt;
         $row              = $get_sth->fetchrow_arrayref();
      }
   }

   # Transactions might still be open, etc
   commit($opts{z} || $opts{'commit-each'});
   if ( $opts{f} && $file ) {
      close $file
         or die "Cannot close $opts{f}: $OS_ERROR\n";
   }

   if ( $opts{P} ) {
      printf("%19s %7d %7d\n", ts($now), $now - $start, $cnt);
   }

   # Tear down the plugins.
   foreach my $table ( $dst, $src ) {
      next unless $table && $table->{plugin};
      trace('after_finish', sub {
         $table->{plugin}->after_finish();
      });
   }

   # Run ANALYZE or OPTIMIZE.
   if ( $oktorun && ($opts{A} || $opts{O}) ) {
      my $action = $opts{A} || $opts{O};
      my $maint  = ($opts{A} ? 'ANALYZE'                        : 'OPTIMIZE')
                 . ($opts{L} ? ' /*!40101 NO_WRITE_TO_BINLOG*/' : '');
      if ( $action =~ m/s/i ) {
         trace($maint, sub {
            $src->{dbh}->do("$maint TABLE $src->{db_tbl}");
         });
      }
      if ( $action =~ m/d/i && $dst ) {
         trace($maint, sub {
            $dst->{dbh}->do("$maint TABLE $dst->{db_tbl}");
         });
      }
   }

   # Print statistics
   if ( $opts{statistics} ) {
      my $stat_total = Time::HiRes::gettimeofday() - $stat_start;
      my $total2     = 0;
      my %summary;
      my $maxlen = 0;
      foreach my $thing ( keys %statistics ) {
         my ( $action, $type ) = $thing =~ m/^(.*?)_(count|time)$/;
         $summary{$action}->{$type}  = $statistics{$thing};
         $summary{$action}->{action} = $action;
         $maxlen                     = max($maxlen, length($action));
      }
      printf("%-${maxlen}s \%10s %10s %10s\n", qw(Action Count Time Pct));
      my $fmt = "%-${maxlen}s \%10d %10.4f %10.2f\n";

      foreach my $stat ( reverse sort { $a->{time} <=> $b->{time} } values %summary ) {
         my $pct = $stat->{time} / $stat_total * 100;
         printf($fmt, @{$stat}{qw(action count time)}, $pct);
         $total2 += $stat->{time};
      }
      printf($fmt, 'other', 0, $stat_total - $total2,
         ($stat_total - $total2) / $stat_total * 100);
   }

   # Optionally print the reason for exiting
   if ( $opts{q} ) {
      if ( $retries < 0 ) {
         print "Exiting because retries exceeded.\n";
      }
      elsif ( $opts{m} && $now >= $end ) {
         print "Exiting because time exceeded.\n";
      }
      elsif ( -f $opts{S} ) {
         print "Exiting because sentinel file $opts{S} exists.\n";
      }
   }
}

# ############################################################################
# Subroutines.
# ############################################################################

# Catches signals so mk-archiver can exit gracefully.
sub finish {
   my ($signal) = @_;
   print STDERR "Exiting on SIG$signal.\n";
   $oktorun = 0;
}

# Accesses globals, but I wanted the code in one place.
sub commit {
   my ( $force ) = @_;
   if ( $force || ($opts{z} && $txn_cnt && $cnt % $opts{z} == 0) ) {
      if ( $opts{b} && $file ) {
         trace('flush', sub {
            $file->flush or die "Cannot flush $opts{f}: $OS_ERROR\n";
         });
      }
      if ( $dst ) {
         trace('commit', sub {
            $dst->{dbh}->commit;
         });
      }
      trace('commit', sub {
         $src->{dbh}->commit;
      });
      $txn_cnt = 0;
   }
}

# Repeatedly retries the code until retries runs out, a really bad error
# happens, or it succeeds.  This sub uses lots of global variables; I only wrote
# it to factor out some repeated code.  If it returns 0, it exhausted retries
# and rolled back; if it returns -1, it didn't retry, but rolled back; if it
# returns 1, all is well.
sub do_with_retries {
   my ( $doing, $code ) = @_;
   my $retries = $opts{R};
   my $success = 0;

   RETRY:
   while ( !$success && $retries >= 0 ) {
      eval {
         trace($doing, $code);
         $success = 1;
      };
      if ( $EVAL_ERROR ) {
         if ( $EVAL_ERROR =~ m/Lock wait timeout exceeded|Deadlock found/ ) {
            if (
               (($opts{z} && $opts{z} > 1) || ($opts{'commit-each'} && $opts{l} > 1))  # More than one row per txn
               && $txn_cnt                                                             # Not first row
               && (!$can_retry || $EVAL_ERROR =~ m/Deadlock/)                          # And it's not retry-able
            ) {
               # The txn, which is more than 1 statement, was rolled back.
               last RETRY;
            }
            else {
               #  Only one statement had trouble, and the rest of the txn was not
               #  rolled back.  The statement can be retried.
               --$retries;
            }
         }
         else {
            die;
         }
      }
   }

   if ( $success != 1 ) {
      # Must throw away everything and start the transaction over.
      if ( $retries >= 0 ) {
         warn "Deadlock or non-retryable lock wait while $doing; rolling back $txn_cnt rows.\n";
         $success = -1;
      }
      else {
         warn "Exhausted retries while $doing; rolling back $txn_cnt rows.\n";
         $success = 0;
      }
      $get_sth->finish;
      trace('rollback', sub {
         $dst->{dbh}->rollback;
      });
      trace('rollback', sub {
         $src->{dbh}->rollback;
      });
      # I wish: $file->rollback
      trace('select', sub {
         $get_sth->execute(@beginning_of_txn);
      });
      $cnt -= $txn_cnt;
      $txn_cnt = 0;
   }
   return $success;
}

# Formats a row the same way SELECT INTO OUTFILE does by default.  This is
# described in the LOAD DATA INFILE section of the MySQL manual,
# http://dev.mysql.com/doc/refman/5.0/en/load-data.html
sub escape {
   my ($row) = @_;
   return join("\t", map {
      s/([\t\n\\])/\\$1/g if defined $_;  # Escape tabs etc
      defined $_ ? $_ : '\N';             # NULL = \N
   } @$row);
}

sub get_dbh {
   my ( $info ) = @_;
   my $db_options = {
      AutoCommit => !$opts{z} && !$opts{'commit-each'},
      RaiseError => 1,
      PrintError => 0,
   };

   if ( !defined $info->{p} && $opts{askpass} ) {
      $info->{p} = OptionParser::prompt_noecho("Enter password: ");
   }

   my $dbh = DBI->connect($dp->get_cxn_params($info), $db_options);

   # Set options that can enable removing data on the master and archiving it
   # on the slaves.
   if ( $info->{a} ) {
      $dbh->do("USE $info->{a}");
   }
   if ( $info->{b} ) {
      $dbh->do("SET SQL_LOG_BIN=0");
   }
   return $dbh;
}

sub ts {
   my ( $time ) = @_;
   my ( $sec, $min, $hour, $mday, $mon, $year )
      = localtime($time);
   $mon  += 1;
   $year += 1900;
   return sprintf("%d-%02d-%02dT%02d:%02d:%02d",
      $year, $mon, $mday, $hour, $min, $sec);
}

sub get_irot {
   my ( $dbh ) = @_;
   return 1 unless $vp->version_ge($dbh, '5.0.13');
   my $rows = $dbh->selectall_arrayref(
      "show variables like 'innodb_rollback_on_timeout'",
      { Slice => {} });
   return 0 unless $rows;
   return @$rows && $rows->[0]->{Value} ne 'OFF';
}

# ############################################################################
# Documentation.
# ############################################################################

=pod

=head1 NAME

mk-archiver - Archive rows from a MySQL table into another table or a file.

=head1 SYNOPSIS

 mk-archiver --source h=oltp_server,D=test,t=tbl --dest h=olap_server \
    --file '/var/log/archive/%Y-%m-%d-%D.%t' --limit 1000 --commit-each

=head1 DESCRIPTION

mk-archiver is the tool I use to archive tables as described in
L<http://www.xaprb.com/blog/2006/05/02/how-to-write-efficient-archiving-and-purging-jobs-in-sql/>.
The goal is a low-impact, forward-only job to nibble old data out of the table
without impacting OLTP queries much.  You can insert the data into another
table, which need not be on the same server.  You can also write it to a file
in a format suitable for LOAD DATA INFILE.  Or you can do neither, in which
case it's just an incremental DELETE.

mk-archiver is extensible via a plugin mechanism.  You can inject your own
code to add advanced archiving logic that could be useful for archiving
dependent data, applying complex business rules, or building a data warehouse
during the archiving process.

You need to choose values carefully for some options.  The most important are
L<"--limit">, L<"--retries">, and L<"--txnsize">.

The strategy is to find the first row(s), then scan some index forward-only to
find more rows efficiently.  Each subsequent query should not scan the entire
table; it should seek into the index, then scan until it finds more archivable
rows.  Specifying the index with the 'i' part of the L<"--source"> argument can be
crucial for this; use L<"--test"> to examine the generated queries and be sure to
EXPLAIN them to see if they are efficient (most of the time you probably want
to scan the PRIMARY key, which is the default).  Even better, profile
mk-archiver with mk-query-profiler and make sure it is not scanning the
whole table every query.

You can disable the seek-then-scan optimizations partially or wholly with
L<"--noascend"> and L<"--ascendfirst">.  Sometimes this may be more efficient for
multi-column keys.

=head1 ERROR-HANDLING

mk-archiver tries to catch signals and exit gracefully; for example, if you
send it SIGTERM (Ctrl-C on UNIX-ish systems), it will catch the signal, print a
message about the signal, and exit fairly normally.  It will not execute
L<"--analyze"> or L<"--optimize">, because these may take a long time to finish.
It will run all other code normally, including calling after_finish() on any
plugins (see L<"EXTENDING">).

In other words, a signal, if caught, will only break out of the main archiving
loop and skip optimize/analyze.

=head1 OPTIONS

Some options are negatable by specifying them in their long form with a --no
prefix.

=over

=item --analyze

Runs ANALYZE TABLE after finishing.  The argument is an arbitrary string.  If it
contains the letter 's', the source will be analyzed.  If it contains 'd', the
destination will be analyzed.  You can specify either or both.  For example, the
following will analyze both:

  --analyze=ds

See L<http://dev.mysql.com/doc/en/analyze-table.html> for details on ANALYZE
TABLE.

=item --ascend

Causes mk-archiver to optimize repeated SELECT queries so they seek into the
index where the previous query ended, then scan along it, rather than scanning
from the beginning of the table every time.  This is enabled by default because
it is generally a good strategy for repeated accesses.

Large, multiple-column indexes may cause the WHERE clause to be complex enough
that this could actually be less efficient.  Consider for example a four-column
PRIMARY KEY on (a, b, c, d).  The WHERE clause to start where the last query
ended is as follows:

   WHERE (a > ?)
      OR (a = ? AND b > ?)
      OR (a = ? AND b = ? AND c > ?)
      OR (a = ? AND b = ? AND c = ? AND d >= ?)

Populating the placeholders with values uses memory and CPU, adds network
traffic and parsing overhead, and may make the query harder for MySQL to
optimize.  A four-column key isn't a big deal, but a ten-column key in which
every column allows C<NULL> might be.

Ascending the index might not be necessary if you know you are simply removing
rows from the beginning of the table in chunks, but not leaving any holes, so
starting at the beginning of the table is actually the most efficient thing to
do.

See also L<"--ascendfirst">.  See L<"EXTENDING"> for a discussion of how this
interacts with plugins.

=item --ascendfirst

If you do want to use the ascending index optimization (see L<"--ascend">), but do
not want to incur the overhead of ascending a large multi-column index, you can
use this option to tell mk-archiver to ascend only the leftmost column of the
index.  This can provide a significant performance boost over not ascending the
index at all, while avoiding the cost of ascending the whole index.

See L<"EXTENDING"> for a discussion of how this interacts with plugins.

=item --askpass

Prompt for password for connections.

=item --buffer

Disables autoflushing to L<"--file"> and flushes L<"--file"> to disk only when a
transaction commits.  This typically means the file is block-flushed by the
operating system, so there may be some implicit flushes to disk between
commits as well.  The default is to flush L<"--file"> to disk after every row.

The danger is that a crash might cause lost data.

The performance increase I have seen from using L<"--buffer"> is around 5 to 15
percent.  Your mileage may vary.

=item --chkcols

Enabled by default; causes mk-archiver to check that the source and destination
tables have the same columns.  It does not check column order, data type, etc.
It just checks that all columns in the source exist in the destination and
vice versa.  If there are any differences, mk-archiver will exit with an
error.

=item --columns

Specify a comma-separated list of columns to fetch, write to the file, and
insert into the destination table.  If specified, mk-archiver ignores other
columns unless it needs to add them to the C<SELECT> statement for ascending an
index or deleting rows.  It fetches and uses these extra columns internally, but
does not write them to the file or to the destination table.  It I<does> pass
them to plugins.

See also --pkonly.

=item --commit-each

Commits transactions and flushes L<"--file"> after each set of rows has been
archived, before fetching the next set of rows, and before sleeping if L<"--sleep">
is specified.  Disables L<"--txnsize">; use L<"--limit"> to control the transaction size
with L<"--commit-each">.

This option is useful as a shortcut to make L<"--limit"> and L<"--txnsize"> the same
value, but more importantly it avoids transactions being held open while
searching for more rows.  For example, imagine you are archiving old rows from
the beginning of a very large table, with L<"--limit"> 1000 and L<"--txnsize"> 1000.
After some period of finding and archiving 1000 rows at a time, mk-archiver
finds the last 999 rows and archives them, then executes the next SELECT to find
more rows.  This scans the rest of the table, but never finds any more rows.  It
has held open a transaction for a very long time, only to determine it is
finished anyway.  You can use L<"--commit-each"> to avoid this.

=item --delayedins

Adds the DELAYED modifier to INSERT or REPLACE statements.  See
L<http://dev.mysql.com/doc/en/insert.html> for details.

=item --dest

This item specifies a table into which mk-archiver will insert rows
archived from L<"--source">.  It uses the same key=val argument format as
L<"--source">.  Most missing values default to the same values as
L<"--source">, so you don't have to repeat options that are the same in
L<"--source"> and L<"--dest">.  Use the L<"--help"> option to see which values
are copied from L<"--source">.

=item --file

Filename to write archived rows to.  A subset of MySQL's DATE_FORMAT()
formatting codes are allowed in the filename, as follows:

   %d    Day of the month, numeric (01..31)
   %H    Hour (00..23)
   %i    Minutes, numeric (00..59)
   %m    Month, numeric (01..12)
   %s    Seconds (00..59)
   %Y    Year, numeric, four digits

You can use the following extra format codes too:

   %D    Database name
   %t    Table name

Example:

   --file '/var/log/archive/%Y-%m-%d-%D.%t'

The file's contents are in the same format used by SELECT INTO OUTFILE, as
documented in the MySQL manual: rows terminated by newlines, columns
terminated by tabs, NULL characters are represented by \N, and special
characters are escaped by \.  This lets you reload a file with LOAD DATA
INFILE's default settings.

If you want a column header at the top of the file, see L<"--header">.  The file is
auto-flushed by default; see L<"--buffer">.

=item --forupdate

Adds the FOR UPDATE modifier to SELECT statements.  For details, see
L<http://dev.mysql.com/doc/en/innodb-locking-reads.html>.

=item --header

Writes column names as the first line in the file given by L<"--file">.  If the
file exists, does not write headers; this keeps the file loadable with LOAD
DATA INFILE in case you append more output to it.

=item --help

Displays a help message.

=item --hpselect

Adds the HIGH_PRIORITY modifier to SELECT statements.  See
L<http://dev.mysql.com/doc/en/select.html> for details.

=item --ignore

Causes INSERTs into L<"--dest"> to be INSERT IGNORE.

=item --limit

Limits the number of rows returned by the SELECT statements that retrieve rows
to archive.  Default is one row.  It may be more efficient to increase the
limit, but be careful if you are archiving sparsely, skipping over many rows;
this can potentially cause more contention with other queries, depending on the
storage engine, transaction isolation level, and options such as L<"--forupdate">.

=item --local

Adds the NO_WRITE_TO_BINLOG modifier to ANALYZE and OPTIMIZE queries.  See
L<"--analyze"> for details.

=item --lpdel

Adds the LOW_PRIORITY modifier to DELETE statements.  See
L<http://dev.mysql.com/doc/en/delete.html> for details.

=item --lpins

Adds the LOW_PRIORITY modifier to INSERT or REPLACE statements.  See
L<http://dev.mysql.com/doc/en/insert.html> for details.

=item --optimize

Runs OPTIMIZE TABLE after finishing.  See L<"--analyze"> for the option syntax and
L<http://dev.mysql.com/doc/en/optimize-table.html> for details on
OPTIMIZE TABLE.

=item --pkonly

A shortcut for specifying L<"--columns"> with the primary key columns.  This is
an efficiency if you just want to purge rows; it avoids fetching the entire row,
when only the primary key columns are needed for C<DELETE> statements.  See also
L<"--purge">.

=item --progress

Prints current time, elapsed time, and rows archived every X rows.

=item --purge

Allows archiving without a L<"--file"> or L<"--dest"> argument, which is effectively a
purge since the rows are just deleted.

If you just want to purge rows, consider specifying the table's primary key
columns with L<"--pkonly">.  This will prevent fetching all columns from the
server for no reason.

=item --quickdel

Adds the QUICK modifier to DELETE statements.  See
L<http://dev.mysql.com/doc/en/delete.html> for details.  As stated in the
documentation, in some cases it may be faster to use DELETE QUICK followed by
OPTIMIZE TABLE.  You can use L<"--optimize"> for this.

=item --replace

Causes INSERTs into L<"--dest"> to be written as REPLACE.

=item --retries

Specifies the number of times mk-archiver should retry when there is an
InnoDB lock wait timeout or deadlock.  When retries are exhausted,
mk-archiver will exit with an error.

Consider carefully what you want to happen when you are archiving between a
mixture of transactional and non-transactional storage engines.  The INSERT to
L<"--dest"> and DELETE from L<"--source"> are on separate connections, so they do not
actually participate in the same transaction even if they're on the same
server.  However, mk-archiver implements simple distributed transactions in
code, so commits and rollbacks should happen as desired across the two
connections.

At this time I have not written any code to handle errors with transactional
storage engines other than InnoDB.  Request that feature if you need it.

=item --safeautoinc

Adds an extra WHERE clause to prevent mk-archiver from removing the newest
row when ascending a single-column AUTO_INCREMENT key.  This guards against
re-using AUTO_INCREMENT values if the server restarts, and is enabled by
default.

The extra WHERE clause contains the maximum value of the auto-increment column
as of the beginning of the archive or purge job.  If new rows are inserted while
mk-archiver is running, it will not see them.

=item --sentinel

The presence of the file specified by L<"--sentinel"> will cause mk-archiver to
stop archiving and exit.  The default is /tmp/mk-archiver-sentinel.  You
might find this handy to stop cron jobs gracefully if necessary.  See also
L<"--stop">.

=item --sharelock

Adds the LOCK IN SHARE MODE modifier to SELECT statements.  For details, see
L<http://dev.mysql.com/doc/en/innodb-locking-reads.html>.

=item --skipfkchk

Disables foreign key checks with SET FOREIGN_KEY_CHECKS=0.

=item --sleep

Specifies how long to sleep between SELECT statements.  Default is not to
sleep at all.  Transactions are NOT committed, and the L<"--file"> file is NOT
flushed, before sleeping.  See L<"--txnsize"> to control that.

If L<"--commit-each"> is specified, committing and flushing happens before sleeping.

=item --source

Specifies a table to archive from.  This argument is specially formatted as a
key=value,key=value string.  Keys are a single letter.  Most options control
how mk-archiver connects to MySQL:

   KEY MEANING
   === =======
   h   Connect to host
   P   Port number to use for connection
   S   Socket file to use for connection
   u   User for login if not current user
   p   Password to use when connecting
   F   Only read default options from the given file

The following options select a table to archive:

   KEY MEANING
   === =======
   D   Database to archive
   t   Table to archive
   i   Index to use

The following options specify pluggable actions, which an external Perl module
can provide:

   KEY MEANING
   === =======
   m   Package name of an external Perl module (see EXTENDING).

The following actions set other options:

   KEY MEANING
   === =======
   a   Database to set as the connection's default with USE
   b   Disable binary logging with SET SQL_LOG_BIN=0

The only required part is the table; other parts may be read from various
places in the environment (such as options files).  Here is an example:

   --source h=my_server,D=my_database,t=my_tbl

The 'i' part deserves special mention.  This tells mk-archiver which index
it should scan to archive.  This appears in a FORCE INDEX or USE INDEX hint in
the SELECT statements used to fetch archivable rows.  If you don't specify
anything, mk-archiver will auto-discover a good index, preferring a C<PRIMARY
KEY> if one exists.  In my experience this usually works well, so most of the
time you can probably just omit the 'i' part.

The index is used to optimize repeated accesses to the table; mk-archiver
remembers the last row it retrieves from each SELECT statement, and uses it to
construct a WHERE clause, using the columns in the specified index, that should
allow MySQL to start the next SELECT where the last one ended, rather than
potentially scanning from the beginning of the table with each successive
SELECT.  If you are using external plugins, please see L<"EXTENDING"> for a
discussion of how they interact with ascending indexes.

The 'a' and 'b' options allow you to control how statements flow through the
binary log.  If you specify the 'b' option, binary logging will be disabled on
the specified connection.  If you specify the 'a' option, the connection will
C<USE> the specified database, which you can use to prevent slaves from
executing the binary log events with C<--replicate-ignore-db> options.  These
two options can be used as different methods to achieve the same goal: archive
data off the master, but leave it on the slave.  For example, you can run a
purge job on the master and prevent it from happening on the slave using your
method of choice.

=item --statistics

Causes mk-archiver to collect and print timing statistics about what it does.
The statistics look like this:

 Action         Count       Time        Pct
 commit            10     0.1079      88.27
 select             5     0.0047       3.87
 deleting           4     0.0028       2.29
 inserting          4     0.0028       2.28
 other              0     0.0040       3.29

The columns are the action, the total number of times that action was timed, the
total time it took, and the percent of the program's total runtime.  The rows
are sorted in order of descending total time.  The last row is the rest of the
time not explicitly attributed to anything.  Actions will vary depending on
command-line options.

This option requires the standard Time::HiRes module, which is part of core Perl
on reasonably new Perl releases.

=item --stop

Causes mk-archiver to create the sentinel file specified by L<"--sentinel"> and
exit.  This should have the effect of stopping all running instances which are
watching the same sentinel file.

=item --test

Causes mk-archiver to exit after printing the filename and SQL statements
it will use.

=item --time

Causes mk-archiver to stop after the specified time has elapsed.  The
argument can have a suffix of s, m, h, or d, indicating seconds, minutes, hours,
or days.  The number is interpreted as seconds if there is no suffix.

=item --txnsize

Specifies the size, in number of rows, of each transaction.  Default is one
row.  Zero disables transactions altogether.  After mk-archiver processes
this many rows, it commits both the L<"--source"> and the L<"--dest"> if given, and
flushes the file given by L<"--file">.

This parameter is critical to performance.  If you are archiving from a live
server, which for example is doing heavy OLTP work, you need to choose a good
balance between transaction size and commit overhead.  Larger transactions
create the possibility of more lock contention and deadlocks, but smaller
transactions cause more frequent commit overhead, which can be significant.  To
give an idea, on a small test set I worked with while writing mk-archiver, a
value of 500 caused archiving to take about 2 seconds per 1000 rows on an
otherwise quiet MySQL instance on my desktop machine, archiving to disk and to
another table.  Disabling transactions with a value of zero, which turns on
autocommit, dropped performance to 38 seconds per thousand rows.

If you are not archiving from or to a transactional storage engine, you may
want to disable transactions so mk-archiver doesn't try to commit.

=item --version

Output version information and exit.

=item --where

Specifies a WHERE clause to limit which rows are archived.  Do not include the
word WHERE.  You may need to quote the argument to prevent your shell from
interpreting it.  For example:

   --where 'ts < current_date - interval 90 day'

For safety, L<"--where"> is required.  If you do not require a WHERE clause, use
L<"--where"> 1=1.

=item --whyquit

Causes mk-archiver to print a message if it exits for any reason other than
running out of rows to archive.  This can be useful if you have a cron job with
L<"--time"> specified, for example, and you want to be sure mk-archiver is
finishing before running out of time.

=back

=head1 EXTENDING

mk-archiver is extensible by plugging in external Perl modules to handle
some logic and/or actions.  You can specify a module for both the L<"--source"> and
the L<"--dest">, with the 'm' part of the specification.  For example:

   --source D=test,t=test1,m=My::Module1 --dest m=My::Module2,t=test2

This will cause mk-archiver to load the My::Module1 and My::Module2 packages,
create instances of them, and then make calls to them during the archiving
process.  The module must provide this interface:

=over

=item new(dbh => $dbh, db => $db_name, tbl => $tbl_name)

The plugin's constructor is passed a reference to the database handle, the
database name, and table name.  The plugin is created just after mk-archiver
opens the connection, and before it examines the table given in the arguments.
This gives the plugin a chance to create and populate temporary tables, or do
other setup work.

=item before_begin(cols => \@cols, allcols => \@allcols)

This method is called just before mk-archiver begins iterating through rows
and archiving them, but after it does all other setup work (examining table
structures, designing SQL queries, and so on).  This is the only time
mk-archiver tells the plugin column names for the rows it will pass the
plugin while archiving.

The C<cols> argument is the column names the user requested to be archived,
either by default or by the L<"--columns"> option.  The C<allcols> argument is
the list of column names for every row mk-archiver will fetch from the source
table.  It may fetch more columns than the user requested, because it needs some
columns for its own use.  When subsequent plugin functions receive a row, it is
the full row containing all the extra columns, if any, added to the end.

=item is_archivable(row => \@row)

This method is called for each row to determine whether it is archivable.  This
only applies to L<"--source">.  The argument is the row itself, as an arrayref.  If
the method returns true, the row will be archived; otherwise it will be skipped.

Skipping a row adds complications for non-unique indexes.  Normally
mk-archiver uses a WHERE clause designed to target the last processed row as
the place to start the scan for the next SELECT statement.  If you have skipped
the row by returning false from is_archivable(), mk-archiver could get into
an infinite loop because the row still exists.  Therefore, when you specify a
plugin for the L<"--source"> argument, mk-archiver will change its WHERE clause
slightly.  Instead of starting at "greater than or equal to" the last processed
row, it will start "strictly greater than."  This will work fine on unique
indexes such as primary keys, but it may skip rows (leave holes) on non-unique
indexes or when ascending only the first column of an index.

=item before_delete(row => \@row)

This method is called for each row just before it is deleted.  This only applies
to L<"--source">.  This is a good place for you to handle dependencies, such as
deleting things that are foreign-keyed to the row you are about to delete.  You
could also use this to recursively archive all dependent tables.

=item before_insert(row => \@row)

This method is called for each row just before it is inserted.  This only
applies to L<"--dest">.  You could use this to insert the row into multiple tables,
perhaps with an ON DUPLICATE KEY UPDATE clause to build summary tables in a data
warehouse.

=item custom_sth(row => \@row, sql => $sql)

This method is called just before inserting the row, but after
L<"before_insert()">.  It allows the plugin to specify different C<INSERT>
statement if desired.  The return value (if any) should be a DBI statement
handle.  The C<sql> parameter is the SQL text used to prepare the default
C<INSERT> statement.

If no value is returned, the default C<INSERT> statement handle is used.

This method only applies to the plugin specified for L<"--dest">, so if your
plugin isn't doing what you expect, check that you've specified it for the
destination and not the source.

=item after_finish()

This method is called after mk-archiver exits the archiving loop, commits all
database handles, closes L<"--file">, and prints the final statistics, but before
mk-archiver runs ANALYZE or OPTIMIZE (see L<"--analyze> and L<"--optimize>).

=back

If you specify a plugin for both L<"--source"> and L<"--dest">, mk-archiver constructs,
calls before_begin(), and calls after_finish() on the two plugins in the order
L<"--source">, L<"--dest">.

mk-archiver assumes it controls transactions, and that the plugin will NOT
commit or roll back the database handle.  The database handle passed to the
plugin's constructor is the same handle mk-archiver uses itself.  Remember
that L<"--source"> and L<"--dest"> are separate handles.

A sample module might look like this:

   package My::Module;
   
   sub new {
      my ( $class, %args ) = @_;
      return bless(\%args, $class);
   }
   
   sub before_begin {
      my ( $self, %args ) = @_;
      # Save column names for later
      $self->{cols} = $args{cols};
   }
   
   sub is_archivable {
      my ( $self, %args ) = @_;
      # Do some advanced logic with $args{row}
      return 1;
   }
   
   sub before_delete {} # Take no action
   sub before_insert {} # Take no action
   sub custom_sth    {} # Take no action
   sub after_finish  {} # Take no action
   
   1;

=head1 SYSTEM REQUIREMENTS

You need Perl, DBI, DBD::mysql, and some core packages that ought to be
installed in any reasonably new version of Perl.

=head1 OUTPUT

If you specify L<"--print">, the output is a header row, plus status output at
intervals.  Each row in the status output lists the current date and time, how
many seconds mk-archiver has been running, and how many rows it has
archived.

=head1 BUGS

Please use the Sourceforge bug tracker, forums, and mailing lists to request
support or report bugs: L<http://sourceforge.net/projects/maatkit/>.

=head1 ACKNOWLEDGEMENTS

Thanks to the following people, and apologies to anyone I've omitted:

Andrew O'Brien,

=head1 COPYRIGHT, LICENSE AND WARRANTY

This program is copyright (c) 2007 Baron Schwartz.  Feedback and improvements
are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 AUTHOR

Baron Schwartz

=head1 VERSION

This manual page documents Ver 1.0.3 Distrib 1316 $Revision: 1308 $.

=cut
