#!/usr/bin/perl

# This program synchronizes data efficiently between two MySQL tables, which
# can be on different servers.
#
# This program is copyright (c) 2007 Baron Schwartz.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.

use strict;
use warnings FATAL => 'all';

# ###########################################################################
# OptionParser package 1178
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package OptionParser;

use Getopt::Long;
use List::Util qw(max);
use English qw(-no_match_vars);

sub new {
   my ( $class, @opts ) = @_;
   my %key_seen;
   my %long_seen;
   my %key_for;
   my %defaults;
   my @mutex;
   my @atleast1;
   my %long_for;
   my %disables;
   my %copyfrom;
   unshift @opts,
      { s => 'help',    d => 'Show this help message' },
      { s => 'version', d => 'Output version information and exit' };
   foreach my $opt ( @opts ) {
      if ( ref $opt ) {
         my ( $long, $short ) = $opt->{s} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
         $opt->{k} = $short || $long;
         $key_for{$long} = $opt->{k};
         $long_for{$opt->{k}} = $long;
         $long_for{$long} = $long;
         $opt->{l} = $long;
         die "Duplicate option $opt->{k}" if $key_seen{$opt->{k}}++;
         die "Duplicate long option $opt->{l}" if $long_seen{$opt->{l}}++;
         $opt->{t} = $short;
         $opt->{n} = $opt->{s} =~ m/!/;
         $opt->{g} ||= 'o';
         if ( (my ($y) = $opt->{s} =~ m/=([mdHhAaz])/) ) {
            $opt->{y} = $y;
            $opt->{s} =~ s/=./=s/;
         }
         $opt->{r} = $opt->{d} =~ m/required/;
         if ( (my ($def) = $opt->{d} =~ m/default(?: ([^)]+))?/) ) {
            $defaults{$opt->{k}} = defined $def ? $def : 1;
         }
         if ( (my ($dis) = $opt->{d} =~ m/(disables .*)/) ) {
            $disables{$opt->{k}} = [ $class->get_participants($dis) ];
         }
      }
      else { # It's an instruction.

         if ( $opt =~ m/at least one|mutually exclusive|one and only one/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $long_for{$_};
               } $class->get_participants($opt);
            if ( $opt =~ m/mutually exclusive|one and only one/ ) {
               push @mutex, \@participants;
            }
            if ( $opt =~ m/at least one|one and only one/ ) {
               push @atleast1, \@participants;
            }
         }
         elsif ( $opt =~ m/default to/ ) {
            my @participants = map {
                  die "No such option '$_' in $opt" unless $long_for{$_};
                  $key_for{$_};
               } $class->get_participants($opt);
            $copyfrom{$participants[0]} = $participants[1];
         }

      }
   }

   foreach my $dis ( keys %disables ) {
      $disables{$dis} = [ map {
            die "No such option '$_' while processing $dis" unless $long_for{$_};
            $long_for{$_};
         } @{$disables{$dis}} ];
   }

   return bless {
      specs => [ grep { ref $_ } @opts ],
      notes => [],
      instr => [ grep { !ref $_ } @opts ],
      mutex => \@mutex,
      defaults => \%defaults,
      long_for => \%long_for,
      atleast1 => \@atleast1,
      disables => \%disables,
      key_for  => \%key_for,
      copyfrom => \%copyfrom,
      strict   => 1,
      groups   => [ { k => 'o', d => 'Options' } ],
   }, $class;
}

sub get_participants {
   my ( $self, $str ) = @_;
   my @participants;
   foreach my $thing ( $str =~ m/(--?[\w-]+)/g ) {
      if ( (my ($long) = $thing =~ m/--(.+)/) ) {
         push @participants, $long;
      }
      else {
         foreach my $short ( $thing =~ m/([^-])/g ) {
            push @participants, $short;
         }
      }
   }
   return @participants;
}

sub parse {
   my ( $self, %defaults ) = @_;
   my @specs = @{$self->{specs}};
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);

   my %opt_seen;
   my %vals = %{$self->{defaults}};
   @vals{keys %defaults} = values %defaults;
   foreach my $spec ( @specs ) {
      $vals{$spec->{k}} = undef unless defined $vals{$spec->{k}};
      $opt_seen{$spec->{k}} = 1;
   }

   foreach my $key ( keys %defaults ) {
      die "Cannot set default for non-existent option '$key'\n"
         unless $opt_seen{$key};
   }

   Getopt::Long::Configure('no_ignore_case', 'bundling');
   GetOptions( map { $_->{s} => \$vals{$_->{k}} } @specs )
      or $self->error('Error parsing options');

   if ( $vals{version} ) {
      my $prog = $self->prog;
      printf("%s  Ver %s Distrib %s Changeset %s\n",
         $prog, $main::VERSION, $main::DISTRIB, $main::SVN_REV);
      exit(0);
   }

   if ( @ARGV && $self->{strict} ) {
      $self->error("Unrecognized command-line options @ARGV");
   }

   foreach my $dis ( grep { defined $vals{$_} } keys %{$self->{disables}} ) {
      my @disses = map { $self->{key_for}->{$_} } @{$self->{disables}->{$dis}};
      @vals{@disses} = map { undef } @disses;
   }

   foreach my $spec ( grep { $_->{r} } @specs ) {
      if ( !defined $vals{$spec->{k}} ) {
         $self->error("Required option --$spec->{l} must be specified");
      }
   }

   foreach my $mutex ( @{$self->{mutex}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$mutex;
      if ( @set > 1 ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$mutex}[ 0 .. scalar(@$mutex) - 2] );
         $note .= " and --$self->{long_for}->{$mutex->[-1]}"
               . " are mutually exclusive.";
         $self->error($note);
      }
   }

   foreach my $required ( @{$self->{atleast1}} ) {
      my @set = grep { defined $vals{$self->{key_for}->{$_}} } @$required;
      if ( !@set ) {
         my $note = join(', ',
            map { "--$self->{long_for}->{$_}" }
                @{$required}[ 0 .. scalar(@$required) - 2] );
         $note .= " or --$self->{long_for}->{$required->[-1]}";
         $self->error("Specify at least one of $note");
      }
   }

   foreach my $spec ( grep { $_->{y} && defined $vals{$_->{k}} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'm' ) {
         my ( $num, $suffix ) = $val =~ m/(\d+)([smhd])$/;
         if ( $suffix ) {
            $val = $suffix eq 's' ? $num            # Seconds
                 : $suffix eq 'm' ? $num * 60       # Minutes
                 : $suffix eq 'h' ? $num * 3600     # Hours
                 :                  $num * 86400;   # Days
            $vals{$spec->{k}} = $val;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
      elsif ( $spec->{y} eq 'd' ) {
         my $from_key = $self->{copyfrom}->{$spec->{k}};
         my $default = {};
         if ( $from_key ) {
            $default = $self->{dsn}->parse($self->{dsn}->as_string($vals{$from_key}));
         }
         $vals{$spec->{k}} = $self->{dsn}->parse($val, $default);
      }
      elsif ( $spec->{y} eq 'z' ) {
         my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
         if ( defined $num ) {
            if ( $factor ) {
               $num *= $factor_for{$factor};
            }
            $vals{$spec->{k}} = ($pre || '') . $num;
         }
         else {
            $self->error("Invalid --$spec->{l} argument");
         }
      }
   }

   foreach my $spec ( grep { $_->{y} } @specs ) {
      my $val = $vals{$spec->{k}};
      if ( $spec->{y} eq 'H' || (defined $val && $spec->{y} eq 'h') ) {
         $vals{$spec->{k}} = { map { $_ => 1 } split(',', ($val || '')) };
      }
      elsif ( $spec->{y} eq 'A' || (defined $val && $spec->{y} eq 'a') ) {
         $vals{$spec->{k}} = [ split(',', ($val || '')) ];
      }
   }

   return %vals;
}

sub error {
   my ( $self, $note ) = @_;
   $self->{__error__} = 1;
   push @{$self->{notes}}, $note;
}

sub prog {
   (my $prog) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
   return $prog || $PROGRAM_NAME;
}

sub prompt {
   my ( $self ) = @_;
   my $prog   = $self->prog;
   my $prompt = $self->{prompt} || '<options>';
   return "Usage: $prog $prompt\n";
}

sub descr {
   my ( $self ) = @_;
   my $prog = $self->prog;
   my $descr  = $prog . ' ' . ($self->{descr} || '')
          . "  For more details, please use the --help option, "
          . "or try 'perldoc $prog' for complete documentation.";
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g);
   $descr =~ s/ +$//mg;
   return $descr;
}

sub usage_or_errors {
   my ( $self, %opts ) = @_;
   if ( $opts{help} ) {
      print $self->usage(%opts);
      exit(0);
   }
   elsif ( $self->{__error__} ) {
      print $self->errors();
      exit(0);
   }
}

sub errors {
   my ( $self ) = @_;
   my $usage = $self->prompt() . "\n";
   if ( (my @notes = @{$self->{notes}}) ) {
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @notes) . "\n";
   }
   return $usage . "\n" . $self->descr();
}

sub usage {
   my ( $self, %vals ) = @_;
   my @specs = @{$self->{specs}};

   my $maxl = max(map { length($_->{l}) + ($_->{n} ? 4 : 0)} @specs);

   my $maxs = max(0,
      map { length($_->{l}) + ($_->{n} ? 4 : 0)}
      grep { $_->{t} } @specs);

   my $lcol = max($maxl, ($maxs + 3));
   my $rcol = 80 - $lcol - 6;
   my $rpad = ' ' x ( 80 - $rcol );

   $maxs = max($lcol - 3, $maxs);

   my $usage = $self->descr() . "\n" . $self->prompt();
   foreach my $g ( @{$self->{groups}} ) {
      $usage .= "\n$g->{d}:\n";
      foreach my $spec ( sort { $a->{l} cmp $b->{l} } grep { $_->{g} eq $g->{k} } @specs ) {
         my $long  = $spec->{n} ? "[no]$spec->{l}" : $spec->{l};
         my $short = $spec->{t};
         my $desc  = $spec->{d};
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
         $desc =~ s/ +$//mg;
         if ( $short ) {
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
         }
         else {
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
         }
      }
   }

   if ( (my @instr = @{$self->{instr}}) ) {
      $usage .= join("\n", map { "  $_" } @instr) . "\n";
   }
   if ( $self->{dsn} ) {
      $usage .= "\n" . $self->{dsn}->usage();
   }
   $usage .= "\nOptions and values after processing arguments:\n";
   foreach my $spec ( sort { $a->{l} cmp $b->{l} } @specs ) {
      my $val   = $vals{$spec->{k}};
      my $type  = $spec->{y} || '';
      my $bool  = $spec->{s} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
      $val      = $bool                     ? ( $val ? 'TRUE' : 'FALSE' )
                : !defined $val             ? '(No value)'
                : $type eq 'd'              ? $self->{dsn}->as_string($val)
                : $type =~ m/H|h/           ? join(',', sort keys %$val)
                : $type =~ m/A|a/           ? join(',', @$val)
                :                             $val;
      $usage .= sprintf("  --%-${lcol}s  %s\n", $spec->{l}, $val);
   }
   return $usage;
}

sub prompt_noecho {
   shift @_ if ref $_[0] eq __PACKAGE__;
   my ( $prompt ) = @_;
   local $OUTPUT_AUTOFLUSH = 1;
   print $prompt;
   my $response;
   eval {
      require Term::ReadKey;
      Term::ReadKey::ReadMode('noecho');
      chomp($response = <STDIN>);
      Term::ReadKey::ReadMode('normal');
      print "\n";
   };
   if ( $EVAL_ERROR ) {
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
   }
   return $response;
}

sub groups {
   my ( $self, @groups ) = @_;
   push @{$self->{groups}}, @groups;
}

1;

# ###########################################################################
# End OptionParser package
# ###########################################################################

# ###########################################################################
# Quoter package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package Quoter;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub quote {
   my ( $self, @vals ) = @_;
   foreach my $val ( @vals ) {
      $val =~ s/`/``/g;
   }
   return join('.', map { '`' . $_ . '`' } @vals);
}

1;

# ###########################################################################
# End Quoter package
# ###########################################################################

# ###########################################################################
# DSNParser package 1216
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package DSNParser;

sub new {
   my ( $class, @opts ) = @_;
   my $self = {
      opts => {
         D => {
            desc => 'Database to use',
            dsn  => 'database',
            copy => 1,
         },
         F => {
            desc => 'Only read default options from the given file',
            dsn  => 'mysql_read_default_file',
            copy => 1,
         },
         h => {
            desc => 'Connect to host',
            dsn  => 'host',
            copy => 1,
         },
         p => {
            desc => 'Password to use when connecting',
            dsn  => 'password',
            copy => 1,
         },
         P => {
            desc => 'Port number to use for connection',
            dsn  => 'port',
            copy => 1,
         },
         S => {
            desc => 'Socket file to use for connection',
            dsn  => 'mysql_socket',
            copy => 1,
         },
         u => {
            desc => 'User for login if not current user',
            dsn  => 'user',
            copy => 1,
         },
      },
   };
   foreach my $opt ( @opts ) {
      $self->{opts}->{$opt->{key}} = { desc => $opt->{desc}, copy => $opt->{copy} };
   }
   return bless $self, $class;
}

sub prop {
   my ( $self, $prop, $value ) = @_;
   if ( @_ > 2 ) {
      $self->{$prop} = $value;
   }
   return $self->{$prop};
}

sub parse {
   my ( $self, $dsn, $prev, $defaults ) = @_;
   return unless $dsn;
   $prev     ||= {};
   $defaults ||= {};
   my %vals;
   my %opts = %{$self->{opts}};
   if ( $dsn !~ m/=/ && $self->prop('autokey') ) {
      $dsn = $self->prop('autokey') . "=$dsn";
   }
   my %hash = map { m/^(.)=(.*)$/g } split(/,/, $dsn);
   foreach my $key ( keys %opts ) {
      $vals{$key} = $hash{$key};
      if ( !defined $vals{$key} && defined $prev->{$key} && $opts{$key}->{copy} ) {
         $vals{$key} = $prev->{$key};
      }
      if ( !defined $vals{$key} ) {
         $vals{$key} = $defaults->{$key};
      }
   }
   foreach my $key ( keys %hash ) {
      die "Unrecognized DSN part '$key' in '$dsn'\n"
         unless exists $opts{$key};
   }
   if ( (my $required = $self->prop('required')) ) {
      foreach my $key ( keys %$required ) {
         die "Missing DSN part '$key' in '$dsn'\n" unless $vals{$key};
      }
   }
   return \%vals;
}

sub as_string {
   my ( $self, $dsn ) = @_;
   return $dsn unless ref $dsn;
   return join(',',
      map  { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
      grep { defined $dsn->{$_} && $self->{opts}->{$_} }
      sort keys %$dsn );
}

sub usage {
   my ( $self ) = @_;
   my $usage
      = "DSN syntax is key=value[,key=value...]  Allowable DSN keys:\n"
      . "  KEY  COPY  MEANING\n"
      . "  ===  ====  =============================================\n";
   my %opts = %{$self->{opts}};
   foreach my $key ( sort keys %opts ) {
      $usage .= "  $key    "
             .  ($opts{$key}->{copy} ? 'yes   ' : 'no    ')
             .  ($opts{$key}->{desc} || '[No description]')
             . "\n";
   }
   if ( (my $key = $self->prop('autokey')) ) {
      $usage .= "  If the DSN is a bareword, the word is treated as the '$key' key.\n";
   }
   return $usage;
}

sub get_cxn_params {
   my ( $self, $info ) = @_;
   my $dsn;
   my %opts = %{$self->{opts}};
   my $driver = $self->prop('dbidriver') || '';
   if ( $driver eq 'Pg' ) {
      $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(h P));
   }
   else {
      $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
         . join(';', map  { "$opts{$_}->{dsn}=$info->{$_}" }
                     grep { defined $info->{$_} }
                     qw(F h P S))
         . ';mysql_read_default_group=mysql';
   }
   return ($dsn, $info->{u}, $info->{p});
}

1;

# ###########################################################################
# End DSNParser package
# ###########################################################################

# ###########################################################################
# VersionParser package 1149
# ###########################################################################
use strict;
use warnings FATAL => 'all';

package VersionParser;

sub new {
   my ( $class ) = @_;
   bless {}, $class;
}

sub parse {
   my ( $self, $str ) = @_;
   return sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
}

sub version_ge {
   my ( $self, $dbh, $target ) = @_;
   $self->{$dbh} ||= $self->parse(
      $dbh->selectrow_array('SELECT VERSION()'));
   return $self->{$dbh} ge $self->parse($target);
}

1;

# ###########################################################################
# End VersionParser package
# ###########################################################################

package main;

use DBI;
use English qw(-no_match_vars);
use List::Util qw(sum max min);
use POSIX qw(ceil);

our $VERSION = '0.9.9';
our $DISTRIB = '1316';
our $SVN_REV = sprintf("%d", q$Revision: 1308 $ =~ m/(\d+)/g || 0);

$OUTPUT_AUTOFLUSH = 1;

my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = localtime(time);

# TODO: unify timestamp format: add a 0 to the timestamp.
# TODO: must add a slice for rows greater/less in drilldown mode.

# TODO: write a helper to analyze indexes and suggest a drilldown strategy.
# TODO: CRC32 can be used for checksums.
# TODO: grouping strategy: first chars of char column.
# TODO: save memory by using more efficient bind methods from DBI.
# TODO: autodetect index violation and rewrite all UPD to DEL/INS
# TODO: check the results of the following query:
# set @CRC := null;
# 
# select 
#    CONCAT(
#       LPAD(CONV(BIT_XOR(
#          CAST(CONV(LEFT(@CRC, 16), 16, 10) AS UNSIGNED)
#          ), 10, 16), 16, '0'),
#       LPAD(CONV(BIT_XOR(
#          CAST(CONV(RIGHT(@CRC:=md5(9), 16), 16, 10) AS UNSIGNED)
#          ), 10, 16), 16, '0')
#    ) AS crc;


# ############################################################################
# Get configuration information.
# ############################################################################

# Generate a quasi-random string that's constant in a given day
my $prefix = '__cmp' . crypt(sprintf('%d%02d%02d', $year + 1900, $mon, $mday), '00');
$prefix =~ s/\W+/_/g;

my @opt_spec = (
   { s => 'algorithm|a=s',    d => 'Algorithm to use (default topdown)'},
   { s => 'analyze|A!',       d => 'Analyze (find/fix/print) in bottom-up algorithm (default)' },
   { s => 'askpass',          d => 'Prompt for password for connections' },
   { s => 'branchfactor|B=i', d => 'Branch factor for bottom-up algorithm (default 128)' },
   { s => 'bufferresults',    d => 'Fetch all rows from MySQL before comparing' },
   { s => 'build|U!',         d => 'Build tables for bottom-up algorithm (default)' },
   { s => 'cleanup|C!',       d => 'Clean up scratch tables for bottom-up algorithm (default)' },
   { s => 'collate|O!',       d => 'Use MySQL to compare strings if necessary (default)' },
   { s => 'columns|c=h',      d => 'Comma-separated column list' },
   { s => 'debug|b',          d => 'Print debugging output to STDOUT' },
   { s => 'deleteinsert|l',   d => 'Convert all UPDATES to DELETE and INSERT' },
   { s => 'queries|q',        d => 'Make debugging output executable SQL' },
   { s => 'drilldown|d=s',    d => 'Drilldown groupings for top-down algorithm' },
   { s => 'engine|E=s',       d => 'Storage engine for bottom-up tables; default InnoDB' },
   { s => 'execute|x',        d => 'Execute queries to sync tables' },
   { s => 'forupdate|F',      d => 'Use SELECT FOR UPDATE or LOCK IN SHARE MODE for checksums' },
   { s => 'function|f=s',     d => 'Cryptographic hash function (SHA1, MD5...)' },
   { s => 'lock|k!',          d => 'Lock tables when beginning work' },
   { s => 'maxcost|m=i',      d => 'Maximum rowcount before aborting' },
   { s => 'onlydo|o=s',       d => 'Only do INS/UPD/DEL (default ins,upd,del)' },
   { s => 'prefix|P=s',       d => "Tablename prefix for bottom-up algorithm (default $prefix)" },
   { s => 'print|p',          d => 'Print all sync queries to STDOUT' },
   { s => 'separator|e=s',    d => 'Separator for CONCAT_WS (default #)' },
   { s => 'singletxn|1',      d => 'Do in a single transaction' },
   { s => 'size|S=i',         d => 'Table size in bottom-up algorithm, not usually needed' },
   { s => 'skipbinlog',       d => 'Do not log to the binary log' },
   { s => 'skipforeignkey|K', d => 'Turn off foreign key checks' },
   { s => 'skipuniquekey',    d => 'Turn off UNIQUE key checks' },
   { s => 'strategy|s=s',     d => 'Query strategy when syncing (r=replace, s=ins/upd/del) (default s)' },
   { s => 'synctomaster|r',   d => 'Sync a slave to its master. Implies --strategy=r and --wait 60'},
   { s => 'temp|T',           d => 'Use temporary tables in bottom-up algorithm' },
   { s => 'timeoutok|t',      d => 'Keep going if --wait fails' },
   { s => 'verbose|v+',       d => 'Explain differences found; specify up to three times; default 0' },
   { s => 'verify|V!',        d => 'Verify checksum compatibility across servers (default)' },
   { s => 'wait|w=i',         d => 'Make slave wait for master pos (implies --lock)' },
   { s => 'where|W=s',        d => 'WHERE clause to restrict syncing to part of the table' },
);

my $q          = new Quoter();
my $vp         = new VersionParser();
my $dsn_parser = new DSNParser(
   {
      key  => 'D',
      desc => 'Database containing the table to be synced',
      dsn  => 'database',
      copy => 1,
   },
   {
      key  => 't',
      desc => 'Table to be synced',
      dsn  => undef,
      copy => 1,
   },
   {
      key  => 'i',
      desc => 'Index to drill into when syncing',
      dsn  => undef,
      copy => 1,
   },
);
$dsn_parser->prop('required', { t => 1 });
my $opt_parser = new OptionParser(@opt_spec);
$opt_parser->{dsn}    = $dsn_parser;
$opt_parser->{strict} = 0;
$opt_parser->{prompt} = '[OPTION].. [SOURCE] DEST';
$opt_parser->{descr}
   = 'finds and resolves data differences between two MySQL tables.  SOURCE '
   . 'and DEST are DSNs.  Values for DEST default to the values for SOURCE.';
my %opts = $opt_parser->parse();

$opts{k}    = $opts{w} if !defined $opts{k};
$opts{w}    = 60       if $opts{r} && !defined $opts{w};

# TODO: when --columns is specified, and --synctomaster is given, you end up
# with REPLACE INTO (partial columns) which will destroy data.  Yet maybe we
# don't want to update the whole row, just columns we know to be bad (the DBA
# knows this externally to mk-table-sync).  This should turn into INSERT
# and UPDATE.  Or REPLACE should be disallowed when not all columns are
# included.

$opts{T} = $opts{T} ? 'TEMPORARY' : '';
if ( !defined $opts{C} ) {
   $opts{C} = !$opts{T};
}
if ( $opts{r} ) {
   $opts{s} = 'r';
}
$opts{o} = lc(join('|', $opts{o} =~ m/(\w{3})/g)); # TODO this is ugly

my $source = $dsn_parser->parse(shift(@ARGV), {}     );
my $dest   = $dsn_parser->parse(shift(@ARGV), $source);

if ( $opts{s} && $opts{s} !~ m/^(s|r)/ ) {
   $opt_parser->error('--strategy must be either r or s');
}

if ( !$source || (!$dest && !$opts{r}) ) {
   $opt_parser->error('SOURCE is required, and DEST is required unless --synctomaster is specified');
}

$opt_parser->usage_or_errors(%opts);

# ############################################################################
# Lookup tables
# ############################################################################

my %code_for_algorithm = (
   topdown  => \&topdown,
   bottomup => \&bottomup,
);

# ############################################################################
# Do the work.
# ############################################################################
my $exit_status = 0;

if ( !$dest ) {
   # The user specified --synctomaster and no master, so the argument is the
   # slave, and master info comes from SHOW SLAVE STATUS.
   $dest = $source;
   undef $source;
   $dest->{dbh} = get_dbh($dest);

   # I want column names to be lowercased for just this one statement.
   $dest->{dbh}->{FetchHashKeyName} = 'NAME_lc';
   my $sth = $dest->{dbh}->prepare('SHOW SLAVE STATUS', { FetchHashKeyName => 'NAME_lc' } );
   $dest->{dbh}->{FetchHashKeyName} = 'NAME';

   $sth->execute();
   my $status = $sth->fetchrow_hashref();
   my $spec   = "h=$status->{master_host},P=$status->{master_port}";
   $source    = $dsn_parser->parse($spec, $dest);
}

if ( $source->{t} ne $dest->{t} && -t STDIN && -t STDOUT ) {
   print "Source and destination table have different names.  Continue?  y/n: ";
   my $answer = <STDIN>;
   if ( $answer !~ m/y/i ) {
      exit(1);
   }
}

foreach my $table ( $source, $dest ) {

   # It is necessary to get a $dbh to run the sync queries, and a $dbh to use
   # while the main $dbh is in use (for checking collated string comparison).
   $table->{dbh}  ||= get_dbh($table);
   $table->{dbh2} ||= get_dbh($table);

   $table->{db_tbl} =
      join('.',
      map  { $q->quote($_) }
      grep { $_ }
      map  { $_ =~ s/(^`|`$)//g; $_; }
      grep { $_ }
      ( $table->{D}, $table->{t} ));
   $table->{info} = get_tbl_struct($table);
   $table->{cols} = col_list(@{$table->{info}->{cols}});
}

# Decide what hash function to use.
my $func = $opts{f} || 'MD5';
{
   my $res = eval { $source->{dbh}->do("SELECT $func('test-string')") };
   if ( !$res ) {
      my $err = '';
      if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
         $err = qq{ because "$1"};
      }
      if ( lc $func eq 'md5' ) {  # There's nothing to fall back to
         die "Cryptographic function $func cannot be used$err\n";
      }
      warn "Cryptographic function $func cannot be used$err; falling back to MD5()\n";
      $func = 'MD5';
   }
}

my $crc_wid   = max(16, length(($source->{dbh}->selectrow_array("SELECT $func('a')"))[0]));
my $crc_slice = 0;
my $sanity    = '';

# Figure out which slice in a sliced BIT_XOR checksum should have the actual
# concat-columns-and-checksum, and which should just get variable references.
{
   my $desired = uc $source->{dbh}->selectall_arrayref("SELECT $func('a')")->[0]->[0];
   my $result  = '';
   my $start   = 1;

   do { # Try different positions till sliced result equals non-sliced.
      $source->{dbh}->do('SET @crc := NULL, @cnt := 0');
      my $slices = make_slices("\@crc := $func('a')");
      my $sanity = "SELECT CONCAT($slices) as test from (select null) as t";
      $result    = $source->{dbh}->selectall_arrayref($sanity)->[0]->[0];
      if ( $result ne $desired ) {
         $start += 16;
         ++$crc_slice;
      }
   } while ( $start < $crc_wid && $result ne $desired );
}

if ( $opts{V} ) {
   # Verify that CONCAT_WS is compatible across all servers.  On older versions
   # of MySQL it skips both empty strings and NULL; on newer just NULL.
   my @verify_sums;
   foreach my $host ( $source, $dest ) {
      my $cks = $host->{dbh}->selectall_arrayref("SELECT MD5(CONCAT_WS(',', '1', ''))")->[0]->[0];
      push @verify_sums, { ver => $host->{dbh}->{mysql_serverinfo}, sum => $cks };
   }
   if ( unique(map { $_->{sum} } @verify_sums ) > 1 ) {
      die "The servers have incompatible versions.  They return different\n"
         . "checksum values for the same query, and cannot be compared.  This\n"
         . "behavior changed in MySQL 4.0.14.  Here is info on each host:\n\n"
         . join("\n", map { "$_->{sum}    $_->{ver}" } @verify_sums)
         . "\n\nYou can disable this check with --noverify.\n";
   }
}

# User wants us to lock for consistency.  But only lock on source initially;
# might have to wait for the slave to catch up before locking on the dest.
if ( $opts{k} ) {
   my $lock_mode = $opts{r} ? 'WRITE' : 'READ';
   if ( $opts{v} ) {
      print "-- Locking table on source in $lock_mode mode\n";
   }
   my $query = "LOCK TABLES $source->{db_tbl} $lock_mode";
   debug_print($query);
   $source->{dbh}->do($query);
   if ( $opts{v} ) {
      print "-- Acquired table lock on source in $lock_mode mode\n";
   }
}

if ( $opts{w} ) {
   my $query = 'SHOW MASTER STATUS';
   debug_print($query);
   my $ms = $source->{dbh}->selectrow_hashref($query);
   if ( $opts{v} ) {
      print "-- Waiting $opts{w} sec for $ms->{File}, $ms->{Position}\n";
   }
   $query = "SELECT MASTER_POS_WAIT('$ms->{File}', $ms->{Position}, $opts{w})";
   debug_print($query);
   my $stat = $dest->{dbh}->selectall_arrayref($query)->[0]->[0];
   $stat = 'NULL' unless defined $stat;
   if ( $stat eq 'NULL' || $stat < 0 && !$opts{t} ) {
      die "MASTER_POS_WAIT failed: $stat";
   }
   if ( $opts{v} ) {
      print "-- Result of waiting: $stat\n";
   }
}

# Now lock on dest if desired (see above).  In this case don't lock at all on
# destination if it's a replication slave, or the replication thread will be
# locked out... we assume there is nothing else writing on the slave.
if ( $opts{k} && !$opts{r} ) {
   my $lock_mode = $opts{x} ? 'WRITE' : 'READ';
   if ( $lock_mode eq 'WRITE' && $opts{a} eq 'bottomup' ) {
      die "Sorry, you can't lock tables with --execute in bottomup mode "
      . "because of mk-table-sync bug #1819744.  Run with --no-lock "
      . "or use --algorithm=topdown instead.\n";
   }
   if ( $opts{v} ) {
      print "-- Locking table on destination in $lock_mode mode\n";
   }
   my $query = "LOCK TABLES $dest->{db_tbl} $lock_mode";
   debug_print($query);
   $dest->{dbh}->do($query);
   if ( $opts{v} ) {
      print "-- Acquired $lock_mode table lock on destination\n";
   }
}

# Will be used later by the different algorithms.
my %collation_for = ();
my %collation_sth = ();
my %charset_for   = ();
if ( $opts{O} && $vp->version_ge($source->{dbh}, '4.1.0') ) {
   my @collations = @{$source->{dbh}->selectall_arrayref('SHOW COLLATION', {Slice => {}})};
   foreach my $collation ( @collations ) {
      $charset_for{$collation->{Collation}} = $collation->{Charset};
   }
}

# Set some SQL options as requested.
if ( $opts{skipbinlog} ) {
   $source->{dbh}->do("/*!32316 SET SQL_LOG_BIN=0 */");
   $dest->{dbh}->do("/*!32316 SET SQL_LOG_BIN=0 */");
}
if ( $opts{skipuniquekey} ) {
   $source->{dbh}->do("/*!40014 SET UNIQUE_CHECKS=0 */");
   $dest->{dbh}->do("/*!40014 SET UNIQUE_CHECKS=0 */");
}
if ( $opts{K} ) {
   $source->{dbh}->do("/*!40014 SET FOREIGN_KEY_CHECKS=0 */");
   $dest->{dbh}->do("/*!40014 SET FOREIGN_KEY_CHECKS=0 */");
}

if ( $opts{v} ) {
   print '-- Syncing ' . $dsn_parser->as_string($source)
      . ' with ' . $dsn_parser->as_string($dest) . "\n";
}

if ( $code_for_algorithm{$opts{a}} ) {
   $code_for_algorithm{$opts{a}}->();
}
else {
   die "Sorry, algorithm '$opts{a}' unknown; try one of "
      . join('|', keys %code_for_algorithm);
}

map { $_->finish() } values %collation_sth; # Keep it from complaining
foreach my $table ( $source, $dest ) {
   my $dbh = $table->{dbh};
   $dbh->commit if $opts{k} || $opts{F} || $opts{1};
   $dbh->disconnect;
}

exit $exit_status;

# ############################################################################
# Top-down algorithm
# ############################################################################

sub topdown {

   # Design a grouping strategy: user-defined, then finally primary key.
   my @groupings = { cols => $source->{info}->{keys}->{$source->{i}} };
   if ( $opts{d} ) {
      push @groupings, reverse map { { cols => [$_] } } $opts{d} =~ m/(\w+)/g;
   }

   # Now that the keys are known, find out the collation on the source.
   my @grp_cols = unique(map { @{$_->{cols}} } @groupings);
   find_collation($source->{dbh}, $source->{db_tbl}, \@grp_cols);

   # Array indices
   my ($WHERE, $LEVEL, $COUNT) = (0, 1, 2);

   # Queue of groups to drill into on next iteration.  Managing as a queue, not
   # stack, is breadth-first search, not depth-first.
   my @to_examine = [ {}, $#groupings, 0 ];

   # Lists of rows that differ in the target tables.
   my (@to_update, @to_delete, @to_insert);

   # Counters
   my %count = map { $_ => 0 } qw(ins upd del bad);

   do {
      my $work  = shift @to_examine;
      my $level = $work->[$LEVEL];
      my $where = $work->[$WHERE];

      my $grouping = $groupings[$level]->{cols};
      my $src_sth  = td_fetch_level($source, $level, $grouping, $where, 'source');
      my $dst_sth  = td_fetch_level($dest,   $level, $grouping, $where, 'dest');

      my ($sr, $dr);       # Source row, dest row
      my %this_level = ( rows => 0, cnt => 0 );

      # TODO: keep track of what the last change was, and accumulate adjacent
      # INSERT and UPDATE statements into IN() lists as I go.

      # The statements fetch in order, so use a 'merge' algorithm of advancing
      # after rows match.  This is essentially a FULL OUTER JOIN.
      MERGE:
      while ( 1 ) { # Exit this loop via 'last'

         if ( !$sr && $src_sth->{Active} ) {
            $sr = $src_sth->fetchrow_hashref;
         }
         if ( !$dr && $dst_sth->{Active} ) {
            $dr = $dst_sth->fetchrow_hashref;
         }

         # Compare the rows if both exist.  The result is used several places.
         my $cmp;
         if ( $sr && $dr ) {
            $cmp = key_cmp($source, $sr, $dr, $grouping);
         }

         last MERGE unless $sr || $dr;

         my %new_where = %$where;  # Will get more cols added and used below.

         # If the current row is the "same row" on both sides...
         if ( $sr && $dr && defined $cmp && $cmp == 0 ) {
            # The "same" row descends from parents that differ.
            if ( $sr->{__crc} ne $dr->{__crc} || ($level && ($sr->{__cnt} != $dr->{__cnt})) ) {
               @new_where{@$grouping} = @{$sr}{@$grouping};
               if ( $level ) {
                  # Special case: push $level - 1 because this will be processed
                  # later.
                  push @to_examine, [ \%new_where, $level - 1, $sr->{__cnt} ];
                  $this_level{cnt}++;
                  $this_level{rows} += $sr->{__cnt};
                  if ( $level && $opts{v} > 2 ) {
                     printf("-- Level %1d: CHECK  group of  %5d rows %s\n",
                        $level, $sr->{__cnt}, make_where_clause($source->{dbh}, \%new_where));
                  }
               }
               else {
                  push @to_update, \%new_where;
                  $count{upd}++;
                  $count{bad}++;
                  if ( $opts{v} > 2 ) {
                     printf("-- Level %1d: UPDATE              1 row  %s\n",
                        $level, make_where_clause($source->{dbh}, \%new_where));
                  }
               }
            }
            $sr = $dr = undef;
         }

         # The row in the source doesn't exist at the destination
         elsif ( !$dr || ( defined $cmp && $cmp < 0 ) ) {
            @new_where{@$grouping} = @{$sr}{@$grouping};
            push @to_insert, \%new_where;
            $count{ins} += $sr->{__cnt} || 1;
            $count{bad} += $sr->{__cnt} || 1;
            if ( $level && $opts{v} > 2 ) {
               printf("-- Level %1d: INSERT group of  %5d rows %s\n",
                  $level, $sr->{__cnt}, make_where_clause($source->{dbh}, \%new_where));
            }
            $sr = undef;
         }

         # Symmetric to the above
         elsif ( !$sr || ( defined $cmp && $cmp > 0 ) ) {
            @new_where{@$grouping} = @{$dr}{@$grouping};
            push @to_delete, \%new_where;
            $count{del} += $dr->{__cnt} || 1;
            $count{bad} += $dr->{__cnt} || 1;
            if ( $level && $opts{v} > 2 ) {
               printf("-- Level %1d: DELETE group of  %5d rows %s\n",
                  $level, $dr->{__cnt}, make_where_clause($source->{dbh}, \%new_where));
            }
            $dr = undef;
         }

         else {
            die "This code should never have run.  This is a bug.";
         }

         if ( $level < $#groupings && $opts{m} && $opts{m} < $count{bad}) {
            print "-- Level $level halt: $count{bad} rows, --maxcost=$opts{m}\n";
            return 0;
         }

      }

      if ( $opts{v} ) {
         printf("--          Level %1d total:   %5d bad rows      %5d to inspect\n",
            $level, $count{bad}, sum(map { $_->[$COUNT] } @to_examine) || 0);
      }
      if ( $opts{v} > 1 ) {
         printf("--          Level %1d summary: %5d bad groups in %5d src groups %5d dst groups\n",
            $level, scalar(@to_examine), $src_sth->rows, $dst_sth->rows);
         printf("--          Level %1d changes: %5d updates       %5d inserts    %5d deletes\n",
            $level, scalar(@to_update), $count{ins}, $count{del});
      }

      $level--;
   } while ( @to_examine );

   # Release locks/close transaction as soon as possible.
   if ( $opts{r} && $opts{F} && !$opts{1} ) {
      $dest->{dbh}->commit;
   }

   td_handle_data_change('DELETE', @to_delete);
   # Do UPDATE before INSERT because the current (bad) values may conflict with
   # newly INSERTed rows otherwise.
   if ( $opts{l} ) {
      td_handle_data_change('DELETE', @to_update);
      td_handle_data_change('INSERT', @to_update);
   }
   else {
      td_handle_data_change('UPDATE', @to_update);
   }
   td_handle_data_change('INSERT', @to_insert);
}

sub td_handle_data_change {
   my ( $action, @rows ) = @_;
   return unless $action =~ m/$opts{o}/i;
   foreach my $where ( @rows ) {
      # TODO I'm worried this is double-fetching rows that need to be
      # mass-inserted or updated.
      handle_data_change($action, $where);
   }
}

sub td_fetch_level {
   my ( $info, $level, $groupby, $where, $which ) = @_;
   my $dbh = $info->{dbh};
   my $tbl = $info->{info};

   # Columns that need to be in the checksum list.
   # TODO: why remove columns that are in the WHERE clause?
   my @cols = grep { !exists($where->{$_}) } @{$tbl->{cols}};
   my $cols = col_list(@cols);

   # To handle nulls, make a bitmap of nullable columns that are null.
   my @null = grep { $tbl->{null_hash}->{$_} } @cols;
   my $null = @null
            ? (", CONCAT(" . join(', ', map  { "ISNULL(`$_`)" } @null) . ")")
            : '';

   my $grp  = col_list(@$groupby);
   my $crit = make_where_clause($dbh, $where);
   my $lock = '';

   if ( $opts{F} && !$opts{k} ) { # User wants us to lock for consistency.
      # Is this the server where changes will happen?
      my $is_target = $opts{r} ? $which eq 'source' : $which eq 'dest';
      $lock = $is_target ? ' FOR UPDATE' : ' LOCK IN SHARE MODE';
   }

   # Maxia's approach used SUM() as the aggregate function.  This is not a good
   # aggregate function; though it commutes, and is therefore order-independent,
   # the law of large numbers will cause checksum collisions on large data sets.
   # BIT_XOR() is really just a bitwise parity.  It is also order-independent,
   # but you expect any given bit in the result to be essentially a random coin
   # flip over the group.

   my $query;
   # Design the column checksum expression.
   if ( $level ) {
      my $slices = make_slices("$func(CONCAT_WS('$opts{e}', $cols$null))");
      $query = "SELECT /*$which:$info->{db_tbl}*/ $grp, COUNT(*) AS __cnt, "
         . "CONCAT($slices) AS __crc "
         . "FROM $info->{db_tbl} $crit GROUP BY $grp ORDER BY $grp$lock";
   }
   else {
      $query = "SELECT /*$which:$info->{db_tbl}*/ $grp, "
         . "MD5(CONCAT_WS('$opts{e}', $cols$null)) AS __crc "
         . "FROM $info->{db_tbl} $crit "
         . "ORDER BY $grp$lock";
   }
   debug_print($query);

   my $sth = $dbh->prepare($query, { mysql_use_result => !$opts{bufferresults}});
   $sth->execute();
   return $sth;
}

# ############################################################################
# Bottom-up algorithm
# ############################################################################

sub bottomup {

   # Ensure branch factor is a power of two.
   $opts{B} = max(2, 2 ** round( log($opts{B}) / log(2) ));

   # Store table prefix in hashes
   $source->{prefix} = "$opts{P}_s_";
   $dest->{prefix}   = "$opts{P}_d_";

   my $levels = 0;
   if ( $opts{U} ) {

      # Begin with estimates of table size to allow calculating the checksum
      # remainder on the first level.
      my $est_size    = $opts{S} || max( estimate_size($source), estimate_size($dest) );
      my $level_est_1 = bu_num_levels($est_size);

      # Determine the data type needed for the remainder column.
      my $rem_col        = bu_size_to_type(( $opts{B} ** ($level_est_1 + 2)) - 1);
      $source->{rem_col} = $rem_col;
      $dest->{rem_col}   = $rem_col;

      # Build the initial checksum tables and calculate how many summary tables to build.
      my $src_size    = bu_build_checksum( $source, $level_est_1 );
      my $level_est_2 = bu_num_levels( max( $est_size, $src_size ) );
      my $dst_size    = bu_build_checksum( $dest, $level_est_2 );
      my $true_size   = max( $src_size, $dst_size );
      $levels         = bu_num_levels( $true_size );

      # Similar to the above, choose a type for the __cnt columns
      my $cnt_col        = bu_size_to_type($true_size);
      $source->{cnt_col} = $cnt_col;
      $dest->{cnt_col}   = $cnt_col;

      # Check and possibly rebuild remainders.
      if ( $levels > $level_est_1 + 2 ) {
         # The initial estimated number of levels caused the first-level tables to
         # have too-small data types, and I don't want to run ALTER TABLE; I'd
         # rather ask the user to re-run.
         die "Table size estimates ($est_size) were too small; specify --size $true_size";
      }
      if ( $level_est_1 != $levels ) {
         bu_rebuild_remainder($source, $levels);
      }
      if ( $level_est_2 != $levels ) {
         bu_rebuild_remainder($dest, $levels);
      }

      # Build the trees, merge them, and clean them up. TODO this part can be
      # parallelized with fork.
      bu_build_tree($source, $levels);
      bu_build_tree($dest,   $levels);
   }
   else {
      $levels  = bu_existing_levels( $source );
   }

   my $finished_work = 1;
   if ( $opts{A} ) {
      # Determine the collation of the primary key columns on the source, then
      # do the comparison.
      find_collation($source->{dbh},
         "$source->{prefix}_0",
         $source->{info}->{keys}->{$source->{i}});
      $finished_work = bu_merge_tree($dest,   $source, $levels);
   }
   bu_cleanup_tree($dest, $source) if $finished_work && $opts{C};
}

# Builds the first-level checksum table and returns the number of rows in it.
# The bitwise & operator in the __rem calculation is essentially the same as
# MOD().  In unsigned arithmetic, num MOD 128 is the same as num & 127.  It has
# the advantage of taking the absolute value of the modulo though, so there will
# be no negative values.
sub bu_build_checksum {
   my ($info, $levels) = @_;
   my $dbh    = $info->{dbh};
   my $tbl    = $info->{info};
   my $pk     = col_list( @{ $tbl->{keys}->{$info->{i}} } );
   my @cols   = @{ $tbl->{cols} };
   my $cols   = col_list(@cols);
   my $pks    = join( ',', @{ $tbl->{defs} }{ @{ $tbl->{keys}->{$info->{i}} } } );
   my @null   = grep { $tbl->{null_hash}->{$_} } @cols;
   my $null = @null
            ? ( ", CONCAT(" . join( ', ', map {"ISNULL(`$_`)"} @null ) . ")" ) : '';
   my $name = "$info->{prefix}_0";
   my $mask = ($opts{B} ** ($levels - 1)) - 1;

   # Create the table
   my $query = "DROP TABLE IF EXISTS `$name`";
   debug_print($query);
   $dbh->do($query);
   ( $query = <<"   END") =~ s/\s+/ /g;
      CREATE $opts{T} TABLE `$name` (
         $pks,
         __crc CHAR(32) NOT NULL,
         __rem $info->{rem_col} UNSIGNED NOT NULL,
         KEY(__rem),
         PRIMARY KEY($pk)
      ) ENGINE=$opts{E}
   END
   debug_print($query);
   $dbh->do($query);

   # Populate it
   ( $query = <<"   END") =~ s/\s+/ /g;
      INSERT /*$info->{db_tbl}*/ INTO `$name`($pk, __crc, __rem)
      SELECT $pk,
         MD5(CONCAT_WS('$opts{e}', $cols$null)) AS __crc,
         CAST(CONV(RIGHT(MD5(CONCAT_WS('$opts{e}', $pk)), 16), 16, 10) AS UNSIGNED) & $mask AS __rem
      FROM $info->{db_tbl}
   END
   debug_print($query);
   my $sth = $dbh->prepare($query);
   $sth->execute();
   return $sth->rows;
}

sub bu_rebuild_remainder {
   my ( $info, $levels ) = @_;
   my $pk   = col_list( @{ $info->{info}->{keys}->{$info->{i}} } );
   my $mask = ($opts{B} ** ($levels - 1)) - 1;
   my $name = "$info->{prefix}_0";
   my $query = "UPDATE `$name` SET __rem = "
      . "CAST(CONV(RIGHT(MD5(CONCAT_WS('$opts{e}', $pk)), 8), 16, 10) AS UNSIGNED) & $mask";
   debug_print($query);
   $info->{dbh}->do($query);
}

# Builds the nth-level summary tables.
# TODO: allow to use other hash functions like SHA1, and genericize the substringing code
# and the required size of the columns.
sub bu_build_tree {
   my ($info, $levels) = @_;
   my $dbh = $info->{dbh};
   my $tbl = $info->{info};

   # Do from 1 because level 0 has already been built.
   foreach my $i ( 1 .. $levels ) {
      my $modulo   = int($opts{B} ** ( $levels - $i - 1 ));
      my $last_mod = $modulo * $opts{B};
      my $this_tbl = "$info->{prefix}_" . $i;
      my $last_tbl = "$info->{prefix}_" . ( $i - 1 );
      my $mask     = max(0, $modulo - 1);
      my $cnt_sum  = $i > 1 ? 'SUM(__cnt)' : 'COUNT(*)';

      # Create the table
      my $query = "DROP TABLE IF EXISTS `$this_tbl`";
      debug_print($query);
      $dbh->do($query);
      ( $query = <<"      END" ) =~ s/\s+/ /g;
         CREATE $opts{T} TABLE `$this_tbl` (
            __par INT NOT NULL,
            __crc CHAR(32) NOT NULL,
            __rem $info->{rem_col} UNSIGNED NOT NULL,
            __cnt $info->{cnt_col} UNSIGNED NOT NULL,
            KEY(__rem),
            PRIMARY KEY(__par)
         ) ENGINE=$opts{E}
      END
      debug_print($query);
      $dbh->do($query);

      # Populate it
      ( $query = <<"      END" ) =~ s/\s+/ /g;
         INSERT /*$info->{db_tbl}*/ INTO `$this_tbl`
            (__par, __crc, __rem, __cnt)
         SELECT __rem,
            CONCAT(
               LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(__crc, 1,  16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'),
               LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(__crc, 17, 16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0')
            ) AS this_crc,
            __rem & $mask AS this_remainder,
            $cnt_sum AS total_rows
         FROM `$last_tbl`
         GROUP BY __rem
         ORDER BY NULL
      END
      debug_print($query);
      $dbh->do($query);
   }
}

# There are actually 1 more than $levels summary tables; there are tables 0 ..
# $levels (see bu_build_tree).  Level 0 has a different structure.  It has
# primary keys instead of a __par pointer.
# Returns true if it finished working.
# TODO: there is a lot of shared code here with topdown, maybe factor out the
# FULL OUTER JOIN-ish code into a subroutine?
sub bu_merge_tree {
   my ($dest, $source, $levels) = @_;

   my $level = $levels;
   my @bad_parents; # List of parents that must differ at current level
   my ( $rows_in_src, $rows_in_dst ) = (0,0);

   # Lists of rows that differ in the target tables.
   my (@to_update, @to_delete, @to_insert);
   my (@bulk_insert, @bulk_delete);

   # Counters
   my %count = map { $_ => 0 } qw(ins upd del bad);

   do {
      my $src_sth = bu_fetch_level($source, $level, @bad_parents);
      my $dst_sth = bu_fetch_level($dest,   $level, @bad_parents);

      # Reset for next loop, once used to fetch this loop
      @bad_parents = ();
      $rows_in_src = $rows_in_dst = 0;

      my @key = $level ? '__par' : @{$source->{info}->{keys}->{$source->{i}}};
      my ($sr, $dr); # Source row, dest row

      # The statements fetch in order, so use a 'merge' algorithm of advancing
      # after rows match.  This is essentially a FULL OUTER JOIN.
      MERGE:
      while ( 1 ) { # Exit this loop via 'last'

         if ( !$sr && $src_sth->{Active} ) {
            $sr = $src_sth->fetchrow_hashref;
            if ( $sr ) {
               $rows_in_src += $sr->{__cnt} || 1;
            }
         }
         if ( !$dr && $dst_sth->{Active} ) {
            $dr = $dst_sth->fetchrow_hashref;
            if ( $dr ) {
               $rows_in_dst += $dr->{__cnt} || 1;
            }
         }

         # Compare the rows if both exist.  The result is used several places.
         my $cmp;
         if ( $sr && $dr ) {
            $cmp = key_cmp($source, $sr, $dr, \@key);
         }

         last MERGE unless $sr || $dr;

         # If the current row is the "same row" on both sides...
         if ( $sr && $dr && defined $cmp && $cmp == 0 ) {
            # The "same" row descends from parents that differ.
            if ( $sr->{__crc} ne $dr->{__crc} ) {
               if ( $level ) {
                  push @bad_parents, $sr;
                  if ( $level && $opts{v} > 2 ) {
                     printf("-- Level %1d UPDATE parent:   %5d\n",
                        $level, $sr->{__par});
                  }
               }
               else {
                  $count{upd}++;
                  $count{bad}++;
                  push @to_update, $sr;
               }
            }
            $sr = $dr = undef;
         }

         # The row in the source doesn't exist at the destination
         elsif ( !$dr || ( defined $cmp && $cmp < 0 ) ) {
            if ( $level ) {
               push @bulk_insert, $sr;
               if ( $level && $opts{v} > 2 ) {
                  printf("-- Level %1d BULKIN parent:   %5d\n",
                     $level, $sr->{__par});
               }
            }
            else {
               push @to_insert, $sr;
               if ( $level && $opts{v} > 2 ) {
                  printf("-- Level %1d INSERT parent:   %5d\n",
                     $level, $sr->{__par});
               }
            }
            $count{ins} += $sr->{__cnt} || 1;
            $count{bad} += $sr->{__cnt} || 1;
            $sr = undef;
         }

         # Symmetric to the above
         elsif ( !$sr || ( defined $cmp && $cmp > 0 ) ) {
            if ( $level ) {
               push @bulk_delete, $dr;
               if ( $level && $opts{v} > 2 ) {
                  printf("-- Level %1d BULKDE parent:   %5d\n",
                     $level, $dr->{__par});
               }
            }
            else {
               push @to_delete, $dr;
               if ( $level && $opts{v} > 2 ) {
                  printf("-- Level %1d DELETE parent:   %5d\n",
                     $level, $dr->{__par});
               }
            }
            $count{del} += $dr->{__cnt} || 1;
            $count{bad} += $dr->{__cnt} || 1;
            $dr = undef;
         }

         else {
            die "This code should never have run.  This is a bug.";
         }

         if ( $level < $levels && $opts{m} && $opts{m} < $count{bad} ) {
            print "-- Level $level halt: $count{bad} rows, --maxcost=$opts{m}\n";
            return 0;
         }

      }

      my $sum_bulk_ins = sum(map { $_->{__cnt} } @bulk_insert) || 0;
      my $sum_bulk_del = sum(map { $_->{__cnt} } @bulk_delete) || 0;
      my $sum_parents  = sum(map { $_->{__cnt} || 1 } @bad_parents) || 0;
      my $num_bad_rows = scalar(@to_update) + scalar(@to_insert) + $sum_bulk_ins
                       + scalar(@to_delete) + $sum_bulk_del + $sum_parents;

      if ( $opts{v} ) {
         printf("--         Level %1d total:   %5d rows\n", $level, $num_bad_rows);
      }
      if ( $opts{v} > 1 ) {
         printf("--         Level %1d summary: %5d parents %5d src rows %5d dst rows\n",
            $level, scalar(@bad_parents), $rows_in_src, $rows_in_dst);
         printf("--         Level %1d changes: %5d updates %5d inserts  %5d deletes %5d total\n",
            $level, scalar(@to_update), scalar(@to_insert) + $sum_bulk_ins,
            scalar(@to_delete) + $sum_bulk_del,
            scalar(@to_update) + scalar(@to_insert) + $sum_bulk_ins
               + scalar(@to_delete) + $sum_bulk_del
         );
         printf("--         Level %1d bulk-op: %5d inserts %5d ins-rows %5d deletes %5d del-rows\n",
            $level, scalar(@bulk_insert), $sum_bulk_ins,
            scalar(@bulk_delete), $sum_bulk_del);
      }

      $level--;
   } while ( $level >= 0 && @bad_parents );

   bu_handle_data_change('DELETE', @to_delete);
   bu_handle_bulk_change('DELETE', $levels, $dest,   @bulk_delete);
   # Do UPDATE before INSERT because the current (bad) values may conflict with
   # newly INSERTed rows otherwise.
   if ( $opts{l} ) {
      bu_handle_data_change('DELETE', @to_update);
      bu_handle_data_change('INSERT', @to_update);
   }
   else {
      bu_handle_data_change('UPDATE', @to_update);
   }
   bu_handle_data_change('INSERT', @to_insert);
   bu_handle_bulk_change('INSERT', $levels, $source, @bulk_insert);

   return 1; # Finished the work.
}

sub bu_cleanup_tree {
   my @servers = @_;
   foreach my $info ( @servers ) {
      my @tables = @{$info->{dbh}->selectcol_arrayref('SHOW TABLES')};
      foreach my $table ( grep { m/^$info->{prefix}_\d+$/ } @tables ) {
         my $query = "DROP TABLE IF EXISTS `$table`";
         debug_print($query);
         $info->{dbh}->do($query);
      }
   }
}

# Finds atomic rows that got folded into an entirely insertable or deleteable
# part of the tree.
sub bu_handle_bulk_change {
   my ( $action, $levels, $info, @rows ) = @_;
   return unless $action =~ m/$opts{o}/i;
   my $pk = col_list( @{ $info->{info}->{keys}->{$info->{i}} } );
   my @rows_to_do;
   my $mask = ($opts{B} ** ($levels - 1)) - 1;

   foreach my $row ( @rows ) {

      # TODO: optimization.
      # This is logically correct, but MySQL won't use indexes:
      # "SELECT $pk FROM $info->{prefix}_0 WHERE __rem & $mask = $row->{__par}"
      # This ends up looking like __rem & 255 = 3.  This will match any of the
      # following (partial list):
      # +-------+--------+
      # | __rem | binary |
      # +-------+--------+
      # |     3 |     11 |
      # |    11 |   1011 |
      # |    15 |   1111 |
      # |    19 |  10011 |
      # |    31 |  11111 |
      # |    51 | 110011 |
      # |    59 | 111011 |
      # +-------+--------+
      # Notice the rightmost two bits are the same in each number.  All these
      # combinations can be generated by adding 3 and every number from 4 to the
      # maximum possible __rem value.  This is easiest to do by mentally
      # left-shifting by the appropriate number of digits and adding.  Suppose
      # $levels is such that the maximum __rem is 63; something like
      # $i = 1; while ( $i * 4 < 63 ) { print 3 + $i * 4; $i++; }
      # If the list is really long, it'll be less efficient for MySQL, so I'd
      # say only do this if the list is less than 20% of the number of __rem
      # values.

      my $parent = $row->{__par};
      my $query  = "SELECT $pk FROM $info->{prefix}_0 WHERE __rem & $mask = $parent";
      debug_print($query);
      my $vals = $info->{dbh}->selectall_arrayref($query, { Slice => {} });
      push @rows_to_do, @$vals;
   }

   bu_handle_data_change($action, @rows_to_do);
}

sub bu_handle_data_change {
   my ( $action, @rows ) = @_;
   return unless $action =~ m/$opts{o}/i;

   foreach my $row ( @rows ) {
      delete $row->{__crc}; # Now the row can be used as a WHERE clause
      handle_data_change($action, $row);
   }
}

sub bu_fetch_level {
   my ( $info, $level, @bad_parents ) = @_;
   my $dbh = $info->{dbh};
   my $tbl = "$info->{prefix}_" . $level;

   my $cols  = $level
             ? '__par, __cnt'
             : col_list( @{ $info->{info}->{keys}->{$info->{i}} } );
   my $where = @bad_parents
             ? "WHERE __rem IN(" . join(',', map { $_->{__par} } @bad_parents) . ")"
             : '';
   my $order = $level
             ? '__par'
             : col_list( @{ $info->{info}->{keys}->{$info->{i}} } );

   my $query = "SELECT $cols, __crc FROM $tbl $where ORDER BY $order";
   debug_print($query);
   my $sth = $dbh->prepare($query, { mysql_use_result => !$opts{bufferresults}});
   $sth->execute();
   return $sth;
}

# Returns how many levels of tables you need to build for a table of a given
# size.  If your B factor is 4 and you pass in 100, you need the summaries
# to be grouping mod 64, 16, 4, 1 so you need 4 levels (5 total including 0,
# which is row-for-row with the real table).
sub bu_num_levels {
   my ( $size ) = @_;
   return int( log($size) / log($opts{B}) );
}

# Returns the maximum modulus that the tables will need.
sub bu_size_to_type {
   my ( $size ) = @_;
   return $size < 256        ? 'TINYINT'
        : $size < 65536      ? 'SMALLINT'
        : $size < 16777216   ? 'MEDIUMINT'
        : $size < 4294967296 ? 'INT'
        :                      'BIGINT';
}

# Figure out how many levels exist for pre-existing tables.
sub bu_existing_levels {
   my ($info) = @_;
   my @tables = @{$info->{dbh}->selectcol_arrayref("SHOW TABLES")};
   @tables    = grep { m/^$info->{prefix}_\d+$/ } @tables;
   die "No existing tables with prefix $info->{prefix} found" unless @tables;
   return max(map { $_ =~ m/(\d+)$/g } @tables);
}

# ############################################################################
# Subroutines
# ############################################################################

# NULL sorts before defined values in MySQL, so I consider undef "less than."
# Numbers are easy to compare.  Otherwise string comparison is tricky.  This
# function must match MySQL exactly or the merge algorithm runs off the rails,
# so when in doubt I ask MySQL to compare strings for me.  See
# http://dev.mysql.com/doc/refman/5.0/en/charset-literal.html
sub key_cmp {
   my ( $info, $r1, $r2, $key ) = @_;
   foreach my $c ( @$key ) {
      my $l = $r1->{$c};
      my $r = $r2->{$c};
      if ( !defined $l || !defined $r ) {
         return defined $l || -1;
      }
      else {
         if ($c eq '__par' || $info->{info}->{num_hash}->{$c} ) {   # Numeric column
            my $cmp = $l <=> $r;
            return $cmp unless $cmp == 0;
         }
         elsif ( $l ne $r ) { # Do case-sensitive cmp, expecting most will be eq
            # TODO: any of the latin1 collations...
            # TODO: in pre-4.0, detect BINARY columns
            my $coll = $collation_for{$c};
            my $send_to_db = $coll
               && ( $coll ne 'latin1_swedish_ci' || $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/);
            my $cmp = $send_to_db ? db_cmp($coll, $l, $r) : lc $l cmp lc $r;
            return $cmp unless $cmp == 0;
         }
      }
   }
   return 0;
}

# Send the strings to MySQL to get a consistent cmp that matches MySQL's own
# sort order.  Without this, the algorithm may do a great deal of extra work.
sub db_cmp {
   my ( $coll, $l, $r ) = @_;
   $collation_sth{$coll} ||= $source->{dbh2}->prepare(
      "SELECT STRCMP(_$charset_for{$coll}? COLLATE $coll, _$charset_for{$coll}? COLLATE $coll) as res");
   $collation_sth{$coll}->execute($l, $r);
   return $collation_sth{$coll}->fetchall_arrayref()->[0]->[0];
}

# All output has to be prefixed with SQL comments so the output can be piped
# right into MySQL if desired.
sub debug_print {
   return unless $opts{b};
   print '-- ' if !$opts{q};
   print @_;
   print ";\n" if $opts{q};
   print "\n" if $opts{b};
}

# Code factored out of bu_handle_data_change and td_handle_data_change.
sub handle_data_change {
   my ( $action, $where) = @_;
   my $which = $opts{r} ? $source : $dest;
   my $dbh   = $which->{dbh};
   my $crit  = make_where_clause($dbh, $where);

   if ( $action eq 'DELETE' ) {
      my $query = "DELETE FROM $which->{db_tbl} $crit";
      if ( $opts{p} ) {
         print STDOUT $query, ";\n";
      }
      if ( $opts{x} ) {
         $dbh->do($query);
      }
   }

   else {
      my $query = "SELECT $source->{cols} FROM $source->{db_tbl} $crit";
      debug_print($query);
      my $sth = $source->{dbh}->prepare($query);
      $sth->execute();
      while ( my $res = $sth->fetchrow_hashref() ) {
         if ( $opts{s} eq 'r' || $action eq 'INSERT' ) {
            my $verb = $opts{s} eq 'r' ? 'REPLACE' : 'INSERT';
            $query = "$verb INTO $which->{db_tbl}($which->{cols}) VALUES("
               . join(',', map { $dbh->quote($res->{$_}) }
                  @{$which->{info}->{cols}}) . ")";
         }
         else {
            my @cols = grep { !exists($where->{$_}) } @{$which->{info}->{cols}};
            $query = "UPDATE $which->{db_tbl} SET "
               . join(',',
                  map { $q->quote($_) . '=' .  $dbh->quote($res->{$_}) } @cols)
               . ' ' . $crit;
         }
         if ( $opts{p} ) {
            print STDOUT $query, ";\n";
         }
         if ( $opts{x} ) {
            eval { $dbh->do($query) };
            if ( $EVAL_ERROR ) {
               if ( $EVAL_ERROR =~ m/Duplicate entry/ ) {
                  die "Your tables probably have some differences "
                     . "that cannot be resolved with UPDATE statements.  "
                     . "Re-run mk-table-sync with --deleteinsert to proceed.\n";
               }
               else {
                  die $EVAL_ERROR;
               }
            }
         }
      }
   }
}

sub make_slices {
   my ( $chks ) = @_;

   # Split the CRC result up into slices and glue them together.
   my @slices;
   for ( my $start = 1; $start < $crc_wid; $start += 16 ) {
      my $len = min(16, $crc_wid - $start + 1);
      push @slices,
         "LPAD(CONV(BIT_XOR("
         . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
         . ", 10, 16), $len, '0')";
   }

   if ( $crc_slice < @slices ) {
      $slices[$crc_slice] =~ s/\@crc/\@crc := $chks/;
   }
   else {
      map { s/\@crc/$chks/ } @slices;
   }

   return join(', ', @slices);
}

sub get_tbl_struct {
   my ( $info ) = @_;
   my $ddl = ($info->{dbh}->selectrow_array("SHOW CREATE TABLE $info->{db_tbl}"))[1];
   my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
   my @cols = map { $_ =~ m/`([^`]+)`/g } @defs;
   if ( $opts{c} ) { # Eliminate columns the user said to ignore
      @cols = grep { exists($opts{c}->{$_}) } @cols;
   }
   my @nums = map  { $_ =~ m/`([^`]+)`/g }
              grep { $_ =~ m/`[^`]+` (?:(?:tiny|big|medium|small)?int|float|double|decimal)/ } @defs;
   my @null = map { $_ =~ m/`([^`]+)`/g } grep { $_ !~ m/NOT NULL/ } @defs;
   my %keys =
      map {
         my ($name) = $_ =~ m/(PRIMARY|`[^`]*`)/;
         my ($cols) = $_ =~ m/\((.+)\),?$/;
         $name =~ s/`//g;
         ($name, [ grep { m/[^,]/ } split('`', $cols) ])
      }
      $ddl =~ m/^  ((?:[A-Z]+ )?KEY .*)$/gm;

   # Die if REPLACE is in use and there's more than one unique key.
   if ( $opts{s} eq 'r' && $opts{x} ) {
      my @matches = $ddl =~ m/(UNIQUE|PRIMARY)/g;
      if ( @matches > 1 ) {
         die "It isn't safe to use --strategy=r when the table has multiple unique keys";
      }
   }

   # Save the column definitions *exactly* for use when calculating summary
   # tables.
   my %alldefs;
   @alldefs{@cols} = @defs;

   $info->{i} ||= 'PRIMARY';
   if ( !exists $keys{$info->{i}} ) {
      die "No such key $info->{i} in table $info->{h}/$info->{db_tbl}";
   }

   return {
      cols      => \@cols,
      col_hash  => { map { $_ => 1 } @cols },
      null      => \@null,
      null_hash => { map { $_ => 1 } @null },
      keys      => \%keys,
      defs      => \%alldefs,
      nums      => \@nums,
      num_hash  => { map { $_ => 1 } @nums },
   };
}

# Get a size estimate (not a precise count because that may be very slow).
# Can't use COUNT(*) because it might be optimized away and so 'rows' could be
# null.  And in tables where there is no NULL column, again it could be
# optimized away, so I must generate a WHERE clause that will defeat this.
# TODO: add --where to this query.
sub estimate_size {
   my ($info) = @_;
   my ( $pkcol ) = @{$info->{info}->{keys}->{$info->{i}}};
   my $query = "EXPLAIN SELECT COUNT("
      . join("), COUNT(", split(',', col_list(@{$info->{info}->{cols}})))
      . ") FROM $info->{db_tbl} "
      . "WHERE COALESCE(`$pkcol`, `$pkcol`) = `$pkcol`";
   debug_print($query);
   return $info->{dbh}->selectrow_hashref($query)->{rows};
}

sub round {
   my ($number) = @_;
   return int( $number + .5 );
}

sub make_where_clause {
   my ( $dbh, $where ) = @_;
   my @clauses = map {
      my $val = $where->{$_};
      my $sep = defined $val ? ' = ' : ' IS ';
      $q->quote($_) . $sep . $dbh->quote($val);
   } keys %$where;
   my $clause = @clauses 
      ? 'WHERE (' . join(' AND ', @clauses) . ')'
      : '';

   # Add in the global WHERE clause from command-line arguments
   if ( $opts{W} ) {
      $clause .= @clauses
         ? " AND ($opts{W})"
         : "WHERE ($opts{W})";
   }

   return $clause;
}

sub find_collation {
   my ( $dbh, $tbl, $cols ) = @_;
   if ( $opts{O} && $vp->version_ge($dbh, '4.1.2') ) {
      my $info = $dbh->selectall_hashref(
         "SHOW FULL COLUMNS FROM $tbl", 'Field');
      foreach my $col ( @$cols ) {
         my $colinfo = $info->{$col}->{Collation};
         if ( $colinfo ) {
            $collation_for{$col} = $colinfo;
         }
      }
   }
}

sub unique {
   my %seen;
   grep { !$seen{$_}++ } @_;
}

sub make_key {
   my ( $row, $cols ) = @_;
   return join('#', map { defined $row->{$_} ? $row->{$_} : 'NULL' } @$cols);
}

sub get_dbh {
   my ( $info ) = @_;
   my $db_options = {
      AutoCommit => !$opts{k} && !$opts{F} && !$opts{1},
      RaiseError => 1,
      PrintError => 0,
      # TODO: enable support for this on old versions of DBD::mysql?
      # http://www.perlmonks.org/?node_id=620803
      mysql_enable_utf8 => 1,
   };

   if ( !$info->{p} && $opts{askpass} ) {
      $info->{p} = OptionParser::prompt_noecho("Enter password for $info->{h}: ");
   }
   return DBI->connect($dsn_parser->get_cxn_params($info), $db_options);
}

sub col_list {
   return '`' . join('`,`', @_) . '`';
}

sub coalesce {
   my $i = 0;
   while ( $i < @_ && !defined $_[$i] ) {
      $i++;
   }
   return $_[$i];
}

# ############################################################################
# Documentation
# ############################################################################
=pod

=head1 NAME

mk-table-sync - Efficiently synchronize data between two MySQL tables.

=head1 SYNOPSIS

To compare two tables, try one of these:

 mk-table-sync -d col1,col2 u=user,p=pass,h=host1,D=db,t=tbl host2
 mk-table-sync -a bottomup -B 128 u=user,p=pass,h=host1,D=db,t=tbl host2

To sync the destination to the source, try

 mk-table-sync -x -d col1,col2 u=user,p=pass,h=host1,D=db,t=tbl host2

To show the differences between the tables, use the L<"--verbose"> option.
Issue this option multiple times for more detail.  To see queries that will
make the second table the same as the first, use the L<"--print"> option.  To
see the queries issued while searching for differences, use the L<"--debug">
option.

There is a special syntax for connecting to MySQL servers.  Each server name
on the command line can be either just a hostname, or a key=value,key=value
string.  Keys are a single letter:

   KEY MEANING
   === =======
   h   Connect to host
   P   Port number to use for connection
   S   Socket file to use for connection
   u   User for login if not current user
   p   Password to use when connecting
   F   Only read default options from the given file
   D   Database containing the table to be synced
   t   Table to be synced
   i   Index to drill into when syncing

=head1 OPTIONS

Many options are enabled by default and can be disabled by prefixing with --no.

=over

=item --algorithm

The algorithm to use when comparing the tables.  Top-down drills into the tables
with GROUP BY queries.  Bottom-up builds levels of summary tables with
logarithmically fewer rows at each successive level, and then navigates the
tables in reverse to find differences.  Top-down is explained in L<"FINDING
DIFFERENCES TOP-DOWN"> and bottom-up in L<"FINDING DIFFERENCES BOTTOM-UP">.

=item --analyze

Only applies to bottom-up search.  Specifies that mk-table-sync should examine the
summary tables and find table differences, and if desired, issue queries to
resolve them.  This is the default behavior, but you can disable it if you just
want to build the summary tables.

=item --askpass

Prompts the user for a password when connecting to MySQL.

=item --branchfactor

Branch factor for bottom-up algorithm.  Each successive summary table will group
this many rows together in the next summary table.  This means each summary
table will be approximately this fraction of the size of the previous table.  If
you don't specify an exact power of two, mk-table-sync will round it to the
nearest power of two.  The default is 128.

=item --bufferresults

Fetch all rows from MySQL before comparing.  This is disabled by default.  If
enabled, all rows will be fetched into memory for comparing.  This may result in
the results "cursor" being held open for a shorter time on the server, but if
the tables are large, it could take a long time anyway, and eat all your memory.
For most non-trivial data sizes, you want to leave this disabled.

=item --build

Build summary tables for bottom-up algorithm.  Enabled by default.  You can
disable to use existing summary tables.

=item --cleanup

Clean up scratch tables for bottom-up algorithm.  Enabled by default.  You can
disable this to leave the scratch (summary) tables for later analysis or
troubleshooting.

=item --collate

Use MySQL to compare strings if necessary.  Enabled by default.  Perl cannot
reliably compare character strings the same way MySQL does, so if it's unsure
how two strings should be sorted, mk-table-sync will send them to MySQL and
ask its opinion.  You should leave this enabled.

=item --columns

Comma-separated list of columns to compare and synchronize.  If you specify this
list, mk-table-sync will completely ignore any other columns.  Specifying
a column that doesn't exist is not an error.

=item --debug

Print debugging output to STDOUT.  This is mostly the queries used to compare
data.  It is not complete at the moment.

=item --deleteinsert

Convert all UPDATES to DELETE and INSERT.  This can help prevent conflicts due
to unique indexes on columns other than the primary key.  This option will
become obsolete when I make mk-table-sync auto-detect this situation and handle it
itself.

=item --drilldown

Drilldown groupings for the top-down algorithm.  This governs how the
comparison algorithm searches for differences.  If you specify col2,col3 it will
group rows by col2, find groups that differ, search within them grouped by col3,
and then search within these rows on primary key.  The behavior of this option
is likely to change in the future.

=item --engine

Storage engine to use for the bottom-up summary tables.  If you don't specify,
it is controlled by the server's default storage engine.

=item --execute

After finding differences, execute the queries required to sync the tables.

=item --forupdate

Use SELECT FOR UPDATE or LOCK IN SHARE MODE for checksums.  This only applies to
the top-down algorithm.  It can help prevent the tables being changed while
you're examining them.  mk-table-sync automatically decides which type of
locks to acquire, based on other command-line options.  This doesn't do anything
on some storage engines, such as MyISAM.

=item --help

Show a brief help message.

=item --lock

Lock tables when beginning work.  This uses table-level LOCK TABLES.  This can
help prevent tables being changed while you're examining them.

=item --maxcost

Maximum rowcount before aborting.  In either algorithm, if you specify this
option mk-table-sync will abort if it discovers more than this many rows need
to be changed to sync the tables.  There may be more rows to change than can be
definitely discovered up front, but this might be a good way to stop before
doing too much work.

=item --onlydo

Only do INSERT, UPDATE, or DELETE queries while syncing tables.  The default is
to do all three, but if you want to, you can specify any combination of actions.
For example, if you want to do a two-way sync of rows that have been added to
either table, one way to do it is specify 'L<"--onlydo"> iu' and then run
mk-table-sync again the "other direction" with the same iu argument.  That
will avoid deleting extra rows from the destination table, which the second
invocation will then copy back to the source table.

=item --prefix

Tablename prefix for bottom-up algorithm.  In case the default prefix clashes
with existing tables for some reason, or in case you want to use work tables
that were built previously.  The default is a pseudo-random value that is
constant within a given day.  If you run mk-table-sync again the next day, it
will choose a different prefix.

=item --print

Print all sync queries to STDOUT.  If you don't trust mk-table-sync, or just
want to see what it will do, this is a good way.  These queries are valid SQL
and you can run them yourself if you want to sync the tables manually.

=item --queries

Output the L<"--debug"> debugging output as SQL that can be executed.  By
default it's commented out with SQL comments, so it won't interfere with the
output of L<"--print">.  This option removes the comments.

=item --separator

Separator for CONCAT_WS, which is used to concatenate all columns in each row
for checksumming.  The default is '#', but you might want to change it if
you're suspicious about this for any reason (possible collisions, for example).

=item --singletxn

Do all work in a single transaction.  This has no effect unless you're syncing
tables that use a transactional storage engine.

=item --size

Table size in bottom-up algorithm.  Size estimates used to build the summary
tables may be off.  If this happens, mk-table-sync will die and tell you what
value to use for this parameter.  Most of the time the size estimates should be
accurate enough (they just have to be within some logarithmic order of
magnitude), so estimating size up front is a good optimization.  It prevents
COUNT() queries that aren't needed.

=item --strategy

Query strategy when syncing (r=replace, s=ins/upd/del).  If you specify r,
INSERT and UPDATE queries will be issued as REPLACE.  This is most useful when
you're syncing a slave to its master via replication (see
L<"--synctomaster">).

=item --synctomaster

Change the table on the master/source server, and let replication propagate the
changes to the destination table.  If you're syncing a master and slave, this is
almost certainly the option you should use.  Implies L<"--strategy">=r.

=item --temp

Use temporary tables in bottom-up algorithm.  The default is to use real tables.

=item --timeoutok

Keep going if L<"--wait"> fails.  If you specify L<"--wait"> and the slave doesn't catch
up to the master's position before the wait times out, the default behavior is
to abort.  This option makes mk-table-sync keep going anyway.  Warning: if
you are trying to get a consistent comparision between the two servers, you
probably don't want to keep going after a timeout.

=item --verbose

Explain the differences found while comparing the tables.  Specify up to three
times for more verbosity.  See L<"OUTPUT"> for more details about the output.

=item --verify

Verify checksum compatibility across servers.  Some older versions of MySQL
won't concatenate columns the same, so checksums might differ.
mk-table-checksum checks for this and aborts if the servers have different
behavior.  If you know your columns will never contain the empty string, you can
safely disable this check.

=item --version

Output version information and exit.

=item --wait

Make the slave wait to catch up to the master before comparing the tables.  The
value is the number of seconds to wait before timing out (see also L<"--timeoutok">).
mk-table-sync issues LOCK TABLES on the master, then finds the master's
binlog position and waits for the slave to catch up to that position before
comparing tables.  This is useful for guaranteeing a consistent comparision
between the servers.

=item --where

Use this option to limit the portion of the table being synchronized.  The
value will be added to queries in the WHERE clause .  Do not include the
keyword WHERE in the option; that will be added automatically.  Be sure to
quote and escape as required by your shell.  For example:

  mk-table-sync [OPTIONS...] --where "id between 5 and 10"

=back

=head1 DESCRIPTION

I wrote this tool to help me resync slaves that "drift" from their masters,
which can happen for any number of reasons.  I wanted a solution that would work
well for MySQL replication, so I didn't have to re-initialize the slaves, which
can be prohibitively expensive -- if there's enough data, even stopping and
restarting the slave is costly, as it takes a while to "warm up" the server.
Add to that the overhead of copying huge amounts of data over the network, and
the time involved, and a way to resync the slaves "live" is very attractive
indeed.  There are also many constraints introduced by replication, which I
wanted to either avoid or use to my advantage.

I know not everyone has exactly these needs, so I made mk-table-sync much more
generic than I'd need to patch a table that's out of sync on a slave.

The DBA must choose the algorithm and parameters to use when reconciling
differences between the tables.  See below for help making this decision.
Different algorithms have more or less network traffic, impact on the servers,
or work better in certain circumstances.  The tool supports a variety of
algorithms so you can resolve the differences as efficiently as possible within
whatever parameters matter to you.

=head1 SYSTEM REQUIREMENTS

You need Perl, DBI, DBD::mysql, and some core packages that ought to be
installed in any reasonably new version of Perl.

You need MySQL version 4.1.1 or greater; earlier versions lack the BIT_XOR()
function required to take a groupwise checksum.

=head1 OVERVIEW

This tool implements two algorithms to find differences between two MySQL
tables, which need not be on the same server.  One is "bottom-up" and builds
summary tables from each table, then traverses them to find rows and chunks of
rows that differ.  The other is "top-down" and builds no summary tables, but
repeatedly queries the target table.  Each algorithm has strengths and
limitations, and is suitable for different situations.

Once you've identified the differences, you can also choose from several methods
of resolving them.  One method is to do inserts, updates and deletes to the
destination table.  The other assumes the destination table is on a replication
slave and makes the changes on the source (master) server, counting on
replication to propagate the changes.  Again it's up to a smart DBA to decide
which method is best.

There are also variations on all the techniques, in support of locking,
master/slave consistency, partial-row updates, and so forth.

=head1 FINDING DIFFERENCES BOTTOM-UP

The bottom-up method of finding differences begins by checksumming every row in
the source and destination target tables.  The result of the checksum is stored
in a scratch table on the server.

This scratch table is what I call the "level 0" table.  It contains the target
table's primary key columns and a checksum of all the columns, concatenated.
This checksum is easy to compare and makes it possible to see whether the rows
differ.  Level 0 contains one row per row in the target table.

Table "level 1" is derived from level 0 by grouping a number of rows together
and checksumming the group.  How many rows are grouped together is up to the
user, but it must be a power of two.  128 is a suggestion I've seen.  I refer to
this number as the "branch factor" because the summary tables conceptually build
a tree.

The grouping works by dividing the checksum of each row in level 0 by some power
of the branch factor and taking the remainder.  A checksum is a number, though
it is usually written as a string of hex digits, such as
acbd18db4cc2f85cedef654fccc4a4d8, so you can divide and take the remainder
(modulus) easily.  (This tool actually uses some bitwise arithmetic to optimize
this, but I won't go into it here).  The power of the branch factor decreases as
the levels are built, so the remainders get smaller and smaller, grouping the
rows into fewer and fewer summary rows.

Level 1's primary key is not the target table's primary key.  It is the modulus
of the group from which the row was derived.  For example, if a number of rows
in level 0 have a modulus of 11, they will be grouped together into a single row
in level 1, with the primary key value of 11.

Assuming a branch factor of 128, level 1 has 1/128th as many rows as level 0,
give or take.  Level 2 is built from level 1, and has 1/128th as many rows in it
again, and so on until level N, which has just a single row.

After building scratch tables 0 through N, mk-table-sync begins at level N and works
backwards.  At level N, there is just a single row.  If the checksum in this row
matches on source and destination, the tables must be identical, and there is no
more work to do.  If they differ though, some rows in level N-1 must differ, and
mk-table-sync examines the "parent" rows in level N-1.  It continues to do this until
it travels all the way back "up the tree" to level 0 and identifies exactly
which rows in the target tables are different.  It uses breadth-first search.

I've glossed over many subtleties.  For example, as the summary tables are
built, not only their checksums but the remainders are computed on the fly; the
remainders are stored in the summary tables and are indexed for efficient lookup
as the algorithm traverses back up the tree seeking differences.  A running
count is also stored so at any given point you know how many rows in the target
table got rolled into the one row you're examining, no matter what level you're
at in the tree.  Some optimizations can be used to short-circuit the process
when entire chunks of rows are missing from one of the tables, and so on.  But
these optimizations and subtleties are just efficiencies, and are not necessary
for correctness.

Here are some details about the table structures: The __crc column contains the
checksum of the row from which it was derived.  The __cnt column contains the
running count of rows from which it was derived, except in the case of level 0
where each row is derived from one row.  The __rem column contains the checksum
modulus the power of the branching factor.  The __par column in level N is a
"pointer" to the __rem column in level N-1.

=head1 FINDING DIFFERENCES TOP-DOWN

The top-down algorithm is nearly the reverse of the bottom-up algorithm.
Instead of building summary tables bottom-up from many rows and ending with one
row, then searching top-down back through the summaries, it does an n-ary search
on clusters of the tables, which I refer to as "groupings."  The search begins
with grouped data and ends with single rows, instead of beginning with single
rows and working towards summaries.  There are no summary tables.

The basic idea is to choose an appropriate grouping strategy which will allow
MySQL to use indexes to drill down through regions of the table, grouping each
region together at first and comparing whole chunks of data between the source
and destination.  Suppose the tables contain day-level data for many client
accounts; clients can have many accounts.  Day, client, account and whatever the
primary key is, are all indexed.  The drill-down strategy might first group the
table by day and see which days differ between the tables, then within the days
that differ group by client, then account, and finally descend to the individual
row level, using the primary key.

This approach is also a breadth-first search as I've implemented it.  At each
level in the drill-down, mk-table-sync knows a set of truths, such as the value of
certain columns in the rows, the number of rows that might be bad, and so on.
In this respect it is fairly similar to the bottom-up approach.

=head1 PROS AND CONS OF THE TWO SEARCH METHODS

Each method for finding the rows that differ has its own strengths and
weaknesses, and is suited for different scenarios.

The bottom-up approach has these advantages:

=over

=item *

The checksum and modulo arithmetic ensure a uniform hierarchy of rows in the
"tree" of N-level summary tables.  Taking the modulo of a checksum is
essentially a random number, which will distribute rows approximately evenly in
each successive summary.

=item *

The algorithm makes no assumptions about keys or data types, and will work on
any table with a primary key or a user-specified index.  All you need is a way
to identify a row, in the final analysis.

=item *

The technique works the same on every table structure; there's no need to think
about the "best" way to do it for a specific table.

=item *

The summary tables can be kept and re-used for successive analyses, or to
restart an analysis that fails for some reason (for example, you specified a
maximum cost before halting, and it was exceeded but you've changed your mind;
no need to rebuild the summaries, you can just restart).

=item *

If you assume there are occasional "bad" rows scattered through the table, the
entire tree of summary tables will need to be examined to find them.
Pre-calculating this is an up-front penalty that pays off in efficiency once you
try to find the bad rows.

=item *

This algorithm's best and worst cases, in terms of pre-computing the summary
tables, are identical.  Given that you know the table size, you know how
expensive it will be.

=item *

Parts of the algorithm can be parallelized readily, though I have not yet done
so, as I want to make sure the implementation is correct first (I plan to do
this soon).

=item *

This algorithm is network-efficient, as the potentially large rows in the target
tables (suppose each row has very large BLOBS in it) are not sent across the
network.  Only the checksums are sent across the network, until the bad rows
themselves are identified.

=back

The algorithm has its shortcomings too, though:

=over

=item *

It is necessary to do some possibly significant work up front to design the
summary tables properly.  For example, you need to know the maximum possible
number of rows to be examined.  I have tried to optimize this process as much as
possible by examining index statistics and making estimates.  This is fairly
cheap, and should work in nearly every case, but it makes the coding much more
complex, just to avoid things like a COUNT(*) query, which is notoriously slow
on InnoDB.

=item *

All the summary tables may add up to a LOT of data on very large tables.  If the
target table is narrow, the summary tables may be even larger than the target
tables, though there will never be more rows in level 0 than the target table
has.

=item *

It's hard to make this approach play nicely with replication.  If you build
temporary tables in memory, you're playing havoc with statement-based
replication should the slave crash.  Even if you build them on disk, which is
durable and restartable, the summary tables built on the master will replicate
to the slave.  The slave server will be doing double work with the master's
queries running on it.  Either way, building summary tables on the server is
anathema to replication.

=item *

It's hard to lock the table for consistency, should you wish to.  You can't
design the summary tables until you start querying the target tables, yet you
can't write to the summary tables while only holding a lock on the target table,
which would require releasing and re-acquiring the lock on the target -- race
conditions abound.  I do have a workaround to this problem in mind, but have not
implemented it yet (and it would not solve the problem in cases where the
destination table is being written to).

=item *

The original algorithm, as designed by Coelho, didn't use any indexes or
pre-computed and cached remainders and counts on the summary tables.  This is
extremely inefficient on large tables, causing repeated table scans.  While I
have modified the algorithm to avoid this, it comes at the cost of larger table
and index size on the server.

=item *

The checksum/modulo approach destroys locality of reference in the target
tables.  Suppose the rows that differ between the source and destination tables
are concentrated in a small region of the table; the checksum/modulo math will
randomly scatter these neighbors throughout the summary tables.  This precludes
some types of optimizations.  This is very important on extremely large tables,
as it causes lots of random I/O during the search phase.  It's also a realistic
scenario for large tables, which may tend to be append-only logs or similar
(credit card transactions, for example).

=item *

The reverse of something I mentioned earlier as a benefit becomes a drawback
when only a small part of the table is bad.  Building the entire summary table
tree for just one bad row is wasteful.  Most of the tree will compare equal, but
there's no chance for early optimizations by pruning those branches; they've
already been built by the time the search notices they aren't needed.

=item *

All the INSERT .. SELECT statements necessary to build the summary tables will
acquire shared row locks on InnoDB tables.  This overhead adds up.

=back

The top-down approach is quite different, both in behavior and implementation
details.  Here are some of its strengths:

=over

=item *

This approach is network-efficient.  Large rows are not sent across the network,
just as with the bottom-up approach.  However, it is also memory and space
efficient on the servers, as there are no summary tables to build.

=item *

This approach does allow for early optimizations such as tree-pruning in the
search.

=item *

The target table's natural groupings, created by its indexes, can (and should)
be exploited.

=item *

The queries are not replicated because they don't affect any data.  The queries
on the master will not cause extra work on the slave.

=item *

It's easy to lock the tables with one of several strategies, including
intentionally locking InnoDB tables with SELECT FOR UPDATE or SELECT LOCK ON
SHARE MODE, and MySQL's own table locks.

=item *

There is no need in this algorithm to compute branching factors and numbers of
summary levels needed according the the size of the target tables.  There's no
up-front analysis to do.  You don't need to pre-compute anything before
searching for differences.

=item *

Spatial locality can be exploited.  Adjacent "bad" rows stay adjacent during the
search.  This can help avoid random I/O during the search.

=item *

There are more opportunities for bulk operations, such as noticing a large chunk
of the table can be inserted or deleted en masse.  While there are some in the
bottom-up approach, there are not as many, and they're harder to optimize.

=item *

The best case for this algorithm is not the same cost as the worst case.  This
can be good or bad, depending on the scenario.

=item *

The grouping strategy determines the "branch factor" of the search tree, and it
can vary from level to level in the search, depending on the data and the
strategy chosen.  This can be a good thing in the hands of a smart DBA, or a bad
thing in a novice's hands.

=item *

As the search descends through the tree of groupings, it needs to examine fewer
and fewer columns in the target tables.

=back

The top-down approach is not without its weaknesses:

=over

=item *

The search strategy can be simpler or more complex.  It's up to the DBA.  There
are certainly more possibilities, and choosing a good top-down strategy might be
hard.

=item *

If there's no locality to exploit, the top-down approach might not be able to
prune the search tree, and it might end up doing more work than the bottom-up.

=item *

The drill-down must repeatedly checksum the entire row (except the columns it
holds as constants in each level of the search).  If the rows are very large,
for example if there are large text columns, this might be a lot of work for the
server to do over and over.

=back

=head1 RESOLVING DIFFERENCES

Once mk-table-sync has found the differences, you probably want to resolve them.
There are two major ways I know to do this, and several variations.

The most obvious way is to simply issue INSERT, UPDATE, and DELETE statements
against the destination server.

A more subtle approach is to take advantage of replication and issue the
statements on the master, letting replication propagate them to the slave.  In
this case a slightly different approach is needed.  If a row is missing on the
slave, you can't just INSERT it on the master, or you'll presumably get a
duplicate key error.  You could do INSERT IGNORE or REPLACE instead.

The tool uses REPLACE by default when you're using replication.  To tell it to
use INSERT and UPDATE instead, use the L<"--strategy">=s option.  You can use
L<"--strategy">=s to use REPLACE even when you're not using replication to fix the
destination table.

You can also use the L<"--onlydo"> option to only issue some kinds of statements --
for example, suppose you want to run the INSERT and UPDATE statements but not
the DELETE.

=head1 SPECIAL CASES

There are cases where no combination of UPDATE statements can resolve
differences without violating some unique key.  For example, suppose there's a
primary key on column a and a unique key on column b.  Then there is no way to
sync these two tables with straightforward UPDATE statements:

 +---+---+  +---+---+
 | a | b |  | a | b |
 +---+---+  +---+---+
 | 1 | 2 |  | 1 | 1 | 
 | 2 | 1 |  | 2 | 2 | 
 +---+---+  +---+---+

If you run into this, you can use the L<"--deleteinsert"> option to convert all
UPDATE statements into DELETE followed by INSERT.  The tool orders statements
such that this will succeed.

If necessary, you can use the L<"--singletxn"> option to make sure sychronizing
either succeeds or rolls back entirely.  Of course this will have no effect on
non-transactional tables such as MyISAM.  Running the entire sync in a single
transaction causes extra overhead.

Another special case is comparing strings between servers.  If your primary key
or drilldown columns are character data, mk-table-sync will potentially issue queries
to the source server when it doesn't think it can reliably compare two strings
exactly the same way MySQL does.  The collation used is the column's collation
on the source server, so if the character set or collation differ between the
tables being synced, it's hard to say what the results will be.  This feature
will cause more network traffic between the servers as it compares the tables.
This is a feature I don't have fully baked yet.  You can disable it with the
L<"--nocollate"> option.

=head1 GUARANTEEING CONSISTENCY

I wrote this tool to synchronize tables live, without stopping the servers
they're on.  This requires some kind of locking to guarantee a consistent write
after reading.  This tool supports several methods.

The first is simple table locks.  If you specify the L<"--lock"> option, it will lock
the table for reading or writing, depending on how you want to update.  If you
want to update on a master and let replication propagate the changes, it locks
for write on the master and read on the slave; if you want to make changes on
the slave, it locks for read on the master and doesn't lock on the slave, to
avoid blocking the replication thread on the slave.

If you are using InnoDB tables, you can get consistency without locking the
whole table.  This is especially useful if you're only trying to synchronize
part of the table that you know to be bad.  Use the L<"--forupdate"> option to make
the SELECT statements acquire locks.  As above, it gets either shared (LOCK IN
SHARE MODE) or exclusive (FOR UPDATE) locks, depending on how you are syncing.
This only applies to the top-down algorithm.  The bottom-up algorithm creates
tables, which implicitly commits and releases locks unless you use temporary
tables (the L<"--temp"> option).  If you're using temporary tables with the bottom-up
algorithm, there are implicit shared locks on the target tables if they're
InnoDB.

Finally, if you're working on a master and slave server, you should probably
specify the L<"--wait"> option in conjunction with one of the above. This locks on
the master, finds the master's position, and then waits for the slave to catch
up to that position.  The argument to the option is the number of seconds the
slave should wait before timing out.  By default L<"--wait"> implies L<"--lock">, but you
can specify L<"--nolock"> if you want to override this and use InnoDB's row-level
locks.  Also by default, if the wait timeout is exceeded or another
MASTER_POS_WAIT error occurs, the program will exit with an error, but you can
use the L<"--timeoutok"> option to control this.

You can use the L<"--singletxn"> option to make the entire sync run in one
transaction.

=head1 OUTPUT

Output varies greatly depending on the command-line options you specify.
There are several different kinds of output: debugging, status, and query.  I
have tried to ensure it will always be valid SQL, though much of it will be
commented out.

If you specify the L<"--print"> option, the queries needed to sync the destination
table with the source table will be printed to STDOUT.

If you specify the L<"--debug"> option, the queries needed to discover the
differences between source and destination will be printed to STDOUT, commented
out.  You can use this to see how many queries are executed for a given
strategy, or whatever other debugging you want.  If you remove the comment
characters with the L<"--queries"> option, you can also replay the process of finding
the differences.

If you specify the L<"--verbose"> option, you'll see information about the process of
discovering the differences between the tables.  The output is quite different
for top-down and bottom-up algorithms.  Specify this option multiple times to
increase the amount of information you see.  This output is complex enough
that I'll need to explain it separately.

=head2 TOP-DOWN OUTPUT

At its most verbose, the top-down output may resemble this:

   -- Level 2: CHECK  group of    81 rows WHERE `col2` = '20'
   -- Level 2: INSERT group of    18 rows WHERE `col2` = '42'
   --          Level 2 total:     18 bad rows        81 to inspect
   --          Level 2 summary:    1 bad groups in    3 src groups    2 dst groups
   --          Level 2 changes:    0 updates         18 inserts       0 deletes
   -- Level 1: CHECK  group of     6 rows WHERE `col3` = '737696900' AND `col2` = '20'
   -- Level 1: CHECK  group of     1 rows WHERE `col3` = '737953400' AND `col2` = '20'
   -- Level 1: CHECK  group of     1 rows WHERE `col3` = '737955900' AND `col2` = '20'
   --          Level 1 total:     18 bad rows         8 to inspect
   --          Level 1 summary:    3 bad groups in   32 src groups   32 dst groups
   --          Level 1 changes:    0 updates         18 inserts       0 deletes
   --          Level 0 total:     18 bad rows         2 to inspect
   --          Level 0 summary:    2 bad groups in    6 src groups    6 dst groups
   --          Level 0 changes:    0 updates         18 inserts       0 deletes
   -- Level 0: UPDATE             1 row  WHERE `col3` = '737953400' AND `col1` = '87551' AND `col2` = '20'
   --          Level 0 total:     19 bad rows         1 to inspect
   --          Level 0 summary:    1 bad groups in    1 src groups    1 dst groups
   --          Level 0 changes:    1 updates         18 inserts       0 deletes
   --          Level 0 total:     19 bad rows         0 to inspect
   --          Level 0 summary:    0 bad groups in    1 src groups    1 dst groups
   --          Level 0 changes:    1 updates         18 inserts       0 deletes

Outdented lines are actions that must be taken later, indented lines are
play-by-play status as differences between the tables are found.  There will be
one group of indented lines for each group of rows drilled into and found to
have differences.

The first two lines are details of level 2.  At level 2, col2 is held as a
constant.  There is one group of 81 rows where col2 = 20, which does not match
from source to destination.  It needs further checking and is marked as CHECK.
The next level will drill down into this group.  Also at level 2, there is one
group of 18 rows that needs to be inserted to sync the destination table.  This
does not need to be drilled into on the next level.

The next three lines summarize the findings at level 2, and the work that
remains to be done.  Line 1 shows level 2 found a total of 18 rows known to
differ between the source and destination, and there are 81 more to inspect
further.  Line 2 shows level 2 found 1 entire group of rows known to be bad
(the group that must be inserted) after inspecting 3 groups from the source
table and 2 from the destination table.  The difference, 3-2, is the one group
that must be inserted.  Line 3 shows 18 rows have been queued for insertion
en masse.

The next six lines are what happens in the one group at level 1.  At level 1,
the group of rows where col2 = 20 is drilled into, grouped on col3.  The first
three lines of output show the algorithm finds three groups of rows that don't
match.  The next three show the total bad-row count still at 18, so no new bad
rows have been found, but the number of rows that must be drilled into is much
smaller now -- only 8 rows.  Level 1 found 3 bad groups by checking 32 groups
from each table, and queued no new rows into the known-bad list.

The level 0 output shows these 3 groups being examined a row at a time, with no
more drilldown possible.  And you can see the one bad row being found.
Eventually the last line of output shows 18 rows must be inserted (no change
from before) and 1 row must be updated to sync the destination table.

It might help to see what happens with only one level of verbosity, this time on
a 50,000 row table with 5 rows missing from the destination:

   --          Level 2 total:       0 bad rows       6385 to inspect
   --          Level 1 total:       0 bad rows       5142 to inspect
   --          Level 1 total:       1 bad rows       4235 to inspect
   --          Level 1 total:       1 bad rows       2919 to inspect
   --          Level 1 total:       1 bad rows       2560 to inspect
   --          Level 1 total:       1 bad rows        325 to inspect
   --          Level 0 total:       2 bad rows        142 to inspect
   --          Level 0 total:       2 bad rows        141 to inspect
   --          Level 0 total:       3 bad rows         61 to inspect
   --          Level 0 total:       4 bad rows         37 to inspect
   --          Level 0 total:       5 bad rows          0 to inspect

Now you can see it progressing from 0 known bad rows, with 6385 to do, all the
way to 5 known bad rows and 0 left to do.

=head2 BOTTOM-UP OUTPUT

At maximum verbosity, the output from the bottom-up algorithm may look like this
(this output is from the same 100-row tables as above).

   -- Level 2 UPDATE parent:      0
   --         Level 2 total:    100 rows
   --         Level 2 summary:    1 parents  100 src rows   82 dst rows
   --         Level 2 changes:    0 updates    0 inserts     0 deletes    0 total
   --         Level 2 bulk-op:    0 inserts    0 ins-rows    0 deletes    0 del-rows
   -- Level 1 UPDATE parent:      0
   -- Level 1 BULKIN parent:      1
   -- Level 1 UPDATE parent:      2
   -- Level 1 UPDATE parent:      3
   -- Level 1 UPDATE parent:      4
   -- Level 1 UPDATE parent:      5
   -- Level 1 UPDATE parent:      8
   -- Level 1 UPDATE parent:      9
   -- Level 1 UPDATE parent:     10
   -- Level 1 UPDATE parent:     11
   -- Level 1 UPDATE parent:     14
   -- Level 1 UPDATE parent:     15
   --         Level 1 total:     84 rows
   --         Level 1 summary:   11 parents  100 src rows   82 dst rows
   --         Level 1 changes:    0 updates    1 inserts     0 deletes    1 total
   --         Level 1 bulk-op:    1 inserts    1 ins-rows    0 deletes    0 del-rows
   --         Level 0 total:     19 rows
   --         Level 0 summary:    0 parents   83 src rows   66 dst rows
   --         Level 0 changes:    1 updates   18 inserts     0 deletes   19 total
   --         Level 0 bulk-op:    1 inserts    1 ins-rows    0 deletes    0 del-rows

The first level, level 2, says the parent row whose remainder is 0 (this will
always be the case at the first level in bottom-up) differs.  At this point it
looks like the parent must be updated to reconcile source and destination
tables, but it's not yet known which individual rows must be changed.  The level
2 summary says there are 100 rows grouped together from parent rows, that is 1
parent value with 100 rows in the source and 82 in the destination (the
difference is the 18 rows that must be inserted, but that is not yet known).
The next two lines of output show what work is queued to do -- row-level
updates, inserts and deletes, and bulk inserts and deletes.  Each bulk insert or
delete knows how many rows it will affect.

The next set of output, for level 1, shows that this level of the tree has 11
rows that don't match between source and destination.  These are again marked as
UPDATE because they differ, but it's still not known why.  One row doesn't exist
in the destination and is marked as BULKIN, for "bulk insert."  This level of
drill-down was able to narrow the part of the table possibly bad from 100 to 84
rows.

At level 0, this narrows down to just 19 rows.  Most of these are inserted
singly, and there is one update.

Again, here is what happens with just one level of verbosity on the same
50,000-row tables as above:

   --         Level 2 total:   50000 rows
   --         Level 1 total:    1945 rows
   --         Level 0 total:       5 rows

At the beginning, all 50,000 rows look bad, but as it navigates the tree, it
narrows it down to just the 5 missing rows.

=head1 COMPATIBILITY

My goal is a superb solution for MySQL.  However, I think you can probably
make some minor changes and use this tool on other platforms.

=head1 HISTORY AND ACKNOWLEDGEMENTS

My work is based in part on Giuseppe Maxia's work on distributed databases,
L<http://www.sysadminmag.com/articles/2004/0408/> and code derived from that
article.  There is more explanation, and a link to the code, at
L<http://www.perlmonks.org/?node_id=381053>.

Another programmer extended Maxia's work even further.  Fabien Coelho changed
and generalized Maxia's technique, introducing symmetry and avoiding some
problems that might have caused too-frequent checksum collisions.  This work
grew into pg_comparator, L<http://www.coelho.net/pg_comparator/>.  Coelho also
explained the technique further in a paper titled "Remote Comparison of Database
Tables" (L<http://cri.ensmp.fr/classement/doc/A-375.pdf>).

This existing literature mostly addressed how to find the differences between
the tables, not how to resolve them once found.  I needed a tool that would not
only find them efficiently, but would then resolve them.  I first began thinking
about how to improve the technique further with my article
L<http://www.xaprb.com/blog/2007/03/05/an-algorithm-to-find-and-resolve-data-differences-between-mysql-tables/>,
where I discussed a number of problems with the Maxia/Coelho "bottom-up"
algorithm.  After writing that article, I began to write this tool.  I wanted to
actually implement their algorithm with some improvements so I was sure I
understood it completely.  I discovered it is not what I thought it was, and is
considerably more complex than it appeared to me at first.  Fabien Coelho was
kind enough to address some questions over email.

The improvements to the bottom-up algorithm are my original work, as is the
top-down algorithm.  The techniques to actually resolve the differences are also
my own work.

Another tool that can synchronize tables is the SQLyog Job Agent from webyog.
Thanks to Rohit Nadhani, SJA's author, for the conversations about the general
techniques.  There is a comparison of mk-table-sync and SJA at
L<http://www.xaprb.com/blog/2007/04/05/mysql-table-sync-vs-sqlyog-job-agent/>

Thanks to the following people and organizations for helping in many ways:

Alan Rimm-Kaufman and John Miller (The Rimm-Kaufman Group),
Dane Miller,
Fabien Coelho,
Giuseppe Maxia and others at MySQL AB,
Kristian Koehntopp (MySQL AB),
Rohit Nadhani (WebYog),
The helpful monks at Perlmonks,
Travis Rodak,

=head1 BUGS

Please use the Sourceforge bug tracker, forums, and mailing lists to request
support or report bugs: L<http://sourceforge.net/projects/maatkit/>.

=head1 COPYRIGHT, LICENSE AND WARRANTY

This program is copyright (c) 2007 Baron Schwartz.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 AUTHOR

Baron Schwartz.

=head1 VERSION

This manual page documents Ver 0.9.9 Distrib 1316 $Revision: 1308 $.

=cut
