3
# This program is copyright 2011-@CURRENTYEAR@ Percona Inc.
4
# This program is copyright 2007-2011 Baron Schwartz.
5
# Feedback and improvements are welcome.
7
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
8
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
9
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
11
# This program is free software; you can redistribute it and/or modify it under
12
# the terms of the GNU General Public License as published by the Free Software
13
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
14
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
17
# You should have received a copy of the GNU General Public License along with
18
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
19
# Place, Suite 330, Boston, MA 02111-1307 USA.
22
use warnings FATAL => 'all';
24
our $VERSION = '@VERSION@';
25
our $DISTRIB = '@DISTRIB@';
26
our $SVN_REV = sprintf("%d", (q$Revision: 7527 $ =~ m/(\d+)/g, 0));
28
# ###########################################################################
29
# TableParser package 7156
30
# This package is a copy without comments from the original. The original
31
# with comments and its test file can be found in the SVN repository at,
32
# trunk/common/TableParser.pm
33
# trunk/common/t/TableParser.t
34
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
35
# ###########################################################################
40
use warnings FATAL => 'all';
41
use English qw(-no_match_vars);
43
$Data::Dumper::Indent = 1;
44
$Data::Dumper::Sortkeys = 1;
45
$Data::Dumper::Quotekeys = 0;
47
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
50
my ( $class, %args ) = @_;
51
my @required_args = qw(Quoter);
52
foreach my $arg ( @required_args ) {
53
die "I need a $arg argument" unless $args{$arg};
56
return bless $self, $class;
60
my ( $self, $ddl, $opts ) = @_;
62
if ( ref $ddl eq 'ARRAY' ) {
63
if ( lc $ddl->[0] eq 'table' ) {
73
if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
74
die "Cannot parse table definition; is ANSI quoting "
75
. "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
78
my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
79
(undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
81
$ddl =~ s/(`[^`]+`)/\L$1/g;
83
my $engine = $self->get_engine($ddl);
85
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
86
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
87
MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
90
@def_for{@cols} = @defs;
93
my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
94
foreach my $col ( @cols ) {
95
my $def = $def_for{$col};
96
my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
97
die "Can't determine column type for $def" unless $type;
98
$type_for{$col} = $type;
99
if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
101
$is_numeric{$col} = 1;
103
if ( $def !~ m/NOT NULL/ ) {
105
$is_nullable{$col} = 1;
107
$is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
110
my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
112
my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
117
col_posn => { map { $cols[$_] => $_ } 0..$#cols },
118
is_col => { map { $_ => 1 } @cols },
120
is_nullable => \%is_nullable,
121
is_autoinc => \%is_autoinc,
122
clustered_key => $clustered_key,
125
numeric_cols => \@nums,
126
is_numeric => \%is_numeric,
128
type_for => \%type_for,
134
my ( $self, $tbl ) = @_;
138
(($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
139
|| ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
140
|| ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
141
|| ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
144
$tbl->{keys}->{$_}->{type} eq 'BTREE'
146
sort keys %{$tbl->{keys}};
148
MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
152
sub find_best_index {
153
my ( $self, $tbl, $index ) = @_;
156
($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
160
die "Index '$index' does not exist in table";
163
($best) = $self->sort_indexes($tbl);
166
MKDEBUG && _d('Best index found is', $best);
170
sub find_possible_keys {
171
my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
172
return () unless $where;
173
my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
174
. ' WHERE ' . $where;
176
my $expl = $dbh->selectrow_hashref($sql);
177
$expl = { map { lc($_) => $expl->{$_} } keys %$expl };
178
if ( $expl->{possible_keys} ) {
179
MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
180
my @candidates = split(',', $expl->{possible_keys});
181
my %possible = map { $_ => 1 } @candidates;
182
if ( $expl->{key} ) {
183
MKDEBUG && _d('MySQL chose', $expl->{key});
184
unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
185
MKDEBUG && _d('Before deduping:', join(', ', @candidates));
187
@candidates = grep { !$seen{$_}++ } @candidates;
189
MKDEBUG && _d('Final list:', join(', ', @candidates));
193
MKDEBUG && _d('No keys in possible_keys');
199
my ( $self, %args ) = @_;
200
my @required_args = qw(dbh db tbl);
201
foreach my $arg ( @required_args ) {
202
die "I need a $arg argument" unless $args{$arg};
204
my ($dbh, $db, $tbl) = @args{@required_args};
205
my $q = $self->{Quoter};
206
my $db_tbl = $q->quote($db, $tbl);
207
MKDEBUG && _d('Checking', $db_tbl);
209
my $sql = "SHOW TABLES FROM " . $q->quote($db)
210
. ' LIKE ' . $q->literal_like($tbl);
214
$row = $dbh->selectrow_arrayref($sql);
217
MKDEBUG && _d($EVAL_ERROR);
220
if ( !$row->[0] || $row->[0] ne $tbl ) {
221
MKDEBUG && _d('Table does not exist');
225
MKDEBUG && _d('Table exists; no privs to check');
226
return 1 unless $args{all_privs};
228
$sql = "SHOW FULL COLUMNS FROM $db_tbl";
231
$row = $dbh->selectrow_hashref($sql);
234
MKDEBUG && _d($EVAL_ERROR);
237
if ( !scalar keys %$row ) {
238
MKDEBUG && _d('Table has no columns:', Dumper($row));
241
my $privs = $row->{privileges} || $row->{Privileges};
243
$sql = "DELETE FROM $db_tbl LIMIT 0";
248
my $can_delete = $EVAL_ERROR ? 0 : 1;
250
MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
251
($can_delete ? 'delete' : ''));
253
if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
255
MKDEBUG && _d('User does not have all privs');
259
MKDEBUG && _d('User has all privs');
264
my ( $self, $ddl, $opts ) = @_;
265
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
266
MKDEBUG && _d('Storage engine:', $engine);
267
return $engine || undef;
271
my ( $self, $ddl, $opts, $is_nullable ) = @_;
272
my $engine = $self->get_engine($ddl);
274
my $clustered_key = undef;
277
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
279
next KEY if $key =~ m/FOREIGN/;
282
MKDEBUG && _d('Parsed key:', $key_ddl);
284
if ( $engine !~ m/MEMORY|HEAP/ ) {
285
$key =~ s/USING HASH/USING BTREE/;
288
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
289
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
290
$type = $type || $special || 'BTREE';
291
if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
292
&& $engine =~ m/HEAP|MEMORY/i )
294
$type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
297
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
298
my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
301
foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
302
my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
304
push @col_prefixes, $prefix;
308
MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
315
col_prefixes => \@col_prefixes,
316
is_unique => $unique,
317
is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
318
is_col => { map { $_ => 1 } @cols },
322
if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
323
my $this_key = $keys->{$name};
324
if ( $this_key->{name} eq 'PRIMARY' ) {
325
$clustered_key = 'PRIMARY';
327
elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
328
$clustered_key = $this_key->{name};
330
MKDEBUG && $clustered_key && _d('This key is the clustered key');
334
return $keys, $clustered_key;
338
my ( $self, $ddl, $opts ) = @_;
342
$ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
344
my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
345
my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
346
my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
348
if ( $parent !~ m/\./ && $opts->{database} ) {
349
$parent = "`$opts->{database}`.$parent";
355
cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
356
parent_tbl => $parent,
357
parent_colnames=> $parent_cols,
358
parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
366
sub remove_auto_increment {
367
my ( $self, $ddl ) = @_;
368
$ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
372
sub remove_secondary_indexes {
373
my ( $self, $ddl ) = @_;
375
my $tbl_struct = $self->parse($ddl);
377
if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) {
378
my $clustered_key = $tbl_struct->{clustered_key};
379
$clustered_key ||= '';
381
my @sec_indexes = map {
382
my $key_def = $_->{ddl};
383
$key_def =~ s/([\(\)])/\\$1/g;
384
$ddl =~ s/\s+$key_def//i;
386
my $key_ddl = "ADD $_->{ddl}";
387
$key_ddl .= ',' unless $key_ddl =~ m/,$/;
390
grep { $_->{name} ne $clustered_key }
391
values %{$tbl_struct->{keys}};
392
MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes));
394
if ( @sec_indexes ) {
395
$sec_indexes_ddl = join(' ', @sec_indexes);
396
$sec_indexes_ddl =~ s/,$//;
399
$ddl =~ s/,(\n\) )/$1/s;
402
MKDEBUG && _d('Not removing secondary indexes from',
403
$tbl_struct->{engine}, 'table');
406
return $ddl, $sec_indexes_ddl, $tbl_struct;
410
my ($package, undef, $line) = caller 0;
411
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
412
map { defined $_ ? $_ : 'undef' }
414
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
419
# ###########################################################################
420
# End TableParser package
421
# ###########################################################################
423
# ###########################################################################
424
# TableChecksum package 7080
425
# This package is a copy without comments from the original. The original
426
# with comments and its test file can be found in the SVN repository at,
427
# trunk/common/TableChecksum.pm
428
# trunk/common/t/TableChecksum.t
429
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
430
# ###########################################################################
431
package TableChecksum;
434
use warnings FATAL => 'all';
435
use English qw(-no_match_vars);
436
use List::Util qw(max);
438
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
441
CHECKSUM => { pref => 0, hash => 0 },
442
BIT_XOR => { pref => 2, hash => 1 },
443
ACCUM => { pref => 3, hash => 1 },
447
my ( $class, %args ) = @_;
448
foreach my $arg ( qw(Quoter VersionParser) ) {
449
die "I need a $arg argument" unless defined $args{$arg};
451
my $self = { %args };
452
return bless $self, $class;
456
my ( $self, $string ) = @_;
457
my $poly = 0xEDB88320;
458
my $crc = 0xFFFFFFFF;
459
foreach my $char ( split(//, $string) ) {
460
my $comp = ($crc ^ ord($char)) & 0xFF;
462
$comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
464
$crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
466
return $crc ^ 0xFFFFFFFF;
470
my ( $self, $dbh, $func ) = @_;
472
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
474
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
475
$crc_wid = max(16, length($val));
482
my ( $self, $dbh, $func ) = @_;
485
my $sql = "SELECT $func('a')";
486
my $sth = $dbh->prepare($sql);
489
$type = $sth->{mysql_type_name}->[0];
490
$length = $sth->{mysql_length}->[0];
491
MKDEBUG && _d($sql, $type, $length);
492
if ( $type eq 'bigint' && $length < 20 ) {
497
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
498
return ($type, $length);
502
my ( $self, %args ) = @_;
503
my ( $alg, $dbh ) = @args{ qw(algorithm dbh) };
504
my $vp = $self->{VersionParser};
505
my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
506
die "Invalid checksum algorithm $alg"
507
if $alg && !$ALGOS{$alg};
510
$args{where} || $args{chunk} # CHECKSUM does whole table
511
|| $args{replicate} # CHECKSUM can't do INSERT.. SELECT
512
|| !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist
514
MKDEBUG && _d('Cannot use CHECKSUM algorithm');
515
@choices = grep { $_ ne 'CHECKSUM' } @choices;
518
if ( !$vp->version_ge($dbh, '4.1.1') ) {
519
MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1');
520
@choices = grep { $_ ne 'BIT_XOR' } @choices;
523
if ( $alg && grep { $_ eq $alg } @choices ) {
524
MKDEBUG && _d('User requested', $alg, 'algorithm');
528
if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
529
MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired');
530
@choices = grep { $_ ne 'CHECKSUM' } @choices;
533
MKDEBUG && _d('Algorithms, in order:', @choices);
537
sub is_hash_algorithm {
538
my ( $self, $algorithm ) = @_;
539
return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
542
sub choose_hash_func {
543
my ( $self, %args ) = @_;
544
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
545
if ( $args{function} ) {
546
unshift @funcs, $args{function};
548
my ($result, $error);
552
$func = shift(@funcs);
553
my $sql = "SELECT $func('test-string')";
555
$args{dbh}->do($sql);
558
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
559
$error .= qq{$func cannot be used because "$1"\n};
560
MKDEBUG && _d($func, 'cannot be used because', $1);
562
} while ( @funcs && !$result );
564
die $error unless $result;
565
MKDEBUG && _d('Chosen hash func:', $result);
570
my ( $self, %args ) = @_;
571
my ($dbh, $func) = @args{qw(dbh function)};
573
die "$func never needs the BIT_XOR optimization"
574
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
577
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
580
my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced);
582
do { # Try different positions till sliced result equals non-sliced.
583
MKDEBUG && _d('Trying slice', $opt_slice);
584
$dbh->do('SET @crc := "", @cnt := 0');
585
my $slices = $self->make_xor_slices(
586
query => "\@crc := $func('a')",
588
opt_slice => $opt_slice,
591
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
592
$sliced = ($dbh->selectrow_array($sql))[0];
593
if ( $sliced ne $unsliced ) {
594
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
598
} while ( $start < $crc_wid && $sliced ne $unsliced );
600
if ( $sliced eq $unsliced ) {
601
MKDEBUG && _d('Slice', $opt_slice, 'works');
605
MKDEBUG && _d('No slice works');
610
sub make_xor_slices {
611
my ( $self, %args ) = @_;
612
foreach my $arg ( qw(query crc_wid) ) {
613
die "I need a $arg argument" unless defined $args{$arg};
615
my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)};
618
for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
619
my $len = $crc_wid - $start + 1;
625
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
626
. ", 10, 16), $len, '0')";
629
if ( defined $opt_slice && $opt_slice < @slices ) {
630
$slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
633
map { s/\@crc/$query/ } @slices;
636
return join(', ', @slices);
639
sub make_row_checksum {
640
my ( $self, %args ) = @_;
641
my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) };
642
my $q = $self->{Quoter};
644
my $sep = $args{sep} || '#';
648
my $ignorecols = $args{ignorecols} || {};
650
my %cols = map { lc($_) => 1 }
651
grep { !exists $ignorecols->{$_} }
652
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
656
my $type = $tbl_struct->{type_for}->{$_};
657
my $result = $q->quote($_);
658
if ( $type eq 'timestamp' ) {
661
elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
662
$result = "ROUND($result, $args{float_precision})";
664
elsif ( $args{trim} && $type =~ m/varchar/ ) {
665
$result = "TRIM($result)";
670
$cols{$_} && !$seen{$_}++
672
@{$tbl_struct->{cols}};
675
if ( !$args{no_cols} ) {
679
if ( $col =~ m/\+ 0/ ) {
680
my ($real_col) = /^(\S+)/;
681
$col .= " AS $real_col";
683
elsif ( $col =~ m/TRIM/ ) {
684
my ($real_col) = m/TRIM\(([^\)]+)\)/;
685
$col .= " AS $real_col";
692
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
693
my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
695
my $bitmap = "CONCAT("
696
. join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
702
? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
706
my $fnv_func = uc $func;
707
$query .= "$fnv_func(" . join(', ', @cols) . ')';
713
sub make_checksum_query {
714
my ( $self, %args ) = @_;
715
my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type);
716
foreach my $arg( @required_args ) {
717
die "I need a $arg argument" unless $args{$arg};
719
my ( $db, $tbl, $tbl_struct, $algorithm,
720
$crc_wid, $crc_type) = @args{@required_args};
721
my $func = $args{function};
722
my $q = $self->{Quoter};
725
die "Invalid or missing checksum algorithm"
726
unless $algorithm && $ALGOS{$algorithm};
728
if ( $algorithm eq 'CHECKSUM' ) {
729
return "CHECKSUM TABLE " . $q->quote($db, $tbl);
732
my $expr = $self->make_row_checksum(%args, no_cols=>1);
734
if ( $algorithm eq 'BIT_XOR' ) {
735
if ( $crc_type =~ m/int$/ ) {
736
$result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc ";
739
my $slices = $self->make_xor_slices( query => $expr, %args );
740
$result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc ";
744
if ( $crc_type =~ m/int$/ ) {
745
$result = "COALESCE(RIGHT(MAX("
746
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
747
. "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))"
748
. "), $crc_wid), 0) AS crc ";
751
$result = "COALESCE(RIGHT(MAX("
752
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
753
. "$func(CONCAT(\@crc, $expr)))"
754
. "), $crc_wid), 0) AS crc ";
757
if ( $args{replicate} ) {
758
$result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} "
759
. "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
760
. "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result";
764
. ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '')
765
. "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result";
767
return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/";
770
sub find_replication_differences {
771
my ( $self, $dbh, $table ) = @_;
773
(my $sql = <<" EOF") =~ s/\s+/ /gm;
774
SELECT db, tbl, chunk, boundaries,
775
COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
777
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
780
this_cnt, master_cnt, this_crc, master_crc
782
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
783
OR ISNULL(master_crc) <> ISNULL(this_crc)
787
my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
792
my ($package, undef, $line) = caller 0;
793
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
794
map { defined $_ ? $_ : 'undef' }
796
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
801
# ###########################################################################
802
# End TableChecksum package
803
# ###########################################################################
805
# ###########################################################################
806
# OptionParser package 7102
807
# This package is a copy without comments from the original. The original
808
# with comments and its test file can be found in the SVN repository at,
809
# trunk/common/OptionParser.pm
810
# trunk/common/t/OptionParser.t
811
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
812
# ###########################################################################
814
package OptionParser;
817
use warnings FATAL => 'all';
818
use List::Util qw(max);
819
use English qw(-no_match_vars);
820
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
824
my $POD_link_re = '[LC]<"?([^">]+)"?>';
827
my ( $class, %args ) = @_;
828
my @required_args = qw();
829
foreach my $arg ( @required_args ) {
830
die "I need a $arg argument" unless $args{$arg};
833
my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
834
$program_name ||= $PROGRAM_NAME;
835
my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
847
head1 => 'OPTIONS', # These args are used internally
848
skip_rules => 0, # to instantiate another Option-
849
item => '--(.*)', # Parser obj that parses the
850
attributes => \%attributes, # DSN OPTIONS section. Tools
851
parse_attributes => \&_parse_attribs, # don't tinker with these args.
855
strict => 1, # disabled by a special rule
856
program_name => $program_name,
862
allowed_groups => {},
864
rules => [], # desc of rules for --help
865
mutex => [], # rule: opts are mutually exclusive
866
atleast1 => [], # rule: at least one opt is required
867
disables => {}, # rule: opt disables other opts
868
defaults_to => {}, # rule: opt defaults to value of other opt
871
"/etc/maatkit/maatkit.conf",
872
"/etc/maatkit/$program_name.conf",
873
"$home/.maatkit.conf",
874
"$home/.$program_name.conf",
877
string => 's', # standard Getopt type
878
int => 'i', # standard Getopt type
879
float => 'f', # standard Getopt type
880
Hash => 'H', # hash, formed from a comma-separated list
881
hash => 'h', # hash as above, but only if a value is given
882
Array => 'A', # array, similar to Hash
883
array => 'a', # array, similar to hash
885
size => 'z', # size with kMG suffix (powers of 2^10)
886
time => 'm', # time, with an optional suffix of s/h/m/d
890
return bless $self, $class;
894
my ( $self, $file ) = @_;
895
$file ||= $self->{file} || __FILE__;
896
my @specs = $self->_pod_to_specs($file);
897
$self->_parse_specs(@specs);
899
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
900
my $contents = do { local $/ = undef; <$fh> };
902
if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
903
MKDEBUG && _d('Parsing DSN OPTIONS');
908
my $parse_dsn_attribs = sub {
909
my ( $self, $option, $attribs ) = @_;
911
my $val = $attribs->{$_};
913
$val = $val eq 'yes' ? 1
916
$attribs->{$_} = $val;
924
my $dsn_o = new OptionParser(
925
description => 'DSN OPTIONS',
926
head1 => 'DSN OPTIONS',
927
dsn => 0, # XXX don't infinitely recurse!
928
item => '\* (.)', # key opts are a single character
929
skip_rules => 1, # no rules before opts
930
attributes => $dsn_attribs,
931
parse_attributes => $parse_dsn_attribs,
935
key => $_->{spec}->{key},
936
dsn => $_->{spec}->{dsn},
937
copy => $_->{spec}->{copy},
941
} $dsn_o->_pod_to_specs($file);
942
$self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
950
return $self->{DSNParser};
953
sub get_defaults_files {
955
return @{$self->{default_files}};
959
my ( $self, $file ) = @_;
960
$file ||= $self->{file} || __FILE__;
961
open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
967
local $INPUT_RECORD_SEPARATOR = '';
968
while ( $para = <$fh> ) {
969
next unless $para =~ m/^=head1 $self->{head1}/;
973
while ( $para = <$fh> ) {
974
last if $para =~ m/^=over/;
975
next if $self->{skip_rules};
978
$para =~ s/$POD_link_re/$1/go;
979
MKDEBUG && _d('Option rule:', $para);
983
die "POD has no $self->{head1} section" unless $para;
986
if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
988
MKDEBUG && _d($para);
991
$para = <$fh>; # read next paragraph, possibly attributes
993
if ( $para =~ m/: / ) { # attributes
996
my ( $attrib, $val) = split(/: /, $_);
997
die "Unrecognized attribute for --$option: $attrib"
998
unless $self->{attributes}->{$attrib};
1000
} split(/; /, $para);
1001
if ( $attribs{'short form'} ) {
1002
$attribs{'short form'} =~ s/-//;
1004
$para = <$fh>; # read next paragraph, probably short help desc
1007
MKDEBUG && _d('Option has no attributes');
1010
$para =~ s/\s+\Z//g;
1012
$para =~ s/$POD_link_re/$1/go;
1014
$para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
1015
MKDEBUG && _d('Short help:', $para);
1017
die "No description after option spec $option" if $para =~ m/^=item/;
1019
if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
1020
$option = $base_option;
1021
$attribs{'negatable'} = 1;
1025
spec => $self->{parse_attributes}->($self, $option, \%attribs),
1027
. (defined $attribs{default} ? " (default $attribs{default})" : ''),
1028
group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
1031
while ( $para = <$fh> ) {
1033
if ( $para =~ m/^=head1/ ) {
1034
$para = undef; # Can't 'last' out of a do {} block.
1037
last if $para =~ m/^=item /;
1041
die "No valid specs in $self->{head1}" unless @specs;
1044
return @specs, @rules;
1048
my ( $self, @specs ) = @_;
1049
my %disables; # special rule that requires deferred checking
1051
foreach my $opt ( @specs ) {
1052
if ( ref $opt ) { # It's an option spec, not a rule.
1053
MKDEBUG && _d('Parsing opt spec:',
1054
map { ($_, '=>', $opt->{$_}) } keys %$opt);
1056
my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
1058
die "Cannot parse long option from spec $opt->{spec}";
1060
$opt->{long} = $long;
1062
die "Duplicate long option --$long" if exists $self->{opts}->{$long};
1063
$self->{opts}->{$long} = $opt;
1065
if ( length $long == 1 ) {
1066
MKDEBUG && _d('Long opt', $long, 'looks like short opt');
1067
$self->{short_opts}->{$long} = $long;
1071
die "Duplicate short option -$short"
1072
if exists $self->{short_opts}->{$short};
1073
$self->{short_opts}->{$short} = $long;
1074
$opt->{short} = $short;
1077
$opt->{short} = undef;
1080
$opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
1081
$opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
1082
$opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
1084
$opt->{group} ||= 'default';
1085
$self->{groups}->{ $opt->{group} }->{$long} = 1;
1087
$opt->{value} = undef;
1090
my ( $type ) = $opt->{spec} =~ m/=(.)/;
1091
$opt->{type} = $type;
1092
MKDEBUG && _d($long, 'type:', $type);
1095
$opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
1097
if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
1098
$self->{defaults}->{$long} = defined $def ? $def : 1;
1099
MKDEBUG && _d($long, 'default:', $def);
1102
if ( $long eq 'config' ) {
1103
$self->{defaults}->{$long} = join(',', $self->get_defaults_files());
1106
if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
1107
$disables{$long} = $dis;
1108
MKDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
1111
$self->{opts}->{$long} = $opt;
1113
else { # It's an option rule, not a spec.
1114
MKDEBUG && _d('Parsing rule:', $opt);
1115
push @{$self->{rules}}, $opt;
1116
my @participants = $self->_get_participants($opt);
1119
if ( $opt =~ m/mutually exclusive|one and only one/ ) {
1121
push @{$self->{mutex}}, \@participants;
1122
MKDEBUG && _d(@participants, 'are mutually exclusive');
1124
if ( $opt =~ m/at least one|one and only one/ ) {
1126
push @{$self->{atleast1}}, \@participants;
1127
MKDEBUG && _d(@participants, 'require at least one');
1129
if ( $opt =~ m/default to/ ) {
1131
$self->{defaults_to}->{$participants[0]} = $participants[1];
1132
MKDEBUG && _d($participants[0], 'defaults to', $participants[1]);
1134
if ( $opt =~ m/restricted to option groups/ ) {
1136
my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
1137
my @groups = split(',', $groups);
1138
%{$self->{allowed_groups}->{$participants[0]}} = map {
1143
if( $opt =~ m/accepts additional command-line arguments/ ) {
1145
$self->{strict} = 0;
1146
MKDEBUG && _d("Strict mode disabled by rule");
1149
die "Unrecognized option rule: $opt" unless $rule_ok;
1153
foreach my $long ( keys %disables ) {
1154
my @participants = $self->_get_participants($disables{$long});
1155
$self->{disables}->{$long} = \@participants;
1156
MKDEBUG && _d('Option', $long, 'disables', @participants);
1162
sub _get_participants {
1163
my ( $self, $str ) = @_;
1165
foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
1166
die "Option --$long does not exist while processing rule $str"
1167
unless exists $self->{opts}->{$long};
1168
push @participants, $long;
1170
MKDEBUG && _d('Participants for', $str, ':', @participants);
1171
return @participants;
1176
my %opts = %{$self->{opts}};
1182
my %short_opts = %{$self->{short_opts}};
1187
my ( $self, %defaults ) = @_;
1188
$self->{defaults} = {};
1189
foreach my $long ( keys %defaults ) {
1190
die "Cannot set default for nonexistent option $long"
1191
unless exists $self->{opts}->{$long};
1192
$self->{defaults}->{$long} = $defaults{$long};
1193
MKDEBUG && _d('Default val for', $long, ':', $defaults{$long});
1200
return $self->{defaults};
1205
return $self->{groups};
1209
my ( $self, $opt, $val ) = @_;
1210
my $long = exists $self->{opts}->{$opt} ? $opt
1211
: exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
1212
: die "Getopt::Long gave a nonexistent option: $opt";
1214
$opt = $self->{opts}->{$long};
1215
if ( $opt->{is_cumulative} ) {
1219
$opt->{value} = $val;
1222
MKDEBUG && _d('Got option', $long, '=', $val);
1228
foreach my $long ( keys %{$self->{opts}} ) {
1229
$self->{opts}->{$long}->{got} = 0;
1230
$self->{opts}->{$long}->{value}
1231
= exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
1232
: $self->{opts}->{$long}->{is_cumulative} ? 0
1235
$self->{got_opts} = 0;
1237
$self->{errors} = [];
1239
if ( @ARGV && $ARGV[0] eq "--config" ) {
1241
$self->_set_option('config', shift @ARGV);
1243
if ( $self->has('config') ) {
1245
foreach my $filename ( split(',', $self->get('config')) ) {
1247
push @extra_args, $self->_read_config_file($filename);
1249
if ( $EVAL_ERROR ) {
1250
if ( $self->got('config') ) {
1258
unshift @ARGV, @extra_args;
1261
Getopt::Long::Configure('no_ignore_case', 'bundling');
1263
map { $_->{spec} => sub { $self->_set_option(@_); } }
1264
grep { $_->{long} ne 'config' } # --config is handled specially above.
1265
values %{$self->{opts}}
1266
) or $self->save_error('Error parsing options');
1268
if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
1269
printf("%s Ver %s Distrib %s Changeset %s\n",
1270
$self->{program_name}, $main::VERSION, $main::DISTRIB, $main::SVN_REV)
1271
or die "Cannot print: $OS_ERROR";
1275
if ( @ARGV && $self->{strict} ) {
1276
$self->save_error("Unrecognized command-line options @ARGV");
1279
foreach my $mutex ( @{$self->{mutex}} ) {
1280
my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
1282
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
1283
@{$mutex}[ 0 .. scalar(@$mutex) - 2] )
1284
. ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
1285
. ' are mutually exclusive.';
1286
$self->save_error($err);
1290
foreach my $required ( @{$self->{atleast1}} ) {
1291
my @set = grep { $self->{opts}->{$_}->{got} } @$required;
1293
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
1294
@{$required}[ 0 .. scalar(@$required) - 2] )
1295
.' or --'.$self->{opts}->{$required->[-1]}->{long};
1296
$self->save_error("Specify at least one of $err");
1300
$self->_check_opts( keys %{$self->{opts}} );
1301
$self->{got_opts} = 1;
1306
my ( $self, @long ) = @_;
1307
my $long_last = scalar @long;
1309
foreach my $i ( 0..$#long ) {
1310
my $long = $long[$i];
1312
my $opt = $self->{opts}->{$long};
1313
if ( $opt->{got} ) {
1314
if ( exists $self->{disables}->{$long} ) {
1315
my @disable_opts = @{$self->{disables}->{$long}};
1316
map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
1317
MKDEBUG && _d('Unset options', @disable_opts,
1318
'because', $long,'disables them');
1321
if ( exists $self->{allowed_groups}->{$long} ) {
1323
my @restricted_groups = grep {
1324
!exists $self->{allowed_groups}->{$long}->{$_}
1325
} keys %{$self->{groups}};
1327
my @restricted_opts;
1328
foreach my $restricted_group ( @restricted_groups ) {
1330
foreach my $restricted_opt (
1331
keys %{$self->{groups}->{$restricted_group}} )
1333
next RESTRICTED_OPT if $restricted_opt eq $long;
1334
push @restricted_opts, $restricted_opt
1335
if $self->{opts}->{$restricted_opt}->{got};
1339
if ( @restricted_opts ) {
1341
if ( @restricted_opts == 1 ) {
1342
$err = "--$restricted_opts[0]";
1346
map { "--$self->{opts}->{$_}->{long}" }
1348
@restricted_opts[0..scalar(@restricted_opts) - 2]
1350
. ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
1352
$self->save_error("--$long is not allowed with $err");
1357
elsif ( $opt->{is_required} ) {
1358
$self->save_error("Required option --$long must be specified");
1361
$self->_validate_type($opt);
1362
if ( $opt->{parsed} ) {
1366
MKDEBUG && _d('Temporarily failed to parse', $long);
1370
die "Failed to parse options, possibly due to circular dependencies"
1371
if @long == $long_last;
1378
sub _validate_type {
1379
my ( $self, $opt ) = @_;
1382
if ( !$opt->{type} ) {
1387
my $val = $opt->{value};
1389
if ( $val && $opt->{type} eq 'm' ) { # type time
1390
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
1391
my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
1393
my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
1394
$suffix = $s || 's';
1395
MKDEBUG && _d('No suffix given; using', $suffix, 'for',
1396
$opt->{long}, '(value:', $val, ')');
1398
if ( $suffix =~ m/[smhd]/ ) {
1399
$val = $suffix eq 's' ? $num # Seconds
1400
: $suffix eq 'm' ? $num * 60 # Minutes
1401
: $suffix eq 'h' ? $num * 3600 # Hours
1402
: $num * 86400; # Days
1403
$opt->{value} = ($prefix || '') . $val;
1404
MKDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
1407
$self->save_error("Invalid time suffix for --$opt->{long}");
1410
elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
1411
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
1413
my $from_key = $self->{defaults_to}->{ $opt->{long} };
1415
MKDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
1416
if ( $self->{opts}->{$from_key}->{parsed} ) {
1417
$prev = $self->{opts}->{$from_key}->{value};
1420
MKDEBUG && _d('Cannot parse', $opt->{long}, 'until',
1421
$from_key, 'parsed');
1425
my $defaults = $self->{DSNParser}->parse_options($self);
1426
$opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
1428
elsif ( $val && $opt->{type} eq 'z' ) { # type size
1429
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
1430
$self->_parse_size($opt, $val);
1432
elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
1433
$opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
1435
elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
1436
$opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
1439
MKDEBUG && _d('Nothing to validate for option',
1440
$opt->{long}, 'type', $opt->{type}, 'value', $val);
1448
my ( $self, $opt ) = @_;
1449
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1450
die "Option $opt does not exist"
1451
unless $long && exists $self->{opts}->{$long};
1452
return $self->{opts}->{$long}->{value};
1456
my ( $self, $opt ) = @_;
1457
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1458
die "Option $opt does not exist"
1459
unless $long && exists $self->{opts}->{$long};
1460
return $self->{opts}->{$long}->{got};
1464
my ( $self, $opt ) = @_;
1465
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1466
return defined $long ? exists $self->{opts}->{$long} : 0;
1470
my ( $self, $opt, $val ) = @_;
1471
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1472
die "Option $opt does not exist"
1473
unless $long && exists $self->{opts}->{$long};
1474
$self->{opts}->{$long}->{value} = $val;
1479
my ( $self, $error ) = @_;
1480
push @{$self->{errors}}, $error;
1486
return $self->{errors};
1491
warn "No usage string is set" unless $self->{usage}; # XXX
1492
return "Usage: " . ($self->{usage} || '') . "\n";
1497
warn "No description string is set" unless $self->{description}; # XXX
1498
my $descr = ($self->{description} || $self->{program_name} || '')
1499
. " For more details, please use the --help option, "
1500
. "or try 'perldoc $PROGRAM_NAME' "
1501
. "for complete documentation.";
1502
$descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
1503
unless $ENV{DONT_BREAK_LINES};
1504
$descr =~ s/ +$//mg;
1508
sub usage_or_errors {
1509
my ( $self, $file, $return ) = @_;
1510
$file ||= $self->{file} || __FILE__;
1512
if ( !$self->{description} || !$self->{usage} ) {
1513
MKDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
1514
my %synop = $self->_parse_synopsis($file);
1515
$self->{description} ||= $synop{description};
1516
$self->{usage} ||= $synop{usage};
1517
MKDEBUG && _d("Description:", $self->{description},
1518
"\nUsage:", $self->{usage});
1521
if ( $self->{opts}->{help}->{got} ) {
1522
print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
1523
exit 0 unless $return;
1525
elsif ( scalar @{$self->{errors}} ) {
1526
print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
1527
exit 0 unless $return;
1535
my $usage = $self->usage() . "\n";
1536
if ( (my @errors = @{$self->{errors}}) ) {
1537
$usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
1540
return $usage . "\n" . $self->descr();
1545
die "Run get_opts() before print_usage()" unless $self->{got_opts};
1546
my @opts = values %{$self->{opts}};
1550
length($_->{long}) # option long name
1551
+ ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
1552
+ ($_->{type} ? 2 : 0) # "=x" where x is the opt type
1559
+ ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
1560
+ ($self->{opts}->{$_}->{type} ? 2 : 0)
1562
values %{$self->{short_opts}});
1564
my $lcol = max($maxl, ($maxs + 3));
1565
my $rcol = 80 - $lcol - 6;
1566
my $rpad = ' ' x ( 80 - $rcol );
1568
$maxs = max($lcol - 3, $maxs);
1570
my $usage = $self->descr() . "\n" . $self->usage();
1572
my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
1573
push @groups, 'default';
1575
foreach my $group ( reverse @groups ) {
1576
$usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
1578
sort { $a->{long} cmp $b->{long} }
1579
grep { $_->{group} eq $group }
1582
my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
1583
my $short = $opt->{short};
1584
my $desc = $opt->{desc};
1586
$long .= $opt->{type} ? "=$opt->{type}" : "";
1588
if ( $opt->{type} && $opt->{type} eq 'm' ) {
1589
my ($s) = $desc =~ m/\(suffix (.)\)/;
1591
$desc =~ s/\s+\(suffix .\)//;
1592
$desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
1593
. "d=days; if no suffix, $s is used.";
1595
$desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
1598
$usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
1601
$usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
1606
$usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
1608
if ( (my @rules = @{$self->{rules}}) ) {
1609
$usage .= "\nRules:\n\n";
1610
$usage .= join("\n", map { " $_" } @rules) . "\n";
1612
if ( $self->{DSNParser} ) {
1613
$usage .= "\n" . $self->{DSNParser}->usage();
1615
$usage .= "\nOptions and values after processing arguments:\n\n";
1616
foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
1617
my $val = $opt->{value};
1618
my $type = $opt->{type} || '';
1619
my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
1620
$val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
1621
: !defined $val ? '(No value)'
1622
: $type eq 'd' ? $self->{DSNParser}->as_string($val)
1623
: $type =~ m/H|h/ ? join(',', sort keys %$val)
1624
: $type =~ m/A|a/ ? join(',', @$val)
1626
$usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
1632
shift @_ if ref $_[0] eq __PACKAGE__;
1633
my ( $prompt ) = @_;
1634
local $OUTPUT_AUTOFLUSH = 1;
1636
or die "Cannot print: $OS_ERROR";
1639
require Term::ReadKey;
1640
Term::ReadKey::ReadMode('noecho');
1641
chomp($response = <STDIN>);
1642
Term::ReadKey::ReadMode('normal');
1644
or die "Cannot print: $OS_ERROR";
1646
if ( $EVAL_ERROR ) {
1647
die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
1653
print '# ', $^X, ' ', $], "\n";
1654
my $uname = `uname -a`;
1656
$uname =~ s/\s+/ /g;
1659
printf("# %s Ver %s Distrib %s Changeset %s line %d\n",
1660
$PROGRAM_NAME, ($main::VERSION || ''), ($main::DISTRIB || ''),
1661
($main::SVN_REV || ''), __LINE__);
1662
print('# Arguments: ',
1663
join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n");
1666
sub _read_config_file {
1667
my ( $self, $filename ) = @_;
1668
open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
1674
while ( my $line = <$fh> ) {
1676
next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
1677
$line =~ s/\s+#.*$//g;
1678
$line =~ s/^\s+|\s+$//g;
1679
if ( $line eq '--' ) {
1685
&& (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
1687
push @args, grep { defined $_ } ("$prefix$opt", $arg);
1689
elsif ( $line =~ m/./ ) {
1693
die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
1700
sub read_para_after {
1701
my ( $self, $file, $regex ) = @_;
1702
open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
1703
local $INPUT_RECORD_SEPARATOR = '';
1705
while ( $para = <$fh> ) {
1706
next unless $para =~ m/^=pod$/m;
1709
while ( $para = <$fh> ) {
1710
next unless $para =~ m/$regex/;
1715
close $fh or die "Can't close $file: $OS_ERROR";
1723
my $hashref = $self->{$_};
1725
foreach my $key ( keys %$hashref ) {
1726
my $ref = ref $hashref->{$key};
1727
$val_copy->{$key} = !$ref ? $hashref->{$key}
1728
: $ref eq 'HASH' ? { %{$hashref->{$key}} }
1729
: $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
1733
} qw(opts short_opts defaults);
1735
foreach my $scalar ( qw(got_opts) ) {
1736
$clone{$scalar} = $self->{$scalar};
1739
return bless \%clone;
1743
my ( $self, $opt, $val ) = @_;
1745
if ( lc($val || '') eq 'null' ) {
1746
MKDEBUG && _d('NULL size for', $opt->{long});
1747
$opt->{value} = 'null';
1751
my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
1752
my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
1753
if ( defined $num ) {
1755
$num *= $factor_for{$factor};
1756
MKDEBUG && _d('Setting option', $opt->{y},
1757
'to num', $num, '* factor', $factor);
1759
$opt->{value} = ($pre || '') . $num;
1762
$self->save_error("Invalid size for --$opt->{long}");
1767
sub _parse_attribs {
1768
my ( $self, $option, $attribs ) = @_;
1769
my $types = $self->{types};
1771
. ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
1772
. ($attribs->{'negatable'} ? '!' : '' )
1773
. ($attribs->{'cumulative'} ? '+' : '' )
1774
. ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
1777
sub _parse_synopsis {
1778
my ( $self, $file ) = @_;
1779
$file ||= $self->{file} || __FILE__;
1780
MKDEBUG && _d("Parsing SYNOPSIS in", $file);
1782
local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
1783
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
1785
1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
1786
die "$file does not contain a SYNOPSIS section" unless $para;
1788
for ( 1..2 ) { # 1 for the usage, 2 for the description
1793
MKDEBUG && _d("Raw SYNOPSIS text:", @synop);
1794
my ($usage, $desc) = @synop;
1795
die "The SYNOPSIS section in $file is not formatted properly"
1796
unless $usage && $desc;
1798
$usage =~ s/^\s*Usage:\s+(.+)/$1/;
1802
$desc =~ s/\s{2,}/ /g;
1803
$desc =~ s/\. ([A-Z][a-z])/. $1/g;
1807
description => $desc,
1813
my ($package, undef, $line) = caller 0;
1814
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1815
map { defined $_ ? $_ : 'undef' }
1817
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1822
# ###########################################################################
1823
# End OptionParser package
1824
# ###########################################################################
1826
# ###########################################################################
1827
# DSNParser package 7388
1828
# This package is a copy without comments from the original. The original
1829
# with comments and its test file can be found in the SVN repository at,
1830
# trunk/common/DSNParser.pm
1831
# trunk/common/t/DSNParser.t
1832
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
1833
# ###########################################################################
1838
use warnings FATAL => 'all';
1839
use English qw(-no_match_vars);
1840
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1843
$Data::Dumper::Indent = 0;
1844
$Data::Dumper::Quotekeys = 0;
1849
my $have_dbi = $EVAL_ERROR ? 0 : 1;
1853
my ( $class, %args ) = @_;
1854
foreach my $arg ( qw(opts) ) {
1855
die "I need a $arg argument" unless $args{$arg};
1858
opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
1860
foreach my $opt ( @{$args{opts}} ) {
1861
if ( !$opt->{key} || !$opt->{desc} ) {
1862
die "Invalid DSN option: ", Dumper($opt);
1864
MKDEBUG && _d('DSN option:',
1866
map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
1870
$self->{opts}->{$opt->{key}} = {
1872
desc => $opt->{desc},
1873
copy => $opt->{copy} || 0,
1876
return bless $self, $class;
1880
my ( $self, $prop, $value ) = @_;
1882
MKDEBUG && _d('Setting', $prop, 'property');
1883
$self->{$prop} = $value;
1885
return $self->{$prop};
1889
my ( $self, $dsn, $prev, $defaults ) = @_;
1891
MKDEBUG && _d('No DSN to parse');
1894
MKDEBUG && _d('Parsing', $dsn);
1899
my $opts = $self->{opts};
1901
foreach my $dsn_part ( split(/,/, $dsn) ) {
1902
if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
1903
$given_props{$prop_key} = $prop_val;
1906
MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
1907
$given_props{h} = $dsn_part;
1911
foreach my $key ( keys %$opts ) {
1912
MKDEBUG && _d('Finding value for', $key);
1913
$final_props{$key} = $given_props{$key};
1914
if ( !defined $final_props{$key}
1915
&& defined $prev->{$key} && $opts->{$key}->{copy} )
1917
$final_props{$key} = $prev->{$key};
1918
MKDEBUG && _d('Copying value for', $key, 'from previous DSN');
1920
if ( !defined $final_props{$key} ) {
1921
$final_props{$key} = $defaults->{$key};
1922
MKDEBUG && _d('Copying value for', $key, 'from defaults');
1926
foreach my $key ( keys %given_props ) {
1927
die "Unknown DSN option '$key' in '$dsn'. For more details, "
1928
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1929
. "for complete documentation."
1930
unless exists $opts->{$key};
1932
if ( (my $required = $self->prop('required')) ) {
1933
foreach my $key ( keys %$required ) {
1934
die "Missing required DSN option '$key' in '$dsn'. For more details, "
1935
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1936
. "for complete documentation."
1937
unless $final_props{$key};
1941
return \%final_props;
1945
my ( $self, $o ) = @_;
1946
die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
1949
map { "$_=".$o->get($_); }
1950
grep { $o->has($_) && $o->get($_) }
1951
keys %{$self->{opts}}
1953
MKDEBUG && _d('DSN string made from options:', $dsn_string);
1954
return $self->parse($dsn_string);
1958
my ( $self, $dsn, $props ) = @_;
1959
return $dsn unless ref $dsn;
1960
my %allowed = $props ? map { $_=>1 } @$props : ();
1962
map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
1963
grep { defined $dsn->{$_} && $self->{opts}->{$_} }
1964
grep { !$props || $allowed{$_} }
1971
= "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
1972
. " KEY COPY MEANING\n"
1973
. " === ==== =============================================\n";
1974
my %opts = %{$self->{opts}};
1975
foreach my $key ( sort keys %opts ) {
1977
. ($opts{$key}->{copy} ? 'yes ' : 'no ')
1978
. ($opts{$key}->{desc} || '[No description]')
1981
$usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
1985
sub get_cxn_params {
1986
my ( $self, $info ) = @_;
1988
my %opts = %{$self->{opts}};
1989
my $driver = $self->prop('dbidriver') || '';
1990
if ( $driver eq 'Pg' ) {
1991
$dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
1992
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1993
grep { defined $info->{$_} }
1997
$dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
1998
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1999
grep { defined $info->{$_} }
2001
. ';mysql_read_default_group=client';
2003
MKDEBUG && _d($dsn);
2004
return ($dsn, $info->{u}, $info->{p});
2008
my ( $self, $dbh, $dsn ) = @_;
2009
my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
2010
my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
2012
$dsn->{h} ||= $vars->{hostname}->{Value};
2013
$dsn->{S} ||= $vars->{'socket'}->{Value};
2014
$dsn->{P} ||= $vars->{port}->{Value};
2015
$dsn->{u} ||= $user;
2020
my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
2026
ShowErrorStatement => 1,
2027
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
2029
@{$defaults}{ keys %$opts } = values %$opts;
2031
if ( $opts->{mysql_use_result} ) {
2032
$defaults->{mysql_use_result} = 1;
2036
die "Cannot connect to MySQL because the Perl DBI module is not "
2037
. "installed or not found. Run 'perl -MDBI' to see the directories "
2038
. "that Perl searches for DBI. If DBI is not installed, try:\n"
2039
. " Debian/Ubuntu apt-get install libdbi-perl\n"
2040
. " RHEL/CentOS yum install perl-DBI\n"
2041
. " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
2047
while ( !$dbh && $tries-- ) {
2048
MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, ' {',
2049
join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ), '}');
2052
$dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
2054
if ( $cxn_string =~ m/mysql/i ) {
2057
$sql = 'SELECT @@SQL_MODE';
2058
MKDEBUG && _d($dbh, $sql);
2059
my ($sql_mode) = $dbh->selectrow_array($sql);
2061
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
2062
. '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
2063
. ($sql_mode ? ",$sql_mode" : '')
2065
MKDEBUG && _d($dbh, $sql);
2068
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
2069
$sql = "/*!40101 SET NAMES $charset*/";
2070
MKDEBUG && _d($dbh, ':', $sql);
2072
MKDEBUG && _d('Enabling charset for STDOUT');
2073
if ( $charset eq 'utf8' ) {
2074
binmode(STDOUT, ':utf8')
2075
or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
2078
binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
2082
if ( $self->prop('set-vars') ) {
2083
$sql = "SET " . $self->prop('set-vars');
2084
MKDEBUG && _d($dbh, ':', $sql);
2089
if ( !$dbh && $EVAL_ERROR ) {
2090
MKDEBUG && _d($EVAL_ERROR);
2091
if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
2092
MKDEBUG && _d('Going to try again without utf8 support');
2093
delete $defaults->{mysql_enable_utf8};
2095
elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
2096
die "Cannot connect to MySQL because the Perl DBD::mysql module is "
2097
. "not installed or not found. Run 'perl -MDBD::mysql' to see "
2098
. "the directories that Perl searches for DBD::mysql. If "
2099
. "DBD::mysql is not installed, try:\n"
2100
. " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
2101
. " RHEL/CentOS yum install perl-DBD-MySQL\n"
2102
. " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
2110
MKDEBUG && _d('DBH info: ',
2112
Dumper($dbh->selectrow_hashref(
2113
'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
2114
'Connection info:', $dbh->{mysql_hostinfo},
2115
'Character set info:', Dumper($dbh->selectall_arrayref(
2116
'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
2117
'$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
2118
'$DBI::VERSION:', $DBI::VERSION,
2125
my ( $self, $dbh ) = @_;
2126
if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
2129
my ( $hostname, $one ) = $dbh->selectrow_array(
2130
'SELECT /*!50038 @@hostname, */ 1');
2135
my ( $self, $dbh ) = @_;
2136
MKDEBUG && $self->print_active_handles($dbh);
2140
sub print_active_handles {
2141
my ( $self, $thing, $level ) = @_;
2143
printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
2144
$thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
2145
or die "Cannot print: $OS_ERROR";
2146
foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
2147
$self->print_active_handles( $handle, $level + 1 );
2152
my ( $self, $dsn_1, $dsn_2, %args ) = @_;
2153
die 'I need a dsn_1 argument' unless $dsn_1;
2154
die 'I need a dsn_2 argument' unless $dsn_2;
2158
if ( $args{overwrite} ) {
2159
$val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
2162
$val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
2165
} keys %{$self->{opts}};
2170
my ($package, undef, $line) = caller 0;
2171
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2172
map { defined $_ ? $_ : 'undef' }
2174
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2179
# ###########################################################################
2180
# End DSNParser package
2181
# ###########################################################################
2183
# ###########################################################################
2184
# VersionParser package 6667
2185
# This package is a copy without comments from the original. The original
2186
# with comments and its test file can be found in the SVN repository at,
2187
# trunk/common/VersionParser.pm
2188
# trunk/common/t/VersionParser.t
2189
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2190
# ###########################################################################
2191
package VersionParser;
2194
use warnings FATAL => 'all';
2196
use English qw(-no_match_vars);
2198
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2206
my ( $self, $str ) = @_;
2207
my $result = sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
2208
MKDEBUG && _d($str, 'parses to', $result);
2213
my ( $self, $dbh, $target ) = @_;
2214
if ( !$self->{$dbh} ) {
2215
$self->{$dbh} = $self->parse(
2216
$dbh->selectrow_array('SELECT VERSION()'));
2218
my $result = $self->{$dbh} ge $self->parse($target) ? 1 : 0;
2219
MKDEBUG && _d($self->{$dbh}, 'ge', $target, ':', $result);
2223
sub innodb_version {
2224
my ( $self, $dbh ) = @_;
2226
my $innodb_version = "NO";
2229
grep { $_->{engine} =~ m/InnoDB/i }
2232
@hash{ map { lc $_ } keys %$_ } = values %$_;
2235
@{ $dbh->selectall_arrayref("SHOW ENGINES", {Slice=>{}}) };
2237
MKDEBUG && _d("InnoDB support:", $innodb->{support});
2238
if ( $innodb->{support} =~ m/YES|DEFAULT/i ) {
2239
my $vars = $dbh->selectrow_hashref(
2240
"SHOW VARIABLES LIKE 'innodb_version'");
2241
$innodb_version = !$vars ? "BUILTIN"
2242
: ($vars->{Value} || $vars->{value});
2245
$innodb_version = $innodb->{support}; # probably DISABLED or NO
2249
MKDEBUG && _d("InnoDB version:", $innodb_version);
2250
return $innodb_version;
2254
my ($package, undef, $line) = caller 0;
2255
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2256
map { defined $_ ? $_ : 'undef' }
2258
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2263
# ###########################################################################
2264
# End VersionParser package
2265
# ###########################################################################
2267
# ###########################################################################
2268
# MySQLDump package 6345
2269
# This package is a copy without comments from the original. The original
2270
# with comments and its test file can be found in the SVN repository at,
2271
# trunk/common/MySQLDump.pm
2272
# trunk/common/t/MySQLDump.t
2273
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2274
# ###########################################################################
2278
use warnings FATAL => 'all';
2280
use English qw(-no_match_vars);
2282
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2284
( our $before = <<'EOF') =~ s/^ //gm;
2285
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
2286
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
2287
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
2288
/*!40101 SET NAMES utf8 */;
2289
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
2290
/*!40103 SET TIME_ZONE='+00:00' */;
2291
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
2292
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
2293
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
2294
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
2297
( our $after = <<'EOF') =~ s/^ //gm;
2298
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
2299
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
2300
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
2301
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
2302
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
2303
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
2304
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
2305
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
2309
my ( $class, %args ) = @_;
2311
cache => 0, # Afaik no script uses this cache any longer because
2313
return bless $self, $class;
2317
my ( $self, $dbh, $quoter, $db, $tbl, $what ) = @_;
2319
if ( $what eq 'table' ) {
2320
my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
2322
if ( $ddl->[0] eq 'table' ) {
2324
. 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
2325
. $ddl->[1] . ";\n";
2328
return 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
2329
. '/*!50001 DROP VIEW IF EXISTS '
2330
. $quoter->quote($tbl) . "*/;\n/*!50001 "
2331
. $self->get_tmp_table($dbh, $quoter, $db, $tbl) . "*/;\n";
2334
elsif ( $what eq 'triggers' ) {
2335
my $trgs = $self->get_triggers($dbh, $quoter, $db, $tbl);
2336
if ( $trgs && @$trgs ) {
2337
my $result = $before . "\nDELIMITER ;;\n";
2338
foreach my $trg ( @$trgs ) {
2339
if ( $trg->{sql_mode} ) {
2340
$result .= qq{/*!50003 SET SESSION SQL_MODE='$trg->{sql_mode}' */;;\n};
2342
$result .= "/*!50003 CREATE */ ";
2343
if ( $trg->{definer} ) {
2345
= map { s/'/''/g; "'$_'"; }
2346
split('@', $trg->{definer}, 2);
2347
$result .= "/*!50017 DEFINER=$user\@$host */ ";
2349
$result .= sprintf("/*!50003 TRIGGER %s %s %s ON %s\nFOR EACH ROW %s */;;\n\n",
2350
$quoter->quote($trg->{trigger}),
2351
@{$trg}{qw(timing event)},
2352
$quoter->quote($trg->{table}),
2355
$result .= "DELIMITER ;\n\n/*!50003 SET SESSION SQL_MODE=\@OLD_SQL_MODE */;\n\n";
2362
elsif ( $what eq 'view' ) {
2363
my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
2364
return '/*!50001 DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
2365
. '/*!50001 DROP VIEW IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
2366
. '/*!50001 ' . $ddl->[1] . "*/;\n";
2369
die "You didn't say what to dump.";
2374
my ( $self, $dbh, $quoter, $new ) = @_;
2376
MKDEBUG && _d('No new DB to use');
2379
my $sql = 'USE ' . $quoter->quote($new);
2380
MKDEBUG && _d($dbh, $sql);
2385
sub get_create_table {
2386
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2387
if ( !$self->{cache} || !$self->{tables}->{$db}->{$tbl} ) {
2388
my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
2389
. q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
2390
. '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
2391
. '@@SQL_QUOTE_SHOW_CREATE := 1 */';
2392
MKDEBUG && _d($sql);
2393
eval { $dbh->do($sql); };
2394
MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2395
$self->_use_db($dbh, $quoter, $db);
2396
$sql = "SHOW CREATE TABLE " . $quoter->quote($db, $tbl);
2397
MKDEBUG && _d($sql);
2399
eval { $href = $dbh->selectrow_hashref($sql); };
2400
if ( $EVAL_ERROR ) {
2401
warn "Failed to $sql. The table may be damaged.\nError: $EVAL_ERROR";
2405
$sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
2406
. '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
2407
MKDEBUG && _d($sql);
2409
my ($key) = grep { m/create table/i } keys %$href;
2411
MKDEBUG && _d('This table is a base table');
2412
$self->{tables}->{$db}->{$tbl} = [ 'table', $href->{$key} ];
2415
MKDEBUG && _d('This table is a view');
2416
($key) = grep { m/create view/i } keys %$href;
2417
$self->{tables}->{$db}->{$tbl} = [ 'view', $href->{$key} ];
2420
return $self->{tables}->{$db}->{$tbl};
2424
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2425
MKDEBUG && _d('Get columns for', $db, $tbl);
2426
if ( !$self->{cache} || !$self->{columns}->{$db}->{$tbl} ) {
2427
$self->_use_db($dbh, $quoter, $db);
2428
my $sql = "SHOW COLUMNS FROM " . $quoter->quote($db, $tbl);
2429
MKDEBUG && _d($sql);
2430
my $cols = $dbh->selectall_arrayref($sql, { Slice => {} });
2432
$self->{columns}->{$db}->{$tbl} = [
2435
@row{ map { lc $_ } keys %$_ } = values %$_;
2440
return $self->{columns}->{$db}->{$tbl};
2444
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2445
my $result = 'CREATE TABLE ' . $quoter->quote($tbl) . " (\n";
2446
$result .= join(",\n",
2447
map { ' ' . $quoter->quote($_->{field}) . ' ' . $_->{type} }
2448
@{$self->get_columns($dbh, $quoter, $db, $tbl)});
2450
MKDEBUG && _d($result);
2455
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2456
if ( !$self->{cache} || !$self->{triggers}->{$db} ) {
2457
$self->{triggers}->{$db} = {};
2458
my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
2459
. q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
2460
. '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
2461
. '@@SQL_QUOTE_SHOW_CREATE := 1 */';
2462
MKDEBUG && _d($sql);
2463
eval { $dbh->do($sql); };
2464
MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2465
$sql = "SHOW TRIGGERS FROM " . $quoter->quote($db);
2466
MKDEBUG && _d($sql);
2467
my $sth = $dbh->prepare($sql);
2470
my $trgs = $sth->fetchall_arrayref({});
2471
foreach my $trg (@$trgs) {
2473
@trg{ map { lc $_ } keys %$trg } = values %$trg;
2474
push @{ $self->{triggers}->{$db}->{ $trg{table} } }, \%trg;
2477
$sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
2478
. '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
2479
MKDEBUG && _d($sql);
2483
return $self->{triggers}->{$db}->{$tbl};
2485
return values %{$self->{triggers}->{$db}};
2489
my ( $self, $dbh, $quoter, $like ) = @_;
2490
if ( !$self->{cache} || !$self->{databases} || $like ) {
2491
my $sql = 'SHOW DATABASES';
2495
push @params, $like;
2497
my $sth = $dbh->prepare($sql);
2498
MKDEBUG && _d($sql, @params);
2499
$sth->execute( @params );
2500
my @dbs = map { $_->[0] } @{$sth->fetchall_arrayref()};
2501
$self->{databases} = \@dbs unless $like;
2504
return @{$self->{databases}};
2507
sub get_table_status {
2508
my ( $self, $dbh, $quoter, $db, $like ) = @_;
2509
if ( !$self->{cache} || !$self->{table_status}->{$db} || $like ) {
2510
my $sql = "SHOW TABLE STATUS FROM " . $quoter->quote($db);
2514
push @params, $like;
2516
MKDEBUG && _d($sql, @params);
2517
my $sth = $dbh->prepare($sql);
2518
$sth->execute(@params);
2519
my @tables = @{$sth->fetchall_arrayref({})};
2521
my %tbl; # Make a copy with lowercased keys
2522
@tbl{ map { lc $_ } keys %$_ } = values %$_;
2523
$tbl{engine} ||= $tbl{type} || $tbl{comment};
2527
$self->{table_status}->{$db} = \@tables unless $like;
2530
return @{$self->{table_status}->{$db}};
2533
sub get_table_list {
2534
my ( $self, $dbh, $quoter, $db, $like ) = @_;
2535
if ( !$self->{cache} || !$self->{table_list}->{$db} || $like ) {
2536
my $sql = "SHOW /*!50002 FULL*/ TABLES FROM " . $quoter->quote($db);
2540
push @params, $like;
2542
MKDEBUG && _d($sql, @params);
2543
my $sth = $dbh->prepare($sql);
2544
$sth->execute(@params);
2545
my @tables = @{$sth->fetchall_arrayref()};
2549
engine => ($_->[1] || '') eq 'VIEW' ? 'VIEW' : '',
2553
$self->{table_list}->{$db} = \@tables unless $like;
2556
return @{$self->{table_list}->{$db}};
2560
my ($package, undef, $line) = caller 0;
2561
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2562
map { defined $_ ? $_ : 'undef' }
2564
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2569
# ###########################################################################
2570
# End MySQLDump package
2571
# ###########################################################################
2573
# ###########################################################################
2574
# TableChunker package 7169
2575
# This package is a copy without comments from the original. The original
2576
# with comments and its test file can be found in the SVN repository at,
2577
# trunk/common/TableChunker.pm
2578
# trunk/common/t/TableChunker.t
2579
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2580
# ###########################################################################
2582
package TableChunker;
2585
use warnings FATAL => 'all';
2586
use English qw(-no_match_vars);
2587
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2589
use POSIX qw(floor ceil);
2590
use List::Util qw(min max);
2592
$Data::Dumper::Indent = 1;
2593
$Data::Dumper::Sortkeys = 1;
2594
$Data::Dumper::Quotekeys = 0;
2597
my ( $class, %args ) = @_;
2598
foreach my $arg ( qw(Quoter MySQLDump) ) {
2599
die "I need a $arg argument" unless $args{$arg};
2602
my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);
2603
my %real_types = map { $_ => 1 } qw(decimal double float);
2607
int_types => \%int_types,
2608
real_types => \%real_types,
2609
EPOCH => '1970-01-01',
2612
return bless $self, $class;
2615
sub find_chunk_columns {
2616
my ( $self, %args ) = @_;
2617
foreach my $arg ( qw(tbl_struct) ) {
2618
die "I need a $arg argument" unless $args{$arg};
2620
my $tbl_struct = $args{tbl_struct};
2622
my @possible_indexes;
2623
foreach my $index ( values %{ $tbl_struct->{keys} } ) {
2625
next unless $index->{type} eq 'BTREE';
2627
next if grep { defined } @{$index->{col_prefixes}};
2629
if ( $args{exact} ) {
2630
next unless $index->{is_unique} && @{$index->{cols}} == 1;
2633
push @possible_indexes, $index;
2635
MKDEBUG && _d('Possible chunk indexes in order:',
2636
join(', ', map { $_->{name} } @possible_indexes));
2638
my $can_chunk_exact = 0;
2640
foreach my $index ( @possible_indexes ) {
2641
my $col = $index->{cols}->[0];
2643
my $col_type = $tbl_struct->{type_for}->{$col};
2644
next unless $self->{int_types}->{$col_type}
2645
|| $self->{real_types}->{$col_type}
2646
|| $col_type =~ m/char/;
2648
push @candidate_cols, { column => $col, index => $index->{name} };
2651
$can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;
2654
my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';
2655
_d($chunk_type, 'chunkable:',
2656
join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));
2660
MKDEBUG && _d('Ordering columns by order in tbl, PK first');
2661
if ( $tbl_struct->{keys}->{PRIMARY} ) {
2662
my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];
2663
@result = grep { $_->{column} eq $pk_first_col } @candidate_cols;
2664
@candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;
2667
my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};
2668
push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }
2672
_d('Chunkable columns:',
2673
join(', ', map { "$_->{column} on $_->{index}" } @result));
2674
_d('Can chunk exactly:', $can_chunk_exact);
2677
return ($can_chunk_exact, @result);
2680
sub calculate_chunks {
2681
my ( $self, %args ) = @_;
2682
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2683
foreach my $arg ( @required_args ) {
2684
die "I need a $arg argument" unless defined $args{$arg};
2686
MKDEBUG && _d('Calculate chunks for',
2687
join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}
2688
qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)
2691
if ( !$args{rows_in_range} ) {
2692
MKDEBUG && _d("Empty table");
2696
if ( $args{rows_in_range} < $args{chunk_size} ) {
2697
MKDEBUG && _d("Chunk size larger than rows in range");
2701
my $q = $self->{Quoter};
2702
my $dbh = $args{dbh};
2703
my $chunk_col = $args{chunk_col};
2704
my $tbl_struct = $args{tbl_struct};
2705
my $col_type = $tbl_struct->{type_for}->{$chunk_col};
2706
MKDEBUG && _d('chunk col type:', $col_type);
2709
if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {
2710
%chunker = $self->_chunk_numeric(%args);
2712
elsif ( $col_type =~ m/char/ ) {
2713
%chunker = $self->_chunk_char(%args);
2716
die "Cannot chunk $col_type columns";
2718
MKDEBUG && _d("Chunker:", Dumper(\%chunker));
2719
my ($col, $start_point, $end_point, $interval, $range_func)
2720
= @chunker{qw(col start_point end_point interval range_func)};
2723
if ( $start_point < $end_point ) {
2725
push @chunks, "$col = 0" if $chunker{have_zero_chunk};
2729
for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
2730
($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);
2732
if ( $iter++ == 0 ) {
2734
($chunker{have_zero_chunk} ? "$col > 0 AND " : "")
2735
."$col < " . $q->quote_val($end);
2738
push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);
2742
my $chunk_range = lc $args{chunk_range} || 'open';
2743
my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};
2746
push @chunks, "$col >= " . $q->quote_val($beg)
2747
. ($chunk_range eq 'openclosed'
2748
? " AND $col <= " . $q->quote_val($args{max}) : "");
2751
push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
2754
push @chunks, "$col IS NULL";
2758
MKDEBUG && _d('No chunks; using single chunk 1=1');
2759
push @chunks, '1=1';
2765
sub _chunk_numeric {
2766
my ( $self, %args ) = @_;
2767
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2768
foreach my $arg ( @required_args ) {
2769
die "I need a $arg argument" unless defined $args{$arg};
2771
my $q = $self->{Quoter};
2772
my $db_tbl = $q->quote($args{db}, $args{tbl});
2773
my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};
2776
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
2777
$range_func = 'range_num';
2779
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
2780
$range_func = "range_$col_type";
2782
elsif ( $col_type eq 'datetime' ) {
2783
$range_func = 'range_datetime';
2786
my ($start_point, $end_point);
2788
$start_point = $self->value_to_number(
2789
value => $args{min},
2790
column_type => $col_type,
2793
$end_point = $self->value_to_number(
2794
value => $args{max},
2795
column_type => $col_type,
2799
if ( $EVAL_ERROR ) {
2800
if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {
2804
die "Error calculating chunk start and end points for table "
2805
. "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "
2806
. "with min/max values "
2808
map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))
2811
. "\nVerify that the min and max values are valid for the column. "
2812
. "If they are valid, this error could be caused by a bug in the "
2817
if ( !defined $start_point ) {
2818
MKDEBUG && _d('Start point is undefined');
2821
if ( !defined $end_point || $end_point < $start_point ) {
2822
MKDEBUG && _d('End point is undefined or before start point');
2825
MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);
2827
my $have_zero_chunk = 0;
2828
if ( $args{zero_chunk} ) {
2829
if ( $start_point != $end_point && $start_point >= 0 ) {
2830
MKDEBUG && _d('Zero chunking');
2831
my $nonzero_val = $self->get_nonzero_value(
2834
col => $args{chunk_col},
2835
col_type => $col_type,
2838
$start_point = $self->value_to_number(
2839
value => $nonzero_val,
2840
column_type => $col_type,
2843
$have_zero_chunk = 1;
2846
MKDEBUG && _d("Cannot zero chunk");
2849
MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);
2851
my $interval = $args{chunk_size}
2852
* ($end_point - $start_point)
2853
/ $args{rows_in_range};
2854
if ( $self->{int_types}->{$col_type} ) {
2855
$interval = ceil($interval);
2857
$interval ||= $args{chunk_size};
2858
if ( $args{exact} ) {
2859
$interval = $args{chunk_size};
2861
MKDEBUG && _d('Chunk interval:', $interval, 'units');
2864
col => $q->quote($args{chunk_col}),
2865
start_point => $start_point,
2866
end_point => $end_point,
2867
interval => $interval,
2868
range_func => $range_func,
2869
have_zero_chunk => $have_zero_chunk,
2874
my ( $self, %args ) = @_;
2875
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2876
foreach my $arg ( @required_args ) {
2877
die "I need a $arg argument" unless defined $args{$arg};
2879
my $q = $self->{Quoter};
2880
my $db_tbl = $q->quote($args{db}, $args{tbl});
2881
my $dbh = $args{dbh};
2882
my $chunk_col = $args{chunk_col};
2886
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
2887
. "ORDER BY `$chunk_col`";
2888
MKDEBUG && _d($dbh, $sql);
2889
$row = $dbh->selectrow_arrayref($sql);
2890
my ($min_col, $max_col) = ($row->[0], $row->[1]);
2892
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
2893
MKDEBUG && _d($dbh, $sql);
2894
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
2895
$ord_sth->execute($min_col, $max_col);
2896
$row = $ord_sth->fetchrow_arrayref();
2897
my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);
2898
MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);
2902
MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});
2903
if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {
2904
my @sorted_latin1_chars = (
2905
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
2906
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
2907
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
2908
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
2909
88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,
2910
162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
2911
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
2912
190, 191, 215, 216, 222, 223, 247, 255);
2914
my ($first_char, $last_char);
2915
for my $i ( 0..$#sorted_latin1_chars ) {
2916
$first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;
2918
for my $i ( $first_char..$#sorted_latin1_chars ) {
2919
$last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;
2922
@chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];
2923
$base = scalar @chars;
2927
my $tmp_tbl = '__maatkit_char_chunking_map';
2928
my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);
2929
$sql = "DROP TABLE IF EXISTS $tmp_db_tbl";
2930
MKDEBUG && _d($dbh, $sql);
2932
my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};
2933
$sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "
2935
MKDEBUG && _d($dbh, $sql);
2938
$sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))";
2939
MKDEBUG && _d($dbh, $sql);
2940
my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues
2941
for my $char_code ( $min_col_ord..$max_col_ord ) {
2942
$ins_char_sth->execute($char_code);
2945
$sql = "SELECT `$chunk_col` FROM $tmp_db_tbl "
2946
. "WHERE `$chunk_col` BETWEEN ? AND ? "
2947
. "ORDER BY `$chunk_col`";
2948
MKDEBUG && _d($dbh, $sql);
2949
my $sel_char_sth = $dbh->prepare($sql);
2950
$sel_char_sth->execute($min_col, $max_col);
2952
@chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };
2953
$base = scalar @chars;
2955
$sql = "DROP TABLE $tmp_db_tbl";
2956
MKDEBUG && _d($dbh, $sql);
2959
MKDEBUG && _d("Base", $base, "chars:", @chars);
2962
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
2963
MKDEBUG && _d($dbh, $sql);
2964
$row = $dbh->selectrow_arrayref($sql);
2965
my $max_col_len = $row->[0];
2966
MKDEBUG && _d("Max column value:", $max_col, $max_col_len);
2968
for my $n_chars ( 1..$max_col_len ) {
2969
$n_values = $base**$n_chars;
2970
if ( $n_values >= $args{chunk_size} ) {
2971
MKDEBUG && _d($n_chars, "chars in base", $base, "expresses",
2972
$n_values, "values");
2977
my $n_chunks = $args{rows_in_range} / $args{chunk_size};
2978
my $interval = floor($n_values / $n_chunks) || 1;
2980
my $range_func = sub {
2981
my ( $self, $dbh, $start, $interval, $max ) = @_;
2982
my $start_char = $self->base_count(
2987
my $end_char = $self->base_count(
2988
count_to => min($max, $start + $interval),
2992
return $start_char, $end_char;
2996
col => $q->quote($chunk_col),
2998
end_point => $n_values,
2999
interval => $interval,
3000
range_func => $range_func,
3004
sub get_first_chunkable_column {
3005
my ( $self, %args ) = @_;
3006
foreach my $arg ( qw(tbl_struct) ) {
3007
die "I need a $arg argument" unless $args{$arg};
3010
my ($exact, @cols) = $self->find_chunk_columns(%args);
3011
my $col = $cols[0]->{column};
3012
my $idx = $cols[0]->{index};
3014
my $wanted_col = $args{chunk_column};
3015
my $wanted_idx = $args{chunk_index};
3016
MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);
3018
if ( $wanted_col && $wanted_idx ) {
3019
foreach my $chunkable_col ( @cols ) {
3020
if ( $wanted_col eq $chunkable_col->{column}
3021
&& $wanted_idx eq $chunkable_col->{index} ) {
3028
elsif ( $wanted_col ) {
3029
foreach my $chunkable_col ( @cols ) {
3030
if ( $wanted_col eq $chunkable_col->{column} ) {
3032
$idx = $chunkable_col->{index};
3037
elsif ( $wanted_idx ) {
3038
foreach my $chunkable_col ( @cols ) {
3039
if ( $wanted_idx eq $chunkable_col->{index} ) {
3040
$col = $chunkable_col->{column};
3047
MKDEBUG && _d('First chunkable col/index:', $col, $idx);
3052
my ( $self, %args ) = @_;
3053
my @required_args = qw(dbh db tbl chunk_size);
3054
foreach my $arg ( @required_args ) {
3055
die "I need a $arg argument" unless $args{$arg};
3057
my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};
3058
my $q = $self->{Quoter};
3059
my $du = $self->{MySQLDump};
3061
my ($n_rows, $avg_row_length);
3063
my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;
3064
if ( $suffix ) { # Convert to bytes.
3065
$chunk_size = $suffix eq 'k' ? $num * 1_024
3066
: $suffix eq 'M' ? $num * 1_024 * 1_024
3067
: $num * 1_024 * 1_024 * 1_024;
3073
die "Invalid chunk size $chunk_size; must be an integer "
3074
. "with optional suffix kMG";
3077
if ( $suffix || $args{avg_row_length} ) {
3078
my ($status) = $du->get_table_status($dbh, $q, $db, $tbl);
3079
$avg_row_length = $status->{avg_row_length};
3080
if ( !defined $n_rows ) {
3081
$n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;
3085
return $n_rows, $avg_row_length;
3088
sub get_range_statistics {
3089
my ( $self, %args ) = @_;
3090
my @required_args = qw(dbh db tbl chunk_col tbl_struct);
3091
foreach my $arg ( @required_args ) {
3092
die "I need a $arg argument" unless $args{$arg};
3094
my ($dbh, $db, $tbl, $col) = @args{@required_args};
3095
my $where = $args{where};
3096
my $q = $self->{Quoter};
3098
my $col_type = $args{tbl_struct}->{type_for}->{$col};
3099
my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};
3101
my $db_tbl = $q->quote($db, $tbl);
3102
$col = $q->quote($col);
3106
my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"
3107
. ($args{index_hint} ? " $args{index_hint}" : "")
3108
. ($where ? " WHERE ($where)" : '');
3109
MKDEBUG && _d($dbh, $sql);
3110
($min, $max) = $dbh->selectrow_array($sql);
3111
MKDEBUG && _d("Actual end points:", $min, $max);
3113
($min, $max) = $self->get_valid_end_points(
3118
col_type => $col_type,
3122
MKDEBUG && _d("Valid end points:", $min, $max);
3124
if ( $EVAL_ERROR ) {
3125
die "Error getting min and max values for table $db_tbl "
3126
. "on column $col: $EVAL_ERROR";
3129
my $sql = "EXPLAIN SELECT * FROM $db_tbl"
3130
. ($args{index_hint} ? " $args{index_hint}" : "")
3131
. ($where ? " WHERE $where" : '');
3132
MKDEBUG && _d($sql);
3133
my $expl = $dbh->selectrow_hashref($sql);
3138
rows_in_range => $expl->{rows},
3143
my ( $self, %args ) = @_;
3144
foreach my $arg ( qw(database table chunks chunk_num query) ) {
3145
die "I need a $arg argument" unless defined $args{$arg};
3147
MKDEBUG && _d('Injecting chunk', $args{chunk_num});
3148
my $query = $args{query};
3149
my $comment = sprintf("/*%s.%s:%d/%d*/",
3150
$args{database}, $args{table},
3151
$args{chunk_num} + 1, scalar @{$args{chunks}});
3152
$query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;
3153
my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
3154
if ( $args{where} && grep { $_ } @{$args{where}} ) {
3156
. join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )
3159
my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});
3160
my $index_hint = $args{index_hint} || '';
3162
MKDEBUG && _d('Parameters:',
3163
Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));
3164
$query =~ s!/\*WHERE\*/! $where!;
3165
$query =~ s!/\*DB_TBL\*/!$db_tbl!;
3166
$query =~ s!/\*INDEX_HINT\*/! $index_hint!;
3167
$query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;
3173
sub value_to_number {
3174
my ( $self, %args ) = @_;
3175
my @required_args = qw(column_type dbh);
3176
foreach my $arg ( @required_args ) {
3177
die "I need a $arg argument" unless defined $args{$arg};
3179
my $val = $args{value};
3180
my ($col_type, $dbh) = @args{@required_args};
3181
MKDEBUG && _d('Converting MySQL', $col_type, $val);
3183
return unless defined $val; # value is NULL
3185
my %mysql_conv_func_for = (
3186
timestamp => 'UNIX_TIMESTAMP',
3188
time => 'TIME_TO_SEC',
3189
datetime => 'TO_DAYS',
3193
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
3196
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
3197
my $func = $mysql_conv_func_for{$col_type};
3198
my $sql = "SELECT $func(?)";
3199
MKDEBUG && _d($dbh, $sql, $val);
3200
my $sth = $dbh->prepare($sql);
3201
$sth->execute($val);
3202
($num) = $sth->fetchrow_array();
3204
elsif ( $col_type eq 'datetime' ) {
3205
$num = $self->timestampdiff($dbh, $val);
3208
die "I don't know how to chunk $col_type\n";
3210
MKDEBUG && _d('Converts to', $num);
3215
my ( $self, $dbh, $start, $interval, $max ) = @_;
3216
my $end = min($max, $start + $interval);
3219
$start = sprintf('%.17f', $start) if $start =~ /e/;
3220
$end = sprintf('%.17f', $end) if $end =~ /e/;
3222
$start =~ s/\.(\d{5}).*$/.$1/;
3223
$end =~ s/\.(\d{5}).*$/.$1/;
3225
if ( $end > $start ) {
3226
return ( $start, $end );
3229
die "Chunk size is too small: $end !> $start\n";
3234
my ( $self, $dbh, $start, $interval, $max ) = @_;
3235
my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";
3236
MKDEBUG && _d($sql);
3237
return $dbh->selectrow_array($sql);
3241
my ( $self, $dbh, $start, $interval, $max ) = @_;
3242
my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";
3243
MKDEBUG && _d($sql);
3244
return $dbh->selectrow_array($sql);
3247
sub range_datetime {
3248
my ( $self, $dbh, $start, $interval, $max ) = @_;
3249
my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "
3250
. "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";
3251
MKDEBUG && _d($sql);
3252
return $dbh->selectrow_array($sql);
3255
sub range_timestamp {
3256
my ( $self, $dbh, $start, $interval, $max ) = @_;
3257
my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";
3258
MKDEBUG && _d($sql);
3259
return $dbh->selectrow_array($sql);
3263
my ( $self, $dbh, $time ) = @_;
3264
my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "
3265
. "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";
3266
MKDEBUG && _d($sql);
3267
my ( $diff ) = $dbh->selectrow_array($sql);
3268
$sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";
3269
MKDEBUG && _d($sql);
3270
my ( $check ) = $dbh->selectrow_array($sql);
3272
Incorrect datetime math: given $time, calculated $diff but checked to $check.
3273
This could be due to a version of MySQL that overflows on large interval
3274
values to DATE_ADD(), or the given datetime is not a valid date. If not,
3275
please report this as a bug.
3277
unless $check eq $time;
3284
sub get_valid_end_points {
3285
my ( $self, %args ) = @_;
3286
my @required_args = qw(dbh db_tbl col col_type);
3287
foreach my $arg ( @required_args ) {
3288
die "I need a $arg argument" unless $args{$arg};
3290
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3291
my ($real_min, $real_max) = @args{qw(min max)};
3293
my $err_fmt = "Error finding a valid %s value for table $db_tbl on "
3294
. "column $col. The real %s value %s is invalid and "
3295
. "no other valid values were found. Verify that the table "
3296
. "has at least one valid value for this column"
3297
. ($args{where} ? " where $args{where}." : ".");
3299
my $valid_min = $real_min;
3300
if ( defined $valid_min ) {
3301
MKDEBUG && _d("Validating min end point:", $real_min);
3302
$valid_min = $self->_get_valid_end_point(
3307
die sprintf($err_fmt, 'minimum', 'minimum',
3308
(defined $real_min ? $real_min : "NULL"))
3309
unless defined $valid_min;
3312
my $valid_max = $real_max;
3313
if ( defined $valid_max ) {
3314
MKDEBUG && _d("Validating max end point:", $real_min);
3315
$valid_max = $self->_get_valid_end_point(
3320
die sprintf($err_fmt, 'maximum', 'maximum',
3321
(defined $real_max ? $real_max : "NULL"))
3322
unless defined $valid_max;
3325
return $valid_min, $valid_max;
3328
sub _get_valid_end_point {
3329
my ( $self, %args ) = @_;
3330
my @required_args = qw(dbh db_tbl col col_type);
3331
foreach my $arg ( @required_args ) {
3332
die "I need a $arg argument" unless $args{$arg};
3334
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3335
my $val = $args{val};
3337
return $val unless defined $val;
3339
my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3343
MKDEBUG && _d("No validator for", $col_type, "values");
3347
return $val if defined $validate->($dbh, $val);
3349
MKDEBUG && _d("Value is invalid, getting first valid value");
3350
$val = $self->get_first_valid_value(
3353
validate => $validate,
3359
sub get_first_valid_value {
3360
my ( $self, %args ) = @_;
3361
my @required_args = qw(dbh db_tbl col validate endpoint);
3362
foreach my $arg ( @required_args ) {
3363
die "I need a $arg argument" unless $args{$arg};
3365
my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};
3366
my $tries = defined $args{tries} ? $args{tries} : 5;
3367
my $val = $args{val};
3369
return unless defined $val;
3371
my $cmp = $endpoint =~ m/min/i ? '>'
3372
: $endpoint =~ m/max/i ? '<'
3373
: die "Invalid endpoint arg: $endpoint";
3374
my $sql = "SELECT $col FROM $db_tbl "
3375
. ($args{index_hint} ? "$args{index_hint} " : "")
3376
. "WHERE $col $cmp ? AND $col IS NOT NULL "
3377
. ($args{where} ? "AND ($args{where}) " : "")
3378
. "ORDER BY $col LIMIT 1";
3379
MKDEBUG && _d($dbh, $sql);
3380
my $sth = $dbh->prepare($sql);
3382
my $last_val = $val;
3383
while ( $tries-- ) {
3384
$sth->execute($last_val);
3385
my ($next_val) = $sth->fetchrow_array();
3386
MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);
3387
if ( !defined $next_val ) {
3388
MKDEBUG && _d('No more rows in table');
3391
if ( defined $validate->($dbh, $next_val) ) {
3392
MKDEBUG && _d('First valid value:', $next_val);
3396
$last_val = $next_val;
3399
$val = undef; # no valid value found
3404
sub _validate_temporal_value {
3405
my ( $dbh, $val ) = @_;
3406
my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";
3409
MKDEBUG && _d($dbh, $sql, $val);
3410
my $sth = $dbh->prepare($sql);
3411
$sth->execute($val, $val, $val, $val);
3412
($res) = $sth->fetchrow_array();
3415
if ( $EVAL_ERROR ) {
3416
MKDEBUG && _d($EVAL_ERROR);
3421
sub get_nonzero_value {
3422
my ( $self, %args ) = @_;
3423
my @required_args = qw(dbh db_tbl col col_type);
3424
foreach my $arg ( @required_args ) {
3425
die "I need a $arg argument" unless $args{$arg};
3427
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3428
my $tries = defined $args{tries} ? $args{tries} : 5;
3429
my $val = $args{val};
3431
my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3432
: sub { return $_[1]; };
3434
if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry
3435
MKDEBUG && _d('Discarding zero value:', $val);
3436
my $sql = "SELECT $col FROM $db_tbl "
3437
. ($args{index_hint} ? "$args{index_hint} " : "")
3438
. "WHERE $col > ? AND $col IS NOT NULL "
3439
. ($args{where} ? "AND ($args{where}) " : '')
3440
. "ORDER BY $col LIMIT 1";
3441
MKDEBUG && _d($sql);
3442
my $sth = $dbh->prepare($sql);
3444
my $last_val = $val;
3445
while ( $tries-- ) {
3446
$sth->execute($last_val);
3447
my ($next_val) = $sth->fetchrow_array();
3448
if ( $is_nonzero->($dbh, $next_val) ) {
3449
MKDEBUG && _d('First non-zero value:', $next_val);
3453
$last_val = $next_val;
3456
$val = undef; # no non-zero value found
3463
my ( $self, %args ) = @_;
3464
my @required_args = qw(count_to base symbols);
3465
foreach my $arg ( @required_args ) {
3466
die "I need a $arg argument" unless defined $args{$arg};
3468
my ($n, $base, $symbols) = @args{@required_args};
3470
return $symbols->[0] if $n == 0;
3472
my $highest_power = floor(log($n)/log($base));
3473
if ( $highest_power == 0 ){
3474
return $symbols->[$n];
3478
for my $power ( 0..$highest_power ) {
3479
push @base_powers, ($base**$power) || 1;
3483
foreach my $base_power ( reverse @base_powers ) {
3484
my $multiples = floor($n / $base_power);
3485
push @base_multiples, $multiples;
3486
$n -= $multiples * $base_power;
3489
return join('', map { $symbols->[$_] } @base_multiples);
3493
my ($package, undef, $line) = caller 0;
3494
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3495
map { defined $_ ? $_ : 'undef' }
3497
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3502
# ###########################################################################
3503
# End TableChunker package
3504
# ###########################################################################
3506
# ###########################################################################
3507
# Quoter package 6850
3508
# This package is a copy without comments from the original. The original
3509
# with comments and its test file can be found in the SVN repository at,
3510
# trunk/common/Quoter.pm
3511
# trunk/common/t/Quoter.t
3512
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
3513
# ###########################################################################
3518
use warnings FATAL => 'all';
3519
use English qw(-no_match_vars);
3521
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
3524
my ( $class, %args ) = @_;
3525
return bless {}, $class;
3529
my ( $self, @vals ) = @_;
3530
foreach my $val ( @vals ) {
3533
return join('.', map { '`' . $_ . '`' } @vals);
3537
my ( $self, $val ) = @_;
3539
return 'NULL' unless defined $val; # undef = NULL
3540
return "''" if $val eq ''; # blank string = ''
3541
return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data
3543
$val =~ s/(['\\])/\\$1/g;
3548
my ( $self, $db_tbl, $default_db ) = @_;
3550
my ( $db, $tbl ) = split(/[.]/, $db_tbl);
3559
my ( $self, $like ) = @_;
3560
return unless $like;
3561
$like =~ s/([%_])/\\$1/g;
3566
my ( $self, $default_db, $db_tbl ) = @_;
3567
return unless $db_tbl;
3568
my ($db, $tbl) = split(/[.]/, $db_tbl);
3573
$db = "`$db`" if $db && $db !~ m/^`/;
3574
$tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/;
3575
return $db ? "$db.$tbl" : $tbl;
3580
# ###########################################################################
3581
# End Quoter package
3582
# ###########################################################################
3584
# ###########################################################################
3585
# MasterSlave package 7525
3586
# This package is a copy without comments from the original. The original
3587
# with comments and its test file can be found in the SVN repository at,
3588
# trunk/common/MasterSlave.pm
3589
# trunk/common/t/MasterSlave.t
3590
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
3591
# ###########################################################################
3593
package MasterSlave;
3596
use warnings FATAL => 'all';
3597
use English qw(-no_match_vars);
3598
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
3600
use List::Util qw(min max);
3602
$Data::Dumper::Quotekeys = 0;
3603
$Data::Dumper::Indent = 0;
3606
my ( $class, %args ) = @_;
3609
replication_thread => {},
3611
return bless $self, $class;
3614
sub recurse_to_slaves {
3615
my ( $self, $args, $level ) = @_;
3617
my $dp = $args->{dsn_parser};
3618
my $dsn = $args->{dsn};
3622
$dbh = $args->{dbh} || $dp->get_dbh(
3623
$dp->get_cxn_params($dsn), { AutoCommit => 1 });
3624
MKDEBUG && _d('Connected to', $dp->as_string($dsn));
3626
if ( $EVAL_ERROR ) {
3627
print STDERR "Cannot connect to ", $dp->as_string($dsn), "\n"
3628
or die "Cannot print: $OS_ERROR";
3632
my $sql = 'SELECT @@SERVER_ID';
3633
MKDEBUG && _d($sql);
3634
my ($id) = $dbh->selectrow_array($sql);
3635
MKDEBUG && _d('Working on server ID', $id);
3636
my $master_thinks_i_am = $dsn->{server_id};
3638
|| ( defined $master_thinks_i_am && $master_thinks_i_am != $id )
3639
|| $args->{server_ids_seen}->{$id}++
3641
MKDEBUG && _d('Server ID seen, or not what master said');
3642
if ( $args->{skip_callback} ) {
3643
$args->{skip_callback}->($dsn, $dbh, $level, $args->{parent});
3648
$args->{callback}->($dsn, $dbh, $level, $args->{parent});
3650
if ( !defined $args->{recurse} || $level < $args->{recurse} ) {
3653
grep { !$_->{master_id} || $_->{master_id} == $id } # Only my slaves.
3654
$self->find_slave_hosts($dp, $dbh, $dsn, $args->{method});
3656
foreach my $slave ( @slaves ) {
3657
MKDEBUG && _d('Recursing from',
3658
$dp->as_string($dsn), 'to', $dp->as_string($slave));
3659
$self->recurse_to_slaves(
3660
{ %$args, dsn => $slave, dbh => undef, parent => $dsn }, $level + 1 );
3665
sub find_slave_hosts {
3666
my ( $self, $dsn_parser, $dbh, $dsn, $method ) = @_;
3668
my @methods = qw(processlist hosts);
3670
@methods = grep { $_ ne $method } @methods;
3671
unshift @methods, $method;
3674
if ( ($dsn->{P} || 3306) != 3306 ) {
3675
MKDEBUG && _d('Port number is non-standard; using only hosts method');
3676
@methods = qw(hosts);
3679
MKDEBUG && _d('Looking for slaves on', $dsn_parser->as_string($dsn),
3680
'using methods', @methods);
3684
foreach my $method ( @methods ) {
3685
my $find_slaves = "_find_slaves_by_$method";
3686
MKDEBUG && _d('Finding slaves with', $find_slaves);
3687
@slaves = $self->$find_slaves($dsn_parser, $dbh, $dsn);
3688
last METHOD if @slaves;
3691
MKDEBUG && _d('Found', scalar(@slaves), 'slaves');
3695
sub _find_slaves_by_processlist {
3696
my ( $self, $dsn_parser, $dbh, $dsn ) = @_;
3699
my $slave = $dsn_parser->parse("h=$_", $dsn);
3700
$slave->{source} = 'processlist';
3705
my ( $host ) = $_->{host} =~ m/^([^:]+):/;
3706
if ( $host eq 'localhost' ) {
3707
$host = '127.0.0.1'; # Replication never uses sockets.
3710
} $self->get_connected_slaves($dbh);
3715
sub _find_slaves_by_hosts {
3716
my ( $self, $dsn_parser, $dbh, $dsn ) = @_;
3719
my $sql = 'SHOW SLAVE HOSTS';
3720
MKDEBUG && _d($dbh, $sql);
3721
@slaves = @{$dbh->selectall_arrayref($sql, { Slice => {} })};
3724
MKDEBUG && _d('Found some SHOW SLAVE HOSTS info');
3727
@hash{ map { lc $_ } keys %$_ } = values %$_;
3728
my $spec = "h=$hash{host},P=$hash{port}"
3729
. ( $hash{user} ? ",u=$hash{user}" : '')
3730
. ( $hash{password} ? ",p=$hash{password}" : '');
3731
my $dsn = $dsn_parser->parse($spec, $dsn);
3732
$dsn->{server_id} = $hash{server_id};
3733
$dsn->{master_id} = $hash{master_id};
3734
$dsn->{source} = 'hosts';
3742
sub get_connected_slaves {
3743
my ( $self, $dbh ) = @_;
3745
my $show = "SHOW GRANTS FOR ";
3746
my $user = 'CURRENT_USER()';
3747
my $vp = $self->{VersionParser};
3748
if ( $vp && !$vp->version_ge($dbh, '4.1.2') ) {
3749
$user = $dbh->selectrow_arrayref('SELECT USER()')->[0];
3750
$user =~ s/([^@]+)@(.+)/'$1'\@'$2'/;
3752
my $sql = $show . $user;
3753
MKDEBUG && _d($dbh, $sql);
3758
m/ALL PRIVILEGES.*?\*\.\*|PROCESS/
3759
} @{$dbh->selectcol_arrayref($sql)};
3761
if ( $EVAL_ERROR ) {
3763
if ( $EVAL_ERROR =~ m/no such grant defined for user/ ) {
3764
MKDEBUG && _d('Retrying SHOW GRANTS without host; error:',
3766
($user) = split('@', $user);
3767
$sql = $show . $user;
3768
MKDEBUG && _d($sql);
3771
m/ALL PRIVILEGES.*?\*\.\*|PROCESS/
3772
} @{$dbh->selectcol_arrayref($sql)};
3776
die "Failed to $sql: $EVAL_ERROR" if $EVAL_ERROR;
3779
die "You do not have the PROCESS privilege";
3782
$sql = 'SHOW PROCESSLIST';
3783
MKDEBUG && _d($dbh, $sql);
3784
grep { $_->{command} =~ m/Binlog Dump/i }
3785
map { # Lowercase the column names
3787
@hash{ map { lc $_ } keys %$_ } = values %$_;
3790
@{$dbh->selectall_arrayref($sql, { Slice => {} })};
3794
my ( $self, $master, $slave ) = @_;
3795
my $master_status = $self->get_master_status($master)
3796
or die "The server specified as a master is not a master";
3797
my $slave_status = $self->get_slave_status($slave)
3798
or die "The server specified as a slave is not a slave";
3799
my @connected = $self->get_connected_slaves($master)
3800
or die "The server specified as a master has no connected slaves";
3801
my (undef, $port) = $master->selectrow_array('SHOW VARIABLES LIKE "port"');
3803
if ( $port != $slave_status->{master_port} ) {
3804
die "The slave is connected to $slave_status->{master_port} "
3805
. "but the master's port is $port";
3808
if ( !grep { $slave_status->{master_user} eq $_->{user} } @connected ) {
3809
die "I don't see any slave I/O thread connected with user "
3810
. $slave_status->{master_user};
3813
if ( ($slave_status->{slave_io_state} || '')
3814
eq 'Waiting for master to send event' )
3816
my ( $master_log_name, $master_log_num )
3817
= $master_status->{file} =~ m/^(.*?)\.0*([1-9][0-9]*)$/;
3818
my ( $slave_log_name, $slave_log_num )
3819
= $slave_status->{master_log_file} =~ m/^(.*?)\.0*([1-9][0-9]*)$/;
3820
if ( $master_log_name ne $slave_log_name
3821
|| abs($master_log_num - $slave_log_num) > 1 )
3823
die "The slave thinks it is reading from "
3824
. "$slave_status->{master_log_file}, but the "
3825
. "master is writing to $master_status->{file}";
3831
sub get_master_dsn {
3832
my ( $self, $dbh, $dsn, $dsn_parser ) = @_;
3833
my $master = $self->get_slave_status($dbh) or return undef;
3834
my $spec = "h=$master->{master_host},P=$master->{master_port}";
3835
return $dsn_parser->parse($spec, $dsn);
3838
sub get_slave_status {
3839
my ( $self, $dbh ) = @_;
3840
if ( !$self->{not_a_slave}->{$dbh} ) {
3841
my $sth = $self->{sths}->{$dbh}->{SLAVE_STATUS}
3842
||= $dbh->prepare('SHOW SLAVE STATUS');
3843
MKDEBUG && _d($dbh, 'SHOW SLAVE STATUS');
3845
my ($ss) = @{$sth->fetchall_arrayref({})};
3847
if ( $ss && %$ss ) {
3848
$ss = { map { lc($_) => $ss->{$_} } keys %$ss }; # lowercase the keys
3852
MKDEBUG && _d('This server returns nothing for SHOW SLAVE STATUS');
3853
$self->{not_a_slave}->{$dbh}++;
3857
sub get_master_status {
3858
my ( $self, $dbh ) = @_;
3860
if ( $self->{not_a_master}->{$dbh} ) {
3861
MKDEBUG && _d('Server on dbh', $dbh, 'is not a master');
3865
my $sth = $self->{sths}->{$dbh}->{MASTER_STATUS}
3866
||= $dbh->prepare('SHOW MASTER STATUS');
3867
MKDEBUG && _d($dbh, 'SHOW MASTER STATUS');
3869
my ($ms) = @{$sth->fetchall_arrayref({})};
3870
MKDEBUG && _d(Dumper($ms));
3872
if ( !$ms || scalar keys %$ms < 2 ) {
3873
MKDEBUG && _d('Server on dbh', $dbh, 'does not seem to be a master');
3874
$self->{not_a_master}->{$dbh}++;
3877
return { map { lc($_) => $ms->{$_} } keys %$ms }; # lowercase the keys
3880
sub wait_for_master {
3881
my ( $self, %args ) = @_;
3882
my @required_args = qw(master_status slave_dbh);
3883
foreach my $arg ( @required_args ) {
3884
die "I need a $arg argument" unless $args{$arg};
3886
my ($master_status, $slave_dbh) = @args{@required_args};
3887
my $timeout = $args{timeout} || 60;
3891
if ( $master_status ) {
3892
my $sql = "SELECT MASTER_POS_WAIT('$master_status->{file}', "
3893
. "$master_status->{position}, $timeout)";
3894
MKDEBUG && _d($slave_dbh, $sql);
3896
($result) = $slave_dbh->selectrow_array($sql);
3898
$waited = time - $start;
3900
MKDEBUG && _d('Result of waiting:', $result);
3901
MKDEBUG && _d("Waited", $waited, "seconds");
3904
MKDEBUG && _d('Not waiting: this server is not a master');
3914
my ( $self, $dbh ) = @_;
3915
my $sth = $self->{sths}->{$dbh}->{STOP_SLAVE}
3916
||= $dbh->prepare('STOP SLAVE');
3917
MKDEBUG && _d($dbh, $sth->{Statement});
3922
my ( $self, $dbh, $pos ) = @_;
3924
my $sql = "START SLAVE UNTIL MASTER_LOG_FILE='$pos->{file}', "
3925
. "MASTER_LOG_POS=$pos->{position}";
3926
MKDEBUG && _d($dbh, $sql);
3930
my $sth = $self->{sths}->{$dbh}->{START_SLAVE}
3931
||= $dbh->prepare('START SLAVE');
3932
MKDEBUG && _d($dbh, $sth->{Statement});
3937
sub catchup_to_master {
3938
my ( $self, $slave, $master, $timeout ) = @_;
3939
$self->stop_slave($master);
3940
$self->stop_slave($slave);
3941
my $slave_status = $self->get_slave_status($slave);
3942
my $slave_pos = $self->repl_posn($slave_status);
3943
my $master_status = $self->get_master_status($master);
3944
my $master_pos = $self->repl_posn($master_status);
3945
MKDEBUG && _d('Master position:', $self->pos_to_string($master_pos),
3946
'Slave position:', $self->pos_to_string($slave_pos));
3949
if ( $self->pos_cmp($slave_pos, $master_pos) < 0 ) {
3950
MKDEBUG && _d('Waiting for slave to catch up to master');
3951
$self->start_slave($slave, $master_pos);
3953
$result = $self->wait_for_master(
3954
master_status => $master_status,
3955
slave_dbh => $slave,
3956
timeout => $timeout,
3957
master_status => $master_status
3959
if ( !defined $result->{result} ) {
3960
$slave_status = $self->get_slave_status($slave);
3961
if ( !$self->slave_is_running($slave_status) ) {
3962
MKDEBUG && _d('Master position:',
3963
$self->pos_to_string($master_pos),
3964
'Slave position:', $self->pos_to_string($slave_pos));
3965
$slave_pos = $self->repl_posn($slave_status);
3966
if ( $self->pos_cmp($slave_pos, $master_pos) != 0 ) {
3967
die "MASTER_POS_WAIT() returned NULL but slave has not "
3968
. "caught up to master";
3970
MKDEBUG && _d('Slave is caught up to master and stopped');
3973
die "Slave has not caught up to master and it is still running";
3978
MKDEBUG && _d("Slave is already caught up to master");
3984
sub catchup_to_same_pos {
3985
my ( $self, $s1_dbh, $s2_dbh ) = @_;
3986
$self->stop_slave($s1_dbh);
3987
$self->stop_slave($s2_dbh);
3988
my $s1_status = $self->get_slave_status($s1_dbh);
3989
my $s2_status = $self->get_slave_status($s2_dbh);
3990
my $s1_pos = $self->repl_posn($s1_status);
3991
my $s2_pos = $self->repl_posn($s2_status);
3992
if ( $self->pos_cmp($s1_pos, $s2_pos) < 0 ) {
3993
$self->start_slave($s1_dbh, $s2_pos);
3995
elsif ( $self->pos_cmp($s2_pos, $s1_pos) < 0 ) {
3996
$self->start_slave($s2_dbh, $s1_pos);
3999
$s1_status = $self->get_slave_status($s1_dbh);
4000
$s2_status = $self->get_slave_status($s2_dbh);
4001
$s1_pos = $self->repl_posn($s1_status);
4002
$s2_pos = $self->repl_posn($s2_status);
4004
if ( $self->slave_is_running($s1_status)
4005
|| $self->slave_is_running($s2_status)
4006
|| $self->pos_cmp($s1_pos, $s2_pos) != 0)
4008
die "The servers aren't both stopped at the same position";
4013
sub change_master_to {
4014
my ( $self, $dbh, $master_dsn, $master_pos ) = @_;
4015
$self->stop_slave($dbh);
4016
MKDEBUG && _d(Dumper($master_dsn), Dumper($master_pos));
4017
my $sql = "CHANGE MASTER TO MASTER_HOST='$master_dsn->{h}', "
4018
. "MASTER_PORT= $master_dsn->{P}, MASTER_LOG_FILE='$master_pos->{file}', "
4019
. "MASTER_LOG_POS=$master_pos->{position}";
4020
MKDEBUG && _d($dbh, $sql);
4024
sub make_sibling_of_master {
4025
my ( $self, $slave_dbh, $slave_dsn, $dsn_parser, $timeout) = @_;
4027
my $master_dsn = $self->get_master_dsn($slave_dbh, $slave_dsn, $dsn_parser)
4028
or die "This server is not a slave";
4029
my $master_dbh = $dsn_parser->get_dbh(
4030
$dsn_parser->get_cxn_params($master_dsn), { AutoCommit => 1 });
4032
= $self->get_master_dsn($master_dbh, $master_dsn, $dsn_parser)
4033
or die "This server's master is not a slave";
4034
my $gmaster_dbh = $dsn_parser->get_dbh(
4035
$dsn_parser->get_cxn_params($gmaster_dsn), { AutoCommit => 1 });
4036
if ( $self->short_host($slave_dsn) eq $self->short_host($gmaster_dsn) ) {
4037
die "The slave's master's master is the slave: master-master replication";
4040
$self->stop_slave($master_dbh);
4041
$self->catchup_to_master($slave_dbh, $master_dbh, $timeout);
4042
$self->stop_slave($slave_dbh);
4044
my $master_status = $self->get_master_status($master_dbh);
4045
my $mslave_status = $self->get_slave_status($master_dbh);
4046
my $slave_status = $self->get_slave_status($slave_dbh);
4047
my $master_pos = $self->repl_posn($master_status);
4048
my $slave_pos = $self->repl_posn($slave_status);
4050
if ( !$self->slave_is_running($mslave_status)
4051
&& !$self->slave_is_running($slave_status)
4052
&& $self->pos_cmp($master_pos, $slave_pos) == 0)
4054
$self->change_master_to($slave_dbh, $gmaster_dsn,
4055
$self->repl_posn($mslave_status)); # Note it's not $master_pos!
4058
die "The servers aren't both stopped at the same position";
4061
$mslave_status = $self->get_slave_status($master_dbh);
4062
$slave_status = $self->get_slave_status($slave_dbh);
4063
my $mslave_pos = $self->repl_posn($mslave_status);
4064
$slave_pos = $self->repl_posn($slave_status);
4065
if ( $self->short_host($mslave_status) ne $self->short_host($slave_status)
4066
|| $self->pos_cmp($mslave_pos, $slave_pos) != 0)
4068
die "The servers don't have the same master/position after the change";
4072
sub make_slave_of_sibling {
4073
my ( $self, $slave_dbh, $slave_dsn, $sib_dbh, $sib_dsn,
4074
$dsn_parser, $timeout) = @_;
4076
if ( $self->short_host($slave_dsn) eq $self->short_host($sib_dsn) ) {
4077
die "You are trying to make the slave a slave of itself";
4080
my $master_dsn1 = $self->get_master_dsn($slave_dbh, $slave_dsn, $dsn_parser)
4081
or die "This server is not a slave";
4082
my $master_dbh1 = $dsn_parser->get_dbh(
4083
$dsn_parser->get_cxn_params($master_dsn1), { AutoCommit => 1 });
4084
my $master_dsn2 = $self->get_master_dsn($slave_dbh, $slave_dsn, $dsn_parser)
4085
or die "The sibling is not a slave";
4086
if ( $self->short_host($master_dsn1) ne $self->short_host($master_dsn2) ) {
4087
die "This server isn't a sibling of the slave";
4089
my $sib_master_stat = $self->get_master_status($sib_dbh)
4090
or die "Binary logging is not enabled on the sibling";
4091
die "The log_slave_updates option is not enabled on the sibling"
4092
unless $self->has_slave_updates($sib_dbh);
4094
$self->catchup_to_same_pos($slave_dbh, $sib_dbh);
4096
$sib_master_stat = $self->get_master_status($sib_dbh);
4097
$self->change_master_to($slave_dbh, $sib_dsn,
4098
$self->repl_posn($sib_master_stat));
4100
my $slave_status = $self->get_slave_status($slave_dbh);
4101
my $slave_pos = $self->repl_posn($slave_status);
4102
$sib_master_stat = $self->get_master_status($sib_dbh);
4103
if ( $self->short_host($slave_status) ne $self->short_host($sib_dsn)
4104
|| $self->pos_cmp($self->repl_posn($sib_master_stat), $slave_pos) != 0)
4106
die "After changing the slave's master, it isn't a slave of the sibling, "
4107
. "or it has a different replication position than the sibling";
4111
sub make_slave_of_uncle {
4112
my ( $self, $slave_dbh, $slave_dsn, $unc_dbh, $unc_dsn,
4113
$dsn_parser, $timeout) = @_;
4115
if ( $self->short_host($slave_dsn) eq $self->short_host($unc_dsn) ) {
4116
die "You are trying to make the slave a slave of itself";
4119
my $master_dsn = $self->get_master_dsn($slave_dbh, $slave_dsn, $dsn_parser)
4120
or die "This server is not a slave";
4121
my $master_dbh = $dsn_parser->get_dbh(
4122
$dsn_parser->get_cxn_params($master_dsn), { AutoCommit => 1 });
4124
= $self->get_master_dsn($master_dbh, $master_dsn, $dsn_parser)
4125
or die "The master is not a slave";
4127
= $self->get_master_dsn($unc_dbh, $unc_dsn, $dsn_parser)
4128
or die "The uncle is not a slave";
4129
if ($self->short_host($gmaster_dsn) ne $self->short_host($unc_master_dsn)) {
4130
die "The uncle isn't really the slave's uncle";
4133
my $unc_master_stat = $self->get_master_status($unc_dbh)
4134
or die "Binary logging is not enabled on the uncle";
4135
die "The log_slave_updates option is not enabled on the uncle"
4136
unless $self->has_slave_updates($unc_dbh);
4138
$self->catchup_to_same_pos($master_dbh, $unc_dbh);
4139
$self->catchup_to_master($slave_dbh, $master_dbh, $timeout);
4141
my $slave_status = $self->get_slave_status($slave_dbh);
4142
my $master_status = $self->get_master_status($master_dbh);
4143
if ( $self->pos_cmp(
4144
$self->repl_posn($slave_status),
4145
$self->repl_posn($master_status)) != 0 )
4147
die "The slave is not caught up to its master";
4150
$unc_master_stat = $self->get_master_status($unc_dbh);
4151
$self->change_master_to($slave_dbh, $unc_dsn,
4152
$self->repl_posn($unc_master_stat));
4155
$slave_status = $self->get_slave_status($slave_dbh);
4156
my $slave_pos = $self->repl_posn($slave_status);
4157
if ( $self->short_host($slave_status) ne $self->short_host($unc_dsn)
4158
|| $self->pos_cmp($self->repl_posn($unc_master_stat), $slave_pos) != 0)
4160
die "After changing the slave's master, it isn't a slave of the uncle, "
4161
. "or it has a different replication position than the uncle";
4166
my ( $self, $dbh ) = @_;
4167
$self->stop_slave($dbh);
4168
my $stat = $self->get_slave_status($dbh)
4169
or die "This server is not a slave";
4170
$dbh->do('CHANGE MASTER TO MASTER_HOST=""');
4171
$dbh->do('RESET SLAVE'); # Wipes out master.info, etc etc
4175
sub slave_is_running {
4176
my ( $self, $slave_status ) = @_;
4177
return ($slave_status->{slave_sql_running} || 'No') eq 'Yes';
4180
sub has_slave_updates {
4181
my ( $self, $dbh ) = @_;
4182
my $sql = q{SHOW VARIABLES LIKE 'log_slave_updates'};
4183
MKDEBUG && _d($dbh, $sql);
4184
my ($name, $value) = $dbh->selectrow_array($sql);
4185
return $value && $value =~ m/^(1|ON)$/;
4189
my ( $self, $status ) = @_;
4190
if ( exists $status->{file} && exists $status->{position} ) {
4192
file => $status->{file},
4193
position => $status->{position},
4198
file => $status->{relay_master_log_file},
4199
position => $status->{exec_master_log_pos},
4205
my ( $self, $dbh ) = @_;
4206
my $stat = $self->get_slave_status($dbh);
4207
return $stat->{seconds_behind_master};
4211
my ( $self, $a, $b ) = @_;
4212
return $self->pos_to_string($a) cmp $self->pos_to_string($b);
4216
my ( $self, $dsn ) = @_;
4218
if ( $dsn->{master_host} ) {
4219
$host = $dsn->{master_host};
4220
$port = $dsn->{master_port};
4226
return ($host || '[default]') . ( ($port || 3306) == 3306 ? '' : ":$port" );
4229
sub is_replication_thread {
4230
my ( $self, $query, %args ) = @_;
4231
return unless $query;
4233
my $type = lc $args{type} || 'all';
4234
die "Invalid type: $type"
4235
unless $type =~ m/^binlog_dump|slave_io|slave_sql|all$/i;
4238
if ( $type =~ m/binlog_dump|all/i ) {
4240
if ($query->{Command} || $query->{command} || '') eq "Binlog Dump";
4243
if ( ($query->{User} || $query->{user} || '') eq "system user" ) {
4244
MKDEBUG && _d("Slave replication thread");
4245
if ( $type ne 'all' ) {
4246
my $state = $query->{State} || $query->{state} || '';
4248
if ( $state =~ m/^init|end$/ ) {
4249
MKDEBUG && _d("Special state:", $state);
4253
my ($slave_sql) = $state =~ m/
4254
^(Waiting\sfor\sthe\snext\sevent
4255
|Reading\sevent\sfrom\sthe\srelay\slog
4256
|Has\sread\sall\srelay\slog;\swaiting
4258
|Waiting\sfor\sslave\smutex\son\sexit)/xi;
4260
$match = $type eq 'slave_sql' && $slave_sql ? 1
4261
: $type eq 'slave_io' && !$slave_sql ? 1
4270
MKDEBUG && _d('Not system user');
4273
if ( !defined $args{check_known_ids} || $args{check_known_ids} ) {
4274
my $id = $query->{Id} || $query->{id};
4276
$self->{replication_thread}->{$id} = 1;
4279
if ( $self->{replication_thread}->{$id} ) {
4280
MKDEBUG && _d("Thread ID is a known replication thread ID");
4287
MKDEBUG && _d('Matches', $type, 'replication thread:',
4288
($match ? 'yes' : 'no'), '; match:', $match);
4294
sub get_replication_filters {
4295
my ( $self, %args ) = @_;
4296
my @required_args = qw(dbh);
4297
foreach my $arg ( @required_args ) {
4298
die "I need a $arg argument" unless $args{$arg};
4300
my ($dbh) = @args{@required_args};
4304
my $status = $self->get_master_status($dbh);
4306
map { $filters{$_} = $status->{$_} }
4307
grep { defined $status->{$_} && $status->{$_} ne '' }
4314
$status = $self->get_slave_status($dbh);
4316
map { $filters{$_} = $status->{$_} }
4317
grep { defined $status->{$_} && $status->{$_} ne '' }
4322
replicate_ignore_table
4323
replicate_wild_do_table
4324
replicate_wild_ignore_table
4327
my $sql = "SHOW VARIABLES LIKE 'slave_skip_errors'";
4328
MKDEBUG && _d($dbh, $sql);
4329
my $row = $dbh->selectrow_arrayref($sql);
4330
$filters{slave_skip_errors} = $row->[1] if $row->[1] && $row->[1] ne 'OFF';
4338
my ( $self, $pos ) = @_;
4339
my $fmt = '%s/%020d';
4340
return sprintf($fmt, @{$pos}{qw(file position)});
4343
sub reset_known_replication_threads {
4345
$self->{replication_thread} = {};
4350
my ($package, undef, $line) = caller 0;
4351
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4352
map { defined $_ ? $_ : 'undef' }
4354
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4359
# ###########################################################################
4360
# End MasterSlave package
4361
# ###########################################################################
4363
# ###########################################################################
4364
# Daemon package 6255
4365
# This package is a copy without comments from the original. The original
4366
# with comments and its test file can be found in the SVN repository at,
4367
# trunk/common/Daemon.pm
4368
# trunk/common/t/Daemon.t
4369
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4370
# ###########################################################################
4375
use warnings FATAL => 'all';
4377
use POSIX qw(setsid);
4378
use English qw(-no_match_vars);
4380
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4383
my ( $class, %args ) = @_;
4384
foreach my $arg ( qw(o) ) {
4385
die "I need a $arg argument" unless $args{$arg};
4390
log_file => $o->has('log') ? $o->get('log') : undef,
4391
PID_file => $o->has('pid') ? $o->get('pid') : undef,
4394
check_PID_file(undef, $self->{PID_file});
4396
MKDEBUG && _d('Daemonized child will log to', $self->{log_file});
4397
return bless $self, $class;
4403
MKDEBUG && _d('About to fork and daemonize');
4404
defined (my $pid = fork()) or die "Cannot fork: $OS_ERROR";
4406
MKDEBUG && _d('I am the parent and now I die');
4410
$self->{PID_owner} = $PID;
4413
POSIX::setsid() or die "Cannot start a new session: $OS_ERROR";
4414
chdir '/' or die "Cannot chdir to /: $OS_ERROR";
4416
$self->_make_PID_file();
4418
$OUTPUT_AUTOFLUSH = 1;
4422
open STDIN, '/dev/null'
4423
or die "Cannot reopen STDIN to /dev/null: $OS_ERROR";
4426
if ( $self->{log_file} ) {
4428
open STDOUT, '>>', $self->{log_file}
4429
or die "Cannot open log file $self->{log_file}: $OS_ERROR";
4432
open STDERR, ">&STDOUT"
4433
or die "Cannot dupe STDERR to STDOUT: $OS_ERROR";
4438
open STDOUT, '>', '/dev/null'
4439
or die "Cannot reopen STDOUT to /dev/null: $OS_ERROR";
4443
open STDERR, '>', '/dev/null'
4444
or die "Cannot reopen STDERR to /dev/null: $OS_ERROR";
4448
MKDEBUG && _d('I am the child and now I live daemonized');
4452
sub check_PID_file {
4453
my ( $self, $file ) = @_;
4454
my $PID_file = $self ? $self->{PID_file} : $file;
4455
MKDEBUG && _d('Checking PID file', $PID_file);
4456
if ( $PID_file && -f $PID_file ) {
4458
eval { chomp($pid = `cat $PID_file`); };
4459
die "Cannot cat $PID_file: $OS_ERROR" if $EVAL_ERROR;
4460
MKDEBUG && _d('PID file exists; it contains PID', $pid);
4462
my $pid_is_alive = kill 0, $pid;
4463
if ( $pid_is_alive ) {
4464
die "The PID file $PID_file already exists "
4465
. " and the PID that it contains, $pid, is running";
4468
warn "Overwriting PID file $PID_file because the PID that it "
4469
. "contains, $pid, is not running";
4473
die "The PID file $PID_file already exists but it does not "
4478
MKDEBUG && _d('No PID file');
4485
if ( exists $self->{child} ) {
4486
die "Do not call Daemon::make_PID_file() for daemonized scripts";
4488
$self->_make_PID_file();
4489
$self->{PID_owner} = $PID;
4493
sub _make_PID_file {
4496
my $PID_file = $self->{PID_file};
4498
MKDEBUG && _d('No PID file to create');
4502
$self->check_PID_file();
4504
open my $PID_FH, '>', $PID_file
4505
or die "Cannot open PID file $PID_file: $OS_ERROR";
4507
or die "Cannot print to PID file $PID_file: $OS_ERROR";
4509
or die "Cannot close PID file $PID_file: $OS_ERROR";
4511
MKDEBUG && _d('Created PID file:', $self->{PID_file});
4515
sub _remove_PID_file {
4517
if ( $self->{PID_file} && -f $self->{PID_file} ) {
4518
unlink $self->{PID_file}
4519
or warn "Cannot remove PID file $self->{PID_file}: $OS_ERROR";
4520
MKDEBUG && _d('Removed PID file');
4523
MKDEBUG && _d('No PID to remove');
4531
$self->_remove_PID_file() if ($self->{PID_owner} || 0) == $PID;
4537
my ($package, undef, $line) = caller 0;
4538
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4539
map { defined $_ ? $_ : 'undef' }
4541
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4546
# ###########################################################################
4547
# End Daemon package
4548
# ###########################################################################
4550
# ###########################################################################
4551
# SchemaIterator package 7512
4552
# This package is a copy without comments from the original. The original
4553
# with comments and its test file can be found in the SVN repository at,
4554
# trunk/common/SchemaIterator.pm
4555
# trunk/common/t/SchemaIterator.t
4556
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4557
# ###########################################################################
4558
package SchemaIterator;
4562
use warnings FATAL => 'all';
4563
use English qw(-no_match_vars);
4564
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4567
$Data::Dumper::Indent = 1;
4568
$Data::Dumper::Sortkeys = 1;
4569
$Data::Dumper::Quotekeys = 0;
4571
my $open_comment = qr{/\*!\d{5} };
4576
(?:IF NOT EXISTS\s+)?
4582
my ( $class, %args ) = @_;
4583
my @required_args = qw(OptionParser Quoter);
4584
foreach my $arg ( @required_args ) {
4585
die "I need a $arg argument" unless $args{$arg};
4588
my ($file_itr, $dbh) = @args{qw(file_itr dbh)};
4589
die "I need either a dbh or file_itr argument"
4590
if (!$dbh && !$file_itr) || ($dbh && $file_itr);
4594
filters => _make_filters(%args),
4597
return bless $self, $class;
4602
my @required_args = qw(OptionParser Quoter);
4603
foreach my $arg ( @required_args ) {
4604
die "I need a $arg argument" unless $args{$arg};
4606
my ($o, $q) = @args{@required_args};
4611
my @simple_filters = qw(
4612
databases tables engines
4613
ignore-databases ignore-tables ignore-engines);
4615
foreach my $filter ( @simple_filters ) {
4616
if ( $o->has($filter) ) {
4617
my $objs = $o->get($filter);
4618
next FILTER unless $objs && scalar keys %$objs;
4619
my $is_table = $filter =~ m/table/ ? 1 : 0;
4620
foreach my $obj ( keys %$objs ) {
4621
die "Undefined value for --$filter" unless $obj;
4624
my ($db, $tbl) = $q->split_unquote($obj);
4626
MKDEBUG && _d('Filter', $filter, 'value:', $db, $tbl);
4627
$filters{$filter}->{$tbl} = $db;
4630
MKDEBUG && _d('Filter', $filter, 'value:', $obj);
4631
$filters{$filter}->{$obj} = 1;
4637
my @regex_filters = qw(
4638
databases-regex tables-regex
4639
ignore-databases-regex ignore-tables-regex);
4641
foreach my $filter ( @regex_filters ) {
4642
if ( $o->has($filter) ) {
4643
my $pat = $o->get($filter);
4644
next REGEX_FILTER unless $pat;
4645
$filters{$filter} = qr/$pat/;
4646
MKDEBUG && _d('Filter', $filter, 'value:', $filters{$filter});
4650
MKDEBUG && _d('Schema object filters:', Dumper(\%filters));
4654
sub next_schema_object {
4658
if ( $self->{file_itr} ) {
4659
%schema_object = $self->_iterate_files();
4662
%schema_object = $self->_iterate_dbh();
4665
MKDEBUG && _d('Next schema object:', Dumper(\%schema_object));
4666
return %schema_object;
4669
sub _iterate_files {
4672
if ( !$self->{fh} ) {
4673
my ($fh, $file) = $self->{file_itr}->();
4675
MKDEBUG && _d('No more files to iterate');
4679
$self->{file} = $file;
4681
my $fh = $self->{fh};
4682
MKDEBUG && _d('Getting next schema object from', $self->{file});
4684
local $INPUT_RECORD_SEPARATOR = '';
4686
while (defined(my $chunk = <$fh>)) {
4687
if ($chunk =~ m/Database: (\S+)/) {
4689
$db =~ s/^`//; # strip leading `
4690
$db =~ s/`$//; # and trailing `
4691
if ( $self->database_is_allowed($db) ) {
4695
elsif ($self->{db} && $chunk =~ m/CREATE TABLE/) {
4696
if ($chunk =~ m/DROP VIEW IF EXISTS/) {
4697
MKDEBUG && _d('Table is a VIEW, skipping');
4701
my ($tbl) = $chunk =~ m/$tbl_name/;
4704
if ( $self->table_is_allowed($self->{db}, $tbl) ) {
4705
my ($ddl) = $chunk =~ m/^(?:$open_comment)?(CREATE TABLE.+?;)$/ms;
4707
warn "Failed to parse CREATE TABLE from\n" . $chunk;
4710
$ddl =~ s/ \*\/;\Z/;/; # remove end of version comment
4712
my ($engine) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
4714
if ( !$engine || $self->engine_is_allowed($engine) ) {
4725
MKDEBUG && _d('No more schema objects in', $self->{file});
4727
$self->{fh} = undef;
4729
return $self->_iterate_files();
4734
my $q = $self->{Quoter};
4735
my $dbh = $self->{dbh};
4736
MKDEBUG && _d('Getting next schema object from dbh', $dbh);
4738
if ( !defined $self->{dbs} ) {
4739
my $sql = 'SHOW DATABASES';
4740
MKDEBUG && _d($sql);
4741
my @dbs = grep { $self->database_is_allowed($_) }
4742
@{$dbh->selectcol_arrayref($sql)};
4743
MKDEBUG && _d('Found', scalar @dbs, 'databases');
4744
$self->{dbs} = \@dbs;
4747
if ( !$self->{db} ) {
4748
$self->{db} = shift @{$self->{dbs}};
4749
MKDEBUG && _d('Next database:', $self->{db});
4750
return unless $self->{db};
4753
if ( !defined $self->{tbls} ) {
4754
my $sql = 'SHOW /*!50002 FULL*/ TABLES FROM ' . $q->quote($self->{db});
4755
MKDEBUG && _d($sql);
4757
$_->[0]; # (tbl, type)
4760
my ($tbl, $type) = @$_;
4761
$self->table_is_allowed($self->{db}, $tbl)
4762
&& (!$type || ($type ne 'VIEW'));
4764
@{$dbh->selectall_arrayref($sql)};
4765
MKDEBUG && _d('Found', scalar @tbls, 'tables in database', $self->{db});
4766
$self->{tbls} = \@tbls;
4769
while ( my $tbl = shift @{$self->{tbls}} ) {
4771
if ( $self->{filters}->{'engines'}
4772
|| $self->{filters}->{'ignore-engines'} ) {
4773
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db})
4775
MKDEBUG && _d($sql);
4776
$engine = $dbh->selectrow_hashref($sql)->{engine};
4777
MKDEBUG && _d($tbl, 'uses', $engine, 'engine');
4781
if ( !$engine || $self->engine_is_allowed($engine) ) {
4783
if ( my $du = $self->{MySQLDump} ) {
4784
$ddl = $du->get_create_table($dbh, $q, $self->{db}, $tbl)->[1];
4795
MKDEBUG && _d('No more tables in database', $self->{db});
4796
$self->{db} = undef;
4797
$self->{tbls} = undef;
4799
return $self->_iterate_dbh();
4802
sub database_is_allowed {
4803
my ( $self, $db ) = @_;
4804
die "I need a db argument" unless $db;
4808
my $filter = $self->{filters};
4810
if ( $db =~ m/information_schema|performance_schema|lost\+found/ ) {
4811
MKDEBUG && _d('Database', $db, 'is a system database, ignoring');
4815
if ( $self->{filters}->{'ignore-databases'}->{$db} ) {
4816
MKDEBUG && _d('Database', $db, 'is in --ignore-databases list');
4820
if ( $filter->{'ignore-databases-regex'}
4821
&& $db =~ $filter->{'ignore-databases-regex'} ) {
4822
MKDEBUG && _d('Database', $db, 'matches --ignore-databases-regex');
4826
if ( $filter->{'databases'}
4827
&& !$filter->{'databases'}->{$db} ) {
4828
MKDEBUG && _d('Database', $db, 'is not in --databases list, ignoring');
4832
if ( $filter->{'databases-regex'}
4833
&& $db !~ $filter->{'databases-regex'} ) {
4834
MKDEBUG && _d('Database', $db, 'does not match --databases-regex, ignoring');
4841
sub table_is_allowed {
4842
my ( $self, $db, $tbl ) = @_;
4843
die "I need a db argument" unless $db;
4844
die "I need a tbl argument" unless $tbl;
4849
my $filter = $self->{filters};
4851
if ( $filter->{'ignore-tables'}->{$tbl}
4852
&& ($filter->{'ignore-tables'}->{$tbl} eq '*'
4853
|| $filter->{'ignore-tables'}->{$tbl} eq $db) ) {
4854
MKDEBUG && _d('Table', $tbl, 'is in --ignore-tables list');
4858
if ( $filter->{'ignore-tables-regex'}
4859
&& $tbl =~ $filter->{'ignore-tables-regex'} ) {
4860
MKDEBUG && _d('Table', $tbl, 'matches --ignore-tables-regex');
4864
if ( $filter->{'tables'}
4865
&& !$filter->{'tables'}->{$tbl} ) {
4866
MKDEBUG && _d('Table', $tbl, 'is not in --tables list, ignoring');
4870
if ( $filter->{'tables-regex'}
4871
&& $tbl !~ $filter->{'tables-regex'} ) {
4872
MKDEBUG && _d('Table', $tbl, 'does not match --tables-regex, ignoring');
4876
if ( $filter->{'tables'}
4877
&& $filter->{'tables'}->{$tbl}
4878
&& $filter->{'tables'}->{$tbl} ne '*'
4879
&& $filter->{'tables'}->{$tbl} ne $db ) {
4880
MKDEBUG && _d('Table', $tbl, 'is only allowed in database',
4881
$filter->{'tables'}->{$tbl});
4888
sub engine_is_allowed {
4889
my ( $self, $engine ) = @_;
4890
die "I need an engine argument" unless $engine;
4892
$engine = lc $engine;
4894
my $filter = $self->{filters};
4896
if ( $filter->{'ignore-engines'}->{$engine} ) {
4897
MKDEBUG && _d('Engine', $engine, 'is in --ignore-databases list');
4901
if ( $filter->{'engines'}
4902
&& !$filter->{'engines'}->{$engine} ) {
4903
MKDEBUG && _d('Engine', $engine, 'is not in --engines list, ignoring');
4911
my ($package, undef, $line) = caller 0;
4912
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4913
map { defined $_ ? $_ : 'undef' }
4915
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4921
# ###########################################################################
4922
# End SchemaIterator package
4923
# ###########################################################################
4925
# ###########################################################################
4926
# Retry package 7473
4927
# This package is a copy without comments from the original. The original
4928
# with comments and its test file can be found in the SVN repository at,
4929
# trunk/common/Retry.pm
4930
# trunk/common/t/Retry.t
4931
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4932
# ###########################################################################
4936
use warnings FATAL => 'all';
4937
use English qw(-no_match_vars);
4938
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4941
my ( $class, %args ) = @_;
4945
return bless $self, $class;
4949
my ( $self, %args ) = @_;
4950
my @required_args = qw(try wait);
4951
foreach my $arg ( @required_args ) {
4952
die "I need a $arg argument" unless $args{$arg};
4954
my ($try, $wait) = @args{@required_args};
4955
my $tries = $args{tries} || 3;
4958
while ( ++$tryno <= $tries ) {
4959
MKDEBUG && _d("Retry", $tryno, "of", $tries);
4962
$result = $try->(tryno=>$tryno);
4965
if ( defined $result ) {
4966
MKDEBUG && _d("Try code succeeded");
4967
if ( my $on_success = $args{on_success} ) {
4968
MKDEBUG && _d("Calling on_success code");
4969
$on_success->(tryno=>$tryno, result=>$result);
4974
if ( $EVAL_ERROR ) {
4975
MKDEBUG && _d("Try code died:", $EVAL_ERROR);
4976
die $EVAL_ERROR unless $args{retry_on_die};
4979
if ( $tryno < $tries ) {
4980
MKDEBUG && _d("Try code failed, calling wait code");
4981
$wait->(tryno=>$tryno);
4985
MKDEBUG && _d("Try code did not succeed");
4986
if ( my $on_failure = $args{on_failure} ) {
4987
MKDEBUG && _d("Calling on_failure code");
4995
my ($package, undef, $line) = caller 0;
4996
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4997
map { defined $_ ? $_ : 'undef' }
4999
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5004
# ###########################################################################
5006
# ###########################################################################
5008
# ###########################################################################
5009
# Progress package 7096
5010
# This package is a copy without comments from the original. The original
5011
# with comments and its test file can be found in the SVN repository at,
5012
# trunk/common/Progress.pm
5013
# trunk/common/t/Progress.t
5014
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
5015
# ###########################################################################
5019
use warnings FATAL => 'all';
5021
use English qw(-no_match_vars);
5023
$Data::Dumper::Indent = 1;
5024
$Data::Dumper::Sortkeys = 1;
5025
$Data::Dumper::Quotekeys = 0;
5027
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5030
my ( $class, %args ) = @_;
5031
foreach my $arg (qw(jobsize)) {
5032
die "I need a $arg argument" unless defined $args{$arg};
5034
if ( (!$args{report} || !$args{interval}) ) {
5035
if ( $args{spec} && @{$args{spec}} == 2 ) {
5036
@args{qw(report interval)} = @{$args{spec}};
5039
die "I need either report and interval arguments, or a spec";
5043
my $name = $args{name} || "Progress";
5044
$args{start} ||= time();
5047
last_reported => $args{start},
5048
fraction => 0, # How complete the job is
5050
my ($fraction, $elapsed, $remaining, $eta) = @_;
5051
printf STDERR "$name: %3d%% %s remain\n",
5053
Transformers::secs_to_time($remaining),
5054
Transformers::ts($eta);
5058
return bless $self, $class;
5062
shift @_ if $_[0] eq 'Progress'; # Permit calling as Progress-> or Progress::
5064
if ( @$spec != 2 ) {
5065
die "spec array requires a two-part argument\n";
5067
if ( $spec->[0] !~ m/^(?:percentage|time|iterations)$/ ) {
5068
die "spec array's first element must be one of "
5069
. "percentage,time,iterations\n";
5071
if ( $spec->[1] !~ m/^\d+$/ ) {
5072
die "spec array's second element must be an integer\n";
5077
my ( $self, $callback ) = @_;
5078
$self->{callback} = $callback;
5082
my ( $self, $start ) = @_;
5083
$self->{start} = $self->{last_reported} = $start || time();
5087
my ( $self, $callback, $now ) = @_;
5088
my $jobsize = $self->{jobsize};
5090
$self->{iterations}++; # How many updates have happened;
5092
if ( $self->{report} eq 'time'
5093
&& $self->{interval} > $now - $self->{last_reported}
5097
elsif ( $self->{report} eq 'iterations'
5098
&& ($self->{iterations} - 1) % $self->{interval} > 0
5102
$self->{last_reported} = $now;
5104
my $completed = $callback->();
5105
$self->{updates}++; # How many times we have run the update callback
5107
return if $completed > $jobsize;
5109
my $fraction = $completed > 0 ? $completed / $jobsize : 0;
5111
if ( $self->{report} eq 'percentage'
5112
&& $self->fraction_modulo($self->{fraction})
5113
>= $self->fraction_modulo($fraction)
5115
$self->{fraction} = $fraction;
5118
$self->{fraction} = $fraction;
5120
my $elapsed = $now - $self->{start};
5123
if ( $completed > 0 && $completed <= $jobsize && $elapsed > 0 ) {
5124
my $rate = $completed / $elapsed;
5126
$remaining = ($jobsize - $completed) / $rate;
5127
$eta = $now + int($remaining);
5130
$self->{callback}->($fraction, $elapsed, $remaining, $eta, $completed);
5133
sub fraction_modulo {
5134
my ( $self, $num ) = @_;
5135
$num *= 100; # Convert from fraction to percentage
5136
return sprintf('%d',
5137
sprintf('%d', $num / $self->{interval}) * $self->{interval});
5141
my ($package, undef, $line) = caller 0;
5142
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5143
map { defined $_ ? $_ : 'undef' }
5145
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5150
# ###########################################################################
5151
# End Progress package
5152
# ###########################################################################
5154
# ###########################################################################
5155
# This is a combination of modules and programs in one -- a runnable module.
5156
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
5157
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
5159
# Check at the end of this package for the call to main() which actually runs
5161
# ###########################################################################
5162
package mk_table_checksum;
5164
use English qw(-no_match_vars);
5165
use List::Util qw(max maxstr);
5166
use Time::HiRes qw(gettimeofday sleep);
5168
$Data::Dumper::Indent = 0;
5169
$Data::Dumper::Quotekeys = 0;
5171
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5173
$OUTPUT_AUTOFLUSH = 1;
5176
my $checksum_table_data;
5177
my ( $fetch_sth, $update_sth, $savesince_sth );
5178
my ( $crc_wid, $md5sum_fmt );
5179
my $already_checksummed;
5180
# %tables_to_checksum has the following structure:
5186
my %tables_to_checksum;
5189
@ARGV = @_; # set global ARGV for this package
5191
# Reset global vars else tests which run this tool as a module
5192
# will have strange, overlapping results.
5193
$checksum_table_data = undef;
5194
( $fetch_sth, $update_sth, $savesince_sth ) = (undef, undef, undef);
5195
( $crc_wid, $md5sum_fmt ) = (undef, undef);
5196
$already_checksummed = undef;
5197
%tables_to_checksum = ();
5199
my $q = new Quoter();
5200
my $exit_status = 0;
5202
# ########################################################################
5203
# Get configuration information.
5204
# ########################################################################
5205
# Because of --arg-table, $final_o is the OptionParser obj used to get
5206
# most options (see my $final_o below).
5207
my $o = new OptionParser();
5211
my $dp = $o->DSNParser();
5212
$dp->prop('set-vars', $o->get('set-vars'));
5214
# This list contains all the command-line arguments that can be overridden
5215
# by a table that contains arguments for each table to be checksummed.
5216
# The long form of each argument is given. The values are read from the
5217
# POD by finding the magical token.
5218
my %overridable_args;
5220
my $para = $o->read_para_after(
5221
__FILE__, qr/MAGIC_overridable_args/);
5222
foreach my $arg ( $para =~ m/([\w-]+)/g ) {
5223
die "Magical argument $arg mentioned in POD is not a "
5224
. "command-line argument" unless $o->has($arg);
5225
$overridable_args{$arg} = 1;
5229
# Post-process command-line options and arguments.
5230
if ( $o->get('replicate') ) {
5231
# --replicate says that it disables these options. We don't
5232
# check got() because these opts aren't used in do_tbl_replicate()
5233
# or its caller so they're completely useless with --replicate.
5234
$o->set('lock', undef);
5235
$o->set('wait', undef);
5236
$o->set('slave-lag', undef);
5239
$o->set('lock', 1) if $o->get('wait');
5240
$o->set('slave-lag', 1) if $o->get('lock');
5244
$o->save_error("No hosts specified.");
5248
my $dsn_defaults = $dp->parse_options($o);
5250
foreach my $arg ( unique(@ARGV) ) {
5251
push @hosts, $dp->parse($arg, $hosts[0], $dsn_defaults);
5255
if ( $o->get('explain-hosts') ) {
5256
foreach my $host ( @hosts ) {
5257
print "Server $host->{h}:\n ", $dp->as_string($host), "\n";
5262
# Checksumming table data is the normal operation. But if we're only to
5263
# compare schemas, then we can skip a lot of work, like selecting an algo,
5264
# replication stuff, etc.
5265
$checksum_table_data = $o->get('schema') ? 0 : 1;
5267
if ( $o->get('checksum') ) {
5268
$o->set('count', 0);
5271
if ( $o->get('explain') ) {
5275
# --replicate auto-enables --throttle-method slavelag unless user
5276
# set --throttle-method explicitly.
5277
$o->set('throttle-method', 'slavelag')
5278
if $o->get('replicate') && !$o->got('throttle-method');
5280
# These options are only needed if a --chunk-size is specified.
5281
if ( !$o->get('chunk-size') ) {
5282
$o->set('chunk-size-limit', undef);
5283
$o->set('unchunkable-tables', 1);
5286
if ( !$o->get('help') ) {
5287
if ( $o->get('replicate-check') && !$o->get('replicate') ) {
5288
$o->save_error("--replicate-check requires --replicate.");
5290
if ( $o->get('save-since') && !$o->get('arg-table') ) {
5291
$o->save_error("--save-since requires --arg-table.");
5293
elsif ( $o->get('replicate') && @hosts > 1 ) {
5294
$o->save_error("You can only specify one host with --replicate.");
5297
if ( $o->get('resume-replicate') && !$o->get('replicate') ) {
5298
$o->save_error("--resume-replicate requires --replicate.");
5300
if ( $o->get('resume') && $o->get('replicate') ) {
5301
$o->save_error('--resume does not work with --replicate. '
5302
. 'Use --resume-replicate instead.');
5305
if ( my $throttle_method = $o->get('throttle-method') ) {
5306
$throttle_method = lc $throttle_method;
5307
if ( !grep { $throttle_method eq $_ } qw(none slavelag) ) {
5308
$o->save_error("Invalid --throttle-method: $throttle_method");
5312
if ( $o->get('check-slave-lag') && $o->get('throttle-method') eq 'none') {
5313
# User specified --check-slave-lag DSN and --throttle-method none.
5314
# They probably meant just --check-slave-lag DSN.
5315
$o->save_error('-throttle-method=none contradicts --check-slave-lag '
5316
. 'because --check-slave-lag implies --throttle-method=slavelag');
5318
if ( $o->get('throttle-method') ne 'none' && !$o->get('replicate') ) {
5319
# User did --throttle-method (explicitly) without --replicate.
5320
$o->save_error('--throttle-method ', $o->get('throttle-method'),
5321
' requires --replicate');
5324
# Make sure --replicate has a db.
5325
if ( my $replicate_table = $o->get('replicate') ) {
5326
my ($db, $tbl) = $q->split_unquote($replicate_table);
5328
$o->save_error('The --replicate table must be database-qualified');
5332
if ( $o->get('chunk-size-limit') ) {
5333
my $factor = $o->get('chunk-size-limit');
5334
if ( $factor < 0 # can't be negative
5335
|| ($factor > 0 && $factor < 1) ) # can't be less than 1
5337
$o->save_error('--chunk-size-limit must be >= 1 or 0 to disable');
5341
if ( $o->get('progress') ) {
5342
eval { Progress->validate_spec($o->get('progress')) };
5343
if ( $EVAL_ERROR ) {
5345
$o->save_error("--progress $EVAL_ERROR");
5349
if ( my $chunk_range = $o->get('chunk-range') ) {
5350
$chunk_range = lc $chunk_range;
5351
my $para = $o->read_para_after(__FILE__, qr/MAGIC_chunk_range/);
5352
my @vals = $para =~ m/\s+([a-z]+)\s+[A-Z]+/g;
5353
if ( !grep { $chunk_range eq $_} @vals ) {
5354
$o->save_error("Invalid value for --chunk-range. "
5355
. "Valid values are: " . join(", ", @vals));
5360
$o->usage_or_errors();
5362
# ########################################################################
5363
# If --pid, check it first since we'll die if it already exits.
5364
# ########################################################################
5366
if ( $o->get('pid') ) {
5367
# We're not daemoninzing, it just handles PID stuff. Keep $daemon
5368
# in the the scope of main() because when it's destroyed it automatically
5369
# removes the PID file.
5370
$daemon = new Daemon(o=>$o);
5371
$daemon->make_PID_file();
5374
# ########################################################################
5375
# Ready to work now.
5376
# ########################################################################
5377
my $vp = new VersionParser();
5378
my $tp = new TableParser(Quoter => $q);
5379
my $tc = new TableChecksum(Quoter=> $q, VersionParser => $vp);
5380
my $ms = new MasterSlave(VersionParser => $vp);
5381
my $du = new MySQLDump();
5382
my $ch = new TableChunker(Quoter => $q, MySQLDump => $du);
5383
my %common_modules = (
5395
my $main_dbh = get_cxn($hosts[0], %common_modules);
5397
# #########################################################################
5398
# Prepare --throttle-method.
5399
# #########################################################################
5400
my $throttle_method = $o->get('throttle-method');
5402
if ( lc($throttle_method) eq 'slavelag' ) {
5403
if ( $o->get('check-slave-lag') ) {
5404
MKDEBUG && _d('Using --check-slave-lag DSN for throttle');
5405
# OptionParser can't auto-copy DSN vals from a cmd line DSN
5406
# to an opt DSN, so we copy them manually.
5407
my $dsn = $dp->copy($hosts[0], $o->get('check-slave-lag'));
5408
push @slaves, { dsn=>$dsn, dbh=>get_cxn($dsn, %common_modules) };
5411
MKDEBUG && _d('Recursing to slaves for throttle');
5412
$ms->recurse_to_slaves(
5416
recurse => $o->get('recurse'),
5417
method => $o->get('recursion-method'),
5419
my ( $dsn, $dbh, $level, $parent ) = @_;
5420
return unless $level;
5421
MKDEBUG && _d('throttle slave:', $dp->as_string($dsn));
5422
$dbh->{InactiveDestroy} = 1; # Prevent destroying on fork.
5423
$dbh->{FetchHashKeyName} = 'NAME_lc';
5424
push @slaves, { dsn=>$dsn, dbh=>$dbh };
5432
# ########################################################################
5433
# Load --arg-table information.
5434
# ########################################################################
5436
if ( my $arg_tbl = $o->get('arg-table') ) {
5437
my %col_in_argtable;
5438
my $rows = $main_dbh->selectall_arrayref(
5439
"SELECT * FROM $arg_tbl", { Slice => {} });
5440
foreach my $row ( @$rows ) {
5441
die "Invalid entry in --arg-table: db and tbl must be set"
5442
unless $row->{db} && $row->{tbl};
5443
$args_for{$row->{db}}->{$row->{tbl}} = {
5444
map { $_ => $row->{$_} }
5445
grep { $overridable_args{$_} && defined $row->{$_} }
5448
if ( !%col_in_argtable ) { # do only once
5449
foreach my $key ( keys %$row ) {
5450
next if $key =~ m/^(db|tbl|ts)$/;
5451
die "Column $key (from $arg_tbl given by --arg-table) is not "
5452
. "an overridable argument" unless $overridable_args{$key};
5453
$col_in_argtable{$key} = 1;
5457
if ( $col_in_argtable{since} ) {
5458
$savesince_sth = $main_dbh->prepare(
5459
"UPDATE $arg_tbl SET since=COALESCE(?, NOW()) WHERE db=? AND tbl=?");
5463
# ########################################################################
5464
# Check for replication filters.
5465
# ########################################################################
5466
if ( $o->get('replicate') && $o->get('check-replication-filters') ) {
5467
MKDEBUG && _d("Recursing to slaves to check for replication filters");
5468
my @all_repl_filters;
5469
$ms->recurse_to_slaves(
5473
recurse => undef, # check for filters anywhere
5474
method => $o->get('recursion-method'),
5476
my ( $dsn, $dbh, $level, $parent ) = @_;
5477
my $repl_filters = $ms->get_replication_filters(dbh=>$dbh);
5478
if ( keys %$repl_filters ) {
5479
my $host = $dp->as_string($dsn);
5480
push @all_repl_filters,
5482
filters => $repl_filters,
5489
if ( @all_repl_filters ) {
5490
my $msg = "Cannot checksum with --replicate because replication "
5491
. "filters are set on these hosts:\n";
5492
foreach my $host ( @all_repl_filters ) {
5493
my $filters = $host->{filters};
5494
$msg .= " $host->{name}\n"
5495
. join("\n", map { " $_ = $host->{filters}->{$_}" }
5496
keys %{$host->{filters}})
5499
$msg .= "Please read the --check-replication-filters documentation "
5500
. "to learn how to solve this problem.";
5506
# ########################################################################
5507
# Check replication slaves if desired. If only --replicate-check is given,
5508
# then we will exit here. If --recheck is also given, then we'll continue
5509
# through the entire script but checksum only the inconsistent tables found
5511
# ########################################################################
5512
if ( defined $o->get('replicate-check') ) {
5513
MKDEBUG && _d("Recursing to slaves for replicate check, depth",
5514
$o->get('replicate-check'));
5515
my $callback = $o->get('recheck')
5516
? \&save_inconsistent_tbls
5517
: \&print_inconsistent_tbls;
5518
$ms->recurse_to_slaves(
5522
recurse => $o->get('replicate-check'),
5523
method => $o->get('recursion-method'),
5525
my ( $dsn, $dbh, $level, $parent ) = @_;
5526
my @tbls = $tc->find_replication_differences(
5527
$dbh, $o->get('replicate'));
5528
return unless @tbls;
5530
# Call the callback that does something useful with
5531
# the inconsistent tables.
5532
# o dbh db tbl args_for
5539
args_for => \%args_for,
5545
return $exit_status unless $o->get('recheck');
5548
# ########################################################################
5549
# Otherwise get ready to checksum table data, unless we have only to check
5550
# schemas in which case we can skip all such work, knowing already that we
5552
# ########################################################################
5553
if ( $checksum_table_data ) {
5554
# Verify that CONCAT_WS is compatible across all servers. On older
5555
# versions of MySQL it skips both empty strings and NULL; on newer
5557
if ( $o->get('verify') && @hosts > 1 ) {
5558
verify_checksum_compat(hosts=>\@hosts, %common_modules);
5561
($fetch_sth, $update_sth)
5562
= check_repl_table(dbh=>$main_dbh, %common_modules);
5565
$crc_wid = 16; # Wider than the widest CRC32.
5568
# ########################################################################
5569
# If resuming a previous run, figure out what the previous run finished.
5570
# ########################################################################
5571
if ( $o->get('replicate') && $o->get('resume-replicate') ) {
5572
$already_checksummed = read_repl_table(
5574
host => $hosts[0]->{h},
5578
elsif ( $o->get('resume') ) {
5579
$already_checksummed = parse_resume_file($o->get('resume'));
5582
# ########################################################################
5583
# Set transaction isolation level.
5584
# http://code.google.com/p/maatkit/issues/detail?id=720
5585
# ########################################################################
5586
if ( $o->get('replicate') ) {
5587
my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ";
5589
MKDEBUG && _d($main_dbh, $sql);
5590
$main_dbh->do($sql);
5592
if ( $EVAL_ERROR ) {
5593
die "Failed to $sql: $EVAL_ERROR\n"
5594
. "If the --replicate table is InnoDB and the default server "
5595
. "transaction isolation level is not REPEATABLE-READ then "
5596
. "checksumming may fail with errors like \"Binary logging not "
5597
. "possible. Message: Transaction level 'READ-COMMITTED' in "
5598
. "InnoDB is not safe for binlog mode 'STATEMENT'\". In that "
5599
. "case you will need to manually set the transaction isolation "
5600
. "level to REPEATABLE-READ.";
5604
# ########################################################################
5605
# Iterate through databases and tables and do the checksums.
5606
# ########################################################################
5608
# Get table info for all hosts, all slaves, unless we're in the special
5609
# "repl-re-check" mode in which case %tables_to_checksum has already the
5610
# inconsistent tables that we need to re-checksum.
5613
args_for => \%args_for,
5615
) unless ($o->get('replicate-check') && $o->get('recheck'));
5617
# Finally, checksum the tables.
5618
foreach my $database ( keys %tables_to_checksum ) {
5619
my $tables = $tables_to_checksum{$database};
5620
$exit_status |= checksum_tables(
5630
return $exit_status;
5633
# ############################################################################
5635
# ############################################################################
5637
sub get_all_tbls_info {
5639
foreach my $arg ( qw(o dbh q tp du ch args_for) ) {
5640
die "I need a $arg argument" unless $args{$arg};
5642
my $dbh = $args{dbh};
5643
MKDEBUG && _d('Getting all schema objects');
5645
my $si = new SchemaIterator(
5647
OptionParser => $args{o},
5650
while ( my %schema_obj = $si->next_schema_object() ) {
5651
my $final_o = get_final_opts(
5655
save_tbl_to_checksum(
5658
final_o => $final_o,
5665
sub save_tbl_to_checksum {
5667
foreach my $arg ( qw(q ch du final_o tp dbh db tbl du tp ch vp) ) {
5668
die "I need a $arg argument" unless $args{$arg};
5673
my $final_o = $args{final_o};
5674
my $dbh = $args{dbh};
5676
my $tbl = $args{tbl};
5680
# Skip the table in which checksums are stored.
5681
return if ($final_o->get('replicate')
5682
&& $final_o->get('replicate') eq "$db.$tbl");
5684
eval { # Catch errors caused by tables being dropped during work.
5686
# Parse the table and determine a column that's chunkable. This is
5687
# used not only for chunking, but also for --since.
5688
my $create = $du->get_create_table($dbh, $q, $db, $tbl);
5689
my $struct = $tp->parse($create);
5691
# If there's a --where clause and the user didn't specify a chunk index
5692
# a chunk they want, then get MySQL's chosen index for the where clause
5693
# and make it the preferred index.
5694
# http://code.google.com/p/maatkit/issues/detail?id=378
5695
if ( $final_o->get('where')
5696
&& !$final_o->get('chunk-column')
5697
&& !$final_o->get('chunk-index') )
5699
my ($mysql_chosen_index) = $tp->find_possible_keys(
5700
$dbh, $db, $tbl, $q, $final_o->get('where'));
5701
MKDEBUG && _d("Index chosen by MySQL for --where:",
5702
$mysql_chosen_index);
5703
$final_o->set('chunk-index', $mysql_chosen_index)
5704
if $mysql_chosen_index;
5708
# Get the first chunkable column and index, taking into account
5709
# --chunk-column and --chunk-index. If either of those options
5710
# is specified, get_first_chunkable_column() will try to satisfy
5711
# the request but there's no guarantee either will be selected.
5712
# http://code.google.com/p/maatkit/issues/detail?id=519
5713
my ($chunk_col, $chunk_index) = $ch->get_first_chunkable_column(
5715
chunk_column => $final_o->get('chunk-column'),
5716
chunk_index => $final_o->get('chunk-index'),
5717
tbl_struct => $struct,
5721
if ( $final_o->get('use-index') && $chunk_col ) {
5722
my $hint = $vp->version_ge($dbh, '4.0.9') ? 'FORCE' : 'USE';
5723
$index_hint = "$hint INDEX (" . $q->quote($chunk_index) . ")";
5725
MKDEBUG && _d('Index hint:', $index_hint);
5727
my @chunks = '1=1'; # Default.
5728
my $rows_per_chunk = undef;
5730
if ( $final_o->get('chunk-size') ) {
5731
($rows_per_chunk) = $ch->size_to_rows(
5735
chunk_size => $final_o->get('chunk-size'),
5739
# Calculate chunks for this table.
5740
my %params = $ch->get_range_statistics(
5744
chunk_col => $chunk_col,
5745
tbl_struct => $struct,
5747
if ( !grep { !defined $params{$_} } qw(min max rows_in_range) ) {
5748
@chunks = $ch->calculate_chunks(
5752
tbl_struct => $struct,
5753
chunk_col => $chunk_col,
5754
chunk_size => $rows_per_chunk,
5755
zero_chunk => $final_o->get('zero-chunk'),
5756
chunk_range => $final_o->get('chunk-range'),
5759
$maxval = $params{max};
5764
push @{ $tables_to_checksum{$db} }, {
5769
column => $chunk_col,
5770
chunk_index => $chunk_index,
5771
chunk_size => $rows_per_chunk,
5773
index => $index_hint,
5775
final_o => $final_o,
5778
if ( $EVAL_ERROR ) {
5779
print_err($final_o, $EVAL_ERROR, $db, $tbl);
5785
# Checksum the tables in the given database.
5786
# A separate report for each database and its tables is printed.
5787
sub checksum_tables {
5789
foreach my $arg ( qw(tc du o q db dbh hosts tbls) ) {
5790
die "I need a $arg argument" unless $args{$arg};
5796
my $dbh = $args{dbh};
5797
my $hosts = $args{hosts};
5798
my $tbls = $args{tbls};
5801
my ($hdr, $explain);
5802
my $exit_status = 0;
5804
# NOTE: remember, you can't 'next TABLE' inside the eval{}.
5805
# NOTE: remember to use the final_o embedded within each $table, not $o
5806
foreach my $table ( @$tbls ) {
5807
MKDEBUG && _d("Doing", $db, '.', $table->{table});
5808
MKDEBUG && _d("Table:", Dumper($table));
5809
my $final_o = $table->{final_o};
5811
my $is_chunkable_table = 1; # table should be chunkable unless...
5813
# If there's a chunk size but no chunk index and unchunkable tables
5814
# aren't allowed (they're not by default), then table may still be
5815
# chunkable if it's small, i.e. total rows in table <= chunk size.
5816
if ( $table->{chunk_size}
5817
&& !$table->{chunk_index}
5818
&& !$final_o->get('unchunkable-tables') )
5820
$is_chunkable_table = is_chunkable_table(
5823
tbl => $table->{table},
5824
chunk_size => $table->{chunk_size},
5825
where => $final_o->{where},
5828
MKDEBUG && _d("Unchunkable table small enough to chunk:",
5829
$is_chunkable_table ? 'yes' : 'no');
5832
if ( !$is_chunkable_table ) {
5834
print "# cannot chunk $table->{database} $table->{table}\n";
5840
# Determine the checksum strategy for every table because it
5841
# might change given various --arg-table opts for each table.
5843
my ( $strat, $crc_type, $func, $opt_slice );
5844
if ( $checksum_table_data && $do_table ) {
5845
$strat_ref = determine_checksum_strat(
5850
( $strat, $crc_wid, $crc_type, $func, $opt_slice )
5851
= @$strat_ref{ qw(strat crc_wid crc_type func opt_slice) };
5852
MKDEBUG && _d("Checksum strat:", Dumper($strat_ref));
5855
# --schema doesn't use a checksum strategy, but do_tbl()
5856
# requires a strat arg.
5857
$strat = '--schema';
5859
$md5sum_fmt = "%-${crc_wid}s %s.%s.%s.%d\n";
5861
# Design and print header unless we are resuming in which case
5862
# we should have already re-printed the partial output of the
5863
# resume file in parse_resume_file(). This only has to be done
5864
# once and done here because we need $crc_wid which is determined
5865
# by the checksum strat above.
5867
if ( $o->get('tab') ) {
5868
$hdr = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n";
5869
$explain = "%s\t%s\t%s\n";
5872
my $max_tbl = max(5, map { length($_->{table}) } @$tbls);
5873
my $max_db = max(8, length($db));
5874
my $max_host = max(4, map { length($_->{h}) } @$hosts);
5875
$hdr = "%-${max_db}s %-${max_tbl}s %5s "
5876
. "%-${max_host}s %-6s %10s %${crc_wid}s %4s %4s %4s %4s\n";
5877
$explain = "%-${max_db}s %-${max_tbl}s %s\n";
5879
my @hdr_args = qw(DATABASE TABLE CHUNK HOST ENGINE
5880
COUNT CHECKSUM TIME WAIT STAT LAG);
5881
unless ( $o->get('quiet')
5882
|| $o->get('explain')
5883
|| $o->get('checksum')
5884
|| $o->get('resume') )
5886
printf($hdr, @hdr_args)
5887
or die "Cannot print: $OS_ERROR";
5891
# Clean out the replication table entry for this table.
5892
# http://code.google.com/p/maatkit/issues/detail?id=304
5893
if ( (my $replicate_table = $final_o->get('replicate'))
5894
&& !$final_o->get('explain') ) {
5895
use_repl_db(%args); # USE the proper replicate db
5896
my $max_chunkno = scalar @{$table->{chunks}} - 1;
5897
my $del_sql = "DELETE FROM $replicate_table "
5898
. "WHERE db=? AND tbl=? AND chunk > ?";
5899
MKDEBUG && _d($dbh, $del_sql, $db, $table->{table},$max_chunkno);
5900
$dbh->do($del_sql, {}, $db, $table->{table}, $max_chunkno);
5903
# If --since is given, figure out either
5904
# 1) for temporal sinces, if the table has an update time and that
5905
# time is newer than --since, then checksum the whole table,
5906
# otherwise skip it; or
5907
# 2) for "numerical" sinces, which column to use: either the
5908
# specified column (--sincecolumn) or the auto-discovered one,
5909
# whichever exists in the table, in that order.
5910
# Then, if --savesince is given, save either 1) the current timestamp
5911
# or 2) the resulting WHERE clause.
5912
if ( $final_o->get('since') ) {
5913
if ( is_temporal($final_o->get('since')) ) {
5914
MKDEBUG && _d('--since is temporal');
5916
= $du->get_table_status($dbh, $q, $db, $table->{table});
5917
my $time = $stat->{update_time};
5918
if ( $time && $time lt $final_o->get('since') ) {
5919
MKDEBUG && _d("Skipping table because --since value",
5920
$final_o->get('since'), "is newer than", $time);
5922
$table->{chunks} = [];
5926
MKDEBUG && _d('--since is numerical');
5927
# For numerical sinces, choose the column to apply --since to.
5928
# It may not be the column the user said to use! If the user
5929
# didn't specify a column that's good to chunk on, we'll use
5930
# something else instead.
5932
# $table->{column} is the first chunkable column returned from
5933
# the call to get_first_chunkable_column() in
5934
# save_tbl_to_checksum().
5936
grep { $_ && $table->{struct}->{is_col}->{$_} }
5937
( $table->{column}, $final_o->get('since-column') );
5940
MKDEBUG && _d('Column for numerical --since:',
5941
$db, '.', $table->{table}, '.', $sincecol);
5942
# This ends up being an additional WHERE clause.
5943
$table->{since} = $q->quote($sincecol)
5944
. '>=' . $q->quote_val($final_o->get('since'));
5947
MKDEBUG && _d('No column for numerical --since for',
5948
$db, '.', $table->{table});
5953
# ##################################################################
5954
# The query is independent of the chunk, so I make it once for every
5956
# ##################################################################
5958
if ( $checksum_table_data && $do_table ) {
5959
$query = $tc->make_checksum_query(
5961
tbl => $table->{table},
5962
tbl_struct => $table->{struct},
5963
algorithm => $strat,
5965
crc_wid => $crc_wid,
5966
crc_type => $crc_type,
5967
opt_slice => $opt_slice,
5968
cols => $final_o->get('columns'),
5969
sep => $final_o->get('separator'),
5970
replicate => $final_o->get('replicate'),
5971
float_precision => $final_o->get('float-precision'),
5972
trim => $final_o->get('trim'),
5973
ignorecols => $final_o->get('ignore-columns'),
5980
$exit_status |= checksum_chunks(
5985
explain => $explain,
5986
final_o => $final_o,
5990
# Save the --since value if
5991
# 1) it's temporal and the tbl had changed since --since; or
5992
# 2) it's "numerical" and it had a chunkable or nibble-able
5993
# column and it wasn't empty
5994
# See issues 121 and 122.
5995
if ( $final_o->get('save-since') && $savesince_sth ) {
5996
if ( is_temporal($final_o->get('since')) ) {
5998
"Saving temporal --since value: current timestamp for",
5999
$db, '.', $table->{table});
6000
$savesince_sth->execute(undef,
6001
$db, $table->{table});
6003
elsif ( defined $table->{maxval} ) {
6004
MKDEBUG && _d("Saving numerical --since value:",
6005
$table->{maxval}, "for", $db, '.', $table->{table});
6006
$savesince_sth->execute($table->{maxval},
6007
$db, $table->{table});
6010
MKDEBUG && _d("Cannot save --since value:",
6011
$table->{maxval}, "for", $db, '.', $table->{table});
6015
if ( $EVAL_ERROR ) {
6016
print_err($o, $EVAL_ERROR, $db, $table->{table});
6021
return $exit_status;
6024
sub checksum_chunks {
6026
foreach my $arg ( qw(dp final_o ms o q db tbl hosts hdr explain) ) {
6027
die "I need a $arg argument" unless $args{$arg};
6031
my $final_o = $args{final_o};
6036
my $dbh = $args{dbh};
6037
my @hosts = @{$args{hosts}};
6038
my $tbl = $args{tbl};
6040
my $retry = new Retry();
6042
# ##################################################################
6043
# This loop may seem suboptimal, because it causes a new child to be
6044
# forked for each table, for each host, for each chunk. It also
6045
# causes the program to parallelize only within the chunk; that is,
6046
# no two child processes are running on different chunks at a time.
6047
# This is by design. It lets me unlock the table on the master
6049
# ##################################################################
6050
my $exit_status = 0;
6051
my $num_chunks = scalar(@{$tbl->{chunks}});
6052
my $throttle_method = $o->get('throttle-method');
6053
MKDEBUG && _d('Checksumming', $num_chunks, 'chunks');
6055
foreach my $chunk_num ( 0 .. $num_chunks - 1 ) {
6057
if ( $final_o->get('chunk-size-limit')
6058
&& $final_o->get('chunk-size')
6059
&& $tbl->{chunk_size}
6060
&& !$final_o->get('explain') )
6062
my $is_oversize_chunk = is_oversize_chunk(
6064
db => $tbl->{database},
6065
tbl => $tbl->{table},
6066
chunk => $tbl->{chunks}->[$chunk_num],
6067
chunk_size => $tbl->{chunk_size},
6068
index_hint => $tbl->{index},
6069
where => [$final_o->get('where'), $tbl->{since}],
6070
limit => $final_o->get('chunk-size-limit'),
6073
if ( $is_oversize_chunk ) {
6075
if ( !$final_o->get('quiet') ) {
6076
if ( $final_o->get('checksum') ) {
6077
printf($md5sum_fmt, 'NULL', '',
6078
@{$tbl}{qw(database table)}, $chunk_num)
6079
or die "Cannot print: $OS_ERROR";
6083
@{$tbl}{qw(database table)}, $chunk_num,
6084
$hosts[0]->{h}, $tbl->{struct}->{engine}, 'OVERSIZE',
6085
'NULL', 'NULL', 'NULL', 'NULL', 'NULL')
6086
or die "Cannot print: $OS_ERROR";
6093
if ( $throttle_method eq 'slavelag' ) {
6095
if ( $o->get('progress') ) {
6097
jobsize => scalar @{$args{slaves}},
6098
spec => $o->get('progress'),
6099
name => "Wait for slave(s) to catch up",
6103
slaves => $args{slaves},
6104
max_lag => $o->get('max-lag'),
6105
check_interval => $o->get('check-interval'),
6112
if ( ($num_chunks > 1 || $final_o->get('single-chunk'))
6113
&& $checksum_table_data
6114
&& defined $final_o->get('probability')
6115
&& rand(100) >= $final_o->get('probability') ) {
6116
MKDEBUG && _d('Skipping chunk because of --probability');
6120
if ( $num_chunks > 1
6121
&& $checksum_table_data
6122
&& $final_o->get('modulo')
6123
&& ($chunk_num % $final_o->get('modulo') != $final_o->get('offset')))
6125
MKDEBUG && _d('Skipping chunk', $chunk_num, 'because of --modulo');
6129
my $chunk_start_time = gettimeofday();
6130
MKDEBUG && _d('Starting chunk', $chunk_num, 'at', $chunk_start_time);
6132
if ( $final_o->get('replicate') ) {
6133
# We're in --replicate mode.
6135
# If resuming, check if this db.tbl.chunk.host can be skipped.
6136
if ( $o->get('resume-replicate') ) {
6137
if ( already_checksummed($tbl->{database},
6141
print "# already checksummed:"
6142
. " $tbl->{database}"
6147
unless $o->get('quiet');
6152
$hosts[0]->{dbh} ||= $dbh;
6162
# We're in "normal" mode. Lock table and get position on the master.
6164
if ( !$final_o->get('explain') ) {
6165
if ( $final_o->get('lock') ) {
6166
my $sql = "LOCK TABLES "
6167
. $q->quote($db, $tbl->{table}) . " READ";
6168
MKDEBUG && _d($sql);
6171
if ( $final_o->get('wait') ) {
6172
$tbl->{master_status} = $ms->get_master_status($dbh);
6178
foreach my $i ( 0 .. $#hosts ) {
6179
my $is_master = $i == 0; # First host is assumed to be master.
6180
my $host = $hosts[$i];
6182
# Open a single connection for each host. Re-use the
6183
# connection for the master/single host.
6185
$dbh->{InactiveDestroy} = 1; # Ensure that this is set.
6186
$host->{dbh} ||= $dbh;
6189
$host->{dbh} ||= get_cxn($host, %args);
6192
# If resuming, check if this db.tbl.chunk.host can be skipped.
6193
if ( $final_o->get('resume') ) {
6194
next HOST if already_checksummed($tbl->{database},
6200
# Fork, but only if there's more than one host.
6201
my $pid = @hosts > 1 ? fork() : undef;
6203
if ( @hosts == 1 || (defined($pid) && $pid == 0) ) {
6204
# Do the work (I'm a child, or there's only one host)
6211
dbh => $host->{dbh},
6215
if ( $EVAL_ERROR ) {
6216
print_err($o, $EVAL_ERROR, $db, $tbl->{table},
6217
$dp->as_string($host));
6218
exit(1) if @hosts > 1; # exit only if I'm a child
6221
exit(0) if @hosts > 1; # exit only if I'm a child
6223
elsif ( @hosts > 1 && !defined($pid) ) {
6224
die("Unable to fork!");
6227
# I already exited if I'm a child, so I'm the parent.
6228
$children{$host->{h}} = $pid if @hosts > 1;
6231
# Wait for the children to exit.
6232
foreach my $host ( keys %children ) {
6233
my $pid = waitpid($children{$host}, 0);
6234
MKDEBUG && _d("Child", $pid, "exited with", $CHILD_ERROR);
6235
$exit_status ||= $CHILD_ERROR >> 8;
6237
if ( ($final_o->get('lock') && !$final_o->get('explain')) ) {
6238
my $sql = "UNLOCK TABLES";
6239
MKDEBUG && _d($dbh, $sql);
6244
my $chunk_stop_time = gettimeofday();
6245
MKDEBUG && _d('Finished chunk at', $chunk_stop_time);
6247
# --sleep between chunks. Don't sleep if this is the last/only chunk.
6248
if ( $chunk_num < $num_chunks - 1 ) {
6249
if ( $final_o->get('sleep') && !$final_o->get('explain') ) {
6250
MKDEBUG && _d('Sleeping', $final_o->get('sleep'));
6251
sleep($final_o->get('sleep'));
6253
elsif ( $final_o->get('sleep-coef') && !$final_o->get('explain') ) {
6255
= ($chunk_stop_time - $chunk_start_time)
6256
* $final_o->get('sleep-coef');
6257
MKDEBUG && _d('Sleeping', $sleep_time);
6258
if ( $sleep_time < 0 ) {
6259
warn "Calculated invalid sleep time: "
6260
. "$sleep_time = ($chunk_stop_time - $chunk_start_time) * "
6261
. $final_o->get('sleep-coef')
6262
. ". Sleep time set to 1 second instead.";
6268
} # End foreach CHUNK
6270
return $exit_status;
6273
# Override the command-line arguments with those from --arg-table
6274
# if necessary. Returns a cloned OptionParser object ($final_o).
6275
# This clone is only a partial OptionParser object.
6276
sub get_final_opts {
6278
foreach my $arg ( qw(o dbh db tbl args_for) ) {
6279
die "I need a $arg argument" unless $args{$arg};
6282
my $dbh = $args{dbh};
6284
my $tbl = $args{tbl};
6285
my $args_for = $args{args_for};
6287
my $final_o = $o->clone();
6288
if ( my $override = $args_for->{$db}->{$tbl} ) {
6289
map { $final_o->set($_, $override->{$_}); } keys %$override;
6292
# --since and --offset are potentially expressions that should be
6293
# evaluated by the DB server. This has to be done after the override
6294
# from the --arg-table table.
6295
foreach my $opt ( qw(since offset) ) {
6296
# Don't get MySQL to evaluate if it's temporal, as 2008-08-01 --> 1999
6297
my $val = $final_o->get($opt);
6298
if ( $val && !is_temporal($val) ) {
6299
$final_o->set($opt, eval_expr($opt, $val, $dbh));
6308
return $val && $val =~ m/^\d{4}-\d{2}-\d{2}(?:.[0-9:]+)?/;
6311
sub print_inconsistent_tbls {
6313
foreach my $arg ( qw(o dp dsn tbls) ) {
6314
die "I need a $arg argument" unless $args{$arg};
6318
my $dsn = $args{dsn};
6319
my $tbls = $args{tbls};
6321
return if $o->get('quiet');
6323
my @headers = qw(db tbl chunk cnt_diff crc_diff boundaries);
6324
print "Differences on " . $dp->as_string($dsn, [qw(h P F)]) . "\n";
6325
my $max_db = max(5, map { length($_->{db}) } @$tbls);
6326
my $max_tbl = max(5, map { length($_->{tbl}) } @$tbls);
6327
my $fmt = "%-${max_db}s %-${max_tbl}s %5s %8s %8s %s\n";
6328
printf($fmt, map { uc } @headers) or die "Cannot print: $OS_ERROR";
6329
foreach my $tbl ( @$tbls ) {
6330
printf($fmt, @{$tbl}{@headers}) or die "Cannot print: $OS_ERROR";
6332
print "\n" or die "Cannot print: $OS_ERROR";
6337
sub save_inconsistent_tbls {
6339
foreach my $arg ( qw(dbh tbls) ) {
6340
die "I need a $arg argument" unless $args{$arg};
6342
my $dbh = $args{dbh};
6343
my $tbls = $args{tbls};
6345
foreach my $tbl ( @$tbls ) {
6346
MKDEBUG && _d("Will recheck", $tbl->{db}, '.', $tbl->{tbl},
6347
"(chunk:", $tbl->{boundaries}, ')');
6348
my $final_o = get_final_opts(
6353
my $chunks = [ $tbl->{boundaries} ];
6354
save_tbl_to_checksum(
6358
final_o => $final_o,
6364
# The value may be an expression like 'NOW() - INTERVAL 7 DAY'
6365
# and we should evaluate it.
6367
my ( $name, $val, $dbh ) = @_;
6370
($result) = $dbh->selectrow_array("SELECT $val");
6371
MKDEBUG && _d("option", $name, "evaluates to:", $result);
6373
if ( $EVAL_ERROR && MKDEBUG ) {
6375
_d("Error evaluating option", $name, $EVAL_ERROR);
6380
sub determine_checksum_strat {
6382
foreach my $arg ( qw(o dbh tc) ) {
6383
die "I need a $arg argument" unless $args{$arg};
6386
my $dbh = $args{dbh};
6389
my $ret = { # return vals in easy-to-swallow hash form
6391
crc_type => 'varchar',
6397
$ret->{strat} = $tc->best_algorithm(
6398
algorithm => $o->get('algorithm'),
6400
where => $o->get('where') || $o->get('since'),
6401
chunk => $o->get('chunk-size'),
6402
replicate => $o->get('replicate'),
6403
count => $o->get('count'),
6406
if ( $o->get('algorithm') && $o->get('algorithm') ne $ret->{strat} ) {
6407
warn "--algorithm=".$o->get('algorithm')." can't be used; "
6408
. "falling back to $ret->{strat}\n";
6411
# If using a cryptographic hash strategy, decide what hash function to use,
6412
# and if using BIT_XOR whether and which slice to place the user variable in.
6413
if ( $tc->is_hash_algorithm( $ret->{strat} ) ) {
6414
$ret->{func} = $tc->choose_hash_func(
6415
function => $o->get('function'),
6418
if ( $o->get('function') && $o->get('function') ne $ret->{func} ) {
6419
warn "Checksum function ".$o->get('function')." cannot be used; "
6420
. "using $ret->{func}\n";
6422
$ret->{crc_wid} = $tc->get_crc_wid($dbh, $ret->{func});
6423
($ret->{crc_type}) = $tc->get_crc_type($dbh, $ret->{func});
6425
if ( $o->get('optimize-xor') && $ret->{strat} eq 'BIT_XOR' ) {
6426
if ( $ret->{crc_type} !~ m/int$/ ) {
6428
= $tc->optimize_xor(dbh => $dbh, function => $ret->{func});
6429
if ( !defined $ret->{opt_slice} ) {
6430
warn "Cannot use --optimize-xor, disabling";
6431
$o->set('optimize-xor', 0);
6435
# FNV_64 doesn't need the optimize_xor gizmo.
6436
$o->get('optimize-xor', 0);
6444
sub verify_checksum_compat {
6446
foreach my $arg ( qw(o hosts) ) {
6447
die "I need a $arg argument" unless $args{$arg};
6450
my $hosts = $args{hosts};
6453
foreach my $host ( @$hosts ) {
6454
my $dbh = get_cxn($host, %args);
6455
my $sql = "SELECT MD5(CONCAT_WS(',', '1', ''))";
6456
MKDEBUG && _d($dbh, $sql);
6457
my $cks = $dbh->selectall_arrayref($sql)->[0]->[0];
6458
push @verify_sums, {
6460
ver => $dbh->{mysql_serverinfo},
6464
if ( unique(map { $_->{sum} } @verify_sums ) > 1 ) {
6465
my $max = max(map { length($_->{h}) } @$hosts);
6466
die "Not all servers have compatible versions. Some return different\n"
6467
. "checksum values for the same query, and cannot be compared. This\n"
6468
. "behavior changed in MySQL 4.0.14. Here is info on each host:\n\n"
6470
map { sprintf("%-${max}s %-32s %s", @{$_}{qw(host sum ver)}) }
6471
{ host => 'HOST', sum => 'CHECKSUM', ver => 'VERSION'},
6474
. "\n\nYou can disable this check with --no-verify.\n";
6479
# Check for existence and privileges on the replication table before
6480
# starting, and prepare the statements that will be used to update it.
6481
# Also clean out the checksum table. And create it if needed.
6482
# Returns fetch and update statement handles.
6483
sub check_repl_table {
6485
foreach my $arg ( qw(o dbh tp q) ) {
6486
die "I need a $arg argument" unless $args{$arg};
6489
my $dbh = $args{dbh};
6493
my $replicate_table = $o->get('replicate');
6494
return unless $replicate_table;
6496
use_repl_db(%args); # USE the proper replicate db
6498
my ($db, $tbl) = $q->split_unquote($replicate_table);
6499
my $tbl_exists = $tp->check_table(
6504
if ( !$tbl_exists ) {
6505
if ( $o->get('create-replicate-table') ) {
6506
create_repl_table(%args)
6507
or die "--create-replicate-table failed to create "
6511
die "--replicate table $replicate_table does not exist; "
6512
. "read the documentation or use --create-replicate-table "
6517
MKDEBUG && _d('--replicate table', $replicate_table, 'already exists');
6518
# Check it again but this time check the privs.
6519
my $have_tbl_privs = $tp->check_table(
6525
die "User does not have all necessary privileges on $replicate_table"
6526
unless $have_tbl_privs;
6529
# Clean out the replicate table globally.
6530
if ( $o->get('empty-replicate-table') ) {
6531
my $del_sql = "DELETE FROM $replicate_table";
6532
MKDEBUG && _d($dbh, $del_sql);
6536
my $fetch_sth = $dbh->prepare(
6537
"SELECT this_crc, this_cnt FROM $replicate_table "
6538
. "WHERE db = ? AND tbl = ? AND chunk = ?");
6539
my $update_sth = $dbh->prepare(
6540
"UPDATE $replicate_table SET master_crc = ?, master_cnt = ? "
6541
. "WHERE db = ? AND tbl = ? AND chunk = ?");
6543
return ($fetch_sth, $update_sth);
6546
# This sub should be called before any work is done with the
6547
# --replicate table. It will USE the correct replicate db.
6548
# If there's a tbl arg then its db will be used unless --replicate-database
6549
# was specified. A tbl arg means we're checksumming that table,
6550
# so we've been called from do_tbl_replicate(). Other callers
6551
# won't pass a tbl arg because they're just doing something to
6552
# the --replicate table.
6553
# See http://code.google.com/p/maatkit/issues/detail?id=982
6556
my @required_args = qw(dbh o q);
6557
foreach my $arg ( @required_args ) {
6558
die "I need a $arg argument" unless $args{$arg};
6560
my ($dbh, $o, $q) = @args{@required_args};
6562
my $replicate_table = $o->get('replicate');
6563
return unless $replicate_table;
6565
# db and tbl from --replicate
6566
my ($db, $tbl) = $q->split_unquote($replicate_table);
6568
if ( my $tbl = $args{tbl} ) {
6569
# Caller is checksumming this table, USE its db unless
6570
# --replicate-database is in effect.
6571
$db = $o->get('replicate-database') ? $o->get('replicate-database')
6575
# Caller is doing something just to the --replicate table.
6576
# Use the db from --replicate db.tbl (gotten earlier) unless
6577
# --replicate-database is in effect.
6578
$db = $o->get('replicate-database') if $o->get('replicate-database');
6582
my $sql = "USE " . $q->quote($db);
6583
MKDEBUG && _d($dbh, $sql);
6586
if ( $EVAL_ERROR ) {
6587
# Report which option db really came from.
6588
my $opt = $o->get('replicate-database') ? "--replicate-database"
6589
: "--replicate database";
6590
if ( $EVAL_ERROR =~ m/unknown database/i ) {
6591
die "$opt `$db` does not exist: $EVAL_ERROR";
6594
die "Error using $opt `$db`: $EVAL_ERROR";
6601
# Returns 1 on successful creation of the replicate table,
6603
sub create_repl_table {
6605
foreach my $arg ( qw(o dbh) ) {
6606
die "I need a $arg argument" unless $args{$arg};
6609
my $dbh = $args{dbh};
6611
my $replicate_table = $o->get('replicate');
6613
my $sql = $o->read_para_after(
6614
__FILE__, qr/MAGIC_create_replicate/);
6615
$sql =~ s/CREATE TABLE checksum/CREATE TABLE $replicate_table/;
6617
MKDEBUG && _d($dbh, $sql);
6621
if ( $EVAL_ERROR ) {
6622
MKDEBUG && _d('--create-replicate-table failed:', $EVAL_ERROR);
6629
sub read_repl_table {
6631
foreach my $arg ( qw(o dbh host) ) {
6632
die "I need a $arg argument" unless $args{$arg};
6635
my $dbh = $args{dbh};
6636
my $host = $args{host};
6638
my $replicate_table = $o->get('replicate');
6639
die "Cannot read replicate table because --replicate was not specified"
6640
unless $replicate_table;
6642
# Read checksums from replicate table.
6643
my $already_checksummed;
6645
= $dbh->selectall_arrayref("SELECT db, tbl, chunk FROM $replicate_table");
6647
# Save each finished checksum.
6648
foreach my $checksum ( @$checksums ) {
6649
my ( $db, $tbl, $chunk ) = @$checksum[0..2];
6650
$already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1;
6653
return $already_checksummed;
6656
sub parse_resume_file {
6657
my ( $resume_file ) = @_;
6659
open my $resume_fh, '<', $resume_file
6660
or die "Cannot open resume file $resume_file: $OS_ERROR";
6662
# The resume file, being the output from a previous run, should
6663
# have the columns DATABASE TABLE CHUNK HOST ... (in that order).
6664
# We only need those first 4 columns. We re-print every line of
6665
# the resume file so the end result will be the whole, finished
6666
# output: what the previous run got done plus what we are about
6667
# to resume and finish.
6668
my $already_checksummed;
6669
while ( my $line = <$resume_fh> ) {
6670
# Re-print every line.
6673
# If the line is a checksum line, parse from it the db, tbl,
6674
# checksum and host.
6675
if ( $line =~ m/^\S+\s+\S+\s+\d+\s+/ ) {
6676
my ( $db, $tbl, $chunk, $host ) = $line =~ m/(\S+)/g;
6677
$already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1;
6682
MKDEBUG && _d("Already checksummed:", Dumper($already_checksummed));
6684
return $already_checksummed;
6687
sub already_checksummed {
6688
my ( $d, $t, $c, $h ) = @_; # db, tbl, chunk num, host
6689
if ( exists $already_checksummed->{$d}->{$t}->{$c}->{$h} ) {
6690
MKDEBUG && _d("Skipping chunk because of --resume:", $d, $t, $c, $h);
6696
sub do_tbl_replicate {
6697
my ( $chunk_num, %args ) = @_;
6698
foreach my $arg ( qw(q host query tbl hdr explain final_o ch retry) ) {
6699
die "I need a $arg argument" unless $args{$arg};
6702
my $final_o = $args{final_o};
6704
my $host = $args{host};
6705
my $hdr = $args{hdr};
6706
my $explain = $args{explain};
6707
my $tbl = $args{tbl};
6708
my $retry = $args{retry};
6710
MKDEBUG && _d('Replicating chunk', $chunk_num,
6711
'of table', $tbl->{database}, '.', $tbl->{table},
6712
'on', $host->{h}, ':', $host->{P});
6714
my $dbh = $host->{dbh};
6717
use_repl_db(%args); # USE the proper replicate db
6722
$sql = $ch->inject_chunks(
6723
query => $args{query},
6724
database => $tbl->{database},
6725
table => $tbl->{table},
6726
chunks => $tbl->{chunks},
6727
chunk_num => $chunk_num,
6728
where => [$final_o->get('where'), $tbl->{since}],
6729
index_hint => $tbl->{index},
6732
if ( MKDEBUG && $chunk_num == 0 ) {
6733
_d("SQL for inject chunk 0:", $sql);
6736
my $where = $tbl->{chunks}->[$chunk_num];
6737
if ( $final_o->get('explain') ) {
6738
if ( $chunk_num == 0 ) {
6739
printf($explain, @{$tbl}{qw(database table)}, $sql)
6740
or die "Cannot print: $OS_ERROR";
6742
printf($explain, @{$tbl}{qw(database table)}, $where)
6743
or die "Cannot print: $OS_ERROR";
6747
# Actually run the checksum query
6750
wait => sub { return; },
6753
$dbh->do('SET @crc := "", @cnt := 0 /*!50108 , '
6754
. '@@binlog_format := "STATEMENT"*/');
6755
$dbh->do($sql, {}, @{$tbl}{qw(database table)}, $where);
6759
die $EVAL_ERROR; # caught in checksum_tables()
6763
# Catch any warnings thrown....
6764
my $sql_warn = 'SHOW WARNINGS';
6765
MKDEBUG && _d($sql_warn);
6766
my $warnings = $dbh->selectall_arrayref($sql_warn, { Slice => {} } );
6767
foreach my $warning ( @$warnings ) {
6768
if ( $warning->{message} =~ m/Data truncated for column 'boundaries'/ ) {
6769
_d("Warning: WHERE clause too large for boundaries column; ",
6770
"mk-table-sync may fail; value:", $where);
6772
elsif ( ($warning->{code} || 0) == 1592 ) {
6773
# Error: 1592 SQLSTATE: HY000 (ER_BINLOG_UNSAFE_STATEMENT)
6774
# Message: Statement may not be safe to log in statement format.
6775
# Ignore this warning because we have purposely set statement-based
6777
MKDEBUG && _d('Ignoring warning:', $warning->{message});
6780
# die doesn't permit extra line breaks so warn then die.
6781
warn "\nChecksum query caused a warning:\n"
6783
map { "\t$_: " . $warning->{$_} || '' } qw(level code message)
6785
. "\n\tquery: $sql\n\n";
6790
# Update the master_crc etc columns
6791
$fetch_sth->execute(@{$tbl}{qw(database table)}, $chunk_num);
6792
( $crc, $cnt ) = $fetch_sth->fetchrow_array();
6793
$update_sth->execute($crc, $cnt, @{$tbl}{qw(database table)}, $chunk_num);
6797
if ( !$final_o->get('quiet') && !$final_o->get('explain') ) {
6798
if ( $final_o->get('checksum') ) {
6799
printf($md5sum_fmt, $crc, $host->{h},
6800
@{$tbl}{qw(database table)}, $chunk_num)
6801
or die "Cannot print: $OS_ERROR";
6805
@{$tbl}{qw(database table)}, $chunk_num,
6806
$host->{h}, $tbl->{struct}->{engine}, $cnt, $crc,
6807
$end - $beg, 'NULL', 'NULL', 'NULL')
6808
or die "Cannot print: $OS_ERROR";
6816
my ( $chunk_num, $is_master, %args ) = @_;
6817
foreach my $arg ( qw(du final_o ms q tc dbh host tbl hdr explain strat) ) {
6818
die "I need a $arg argument" unless $args{$arg};
6821
my $final_o = $args{final_o};
6826
my $host = $args{host};
6827
my $tbl = $args{tbl};
6828
my $explain = $args{explain};
6829
my $hdr = $args{hdr};
6830
my $strat = $args{strat};
6832
MKDEBUG && _d('Checksumming chunk', $chunk_num,
6833
'of table', $tbl->{database}, '.', $tbl->{table},
6834
'on', $host->{h}, ':', $host->{P},
6835
'using algorithm', $strat);
6837
my $dbh = $host->{dbh};
6838
$dbh->do("USE " . $q->quote($tbl->{database}));
6845
# Begin timing the checksum operation.
6848
# I'm a slave. Wait to catch up to the master. Calculate slave lag.
6849
if ( !$is_master && !$final_o->get('explain') ) {
6850
if ( $final_o->get('wait') ) {
6851
MKDEBUG && _d('Waiting to catch up to master for --wait');
6852
my $result = $ms->wait_for_master(
6853
master_status => $tbl->{master_status},
6855
timeout => $final_o->get('wait'),
6857
$sta = $result && defined $result->{result}
6862
if ( $final_o->get('slave-lag') ) {
6863
MKDEBUG && _d('Getting slave lag for --slave-lag');
6864
my $res = $ms->get_slave_status($dbh);
6865
$lag = $res && defined $res->{seconds_behind_master}
6866
? $res->{seconds_behind_master}
6871
# Time the checksum operation and the wait-for-master operation separately.
6874
# Check that table exists on slave.
6876
if ( !$is_master || !$checksum_table_data ) {
6877
$have_table = $tp->check_table(
6879
db => $tbl->{database},
6880
tbl => $tbl->{table},
6882
warn "$tbl->{database}.$tbl->{table} does not exist on slave"
6883
. ($host->{h} ? " $host->{h}" : '')
6884
. ($host->{P} ? ":$host->{P}" : '')
6888
if ( $have_table ) {
6889
# Do the checksum operation.
6890
if ( $checksum_table_data ) {
6891
if ( $strat eq 'CHECKSUM' ) {
6892
if ( $final_o->get('crc') ) {
6893
$crc = do_checksum(%args);
6895
if ( $final_o->get('count') ) {
6896
$cnt = do_count($chunk_num, %args);
6899
elsif ( $final_o->get('crc') ) {
6900
( $cnt, $crc ) = do_var_crc($chunk_num, %args);
6904
$cnt = do_count($chunk_num, %args);
6907
else { # Checksum SHOW CREATE TABLE for --schema.
6909
= $du->get_create_table($dbh, $q, $tbl->{database}, $tbl->{table});
6910
$create = $create->[1];
6911
$create = $tp->remove_auto_increment($create);
6912
$crc = $tc->crc32($create);
6918
if ( !$final_o->get('quiet') && !$final_o->get('explain') ) {
6919
if ( $final_o->get('checksum') ) {
6920
printf($md5sum_fmt, $crc, $host->{h},
6921
@{$tbl}{qw(database table)}, $chunk_num)
6922
or die "Cannot print: $OS_ERROR";
6926
@{$tbl}{qw(database table)}, $chunk_num,
6927
$host->{h}, $tbl->{struct}->{engine}, $cnt, $crc,
6928
$end - $mid, $mid - $beg, $sta, $lag)
6929
or die "Cannot print: $OS_ERROR";
6937
my ( $dsn, %args ) = @_;
6938
foreach my $arg ( qw(o dp) ) {
6939
die "I need a $arg argument" unless $args{$arg};
6944
if ( $o->get('ask-pass') && !defined $dsn->{p} ) {
6945
$dsn->{p} = OptionParser::prompt_noecho("Enter password for $dsn->{h}: ");
6948
my $ac = $o->get('lock') ? 0 : 1;
6949
my $dbh = $dp->get_dbh(
6950
$dp->get_cxn_params($dsn), { AutoCommit => $ac });
6951
$dp->fill_in_dsn($dbh, $dsn);
6952
$dbh->{InactiveDestroy} = 1; # Prevent destroying on fork.
6953
$dbh->{FetchHashKeyName} = 'NAME_lc';
6958
my ( $chunk_num, %args ) = @_;
6959
foreach my $arg ( qw(ch dbh query tbl explain final_o) ) {
6960
die "I need a $arg argument" unless $args{$arg};
6962
my $final_o = $args{final_o};
6964
my $tbl = $args{tbl};
6965
my $explain = $args{explain};
6966
my $dbh = $args{dbh};
6968
MKDEBUG && _d("do_var_crc for", $tbl->{table});
6970
my $sql = $ch->inject_chunks(
6971
query => $args{query},
6972
database => $tbl->{database},
6973
table => $tbl->{table},
6974
chunks => $tbl->{chunks},
6975
chunk_num => $chunk_num,
6976
where => [$final_o->get('where'), $tbl->{since}],
6977
index_hint => $tbl->{index},
6980
if ( MKDEBUG && $chunk_num == 0 ) {
6981
_d("SQL for chunk 0:", $sql);
6984
if ( $final_o->get('explain') ) {
6985
if ( $chunk_num == 0 ) {
6986
printf($explain, @{$tbl}{qw(database table)}, $sql)
6987
or die "Cannot print: $OS_ERROR";
6989
printf($explain, @{$tbl}{qw(database table)},$tbl->{chunks}->[$chunk_num])
6990
or die "Cannot print: $OS_ERROR";
6994
$dbh->do('set @crc := "", @cnt := 0');
6995
my $res = $dbh->selectall_arrayref($sql, { Slice => {} })->[0];
6996
return ($res->{cnt}, $res->{crc});
7001
foreach my $arg ( qw(dbh query tbl explain final_o) ) {
7002
die "I need a $arg argument" unless $args{$arg};
7004
my $dbh = $args{dbh};
7005
my $final_o = $args{final_o};
7006
my $tbl = $args{tbl};
7007
my $query = $args{query};
7008
my $explain = $args{explain};
7010
MKDEBUG && _d("do_checksum for", $tbl->{table});
7012
if ( $final_o->get('explain') ) {
7013
printf($explain, @{$tbl}{qw(database table)}, $query)
7014
or die "Cannot print: $OS_ERROR";
7017
my $res = $dbh->selectrow_hashref($query);
7019
my ($key) = grep { m/checksum/i } keys %$res;
7020
return defined $res->{$key} ? $res->{$key} : 'NULL';
7028
my ( $chunk_num, %args ) = @_;
7029
foreach my $arg ( qw(q dbh tbl explain final_o) ) {
7030
die "I need a $arg argument" unless $args{$arg};
7032
my $final_o = $args{final_o};
7033
my $tbl = $args{tbl};
7034
my $explain = $args{explain};
7035
my $dbh = $args{dbh};
7038
MKDEBUG && _d("do_count for", $tbl->{table});
7040
my $sql = "SELECT COUNT(*) FROM "
7041
. $q->quote(@{$tbl}{qw(database table)});
7042
if ( $final_o->get('where') || $final_o->get('since') ) {
7043
my $where_since = ($final_o->get('where'), $final_o->get('since'));
7045
. join(" AND ", map { "($_)" } grep { $_ } @$where_since )
7048
if ( $final_o->get('explain') ) {
7049
printf($explain, @{$tbl}{qw(database table)}, $sql)
7050
or die "Cannot print: $OS_ERROR";
7053
return $dbh->selectall_arrayref($sql)->[0]->[0];
7061
grep { !$seen{$_}++ } @_;
7064
# Tries to extract the MySQL error message and print it
7066
my ( $o, $msg, $db, $tbl, $host ) = @_;
7067
return if !defined $msg
7068
# Honor --quiet in the (common?) event of dropped tables or deadlocks
7069
or ($o->get('quiet')
7070
&& $EVAL_ERROR =~ m/: Table .*? doesn't exist|Deadlock found/);
7071
$msg =~ s/^.*?failed: (.*?) at \S+ line (\d+).*$/$1 at line $2/s;
7073
if ( $db && $tbl ) {
7074
$msg .= " while doing $db.$tbl";
7077
$msg .= " on $host";
7079
print STDERR $msg, "\n";
7082
# Returns when Seconds_Behind_Master on all the given slaves
7083
# is < max_lag, waits check_interval seconds between checks
7084
# if a slave is lagging too much.
7085
sub wait_for_slaves {
7087
my $slaves = $args{slaves};
7088
my $max_lag = $args{max_lag};
7089
my $check_interval = $args{check_interval};
7090
my $dp = $args{DSNParser};
7091
my $ms = $args{MasterSlave};
7092
my $pr = $args{progress};
7094
return unless scalar @$slaves;
7095
my $n_slaves = @$slaves;
7099
# If you use the default Progress report callback, you'll need to
7100
# to add Transformers.pm to this tool.
7102
$pr_callback = sub {
7103
my ($fraction, $elapsed, $remaining, $eta, $slave_no) = @_;
7105
print STDERR "Waiting for slave(s) to catchup...\n";
7109
print STDERR "Still waiting ($elapsed seconds)...\n";
7113
$pr->set_callback($pr_callback);
7116
for my $slave_no ( 0..($n_slaves-1) ) {
7117
my $slave = $slaves->[$slave_no];
7118
MKDEBUG && _d('Checking slave', $dp->as_string($slave->{dsn}),
7119
'lag for throttle');
7120
my $lag = $ms->get_slave_lag($slave->{dbh});
7121
while ( !defined $lag || $lag > $max_lag ) {
7122
MKDEBUG && _d('Slave lag', $lag, '>', $max_lag,
7123
'; sleeping', $check_interval);
7125
# Report what we're waiting for before we wait.
7126
$pr->update(sub { return $slave_no; }) if $pr;
7128
sleep $check_interval;
7129
$lag = $ms->get_slave_lag($slave->{dbh});
7131
MKDEBUG && _d('Slave ready, lag', $lag, '<=', $max_lag);
7137
# Sub: is_oversize_chunk
7138
# Determine if the chunk is oversize.
7143
# Required Arguments:
7145
# * db - db name, not quoted
7146
# * tbl - tbl name, not quoted
7147
# * chunk_size - chunk size in number of rows
7148
# * chunk - chunk, e.g. "`a` > 10"
7149
# * limit - oversize if rows > factor * chunk_size
7150
# * Quoter - <Quoter> object
7152
# Optional Arguments:
7153
# * where - Arrayref of WHERE clauses added to chunk
7154
# * index_hint - FORCE INDEX clause
7157
# True if EXPLAIN rows is >= chunk_size * limit, else false
7158
sub is_oversize_chunk {
7160
my @required_args = qw(dbh db tbl chunk_size chunk limit Quoter);
7161
foreach my $arg ( @required_args ) {
7162
die "I need a $arg argument" unless $args{$arg};
7165
my $where = [$args{chunk}, $args{where} ? @{$args{where}} : ()];
7168
$expl = _explain(%args, where => $where);
7170
if ( $EVAL_ERROR ) {
7171
# This shouldn't happen in production but happens in testing because
7172
# we chunk tables that don't actually exist.
7173
MKDEBUG && _d("Failed to EXPLAIN chunk:", $EVAL_ERROR);
7174
return $args{chunk};
7176
MKDEBUG && _d("Chunk", $args{chunk}, "covers", ($expl->{rows} || 0), "rows");
7178
return ($expl->{rows} || 0) >= $args{chunk_size} * $args{limit} ? 1 : 0;
7181
# Sub: is_chunkable_table
7182
# Determine if the table is chunkable.
7187
# Required Arguments:
7189
# * db - db name, not quoted
7190
# * tbl - tbl name, not quoted
7191
# * chunk_size - chunk size in number of rows
7192
# * Quoter - <Quoter> object
7194
# Optional Arguments:
7195
# * where - Arrayref of WHERE clauses added to chunk
7196
# * index_hint - FORCE INDEX clause
7199
# True if EXPLAIN rows is <= chunk_size, else false
7200
sub is_chunkable_table {
7202
my @required_args = qw(dbh db tbl chunk_size Quoter);
7203
foreach my $arg ( @required_args ) {
7204
die "I need a $arg argument" unless $args{$arg};
7209
$expl = _explain(%args);
7211
if ( $EVAL_ERROR ) {
7212
# This shouldn't happen in production but happens in testing because
7213
# we chunk tables that don't actually exist.
7214
MKDEBUG && _d("Failed to EXPLAIN table:", $EVAL_ERROR);
7215
return; # errr on the side of caution: not chunkable if not explainable
7217
MKDEBUG && _d("Table has", ($expl->{rows} || 0), "rows");
7219
return ($expl->{rows} || 0) <= $args{chunk_size} ? 1 : 0;
7223
# EXPLAIN a chunk or table.
7228
# Required Arguments:
7230
# * db - db name, not quoted
7231
# * tbl - tbl name, not quoted
7232
# * Quoter - <Quoter> object
7234
# Optional Arguments:
7235
# * where - Arrayref of WHERE clauses added to chunk
7236
# * index_hint - FORCE INDEX clause
7239
# Hashref of first EXPLAIN row
7242
my @required_args = qw(dbh db tbl Quoter);
7243
foreach my $arg ( @required_args ) {
7244
die "I need a $arg argument" unless $args{$arg};
7246
my ($dbh, $db, $tbl, $q) = @args{@required_args};
7248
my $db_tbl = $q->quote($db, $tbl);
7250
if ( $args{where} && @{$args{where}} ) {
7251
$where = join(" AND ", map { "($_)" } grep { defined } @{$args{where}});
7253
my $sql = "EXPLAIN SELECT * FROM $db_tbl"
7254
. ($args{index_hint} ? " $args{index_hint}" : "")
7255
. ($args{where} ? " WHERE $where" : "");
7256
MKDEBUG && _d($dbh, $sql);
7258
my $expl = $dbh->selectrow_hashref($sql);
7263
my ($package, undef, $line) = caller 0;
7264
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
7265
map { defined $_ ? $_ : 'undef' }
7267
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
7270
# ############################################################################
7272
# ############################################################################
7273
if ( !caller ) { exit main(@ARGV); }
7275
1; # Because this is a module as well as a script.
7277
# ############################################################################
7279
# ############################################################################
7284
mk-table-checksum - Perform an online replication consistency check, or
7285
checksum MySQL tables efficiently on one or many servers.
7289
Usage: mk-table-checksum [OPTION...] DSN [DSN...]
7291
mk-table-checksum checksums MySQL tables efficiently on one or more hosts.
7292
Each host is specified as a DSN and missing values are inherited from the
7293
first host. If you specify multiple hosts, the first is assumed to be the
7296
STOP! Are you checksumming a slave(s) against its master? Then be sure to learn
7297
what L<"--replicate"> does. It is probably the option you want to use.
7299
mk-table-checksum --replicate=mydb.checksum master-host
7300
... time passses, replication catches up ...
7301
mk-table-checksum --replicate=mydb.checksum --replicate-check 2 \
7306
mk-table-checksum h=host1,u=user,p=password h=host2 ...
7310
mk-table-checksum host1 host2 ... hostN | mk-checksum-filter
7312
See L<"SPECIFYING HOSTS"> for more on the syntax of the host arguments.
7316
The following section is included to inform users about the potential risks,
7317
whether known or unknown, of using this tool. The two main categories of risks
7318
are those created by the nature of the tool (e.g. read-only tools vs. read-write
7319
tools) and those created by bugs.
7321
mk-table-checksum executes queries that cause the MySQL server to checksum its
7322
data. This can cause significant server load. It is read-only unless you use
7323
the L<"--replicate"> option, in which case it inserts a small amount of data
7324
into the specified table.
7326
At the time of this release, we know of no bugs that could cause serious harm to
7327
users. There are miscellaneous bugs that might be annoying.
7329
The authoritative source for updated information is always the online issue
7330
tracking system. Issues that affect this tool will be marked as such. You can
7331
see a list of such issues at the following URL:
7332
L<http://www.maatkit.org/bugs/mk-table-checksum>.
7334
See also L<"BUGS"> for more information on filing bugs and getting help.
7338
mk-table-checksum generates table checksums for MySQL tables, typically
7339
useful for verifying your slaves are in sync with the master. The checksums
7340
are generated by a query on the server, and there is very little network
7341
traffic as a result.
7343
Checksums typically take about twice as long as COUNT(*) on very large InnoDB
7344
tables in my tests. For smaller tables, COUNT(*) is a good bit faster than
7345
the checksums. See L<"--algorithm"> for more details on performance.
7347
If you specify more than one server, mk-table-checksum assumes the first
7348
server is the master and others are slaves. Checksums are parallelized for
7349
speed, forking off a child process for each table. Duplicate server names are
7350
ignored, but if you want to checksum a server against itself you can use two
7351
different forms of the hostname (for example, "localhost 127.0.0.1", or
7352
"h=localhost,P=3306 h=localhost,P=3307").
7354
If you want to compare the tables in one database to those in another database
7355
on the same server, just checksum both databases:
7357
mk-table-checksum --databases db1,db2
7359
You can then use L<mk-checksum-filter> to compare the results in both databases
7362
mk-table-checksum examines table structure only on the first host specified,
7363
so if anything differs on the others, it won't notice. It ignores views.
7365
The checksums work on MySQL version 3.23.58 through 6.0-alpha. They will not
7366
necessarily produce the same values on all versions. Differences in
7367
formatting and/or space-padding between 4.1 and 5.0, for example, will cause
7368
the checksums to be different.
7370
=head1 SPECIFYING HOSTS
7372
mk-table-checksum connects to a theoretically unlimited number of MySQL
7373
servers. You specify a list of one or more host definitions on the command
7374
line, such as "host1 host2". Each host definition can be just a hostname, or it
7375
can be a complex string that specifies connection options as well. You can
7376
specify connection options two ways:
7382
Format a host definition in a key=value,key=value form. If an argument on the
7383
command line contains the letter '=', mk-table-checksum will parse it into
7384
its component parts. Examine the L<"--help"> output for details on the allowed
7387
Specifying a list of simple host definitions "host1 host2" is equivalent to the
7388
more complicated "h=host1 h=host2" format.
7392
With the command-line options such as L<"--user"> and L<"--password">. These
7393
options, if given, apply globally to all host definitions.
7397
In addition to specifying connection options this way, mk-table-checksum
7398
allows shortcuts. Any options specified for the first host definition on the
7399
command line fill in missing values in subsequent ones. Any options that are
7400
still missing after this are filled in from the command-line options if
7403
In other words, the places you specify connection options have precedence:
7404
highest precedence is the option specified directly in the host definition, next
7405
is the option specified in the first host definition, and lowest is the
7406
command-line option.
7408
You can mix simple and complex host definitions and/or command-line arguments.
7409
For example, if all your servers except one of your slaves uses a non-standard
7412
mk-table-checksum --port 4500 master h=slave1,P=3306 slave2 slave3
7414
If you are confused about how mk-table-checksum will connect to your servers,
7415
give the L<"--explain-hosts"> option and it will tell you.
7417
=head1 HOW FAST IS IT?
7419
Speed and efficiency are important, because the typical use case is checksumming
7420
large amounts of data.
7422
C<mk-table-checksum> is designed to do very little work itself, and generates
7423
very little network traffic aside from inspecting table structures with C<SHOW
7424
CREATE TABLE>. The results of checksum queries are typically 40-character or
7427
The MySQL server does the bulk of the work, in the form of the checksum queries.
7428
The following benchmarks show the checksum query times for various checksum
7429
algorithms. The first two results are simply running C<COUNT(col8)> and
7430
C<CHECKSUM TABLE> on the table. C<CHECKSUM TABLE> is just C<CRC32> under the
7431
hood, but it's implemented inside the storage engine layer instead of at the
7434
ALGORITHM HASH FUNCTION EXTRA TIME
7435
============== ============= ============== =====
7440
BIT_XOR MD5 --optimize-xor 80.0
7442
BIT_XOR SHA1 --optimize-xor 90.1
7447
The tests are entirely CPU-bound. The sample data is an InnoDB table with the
7448
following structure:
7457
col7 smallint unsigned NOT NULL,
7458
col8 timestamp NOT NULL,
7459
PRIMARY KEY (col2, col1),
7464
The table has 4303585 rows, 365969408 bytes of data and 173457408 bytes of
7465
indexes. The server is a Dell PowerEdge 1800 with dual 32-bit Xeon 2.8GHz
7466
processors and 2GB of RAM. The tests are fully CPU-bound, and the server is
7467
otherwise idle. The results are generally consistent to within a tenth of a
7468
second on repeated runs.
7470
C<CRC32> is the default checksum function to use, and should be enough for most
7471
cases. If you need stronger guarantees that your data is identical, you should
7472
use one of the other functions.
7474
=head1 ALGORITHM SELECTION
7476
The L<"--algorithm"> option allows you to specify which algorithm you would
7477
like to use, but it does not guarantee that mk-table-checksum will use this
7478
algorithm. mk-table-checksum will ultimately select the best algorithm possible
7479
given various factors such as the MySQL version and other command line options.
7481
The three basic algorithms in descending order of preference are CHECKSUM,
7482
BIT_XOR and ACCUM. CHECKSUM cannot be used if any one of these criteria
7485
* L<"--where"> is used.
7486
* L<"--since"> is used.
7487
* L<"--chunk-size"> is used.
7488
* L<"--replicate"> is used.
7489
* L<"--count"> is used.
7490
* MySQL version less than 4.1.1.
7492
The BIT_XOR algorithm also requires MySQL version 4.1.1 or later.
7494
After checking these criteria, if the requested L<"--algorithm"> remains then it
7495
is used, otherwise the first remaining algorithm with the highest preference
7498
=head1 CONSISTENT CHECKSUMS
7500
If you are using this tool to verify your slaves still have the same data as the
7501
master, which is why I wrote it, you should read this section.
7503
The best way to do this with replication is to use the L<"--replicate"> option.
7504
When the queries are finished running on the master and its slaves, you can go
7505
to the slaves and issue SQL queries to see if any tables are different from the
7506
master. Try the following:
7508
SELECT db, tbl, chunk, this_cnt-master_cnt AS cnt_diff,
7509
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc)
7512
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
7513
OR ISNULL(master_crc) <> ISNULL(this_crc);
7515
The L<"--replicate-check"> option can do this query for you. If you can't use
7516
this method, try the following:
7522
If your servers are not being written to, you can just run the tool with no
7525
mk-table-checksum server1 server2 ... serverN
7529
If the servers are being written to, you need some way to make sure they are
7530
consistent at the moment you run the checksums. For situations other than
7531
master-slave replication, you will have to figure this out yourself. You may be
7532
able to use the L<"--where"> option with a date or time column to only checksum
7533
data that's not recent.
7537
If you are checksumming a master and slaves, you can do a fast parallel
7538
checksum and assume the slaves are caught up to the master. In practice, this
7539
tends to work well except for tables which are constantly updated. You can
7540
use the L<"--slave-lag"> option to see how far behind each slave was when it
7541
checksummed a given table. This can help you decide whether to investigate
7546
The next most disruptive technique is to lock the table on the master, then take
7547
checksums. This should prevent changes from propagating to the slaves. You can
7548
just lock on the master (with L<"--lock">), or you can both lock on the master
7549
and wait on the slaves till they reach that point in the master's binlog
7550
(L<"--wait">). Which is better depends on your workload; only you know that.
7554
If you decide to make the checksums on the slaves wait until they're guaranteed
7555
to be caught up to the master, the algorithm looks like this:
7562
Slave(s): wait for pos, then checksum
7564
Master: unlock table
7569
What I typically do when I'm not using the L<"--replicate"> option is simply run
7570
the tool on all servers with no further options. This runs fast, parallel,
7571
non-blocking checksums simultaneously. If there are tables that look different,
7572
I re-run with L<"--wait">=600 on the tables in question. This makes the tool
7573
lock on the master as explained above.
7577
Output is to STDOUT, one line per server and table, with header lines for each
7578
database. I tried to make the output easy to process with awk. For this reason
7579
columns are always present. If there's no value, mk-table-checksum prints
7582
The default is column-aligned output for human readability, but you can change
7583
it to tab-separated if you want. Use the L<"--tab"> option for this.
7585
Output is unsorted, though all lines for one table should be output together.
7586
For speed, all checksums are done in parallel (as much as possible) and may
7587
complete out of the order in which they were started. You might want to run
7588
them through another script or command-line utility to make sure they are in the
7589
order you want. If you pipe the output through L<mk-checksum-filter>, you
7590
can sort the output and/or avoid seeing output about tables that have no
7593
The columns in the output are as follows. The database, table, and chunk come
7594
first so you can sort by them easily (they are the "primary key").
7596
Output from L<"--replicate-check"> and L<"--checksum"> are different.
7602
The database the table is in.
7610
The chunk (see L<"--chunk-size">). Zero if you are not doing chunked checksums.
7614
The server's hostname.
7618
The table's storage engine.
7622
The table's row count, unless you specified to skip it. If C<OVERSIZE> is
7623
printed, the chunk was skipped because the actual number of rows was greater
7624
than L<"--chunk-size"> times L<"--chunk-size-limit">.
7628
The table's checksum, unless you specified to skip it or the table has no rows.
7629
some types of checksums will be 0 if there are no rows; others will print NULL.
7633
How long it took to checksum the C<CHUNK>, not including C<WAIT> time.
7634
Total checksum time is C<WAIT + TIME>.
7638
How long the slave waited to catch up to its master before beginning to
7639
checksum. C<WAIT> is always 0 for the master. See L<"--wait">.
7643
The return value of MASTER_POS_WAIT(). C<STAT> is always C<NULL> for the
7648
How far the slave lags the master, as reported by SHOW SLAVE STATUS.
7649
C<LAG> is always C<NULL> for the master.
7653
=head1 REPLICATE TABLE MAINTENANCE
7655
If you use L<"--replicate"> to store and replicate checksums, you may need to
7656
perform maintenance on the replicate table from time to time to remove old
7657
checksums. This section describes when checksums in the replicate table are
7658
deleted automatically by mk-table-checksum and when you must manually delete
7661
Before starting, mk-table-checksum calculates chunks for each table, even
7662
if L<"--chunk-size"> is not specified (in that case there is one chunk: "1=1").
7663
Then, before checksumming each table, the tool deletes checksum chunks in the
7664
replicate table greater than the current number of chunks. For example,
7665
if a table is chunked into 100 chunks, 0-99, then mk-table-checksum does:
7667
DELETE FROM replicate table WHERE db=? AND tbl=? AND chunk > 99
7669
That removes any high-end chunks from previous runs which no longer exist.
7670
Currently, this operation cannot be disabled.
7672
If you use L<"--resume">, L<"--resume-replicate">, or L<"--modulo">, then
7673
you need to be careful that the number of rows in a table does not decrease
7674
so much that the number of chunks decreases too, else some checksum chunks may
7675
be deleted. The one exception is if only rows at the high end of the range
7676
are deleted. In that case, the high-end chunks are deleted and lower chunks
7677
remain unchanged. An increasing number of rows or chunks should not cause
7678
any adverse affects.
7680
Changing the L<"--chunk-size"> between runs with L<"--resume">,
7681
L<"--resume-replicate">, or L<"--modulo"> can cause odd or invalid checksums.
7682
You should not do this. It won't work with the resume options. With
7683
L<"--modulo">, the safest thing to do is manually delete all the rows in
7684
the replicate table for the table in question and start over.
7686
If the replicate table becomes cluttered with old or invalid checksums
7687
and the auto-delete operation is not deleting them, then you will need to
7688
manually clean up the replicate table. Alternatively, if you specify
7689
L<"--empty-replicate-table">, then the tool deletes every row in the
7694
An exit status of 0 (sometimes also called a return value or return code)
7695
indicates success. If there is an error checksumming any table, the exit status
7698
When running L<"--replicate-check">, if any slave has chunks that differ from
7699
the master, the exit status is 1.
7703
If you are using innotop (see L<http://code.google.com/p/innotop>),
7704
mytop, or another tool to watch currently running MySQL queries, you may see
7705
the checksum queries. They look similar to this:
7707
REPLACE /*test.test_tbl:'2'/'5'*/ INTO test.checksum(db, ...
7709
Since mk-table-checksum's queries run for a long time and tend to be
7710
textually very long, and thus won't fit on one screen of these monitoring
7711
tools, I've been careful to place a comment at the beginning of the query so
7712
you can see what it is and what it's doing. The comment contains the name of
7713
the table that's being checksummed, the chunk it is currently checksumming,
7714
and how many chunks will be checksummed. In the case above, it is
7715
checksumming chunk 2 of 5 in table test.test_tbl.
7719
L<"--schema"> is restricted to option groups Connection, Filter, Output, Help, Config, Safety.
7721
L<"--empty-replicate-table">, L<"--resume"> and L<"--resume-replicate"> are mutually exclusive.
7723
This tool accepts additional command-line arguments. Refer to the
7724
L<"SYNOPSIS"> and usage information for details.
7732
Checksum algorithm (ACCUM|CHECKSUM|BIT_XOR).
7734
Specifies which checksum algorithm to use. Valid arguments are CHECKSUM,
7735
BIT_XOR and ACCUM. The latter two do cryptographic hash checksums.
7736
See also L<"ALGORITHM SELECTION">.
7738
CHECKSUM is built into MySQL, but has some disadvantages. BIT_XOR and ACCUM are
7739
implemented by SQL queries. They use a cryptographic hash of all columns
7740
concatenated together with a separator, followed by a bitmap of each nullable
7741
column that is NULL (necessary because CONCAT_WS() skips NULL columns).
7743
CHECKSUM is the default. This method uses MySQL's built-in CHECKSUM TABLE
7744
command, which is a CRC32 behind the scenes. It cannot be used before MySQL
7745
4.1.1, and various options disable it as well. It does not simultaneously count
7746
rows; that requires an extra COUNT(*) query. This is a good option when you are
7747
using MyISAM tables with live checksums enabled; in this case both the COUNT(*)
7748
and CHECKSUM queries will run very quickly.
7750
The BIT_XOR algorithm is available for MySQL 4.1.1 and newer. It uses
7751
BIT_XOR(), which is order-independent, to reduce all the rows to a single
7754
ACCUM uses a user variable as an accumulator. It reduces each row to a single
7755
checksum, which is concatenated with the accumulator and re-checksummed. This
7756
technique is order-dependent. If the table has a primary key, it will be used
7757
to order the results for consistency; otherwise it's up to chance.
7759
The pathological worst case is where identical rows will cancel each other out
7760
in the BIT_XOR. In this case you will not be able to distinguish a table full
7761
of one value from a table full of another value. The ACCUM algorithm will
7764
However, the ACCUM algorithm is order-dependent, so if you have two tables
7765
with identical data but the rows are out of order, you'll get different
7766
checksums with ACCUM.
7768
If a given algorithm won't work for some reason, mk-table-checksum falls back to
7769
another. The least common denominator is ACCUM, which works on MySQL 3.23.2 and
7776
The database.table with arguments for each table to checksum.
7778
This table may be named anything you wish. It must contain at least the
7781
CREATE TABLE checksum_args (
7782
db char(64) NOT NULL,
7783
tbl char(64) NOT NULL,
7784
-- other columns as desired
7785
PRIMARY KEY (db, tbl)
7788
In addition to the columns shown, it may contain any of the other columns listed
7789
here (Note: this list is used by the code, MAGIC_overridable_args):
7791
algorithm chunk-column chunk-index chunk-size columns count crc function lock
7792
modulo use-index offset optimize-xor chunk-size-limit probability separator
7793
save-since single-chunk since since-column sleep sleep-coef trim wait where
7795
Each of these columns corresponds to the long form of a command-line option.
7796
Each column should be NULL-able. Column names with hyphens should be enclosed
7797
in backticks (e.g. `chunk-size`) when the table is created. The data type does
7798
not matter, but it's suggested you use a sensible data type to prevent garbage
7801
When C<mk-table-checksum> checksums a table, it will look for a matching entry
7802
in this table. Any column that has a defined value will override the
7803
corresponding command-line argument for the table being currently processed.
7804
In this way it is possible to specify custom command-line arguments for any
7807
If you add columns to the table that aren't in the above list of allowable
7808
columns, it's an error. The exceptions are C<db>, C<tbl>, and C<ts>. The C<ts>
7809
column can be used as a timestamp for easy visibility into the last time the
7810
C<since> column was updated with L<"--save-since">.
7812
This table is assumed to be located on the first server given on the
7819
Prompt for a password when connecting to MySQL.
7821
=item --check-interval
7823
type: time; group: Throttle; default: 1s
7825
How often to check for slave lag if L<"--check-slave-lag"> is given.
7827
=item --[no]check-replication-filters
7829
default: yes; group: Safety
7831
Do not L<"--replicate"> if any replication filters are set. When
7832
--replicate is specified, mk-table-checksum tries to detect slaves and look
7833
for options that filter replication, such as binlog_ignore_db and
7834
replicate_do_db. If it finds any such filters, it aborts with an error.
7835
Replication filtering makes it impossible to be sure that the checksum
7836
queries won't break replication or simply fail to replicate. If you are sure
7837
that it's OK to run the checksum queries, you can negate this option to
7838
disable the checks. See also L<"--replicate-database">.
7840
=item --check-slave-lag
7842
type: DSN; group: Throttle
7844
Pause checksumming until the specified slave's lag is less than L<"--max-lag">.
7846
If this option is specified and L<"--throttle-method"> is set to C<slavelag>
7847
then L<"--throttle-method"> only checks this slave.
7853
Print checksums and table names in the style of md5sum (disables
7856
Makes the output behave more like the output of C<md5sum>. The checksum is
7857
first on the line, followed by the host, database, table, and chunk number,
7858
concatenated with dots.
7860
=item --chunk-column
7864
Prefer this column for dividing tables into chunks. By default,
7865
mk-table-checksum chooses the first suitable column for each table, preferring
7866
to use the primary key. This option lets you specify a preferred column, which
7867
mk-table-checksum uses if it exists in the table and is chunkable. If not, then
7868
mk-table-checksum will revert to its default behavior. Be careful when using
7869
this option; a poor choice could cause bad performance. This is probably best
7870
to use when you are checksumming only a single table, not an entire server. See
7871
also L<"--chunk-index">.
7877
Prefer this index for chunking tables. By default, mk-table-checksum chooses an
7878
appropriate index for the L<"--chunk-column"> (even if it chooses the chunk
7879
column automatically). This option lets you specify the index you prefer. If
7880
the index doesn't exist, then mk-table-checksum will fall back to its default
7881
behavior. mk-table-checksum adds the index to the checksum SQL statements in a
7882
C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
7883
index could cause bad performance. This is probably best to use when you are
7884
checksumming only a single table, not an entire server.
7888
type: string; default: open
7890
Set which ends of the chunk range are open or closed. Possible values are
7891
one of MAGIC_chunk_range:
7894
========== ======================
7895
open Both ends are open
7896
openclosed Low end open, high end closed
7898
By default mk-table-checksum uses an open range of chunks like:
7901
`id` >= '10' AND < '20'
7904
That range is open because the last chunk selects any row with id greater than
7905
(or equal to) 20. An open range can be a problem in cases where a lot of new
7906
rows are inserted with IDs greater than 20 while mk-table-checksumming is
7907
running because the final open-ended chunk will select all the newly inserted
7908
rows. (The less common case of inserting rows with IDs less than 10 would
7909
require a C<closedopen> range but that is not currently implemented.)
7910
Specifying C<openclosed> will cause the final chunk to be closed like:
7912
`id` >= '20' AND `id` <= N
7914
N is the C<MAX(`id`)> that mk-table-checksum used when it first chunked
7915
the rows. Therefore, it will only chunk the range of rows that existed when
7916
the tool started and not any newly inserted rows (unless those rows happen
7917
to be inserted with IDs less than N).
7919
See also L<"--chunk-size-limit">.
7925
Approximate number of rows or size of data to checksum at a time. Allowable
7926
suffixes are k, M, G. Disallows C<--algorithm CHECKSUM>.
7928
If you specify a chunk size, mk-table-checksum will try to find an index that
7929
will let it split the table into ranges of approximately L<"--chunk-size">
7930
rows, based on the table's index statistics. Currently only numeric and date
7931
types can be chunked.
7933
If the table is chunkable, mk-table-checksum will checksum each range separately
7934
with parameters in the checksum query's WHERE clause. If mk-table-checksum
7935
cannot find a suitable index, it will do the entire table in one chunk as though
7936
you had not specified L<"--chunk-size"> at all. Each table is handled
7937
individually, so some tables may be chunked and others not.
7939
The chunks will be approximately sized, and depending on the distribution of
7940
values in the indexed column, some chunks may be larger than the value you
7943
If you specify a suffix (one of k, M or G), the parameter is treated as a data
7944
size rather than a number of rows. The output of SHOW TABLE STATUS is then used
7945
to estimate the amount of data the table contains, and convert that to a number
7948
=item --chunk-size-limit
7950
type: float; default: 2.0; group: Safety
7952
Do not checksum chunks with this many times more rows than L<"--chunk-size">.
7954
When L<"--chunk-size"> is given it specifies an ideal size for each chunk
7955
of a chunkable table (in rows; size values are converted to rows). Before
7956
checksumming each chunk, mk-table-checksum checks how many rows are in the
7957
chunk with EXPLAIN. If the number of rows reported by EXPLAIN is this many
7958
times greater than L<"--chunk-size">, then the chunk is skipped and C<OVERSIZE>
7959
is printed for the C<COUNT> column of the L<"OUTPUT">.
7961
For example, if you specify L<"--chunk-size"> 100 and a chunk has 150 rows,
7962
then it is checksummed with the default L<"--chunk-size-limit"> value 2.0
7963
because 150 is less than 100 * 2.0. But if the chunk has 205 rows, then it
7964
is not checksummed because 205 is greater than 100 * 2.0.
7966
The minimum value for this option is 1 which means that no chunk can be any
7967
larger than L<"--chunk-size">. You probably don't want to specify 1 because
7968
rows reported by EXPLAIN are estimates which can be greater than or less than
7969
the real number of rows in the chunk. If too many chunks are skipped because
7970
they are oversize, you might want to specify a value larger than 2.
7972
You can disable oversize chunk checking by specifying L<"--chunk-size-limit"> 0.
7974
See also L<"--unchunkable-tables">.
7978
short form: -c; type: array; group: Filter
7980
Checksum only this comma-separated list of columns.
7984
type: Array; group: Config
7986
Read this comma-separated list of config files; if specified, this must be the
7987
first option on the command line.
7991
Count rows in tables. This is built into ACCUM and BIT_XOR, but requires an
7992
extra query for CHECKSUM.
7994
This is disabled by default to avoid an extra COUNT(*) query when
7995
L<"--algorithm"> is CHECKSUM. If you have only MyISAM tables and live checksums
7996
are enabled, both CHECKSUM and COUNT will be very fast, but otherwise you may
7997
want to use one of the other algorithms.
8003
Do a CRC (checksum) of tables.
8005
Take the checksum of the rows as well as their count. This is enabled by
8006
default. If you disable it, you'll just get COUNT(*) queries.
8008
=item --create-replicate-table
8010
Create the replicate table given by L<"--replicate"> if it does not exist.
8012
Normally, if the replicate table given by L<"--replicate"> does not exist,
8013
C<mk-table-checksum> will die. With this option, however, C<mk-table-checksum>
8014
will create the replicate table for you, using the database.table name given to
8017
The structure of the replicate table is the same as the suggested table
8018
mentioned in L<"--replicate">. Note that since ENGINE is not specified, the
8019
replicate table will use the server's default storage engine. If you want to
8020
use a different engine, you need to create the table yourself.
8024
short form: -d; type: hash; group: Filter
8026
Only checksum this comma-separated list of databases.
8028
=item --databases-regex
8032
Only checksum databases whose names match this Perl regex.
8034
=item --defaults-file
8036
short form: -F; type: string; group: Connection
8038
Only read mysql options from the given file. You must give an absolute
8041
=item --empty-replicate-table
8043
DELETE all rows in the L<"--replicate"> table before starting.
8045
Issues a DELETE against the table given by L<"--replicate"> before beginning
8046
work. Ignored if L<"--replicate"> is not specified. This can be useful to
8047
remove entries related to tables that no longer exist, or just to clean out the
8048
results of a previous run.
8050
If you want to delete entries for specific databases or tables you must
8055
short form: -e; type: hash; group: Filter
8057
Do only this comma-separated list of storage engines.
8063
Show, but do not execute, checksum queries (disables L<"--empty-replicate-table">).
8065
=item --explain-hosts
8069
Print connection information and exit.
8071
Print out a list of hosts to which mk-table-checksum will connect, with all
8072
the various connection options, and exit. See L<"SPECIFYING HOSTS">.
8074
=item --float-precision
8078
Precision for C<FLOAT> and C<DOUBLE> number-to-string conversion. Causes FLOAT
8079
and DOUBLE values to be rounded to the specified number of digits after the
8080
decimal point, with the ROUND() function in MySQL. This can help avoid
8081
checksum mismatches due to different floating-point representations of the same
8082
values on different MySQL versions and hardware. The default is no rounding;
8083
the values are converted to strings by the CONCAT() function, and MySQL chooses
8084
the string representation. If you specify a value of 2, for example, then the
8085
values 1.008 and 1.009 will be rounded to 1.01, and will checksum as equal.
8091
Hash function for checksums (FNV1A_64, MURMUR_HASH, SHA1, MD5, CRC32, etc).
8093
You can use this option to choose the cryptographic hash function used for
8094
L<"--algorithm">=ACCUM or L<"--algorithm">=BIT_XOR. The default is to use
8095
C<CRC32>, but C<MD5> and C<SHA1> also work, and you can use your own function,
8096
such as a compiled UDF, if you wish. Whatever function you specify is run in
8097
SQL, not in Perl, so it must be available to MySQL.
8099
The C<FNV1A_64> UDF mentioned in the benchmarks is much faster than C<MD5>. The
8100
C++ source code is distributed with Maatkit. It is very simple to compile and
8101
install; look at the header in the source code for instructions. If it is
8102
installed, it is preferred over C<MD5>. You can also use the MURMUR_HASH
8103
function if you compile and install that as a UDF; the source is also
8104
distributed with Maatkit, and it is faster and has better distribution
8113
=item --ignore-columns
8115
type: Hash; group: Filter
8117
Ignore this comma-separated list of columns when calculating the checksum.
8119
This option only affects the checksum when using the ACCUM or BIT_XOR
8122
=item --ignore-databases
8124
type: Hash; group: Filter
8126
Ignore this comma-separated list of databases.
8128
=item --ignore-databases-regex
8132
Ignore databases whose names match this Perl regex.
8134
=item --ignore-engines
8136
type: Hash; default: FEDERATED,MRG_MyISAM; group: Filter
8138
Ignore this comma-separated list of storage engines.
8140
=item --ignore-tables
8142
type: Hash; group: Filter
8144
Ignore this comma-separated list of tables.
8146
Table names may be qualified with the database name.
8148
=item --ignore-tables-regex
8152
Ignore tables whose names match the Perl regex.
8156
Lock on master until done on slaves (implies L<"--slave-lag">).
8158
This option can help you to get a consistent read on a master and many slaves.
8159
If you specify this option, mk-table-checksum will lock the table on the
8160
first server on the command line, which it assumes to be the master. It will
8161
keep this lock until the checksums complete on the other servers.
8163
This option isn't very useful by itself, so you probably want to use L<"--wait">
8166
Note: if you're checksumming a slave against its master, you should use
8167
L<"--replicate">. In that case, there's no need for locking, waiting, or any of
8172
type: time; group: Throttle; default: 1s
8174
Suspend checksumming if the slave given by L<"--check-slave-lag"> lags.
8176
This option causes mk-table-checksum to look at the slave every time it's about
8177
to checksum a chunk. If the slave's lag is greater than the option's value, or
8178
if the slave isn't running (so its lag is NULL), mk-table-checksum sleeps for
8179
L<"--check-interval"> seconds and then looks at the lag again. It repeats until
8180
the slave is caught up, then proceeds to checksum the chunk.
8182
This option is useful to let you checksum data as fast as the slaves can handle
8183
it, assuming the slave you directed mk-table-checksum to monitor is
8184
representative of all the slaves that may be replicating from this server. It
8185
should eliminate the need for L<"--sleep"> or L<"--sleep-coef">.
8191
Do only every Nth chunk on chunked tables.
8193
This option lets you checksum only some chunks of the table. This is a useful
8194
alternative to L<"--probability"> when you want to be sure you get full coverage
8195
in some specified number of runs; for example, you can do only every 7th chunk,
8196
and then use L<"--offset"> to rotate the modulo every day of the week.
8198
Just like with L<"--probability">, a table that cannot be chunked is done every
8203
type: string; default: 0
8205
Modulo offset expression for use with L<"--modulo">.
8207
The argument may be an SQL expression, such as C<WEEKDAY(NOW())> (which returns
8208
a number from 0 through 6). The argument is evaluated by MySQL. The result is
8209
used as follows: if chunk_num % L<"--modulo"> == L<"--offset">, the chunk will
8212
=item --[no]optimize-xor
8216
Optimize BIT_XOR with user variables.
8218
This option specifies to use user variables to reduce the number of times each
8219
row must be passed through the cryptographic hash function when you are using
8220
the BIT_XOR algorithm.
8222
With the optimization, the queries look like this in pseudo-code:
8225
BIT_XOR(SLICE_OF(@user_variable)),
8226
BIT_XOR(SLICE_OF(@user_variable)),
8228
BIT_XOR(SLICE_OF(@user_variable := HASH(col1, col2... colN))));
8230
The exact positioning of user variables and calls to the hash function is
8231
determined dynamically, and will vary between MySQL versions. Without the
8232
optimization, it looks like this:
8235
BIT_XOR(SLICE_OF(MD5(col1, col2... colN))),
8236
BIT_XOR(SLICE_OF(MD5(col1, col2... colN))),
8238
BIT_XOR(SLICE_OF(MD5(col1, col2... colN))));
8240
The difference is the number of times all the columns must be mashed together
8241
and fed through the hash function. If you are checksumming really large
8242
columns, such as BLOB or TEXT columns, this might make a big difference.
8246
short form: -p; type: string; group: Connection
8248
Password to use when connecting.
8254
Create the given PID file. The file contains the process ID of the script.
8255
The PID file is removed when the script exits. Before starting, the script
8256
checks if the PID file already exists. If it does not, then the script creates
8257
and writes its own PID to it. If it does, then the script checks the following:
8258
if the file contains a PID and a process is running with that PID, then
8259
the script dies; or, if there is no process running with that PID, then the
8260
script overwrites the file with its own PID and starts; else, if the file
8261
contains no PID, then the script dies.
8265
short form: -P; type: int; group: Connection
8267
Port number to use for connection.
8271
type: int; default: 100
8273
Checksums will be run with this percent probability.
8275
This is an integer between 1 and 100. If 100, every chunk of every table will
8276
certainly be checksummed. If less than that, there is a chance that some chunks
8277
of some tables will be skipped. This is useful for routine jobs designed to
8278
randomly sample bits of tables without checksumming the whole server. By
8279
default, if a table is not chunkable, it will be checksummed every time even
8280
when the probability is less than 100. You can override this with
8281
L<"--single-chunk">.
8283
See also L<"--modulo">.
8287
type: array; default: time,30
8289
Print progress reports to STDERR. Currently, this feature is only for when
8290
L<"--throttle-method"> waits for slaves to catch up.
8292
The value is a comma-separated list with two parts. The first part can be
8293
percentage, time, or iterations; the second part specifies how often an update
8294
should be printed, in percentage, seconds, or number of iterations.
8298
short form: -q; group: Output
8300
Do not print checksum results.
8304
Re-checksum chunks that L<"--replicate-check"> found to be different.
8308
type: int; group: Throttle
8310
Number of levels to recurse in the hierarchy when discovering slaves.
8311
Default is infinite.
8313
See L<"--recursion-method">.
8315
=item --recursion-method
8319
Preferred recursion method for discovering slaves.
8321
Possible methods are:
8324
=========== ================
8325
processlist SHOW PROCESSLIST
8326
hosts SHOW SLAVE HOSTS
8328
The processlist method is preferred because SHOW SLAVE HOSTS is not reliable.
8329
However, the hosts method is required if the server uses a non-standard
8330
port (not 3306). Usually mk-table-checksum does the right thing and finds
8331
the slaves, but you may give a preferred method and it will be used first.
8332
If it doesn't find any slaves, the other methods will be tried.
8338
Replicate checksums to slaves (disallows --algorithm CHECKSUM).
8340
This option enables a completely different checksum strategy for a consistent,
8341
lock-free checksum across a master and its slaves. Instead of running the
8342
checksum queries on each server, you run them only on the master. You specify a
8343
table, fully qualified in db.table format, to insert the results into. The
8344
checksum queries will insert directly into the table, so they will be replicated
8345
through the binlog to the slaves.
8347
When the queries are finished replicating, you can run a simple query on each
8348
slave to see which tables have differences from the master. With the
8349
L<"--replicate-check"> option, mk-table-checksum can run the query for you to
8350
make it even easier. See L<"CONSISTENT CHECKSUMS"> for details.
8352
If you find tables that have differences, you can use the chunk boundaries in a
8353
WHERE clause with L<mk-table-sync> to help repair them more efficiently. See
8354
L<mk-table-sync> for details.
8356
The table must have at least these columns: db, tbl, chunk, boundaries,
8357
this_crc, master_crc, this_cnt, master_cnt. The table may be named anything you
8358
wish. Here is a suggested table structure, which is automatically used for
8359
L<"--create-replicate-table"> (MAGIC_create_replicate):
8361
CREATE TABLE checksum (
8362
db char(64) NOT NULL,
8363
tbl char(64) NOT NULL,
8365
boundaries char(100) NOT NULL,
8366
this_crc char(40) NOT NULL,
8367
this_cnt int NOT NULL,
8368
master_crc char(40) NULL,
8369
master_cnt int NULL,
8370
ts timestamp NOT NULL,
8371
PRIMARY KEY (db, tbl, chunk)
8374
Be sure to choose an appropriate storage engine for the checksum table. If you
8375
are checksumming InnoDB tables, for instance, a deadlock will break replication
8376
if the checksum table is non-transactional, because the transaction will still
8377
be written to the binlog. It will then replay without a deadlock on the
8378
slave and break replication with "different error on master and slave." This
8379
is not a problem with mk-table-checksum, it's a problem with MySQL
8380
replication, and you can read more about it in the MySQL manual.
8382
This works only with statement-based replication (mk-table-checksum will switch
8383
the binlog format to STATEMENT for the duration of the session if your server
8384
uses row-based replication).
8386
In contrast to running the tool against multiple servers at once, using this
8387
option eliminates the complexities of synchronizing checksum queries across
8388
multiple servers, which normally requires locking and unlocking, waiting for
8389
master binlog positions, and so on. Thus, it disables L<"--lock">, L<"--wait">,
8390
and L<"--slave-lag"> (but not L<"--check-slave-lag">, which is a way to throttle
8391
the execution speed).
8393
The checksum queries actually do a REPLACE into this table, so existing rows
8394
need not be removed before running. However, you may wish to do this anyway to
8395
remove rows related to tables that don't exist anymore. The
8396
L<"--empty-replicate-table"> option does this for you.
8398
Since the table must be qualified with a database (e.g. C<db.checksums>),
8399
mk-table-checksum will only USE this database. This may be important if any
8400
replication options are set because it could affect whether or not changes
8401
to the table are replicated.
8403
If the slaves have any --replicate-do-X or --replicate-ignore-X options, you
8404
should be careful not to checksum any databases or tables that exist on the
8405
master and not the slaves. Changes to such tables may not normally be executed
8406
on the slaves because of the --replicate options, but the checksum queries
8407
modify the contents of the table that stores the checksums, not the tables whose
8408
data you are checksumming. Therefore, these queries will be executed on the
8409
slave, and if the table or database you're checksumming does not exist, the
8410
queries will cause replication to fail. For more information on replication
8411
rules, see L<http://dev.mysql.com/doc/en/replication-rules.html>.
8413
The table specified by L<"--replicate"> will never be checksummed itself.
8415
=item --replicate-check
8419
Check results in L<"--replicate"> table, to the specified depth. You must use
8420
this after you run the tool normally; it skips the checksum step and only checks
8423
It recursively finds differences recorded in the table given by
8424
L<"--replicate">. It recurses to the depth you specify: 0 is no recursion
8425
(check only the server you specify), 1 is check the server and its slaves, 2 is
8426
check the slaves of its slaves, and so on.
8428
It finds differences by running the query shown in L<"CONSISTENT CHECKSUMS">,
8429
and prints results, then exits after printing. This is just a convenient way of
8430
running the query so you don't have to do it manually.
8432
The output is one informational line per slave host, followed by the results
8433
of the query, if any. If L<"--quiet"> is specified, there is no output. If
8434
there are no differences between the master and any slave, there is no output.
8435
If any slave has chunks that differ from the master, mk-table-checksum's
8436
exit status is 1; otherwise it is 0.
8438
This option makes C<mk-table-checksum> look for slaves by running C<SHOW
8439
PROCESSLIST>. If it finds connections that appear to be from slaves, it derives
8440
connection information for each slave with the same default-and-override method
8441
described in L<"SPECIFYING HOSTS">.
8443
If C<SHOW PROCESSLIST> doesn't return any rows, C<mk-table-checksum> looks at
8444
C<SHOW SLAVE HOSTS> instead. The host and port, and user and password if
8445
available, from C<SHOW SLAVE HOSTS> are combined into a DSN and used as the
8446
argument. This requires slaves to be configured with C<report-host>,
8447
C<report-port> and so on.
8449
This requires the @@SERVER_ID system variable, so it works only on MySQL
8452
=item --replicate-database
8456
C<USE> only this database with L<"--replicate">. By default, mk-table-checksum
8457
executes USE to set its default database to the database that contains the table
8458
it's currently working on. It changes its default database as it works on
8459
different tables. This is is a best effort to avoid problems with replication
8460
filters such as binlog_ignore_db and replicate_ignore_db. However, replication
8461
filters can create a situation where there simply is no one right way to do
8462
things. Some statements might not be replicated, and others might cause
8463
replication to fail on the slaves. In such cases, it is up to the user to
8464
specify a safe default database. This option specifies a default database that
8465
mk-table-checksum selects with USE, and never changes afterwards. See also
8466
<L"--[no]check-replication-filters">.
8472
Resume checksum using given output file from a previously interrupted run.
8474
The given output file should be the literal output from a previous run of
8475
C<mk-table-checksum>. For example:
8477
mk-table-checksum host1 host2 -C 100 > checksum_results.txt
8478
mk-table-checksum host1 host2 -C 100 --resume checksum_results.txt
8480
The command line options given to the first run and the resumed run must
8481
be identical (except, of course, for --resume). If they are not, the result
8482
will be unpredictable and probably wrong.
8484
L<"--resume"> does not work with L<"--replicate">; for that, use
8485
L<"--resume-replicate">.
8487
=item --resume-replicate
8489
Resume L<"--replicate">.
8491
This option resumes a previous checksum operation using L<"--replicate">.
8492
It is like L<"--resume"> but does not require an output file. Instead,
8493
it uses the checksum table given to L<"--replicate"> to determine where to
8494
resume the checksum operation.
8498
When L<"--arg-table"> and L<"--since"> are given, save the current L<"--since">
8499
value into that table's C<since> column after checksumming. In this way you can
8500
incrementally checksum tables by starting where the last one finished.
8502
The value to be saved could be the current timestamp, or it could be the maximum
8503
existing value of the column given by L<"--since-column">. It depends on what
8504
options are in effect. See the description of L<"--since"> to see how
8505
timestamps are different from ordinary values.
8509
Checksum C<SHOW CREATE TABLE> instead of table data.
8513
type: string; default: #
8515
The separator character used for CONCAT_WS().
8517
This character is used to join the values of columns when checksumming with
8518
L<"--algorithm"> of BIT_XOR or ACCUM.
8522
type: string; default: wait_timeout=10000; group: Connection
8524
Set these MySQL variables. Immediately after connecting to MySQL, this
8525
string will be appended to SET and executed.
8531
Checksum only data newer than this value.
8533
If the table is chunk-able or nibble-able, this value will apply to the first
8534
column of the chunked or nibbled index.
8536
This is not too different to L<"--where">, but instead of universally applying a
8537
WHERE clause to every table, it selectively finds the right column to use and
8538
applies it only if such a column is found. See also L<"--since-column">.
8540
The argument may be an expression, which is evaluated by MySQL. For example,
8541
you can specify C<CURRENT_DATE - INTERVAL 7 DAY> to get the date of one week
8544
A special bit of extra magic: if the value is temporal (looks like a date or
8545
datetime), then the table is checksummed only if the create time (or last
8546
modified time, for tables that report the last modified time, such as MyISAM
8547
tables) is newer than the value. In this sense it's not applied as a WHERE
8550
=item --since-column
8554
The column name to be used for L<"--since">.
8556
The default is for the tool to choose the best one automatically. If you
8557
specify a value, that will be used if possible; otherwise the best
8558
auto-determined one; otherwise none. If the column doesn't exist in the table,
8561
=item --single-chunk
8563
Permit skipping with L<"--probability"> if there is only one chunk.
8565
Normally, if a table isn't split into many chunks, it will always be
8566
checksummed regardless of L<"--probability">. This setting lets the
8567
probabilistic behavior apply to tables that aren't divided into chunks.
8573
Report replication delay on the slaves.
8575
If this option is enabled, the output will show how many seconds behind the
8576
master each slave is. This can be useful when you want a fast, parallel,
8577
non-blocking checksum, and you know your slaves might be delayed relative to the
8578
master. You can inspect the results and make an educated guess whether any
8579
discrepancies on the slave are due to replication delay instead of corrupt data.
8581
If you're using L<"--replicate">, a slave that is delayed relative to the master
8582
does not invalidate the correctness of the results, so this option is disabled.
8586
type: int; group: Throttle
8588
Sleep time between checksums.
8590
If this option is specified, mk-table-checksum will sleep the specified
8591
number of seconds between checksums. That is, it will sleep between every
8592
table, and if you specify L<"--chunk-size">, it will also sleep between chunks.
8594
This is a very crude way to throttle checksumming; see L<"--sleep-coef"> and
8595
L<"--check-slave-lag"> for techniques that permit greater control.
8599
type: float; group: Throttle
8601
Calculate L<"--sleep"> as a multiple of the last checksum time.
8603
If this option is specified, mk-table-checksum will sleep the amount of
8604
time elapsed during the previous checksum, multiplied by the specified
8605
coefficient. This option is ignored if L<"--sleep"> is specified.
8607
This is a slightly more sophisticated way to throttle checksum speed: sleep a
8608
varying amount of time between chunks, depending on how long the chunks are
8609
taking. Even better is to use L<"--check-slave-lag"> if you're checksumming
8610
master/slave replication.
8614
short form: -S; type: string; group: Connection
8616
Socket file to use for connection.
8622
Print tab-separated output, not column-aligned output.
8626
short form: -t; type: hash; group: Filter
8628
Do only this comma-separated list of tables.
8630
Table names may be qualified with the database name.
8632
=item --tables-regex
8636
Only checksum tables whose names match this Perl regex.
8638
=item --throttle-method
8640
type: string; default: none; group: Throttle
8642
Throttle checksumming when doing L<"--replicate">.
8644
At present there is only one method: C<slavelag>. When L<"--replicate"> is
8645
used, mk-table-checksum automatically sets L<"--throttle-method"> to
8646
C<slavelag> and discovers every slave and throttles checksumming if any slave
8647
lags more than L<"--max-lag">. Specify C<-throttle-method none> to disable
8648
this behavior completely, or specify L<"--check-slave-lag"> and
8649
mk-table-checksum will only check that slave.
8651
See also L<"--recurse"> and L<"--recursion-method">.
8655
Trim C<VARCHAR> columns (helps when comparing 4.1 to >= 5.0).
8657
This option adds a C<TRIM()> to C<VARCHAR> columns in C<BIT_XOR> and C<ACCUM>
8660
This is useful when you don't care about the trailing space differences between
8661
MySQL versions which vary in their handling of trailing spaces. MySQL 5.0 and
8662
later all retain trailing spaces in C<VARCHAR>, while previous versions would
8665
=item --unchunkable-tables
8669
Checksum tables that cannot be chunked when L<"--chunk-size"> is specified.
8671
By default mk-table-checksum will not checksum a table that cannot be chunked
8672
when L<"--chunk-size"> is specified because this might result in a huge,
8673
non-chunkable table being checksummed in one huge, memory-intensive chunk.
8675
Specifying this option allows checksumming tables that cannot be chunked.
8676
Be careful when using this option! Make sure any non-chunkable tables
8677
are not so large that they will cause the tool to consume too much memory
8680
See also L<"--chunk-size-limit">.
8682
=item --[no]use-index
8686
Add FORCE INDEX hints to SQL statements.
8688
By default C<mk-table-checksum> adds an index hint (C<FORCE INDEX> for MySQL
8689
v4.0.9 and newer, C<USE INDEX> for older MySQL versions) to each SQL statement
8690
to coerce MySQL into using the L<"--chunk-index"> (whether the index is
8691
specified by the option or auto-detected). Specifying C<--no-use-index> causes
8692
C<mk-table-checksum> to omit index hints.
8696
short form: -u; type: string; group: Connection
8698
User for login if not current user.
8704
Verify checksum compatibility across servers.
8706
This option runs a trivial checksum on all servers to ensure they have
8707
compatible CONCAT_WS() and cryptographic hash functions.
8709
Versions of MySQL before 4.0.14 will skip empty strings and NULLs in
8710
CONCAT_WS, and others will only skip NULLs. The two kinds of behavior will
8711
produce different results if you have any columns containing the empty string
8712
in your table. If you know you don't (for instance, all columns are
8713
integers), you can safely disable this check and you will get a reliable
8714
checksum even on servers with different behavior.
8720
Show version and exit.
8724
short form: -w; type: time
8726
Wait this long for slaves to catch up to their master (implies L<"--lock">
8729
Note: the best way to verify that a slave is in sync with its master is to use
8730
L<"--replicate"> instead. The L<"--wait"> option is really only useful if
8731
you're trying to compare masters and slaves without using L<"--replicate">,
8732
which is possible but complex and less efficient in some ways.
8734
This option helps you get a consistent checksum across a master server and its
8735
slaves. It combines locking and waiting to accomplish this. First it locks the
8736
table on the master (the first server on the command line). Then it finds the
8737
master's binlog position. Checksums on slaves will be deferred until they reach
8738
the same binlog position.
8740
The argument to the option is the number of seconds to wait for the slaves to
8741
catch up to the master. It is actually the argument to MASTER_POS_WAIT(). If
8742
the slaves don't catch up to the master within this time, they will unblock
8743
and go ahead with the checksum. You can tell whether this happened by
8744
examining the STAT column in the output, which is the return value of
8751
Do only rows matching this C<WHERE> clause (disallows L<"--algorithm"> CHECKSUM).
8753
You can use this option to limit the checksum to only part of the table. This
8754
is particularly useful if you have append-only tables and don't want to
8755
constantly re-check all rows; you could run a daily job to just check
8756
yesterday's rows, for instance.
8758
This option is much like the -w option to mysqldump. Do not specify the WHERE
8759
keyword. You may need to quote the value. Here is an example:
8761
mk-table-checksum --where "foo=bar"
8763
=item --[no]zero-chunk
8767
Add a chunk for rows with zero or zero-equivalent values. The only has an
8768
effect when L<"--chunk-size"> is specified. The purpose of the zero chunk
8769
is to capture a potentially large number of zero values that would imbalance
8770
the size of the first chunk. For example, if a lot of negative numbers were
8771
inserted into an unsigned integer column causing them to be stored as zeros,
8772
then these zero values are captured by the zero chunk instead of the first
8773
chunk and all its non-zero values.
8779
These DSN options are used to create a DSN. Each option is given like
8780
C<option=value>. The options are case-sensitive, so P and p are not the
8781
same option. There cannot be whitespace before or after the C<=> and
8782
if the value contains whitespace it must be quoted. DSN options are
8783
comma-separated. See the L<maatkit> manpage for full details.
8789
dsn: charset; copy: yes
8791
Default character set.
8795
dsn: database; copy: yes
8801
dsn: mysql_read_default_file; copy: yes
8803
Only read default options from the given file
8807
dsn: host; copy: yes
8813
dsn: password; copy: yes
8815
Password to use when connecting.
8819
dsn: port; copy: yes
8821
Port number to use for connection.
8825
dsn: mysql_socket; copy: yes
8827
Socket file to use for connection.
8831
dsn: user; copy: yes
8833
User for login if not current user.
8839
You can download Maatkit from Google Code at
8840
L<http://code.google.com/p/maatkit/>, or you can get any of the tools
8841
easily with a command like the following:
8843
wget http://www.maatkit.org/get/toolname
8845
wget http://www.maatkit.org/trunk/toolname
8847
Where C<toolname> can be replaced with the name (or fragment of a name) of any
8848
of the Maatkit tools. Once downloaded, they're ready to run; no installation is
8849
needed. The first URL gets the latest released version of the tool, and the
8850
second gets the latest trunk code from Subversion.
8854
The environment variable C<MKDEBUG> enables verbose debugging output in all of
8859
=head1 SYSTEM REQUIREMENTS
8861
You need Perl, DBI, DBD::mysql, and some core packages that ought to be
8862
installed in any reasonably new version of Perl.
8866
For a list of known bugs see L<http://www.maatkit.org/bugs/mk-table-checksum>.
8868
Please use Google Code Issues and Groups to report bugs or request support:
8869
L<http://code.google.com/p/maatkit/>. You can also join #maatkit on Freenode to
8872
Please include the complete command-line used to reproduce the problem you are
8873
seeing, the version of all MySQL servers involved, the complete output of the
8874
tool when run with L<"--version">, and if possible, debugging output produced by
8875
running with the C<MKDEBUG=1> environment variable.
8877
=head1 COPYRIGHT, LICENSE AND WARRANTY
8879
This program is copyright 2007-@CURRENTYEAR@ Baron Schwartz.
8880
Feedback and improvements are welcome.
8882
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
8883
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
8884
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
8886
This program is free software; you can redistribute it and/or modify it under
8887
the terms of the GNU General Public License as published by the Free Software
8888
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
8889
systems, you can issue `man perlgpl' or `man perlartistic' to read these
8892
You should have received a copy of the GNU General Public License along with
8893
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
8894
Place, Suite 330, Boston, MA 02111-1307 USA.
8898
See also L<mk-checksum-filter> and L<mk-table-sync>.
8902
Baron "Xaprb" Schwartz
8904
=head1 ABOUT MAATKIT
8906
This tool is part of Maatkit, a toolkit for power users of MySQL. Maatkit
8907
was created by Baron Schwartz; Baron and Daniel Nichter are the primary
8908
code contributors. Both are employed by Percona. Financial support for
8909
Maatkit development is primarily provided by Percona and its clients.
8911
=head1 ACKNOWLEDGMENTS
8913
This is an incomplete list. My apologies for omissions or misspellings.
8916
Francois Saint-Jacques,
8925
This manual page documents Ver @VERSION@ Distrib @DISTRIB@ $Revision: 7527 $.