8
8
use warnings FATAL => 'all';
9
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
9
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
11
11
# ###########################################################################
13
13
# This package is a copy without comments from the original. The original
14
14
# with comments and its test file can be found in the Bazaar repository at,
17
17
# See https://launchpad.net/percona-toolkit for more information.
18
18
# ###########################################################################
23
23
use warnings FATAL => 'all';
24
24
use English qw(-no_match_vars);
25
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
25
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
28
$Data::Dumper::Indent = 1;
29
$Data::Dumper::Sortkeys = 1;
28
$Data::Dumper::Indent = 0;
30
29
$Data::Dumper::Quotekeys = 0;
34
my $have_dbi = $EVAL_ERROR ? 0 : 1;
33
37
my ( $class, %args ) = @_;
34
my @required_args = qw(Quoter);
35
foreach my $arg ( @required_args ) {
38
foreach my $arg ( qw(opts) ) {
36
39
die "I need a $arg argument" unless $args{$arg};
42
opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
44
foreach my $opt ( @{$args{opts}} ) {
45
if ( !$opt->{key} || !$opt->{desc} ) {
46
die "Invalid DSN option: ", Dumper($opt);
48
PTDEBUG && _d('DSN option:',
50
map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
54
$self->{opts}->{$opt->{key}} = {
57
copy => $opt->{copy} || 0,
39
60
return bless $self, $class;
64
my ( $self, $prop, $value ) = @_;
66
PTDEBUG && _d('Setting', $prop, 'property');
67
$self->{$prop} = $value;
69
return $self->{$prop};
43
my ( $self, $ddl, $opts ) = @_;
45
if ( ref $ddl eq 'ARRAY' ) {
46
if ( lc $ddl->[0] eq 'table' ) {
56
if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
57
die "Cannot parse table definition; is ANSI quoting "
58
. "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
61
my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
62
(undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
64
$ddl =~ s/(`[^`]+`)/\L$1/g;
66
my $engine = $self->get_engine($ddl);
68
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
69
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
70
MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
73
@def_for{@cols} = @defs;
76
my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
77
foreach my $col ( @cols ) {
78
my $def = $def_for{$col};
79
my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
80
die "Can't determine column type for $def" unless $type;
81
$type_for{$col} = $type;
82
if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
84
$is_numeric{$col} = 1;
86
if ( $def !~ m/NOT NULL/ ) {
88
$is_nullable{$col} = 1;
90
$is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
93
my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
95
my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
100
col_posn => { map { $cols[$_] => $_ } 0..$#cols },
101
is_col => { map { $_ => 1 } @cols },
103
is_nullable => \%is_nullable,
104
is_autoinc => \%is_autoinc,
105
clustered_key => $clustered_key,
108
numeric_cols => \@nums,
109
is_numeric => \%is_numeric,
111
type_for => \%type_for,
117
my ( $self, $tbl ) = @_;
121
(($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
122
|| ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
123
|| ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
124
|| ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
127
$tbl->{keys}->{$_}->{type} eq 'BTREE'
129
sort keys %{$tbl->{keys}};
131
MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
135
sub find_best_index {
136
my ( $self, $tbl, $index ) = @_;
139
($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
143
die "Index '$index' does not exist in table";
146
($best) = $self->sort_indexes($tbl);
149
MKDEBUG && _d('Best index found is', $best);
153
sub find_possible_keys {
154
my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
155
return () unless $where;
156
my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
157
. ' WHERE ' . $where;
159
my $expl = $dbh->selectrow_hashref($sql);
160
$expl = { map { lc($_) => $expl->{$_} } keys %$expl };
161
if ( $expl->{possible_keys} ) {
162
MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
163
my @candidates = split(',', $expl->{possible_keys});
164
my %possible = map { $_ => 1 } @candidates;
165
if ( $expl->{key} ) {
166
MKDEBUG && _d('MySQL chose', $expl->{key});
167
unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
168
MKDEBUG && _d('Before deduping:', join(', ', @candidates));
170
@candidates = grep { !$seen{$_}++ } @candidates;
172
MKDEBUG && _d('Final list:', join(', ', @candidates));
176
MKDEBUG && _d('No keys in possible_keys');
182
my ( $self, %args ) = @_;
183
my @required_args = qw(dbh db tbl);
184
foreach my $arg ( @required_args ) {
185
die "I need a $arg argument" unless $args{$arg};
187
my ($dbh, $db, $tbl) = @args{@required_args};
188
my $q = $self->{Quoter};
189
my $db_tbl = $q->quote($db, $tbl);
190
MKDEBUG && _d('Checking', $db_tbl);
192
my $sql = "SHOW TABLES FROM " . $q->quote($db)
193
. ' LIKE ' . $q->literal_like($tbl);
197
$row = $dbh->selectrow_arrayref($sql);
200
MKDEBUG && _d($EVAL_ERROR);
203
if ( !$row->[0] || $row->[0] ne $tbl ) {
204
MKDEBUG && _d('Table does not exist');
208
MKDEBUG && _d('Table exists; no privs to check');
209
return 1 unless $args{all_privs};
211
$sql = "SHOW FULL COLUMNS FROM $db_tbl";
214
$row = $dbh->selectrow_hashref($sql);
217
MKDEBUG && _d($EVAL_ERROR);
220
if ( !scalar keys %$row ) {
221
MKDEBUG && _d('Table has no columns:', Dumper($row));
224
my $privs = $row->{privileges} || $row->{Privileges};
226
$sql = "DELETE FROM $db_tbl LIMIT 0";
231
my $can_delete = $EVAL_ERROR ? 0 : 1;
233
MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
234
($can_delete ? 'delete' : ''));
236
if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
238
MKDEBUG && _d('User does not have all privs');
242
MKDEBUG && _d('User has all privs');
247
my ( $self, $ddl, $opts ) = @_;
248
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
249
MKDEBUG && _d('Storage engine:', $engine);
250
return $engine || undef;
254
my ( $self, $ddl, $opts, $is_nullable ) = @_;
255
my $engine = $self->get_engine($ddl);
257
my $clustered_key = undef;
260
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
262
next KEY if $key =~ m/FOREIGN/;
265
MKDEBUG && _d('Parsed key:', $key_ddl);
267
if ( $engine !~ m/MEMORY|HEAP/ ) {
268
$key =~ s/USING HASH/USING BTREE/;
271
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
272
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
273
$type = $type || $special || 'BTREE';
274
if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
275
&& $engine =~ m/HEAP|MEMORY/i )
73
my ( $self, $dsn, $prev, $defaults ) = @_;
75
PTDEBUG && _d('No DSN to parse');
78
PTDEBUG && _d('Parsing', $dsn);
83
my $opts = $self->{opts};
85
foreach my $dsn_part ( split(/,/, $dsn) ) {
86
if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
87
$given_props{$prop_key} = $prop_val;
90
PTDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
91
$given_props{h} = $dsn_part;
95
foreach my $key ( keys %$opts ) {
96
PTDEBUG && _d('Finding value for', $key);
97
$final_props{$key} = $given_props{$key};
98
if ( !defined $final_props{$key}
99
&& defined $prev->{$key} && $opts->{$key}->{copy} )
277
$type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
280
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
281
my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
284
foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
285
my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
287
push @col_prefixes, $prefix;
291
MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
298
col_prefixes => \@col_prefixes,
299
is_unique => $unique,
300
is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
301
is_col => { map { $_ => 1 } @cols },
305
if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
306
my $this_key = $keys->{$name};
307
if ( $this_key->{name} eq 'PRIMARY' ) {
308
$clustered_key = 'PRIMARY';
310
elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
311
$clustered_key = $this_key->{name};
313
MKDEBUG && $clustered_key && _d('This key is the clustered key');
317
return $keys, $clustered_key;
321
my ( $self, $ddl, $opts ) = @_;
322
my $q = $self->{Quoter};
326
$ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
328
my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
329
my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
330
my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
332
my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
333
my %parent_tbl = (tbl => $tbl);
334
$parent_tbl{db} = $db if $db;
336
if ( $parent !~ m/\./ && $opts->{database} ) {
337
$parent = $q->quote($opts->{database}) . ".$parent";
343
cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
344
parent_tbl => \%parent_tbl,
345
parent_tblname => $parent,
346
parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
347
parent_colnames=> $parent_cols,
355
sub remove_auto_increment {
356
my ( $self, $ddl ) = @_;
357
$ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
361
sub remove_secondary_indexes {
362
my ( $self, $ddl ) = @_;
364
my $tbl_struct = $self->parse($ddl);
366
if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) {
367
my $clustered_key = $tbl_struct->{clustered_key};
368
$clustered_key ||= '';
370
my @sec_indexes = map {
371
my $key_def = $_->{ddl};
372
$key_def =~ s/([\(\)])/\\$1/g;
373
$ddl =~ s/\s+$key_def//i;
375
my $key_ddl = "ADD $_->{ddl}";
376
$key_ddl .= ',' unless $key_ddl =~ m/,$/;
379
grep { $_->{name} ne $clustered_key }
380
values %{$tbl_struct->{keys}};
381
MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes));
383
if ( @sec_indexes ) {
384
$sec_indexes_ddl = join(' ', @sec_indexes);
385
$sec_indexes_ddl =~ s/,$//;
388
$ddl =~ s/,(\n\) )/$1/s;
101
$final_props{$key} = $prev->{$key};
102
PTDEBUG && _d('Copying value for', $key, 'from previous DSN');
104
if ( !defined $final_props{$key} ) {
105
$final_props{$key} = $defaults->{$key};
106
PTDEBUG && _d('Copying value for', $key, 'from defaults');
110
foreach my $key ( keys %given_props ) {
111
die "Unknown DSN option '$key' in '$dsn'. For more details, "
112
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
113
. "for complete documentation."
114
unless exists $opts->{$key};
116
if ( (my $required = $self->prop('required')) ) {
117
foreach my $key ( keys %$required ) {
118
die "Missing required DSN option '$key' in '$dsn'. For more details, "
119
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
120
. "for complete documentation."
121
unless $final_props{$key};
125
return \%final_props;
129
my ( $self, $o ) = @_;
130
die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
133
map { "$_=".$o->get($_); }
134
grep { $o->has($_) && $o->get($_) }
135
keys %{$self->{opts}}
137
PTDEBUG && _d('DSN string made from options:', $dsn_string);
138
return $self->parse($dsn_string);
142
my ( $self, $dsn, $props ) = @_;
143
return $dsn unless ref $dsn;
144
my @keys = $props ? @$props : sort keys %$dsn;
146
map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
148
exists $self->{opts}->{$_}
150
&& defined $dsn->{$_}
157
= "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
158
. " KEY COPY MEANING\n"
159
. " === ==== =============================================\n";
160
my %opts = %{$self->{opts}};
161
foreach my $key ( sort keys %opts ) {
163
. ($opts{$key}->{copy} ? 'yes ' : 'no ')
164
. ($opts{$key}->{desc} || '[No description]')
167
$usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
172
my ( $self, $info ) = @_;
174
my %opts = %{$self->{opts}};
175
my $driver = $self->prop('dbidriver') || '';
176
if ( $driver eq 'Pg' ) {
177
$dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
178
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
179
grep { defined $info->{$_} }
391
MKDEBUG && _d('Not removing secondary indexes from',
392
$tbl_struct->{engine}, 'table');
395
return $ddl, $sec_indexes_ddl, $tbl_struct;
399
my ($package, undef, $line) = caller 0;
400
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
401
map { defined $_ ? $_ : 'undef' }
403
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
408
# ###########################################################################
409
# End TableParser package
410
# ###########################################################################
412
# ###########################################################################
413
# TableChecksum package
414
# This package is a copy without comments from the original. The original
415
# with comments and its test file can be found in the Bazaar repository at,
416
# lib/TableChecksum.pm
417
# t/lib/TableChecksum.t
418
# See https://launchpad.net/percona-toolkit for more information.
419
# ###########################################################################
421
package TableChecksum;
424
use warnings FATAL => 'all';
425
use English qw(-no_match_vars);
426
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
428
use List::Util qw(max);
431
CHECKSUM => { pref => 0, hash => 0 },
432
BIT_XOR => { pref => 2, hash => 1 },
433
ACCUM => { pref => 3, hash => 1 },
437
my ( $class, %args ) = @_;
438
foreach my $arg ( qw(Quoter VersionParser) ) {
439
die "I need a $arg argument" unless defined $args{$arg};
441
my $self = { %args };
442
return bless $self, $class;
446
my ( $self, $string ) = @_;
447
my $poly = 0xEDB88320;
448
my $crc = 0xFFFFFFFF;
449
foreach my $char ( split(//, $string) ) {
450
my $comp = ($crc ^ ord($char)) & 0xFF;
452
$comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
454
$crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
456
return $crc ^ 0xFFFFFFFF;
460
my ( $self, $dbh, $func ) = @_;
462
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
464
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
465
$crc_wid = max(16, length($val));
472
my ( $self, $dbh, $func ) = @_;
475
my $sql = "SELECT $func('a')";
476
my $sth = $dbh->prepare($sql);
479
$type = $sth->{mysql_type_name}->[0];
480
$length = $sth->{mysql_length}->[0];
481
MKDEBUG && _d($sql, $type, $length);
482
if ( $type eq 'bigint' && $length < 20 ) {
183
$dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
184
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
185
grep { defined $info->{$_} }
187
. ';mysql_read_default_group=client';
190
return ($dsn, $info->{u}, $info->{p});
194
my ( $self, $dbh, $dsn ) = @_;
195
my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
196
my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
198
$dsn->{h} ||= $vars->{hostname}->{Value};
199
$dsn->{S} ||= $vars->{'socket'}->{Value};
200
$dsn->{P} ||= $vars->{port}->{Value};
206
my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
212
ShowErrorStatement => 1,
213
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
487
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
488
return ($type, $length);
492
my ( $self, %args ) = @_;
493
my ( $alg, $dbh ) = @args{ qw(algorithm dbh) };
494
my $vp = $self->{VersionParser};
495
my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
496
die "Invalid checksum algorithm $alg"
497
if $alg && !$ALGOS{$alg};
500
$args{where} || $args{chunk} # CHECKSUM does whole table
501
|| $args{replicate} # CHECKSUM can't do INSERT.. SELECT
502
|| !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist
504
MKDEBUG && _d('Cannot use CHECKSUM algorithm');
505
@choices = grep { $_ ne 'CHECKSUM' } @choices;
508
if ( !$vp->version_ge($dbh, '4.1.1') ) {
509
MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1');
510
@choices = grep { $_ ne 'BIT_XOR' } @choices;
513
if ( $alg && grep { $_ eq $alg } @choices ) {
514
MKDEBUG && _d('User requested', $alg, 'algorithm');
518
if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
519
MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired');
520
@choices = grep { $_ ne 'CHECKSUM' } @choices;
523
MKDEBUG && _d('Algorithms, in order:', @choices);
527
sub is_hash_algorithm {
528
my ( $self, $algorithm ) = @_;
529
return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
532
sub choose_hash_func {
533
my ( $self, %args ) = @_;
534
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
535
if ( $args{function} ) {
536
unshift @funcs, $args{function};
538
my ($result, $error);
215
@{$defaults}{ keys %$opts } = values %$opts;
217
if ( $opts->{mysql_use_result} ) {
218
$defaults->{mysql_use_result} = 1;
222
die "Cannot connect to MySQL because the Perl DBI module is not "
223
. "installed or not found. Run 'perl -MDBI' to see the directories "
224
. "that Perl searches for DBI. If DBI is not installed, try:\n"
225
. " Debian/Ubuntu apt-get install libdbi-perl\n"
226
. " RHEL/CentOS yum install perl-DBI\n"
227
. " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
233
while ( !$dbh && $tries-- ) {
234
PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
235
join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
542
$func = shift(@funcs);
543
my $sql = "SELECT $func('test-string')";
545
$args{dbh}->do($sql);
238
$dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
240
if ( $cxn_string =~ m/mysql/i ) {
243
$sql = 'SELECT @@SQL_MODE';
244
PTDEBUG && _d($dbh, $sql);
245
my ($sql_mode) = $dbh->selectrow_array($sql);
247
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
248
. '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
249
. ($sql_mode ? ",$sql_mode" : '')
251
PTDEBUG && _d($dbh, $sql);
254
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
255
$sql = "/*!40101 SET NAMES $charset*/";
256
PTDEBUG && _d($dbh, ':', $sql);
258
PTDEBUG && _d('Enabling charset for STDOUT');
259
if ( $charset eq 'utf8' ) {
260
binmode(STDOUT, ':utf8')
261
or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
264
binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
268
if ( $self->prop('set-vars') ) {
269
$sql = "SET " . $self->prop('set-vars');
270
PTDEBUG && _d($dbh, ':', $sql);
548
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
549
$error .= qq{$func cannot be used because "$1"\n};
550
MKDEBUG && _d($func, 'cannot be used because', $1);
552
} while ( @funcs && !$result );
554
die $error unless $result;
555
MKDEBUG && _d('Chosen hash func:', $result);
560
my ( $self, %args ) = @_;
561
my ($dbh, $func) = @args{qw(dbh function)};
563
die "$func never needs the BIT_XOR optimization"
564
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
567
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
570
my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced);
572
do { # Try different positions till sliced result equals non-sliced.
573
MKDEBUG && _d('Trying slice', $opt_slice);
574
$dbh->do('SET @crc := "", @cnt := 0');
575
my $slices = $self->make_xor_slices(
576
query => "\@crc := $func('a')",
578
opt_slice => $opt_slice,
581
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
582
$sliced = ($dbh->selectrow_array($sql))[0];
583
if ( $sliced ne $unsliced ) {
584
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
588
} while ( $start < $crc_wid && $sliced ne $unsliced );
590
if ( $sliced eq $unsliced ) {
591
MKDEBUG && _d('Slice', $opt_slice, 'works');
595
MKDEBUG && _d('No slice works');
600
sub make_xor_slices {
601
my ( $self, %args ) = @_;
602
foreach my $arg ( qw(query crc_wid) ) {
603
die "I need a $arg argument" unless defined $args{$arg};
605
my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)};
608
for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
609
my $len = $crc_wid - $start + 1;
615
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
616
. ", 10, 16), $len, '0')";
619
if ( defined $opt_slice && $opt_slice < @slices ) {
620
$slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
623
map { s/\@crc/$query/ } @slices;
626
return join(', ', @slices);
629
sub make_row_checksum {
630
my ( $self, %args ) = @_;
631
my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) };
632
my $q = $self->{Quoter};
634
my $sep = $args{sep} || '#';
638
my $ignorecols = $args{ignorecols} || {};
640
my %cols = map { lc($_) => 1 }
641
grep { !exists $ignorecols->{$_} }
642
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
646
my $type = $tbl_struct->{type_for}->{$_};
647
my $result = $q->quote($_);
648
if ( $type eq 'timestamp' ) {
651
elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
652
$result = "ROUND($result, $args{float_precision})";
654
elsif ( $args{trim} && $type =~ m/varchar/ ) {
655
$result = "TRIM($result)";
660
$cols{$_} && !$seen{$_}++
662
@{$tbl_struct->{cols}};
665
if ( !$args{no_cols} ) {
669
if ( $col =~ m/\+ 0/ ) {
670
my ($real_col) = /^(\S+)/;
671
$col .= " AS $real_col";
673
elsif ( $col =~ m/TRIM/ ) {
674
my ($real_col) = m/TRIM\(([^\)]+)\)/;
675
$col .= " AS $real_col";
682
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
683
my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
685
my $bitmap = "CONCAT("
686
. join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
692
? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
696
my $fnv_func = uc $func;
697
$query .= "$fnv_func(" . join(', ', @cols) . ')';
703
sub make_checksum_query {
704
my ( $self, %args ) = @_;
705
my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type);
706
foreach my $arg( @required_args ) {
707
die "I need a $arg argument" unless $args{$arg};
709
my ( $db, $tbl, $tbl_struct, $algorithm,
710
$crc_wid, $crc_type) = @args{@required_args};
711
my $func = $args{function};
712
my $q = $self->{Quoter};
715
die "Invalid or missing checksum algorithm"
716
unless $algorithm && $ALGOS{$algorithm};
718
if ( $algorithm eq 'CHECKSUM' ) {
719
return "CHECKSUM TABLE " . $q->quote($db, $tbl);
722
my $expr = $self->make_row_checksum(%args, no_cols=>1);
724
if ( $algorithm eq 'BIT_XOR' ) {
725
if ( $crc_type =~ m/int$/ ) {
726
$result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc ";
729
my $slices = $self->make_xor_slices( query => $expr, %args );
730
$result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc ";
734
if ( $crc_type =~ m/int$/ ) {
735
$result = "COALESCE(RIGHT(MAX("
736
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
737
. "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))"
738
. "), $crc_wid), 0) AS crc ";
741
$result = "COALESCE(RIGHT(MAX("
742
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
743
. "$func(CONCAT(\@crc, $expr)))"
744
. "), $crc_wid), 0) AS crc ";
747
if ( $args{replicate} ) {
748
$result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} "
749
. "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
750
. "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result";
754
. ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '')
755
. "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result";
757
return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/";
760
sub find_replication_differences {
761
my ( $self, $dbh, $table ) = @_;
763
(my $sql = <<" EOF") =~ s/\s+/ /gm;
764
SELECT db, tbl, chunk, boundaries,
765
COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
767
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
770
this_cnt, master_cnt, this_crc, master_crc
772
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
773
OR ISNULL(master_crc) <> ISNULL(this_crc)
777
my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
275
if ( !$dbh && $EVAL_ERROR ) {
276
PTDEBUG && _d($EVAL_ERROR);
277
if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
278
PTDEBUG && _d('Going to try again without utf8 support');
279
delete $defaults->{mysql_enable_utf8};
281
elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
282
die "Cannot connect to MySQL because the Perl DBD::mysql module is "
283
. "not installed or not found. Run 'perl -MDBD::mysql' to see "
284
. "the directories that Perl searches for DBD::mysql. If "
285
. "DBD::mysql is not installed, try:\n"
286
. " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
287
. " RHEL/CentOS yum install perl-DBD-MySQL\n"
288
. " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
296
PTDEBUG && _d('DBH info: ',
298
Dumper($dbh->selectrow_hashref(
299
'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
300
'Connection info:', $dbh->{mysql_hostinfo},
301
'Character set info:', Dumper($dbh->selectall_arrayref(
302
'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
303
'$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
304
'$DBI::VERSION:', $DBI::VERSION,
311
my ( $self, $dbh ) = @_;
312
if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
315
my ( $hostname, $one ) = $dbh->selectrow_array(
316
'SELECT /*!50038 @@hostname, */ 1');
321
my ( $self, $dbh ) = @_;
322
PTDEBUG && $self->print_active_handles($dbh);
326
sub print_active_handles {
327
my ( $self, $thing, $level ) = @_;
329
printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
330
$thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
331
or die "Cannot print: $OS_ERROR";
332
foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
333
$self->print_active_handles( $handle, $level + 1 );
338
my ( $self, $dsn_1, $dsn_2, %args ) = @_;
339
die 'I need a dsn_1 argument' unless $dsn_1;
340
die 'I need a dsn_2 argument' unless $dsn_2;
344
if ( $args{overwrite} ) {
345
$val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
348
$val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
351
} keys %{$self->{opts}};
1818
1392
# ###########################################################################
1820
1394
# ###########################################################################
1822
1396
# This package is a copy without comments from the original. The original
1823
1397
# with comments and its test file can be found in the Bazaar repository at,
1826
1400
# See https://launchpad.net/percona-toolkit for more information.
1827
1401
# ###########################################################################
1832
1406
use warnings FATAL => 'all';
1833
1407
use English qw(-no_match_vars);
1834
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1837
$Data::Dumper::Indent = 0;
1838
$Data::Dumper::Quotekeys = 0;
1843
my $have_dbi = $EVAL_ERROR ? 0 : 1;
1408
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1410
use constant PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0;
1846
1413
my ( $class, %args ) = @_;
1847
foreach my $arg ( qw(opts) ) {
1414
my @required_args = qw(DSNParser OptionParser);
1415
foreach my $arg ( @required_args ) {
1848
1416
die "I need a $arg argument" unless $args{$arg};
1851
opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
1853
foreach my $opt ( @{$args{opts}} ) {
1854
if ( !$opt->{key} || !$opt->{desc} ) {
1855
die "Invalid DSN option: ", Dumper($opt);
1857
MKDEBUG && _d('DSN option:',
1859
map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
1863
$self->{opts}->{$opt->{key}} = {
1865
desc => $opt->{desc},
1866
copy => $opt->{copy} || 0,
1869
return bless $self, $class;
1873
my ( $self, $prop, $value ) = @_;
1875
MKDEBUG && _d('Setting', $prop, 'property');
1876
$self->{$prop} = $value;
1878
return $self->{$prop};
1882
my ( $self, $dsn, $prev, $defaults ) = @_;
1418
my ($dp, $o) = @args{@required_args};
1420
my $dsn_defaults = $dp->parse_options($o);
1421
my $prev_dsn = $args{prev_dsn};
1422
my $dsn = $args{dsn};
1884
MKDEBUG && _d('No DSN to parse');
1887
MKDEBUG && _d('Parsing', $dsn);
1892
my $opts = $self->{opts};
1894
foreach my $dsn_part ( split(/,/, $dsn) ) {
1895
if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
1896
$given_props{$prop_key} = $prop_val;
1899
MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
1900
$given_props{h} = $dsn_part;
1904
foreach my $key ( keys %$opts ) {
1905
MKDEBUG && _d('Finding value for', $key);
1906
$final_props{$key} = $given_props{$key};
1907
if ( !defined $final_props{$key}
1908
&& defined $prev->{$key} && $opts->{$key}->{copy} )
1910
$final_props{$key} = $prev->{$key};
1911
MKDEBUG && _d('Copying value for', $key, 'from previous DSN');
1913
if ( !defined $final_props{$key} ) {
1914
$final_props{$key} = $defaults->{$key};
1915
MKDEBUG && _d('Copying value for', $key, 'from defaults');
1919
foreach my $key ( keys %given_props ) {
1920
die "Unknown DSN option '$key' in '$dsn'. For more details, "
1921
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1922
. "for complete documentation."
1923
unless exists $opts->{$key};
1925
if ( (my $required = $self->prop('required')) ) {
1926
foreach my $key ( keys %$required ) {
1927
die "Missing required DSN option '$key' in '$dsn'. For more details, "
1928
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
1929
. "for complete documentation."
1930
unless $final_props{$key};
1934
return \%final_props;
1938
my ( $self, $o ) = @_;
1939
die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
1942
map { "$_=".$o->get($_); }
1943
grep { $o->has($_) && $o->get($_) }
1944
keys %{$self->{opts}}
1946
MKDEBUG && _d('DSN string made from options:', $dsn_string);
1947
return $self->parse($dsn_string);
1951
my ( $self, $dsn, $props ) = @_;
1952
return $dsn unless ref $dsn;
1953
my %allowed = $props ? map { $_=>1 } @$props : ();
1955
map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
1956
grep { defined $dsn->{$_} && $self->{opts}->{$_} }
1957
grep { !$props || $allowed{$_} }
1424
$args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost');
1427
$args{dsn_string}, $prev_dsn, $dsn_defaults);
1429
elsif ( $prev_dsn ) {
1430
$dsn = $dp->copy($prev_dsn, $dsn);
1436
dsn_name => $dp->as_string($dsn, [qw(h P S)]),
1444
return bless $self, $class;
1962
1448
my ( $self ) = @_;
1964
= "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
1965
. " KEY COPY MEANING\n"
1966
. " === ==== =============================================\n";
1967
my %opts = %{$self->{opts}};
1968
foreach my $key ( sort keys %opts ) {
1970
. ($opts{$key}->{copy} ? 'yes ' : 'no ')
1971
. ($opts{$key}->{desc} || '[No description]')
1974
$usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
1978
sub get_cxn_params {
1979
my ( $self, $info ) = @_;
1981
my %opts = %{$self->{opts}};
1982
my $driver = $self->prop('dbidriver') || '';
1983
if ( $driver eq 'Pg' ) {
1984
$dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
1985
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1986
grep { defined $info->{$_} }
1990
$dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
1991
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
1992
grep { defined $info->{$_} }
1994
. ';mysql_read_default_group=client';
1996
MKDEBUG && _d($dsn);
1997
return ($dsn, $info->{u}, $info->{p});
2001
my ( $self, $dbh, $dsn ) = @_;
2002
my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
2003
my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
2005
$dsn->{h} ||= $vars->{hostname}->{Value};
2006
$dsn->{S} ||= $vars->{'socket'}->{Value};
2007
$dsn->{P} ||= $vars->{port}->{Value};
2008
$dsn->{u} ||= $user;
2013
my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
2019
ShowErrorStatement => 1,
2020
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
2022
@{$defaults}{ keys %$opts } = values %$opts;
2024
if ( $opts->{mysql_use_result} ) {
2025
$defaults->{mysql_use_result} = 1;
2029
die "Cannot connect to MySQL because the Perl DBI module is not "
2030
. "installed or not found. Run 'perl -MDBI' to see the directories "
2031
. "that Perl searches for DBI. If DBI is not installed, try:\n"
2032
. " Debian/Ubuntu apt-get install libdbi-perl\n"
2033
. " RHEL/CentOS yum install perl-DBI\n"
2034
. " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
2040
while ( !$dbh && $tries-- ) {
2041
MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
2042
join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
2045
$dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
2047
if ( $cxn_string =~ m/mysql/i ) {
2050
$sql = 'SELECT @@SQL_MODE';
2051
MKDEBUG && _d($dbh, $sql);
2052
my ($sql_mode) = $dbh->selectrow_array($sql);
2054
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
2055
. '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
2056
. ($sql_mode ? ",$sql_mode" : '')
2058
MKDEBUG && _d($dbh, $sql);
2061
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
2062
$sql = "/*!40101 SET NAMES $charset*/";
2063
MKDEBUG && _d($dbh, ':', $sql);
2065
MKDEBUG && _d('Enabling charset for STDOUT');
2066
if ( $charset eq 'utf8' ) {
2067
binmode(STDOUT, ':utf8')
2068
or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
2071
binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
2075
if ( $self->prop('set-vars') ) {
2076
$sql = "SET " . $self->prop('set-vars');
2077
MKDEBUG && _d($dbh, ':', $sql);
2082
if ( !$dbh && $EVAL_ERROR ) {
2083
MKDEBUG && _d($EVAL_ERROR);
2084
if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
2085
MKDEBUG && _d('Going to try again without utf8 support');
2086
delete $defaults->{mysql_enable_utf8};
2088
elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
2089
die "Cannot connect to MySQL because the Perl DBD::mysql module is "
2090
. "not installed or not found. Run 'perl -MDBD::mysql' to see "
2091
. "the directories that Perl searches for DBD::mysql. If "
2092
. "DBD::mysql is not installed, try:\n"
2093
. " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
2094
. " RHEL/CentOS yum install perl-DBD-MySQL\n"
2095
. " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
1449
my $dsn = $self->{dsn};
1450
my $dp = $self->{DSNParser};
1451
my $o = $self->{OptionParser};
1453
my $dbh = $self->{dbh};
1454
if ( !$dbh || !$dbh->ping() ) {
1455
if ( $o->get('ask-pass') && !$self->{asked_for_pass} ) {
1456
$dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: ");
1457
$self->{asked_for_pass} = 1;
2103
MKDEBUG && _d('DBH info: ',
2105
Dumper($dbh->selectrow_hashref(
2106
'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
2107
'Connection info:', $dbh->{mysql_hostinfo},
2108
'Character set info:', Dumper($dbh->selectall_arrayref(
2109
'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
2110
'$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
2111
'$DBI::VERSION:', $DBI::VERSION,
1459
$dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
1461
PTDEBUG && _d($dbh, 'Connected dbh to', $self->{name});
1463
return $self->set_dbh($dbh);
1467
my ($self, $dbh) = @_;
1469
if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) {
1470
PTDEBUG && _d($dbh, 'Already set dbh');
1474
PTDEBUG && _d($dbh, 'Setting dbh');
1476
$dbh->{FetchHashKeyName} = 'NAME_lc';
1478
my $sql = 'SELECT @@hostname, @@server_id';
1479
PTDEBUG && _d($dbh, $sql);
1480
my ($hostname, $server_id) = $dbh->selectrow_array($sql);
1481
PTDEBUG && _d($dbh, 'hostname:', $hostname, $server_id);
1483
$self->{hostname} = $hostname;
1486
if ( my $set = $self->{set}) {
1490
$self->{dbh} = $dbh;
1491
$self->{dbh_set} = 1;
2118
my ( $self, $dbh ) = @_;
2119
if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
2122
my ( $hostname, $one ) = $dbh->selectrow_array(
2123
'SELECT /*!50038 @@hostname, */ 1');
2128
my ( $self, $dbh ) = @_;
2129
MKDEBUG && $self->print_active_handles($dbh);
2133
sub print_active_handles {
2134
my ( $self, $thing, $level ) = @_;
2136
printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
2137
$thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
2138
or die "Cannot print: $OS_ERROR";
2139
foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
2140
$self->print_active_handles( $handle, $level + 1 );
2145
my ( $self, $dsn_1, $dsn_2, %args ) = @_;
2146
die 'I need a dsn_1 argument' unless $dsn_1;
2147
die 'I need a dsn_2 argument' unless $dsn_2;
2151
if ( $args{overwrite} ) {
2152
$val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
2155
$val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
2158
} keys %{$self->{opts}};
2163
my ($package, undef, $line) = caller 0;
2164
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2165
map { defined $_ ? $_ : 'undef' }
2167
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2172
# ###########################################################################
2173
# End DSNParser package
2174
# ###########################################################################
2176
# ###########################################################################
2177
# VersionParser package
2178
# This package is a copy without comments from the original. The original
2179
# with comments and its test file can be found in the Bazaar repository at,
2180
# lib/VersionParser.pm
2181
# t/lib/VersionParser.t
2182
# See https://launchpad.net/percona-toolkit for more information.
2183
# ###########################################################################
2185
package VersionParser;
2188
use warnings FATAL => 'all';
2189
use English qw(-no_match_vars);
2190
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2198
my ( $self, $str ) = @_;
2199
my $result = sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
2200
MKDEBUG && _d($str, 'parses to', $result);
2205
my ( $self, $dbh, $target ) = @_;
2206
if ( !$self->{$dbh} ) {
2207
$self->{$dbh} = $self->parse(
2208
$dbh->selectrow_array('SELECT VERSION()'));
2210
my $result = $self->{$dbh} ge $self->parse($target) ? 1 : 0;
2211
MKDEBUG && _d($self->{$dbh}, 'ge', $target, ':', $result);
2215
sub innodb_version {
2216
my ( $self, $dbh ) = @_;
2218
my $innodb_version = "NO";
2221
grep { $_->{engine} =~ m/InnoDB/i }
2224
@hash{ map { lc $_ } keys %$_ } = values %$_;
2227
@{ $dbh->selectall_arrayref("SHOW ENGINES", {Slice=>{}}) };
2229
MKDEBUG && _d("InnoDB support:", $innodb->{support});
2230
if ( $innodb->{support} =~ m/YES|DEFAULT/i ) {
2231
my $vars = $dbh->selectrow_hashref(
2232
"SHOW VARIABLES LIKE 'innodb_version'");
2233
$innodb_version = !$vars ? "BUILTIN"
2234
: ($vars->{Value} || $vars->{value});
2237
$innodb_version = $innodb->{support}; # probably DISABLED or NO
2241
MKDEBUG && _d("InnoDB version:", $innodb_version);
2242
return $innodb_version;
2246
my ($package, undef, $line) = caller 0;
2247
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2248
map { defined $_ ? $_ : 'undef' }
2250
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2255
# ###########################################################################
2256
# End VersionParser package
2257
# ###########################################################################
2259
# ###########################################################################
2261
# This package is a copy without comments from the original. The original
2262
# with comments and its test file can be found in the Bazaar repository at,
2265
# See https://launchpad.net/percona-toolkit for more information.
2266
# ###########################################################################
2271
use warnings FATAL => 'all';
2272
use English qw(-no_match_vars);
2273
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2275
( our $before = <<'EOF') =~ s/^ //gm;
2276
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
2277
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
2278
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
2279
/*!40101 SET NAMES utf8 */;
2280
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
2281
/*!40103 SET TIME_ZONE='+00:00' */;
2282
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
2283
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
2284
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
2285
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
2288
( our $after = <<'EOF') =~ s/^ //gm;
2289
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
2290
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
2291
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
2292
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
2293
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
2294
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
2295
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
2296
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
2300
my ( $class, %args ) = @_;
2302
cache => 0, # Afaik no script uses this cache any longer because
2304
return bless $self, $class;
2308
my ( $self, $dbh, $quoter, $db, $tbl, $what ) = @_;
2310
if ( $what eq 'table' ) {
2311
my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
2313
if ( $ddl->[0] eq 'table' ) {
2315
. 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
2316
. $ddl->[1] . ";\n";
2319
return 'DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . ";\n"
2320
. '/*!50001 DROP VIEW IF EXISTS '
2321
. $quoter->quote($tbl) . "*/;\n/*!50001 "
2322
. $self->get_tmp_table($dbh, $quoter, $db, $tbl) . "*/;\n";
2325
elsif ( $what eq 'triggers' ) {
2326
my $trgs = $self->get_triggers($dbh, $quoter, $db, $tbl);
2327
if ( $trgs && @$trgs ) {
2328
my $result = $before . "\nDELIMITER ;;\n";
2329
foreach my $trg ( @$trgs ) {
2330
if ( $trg->{sql_mode} ) {
2331
$result .= qq{/*!50003 SET SESSION SQL_MODE='$trg->{sql_mode}' */;;\n};
2333
$result .= "/*!50003 CREATE */ ";
2334
if ( $trg->{definer} ) {
2336
= map { s/'/''/g; "'$_'"; }
2337
split('@', $trg->{definer}, 2);
2338
$result .= "/*!50017 DEFINER=$user\@$host */ ";
2340
$result .= sprintf("/*!50003 TRIGGER %s %s %s ON %s\nFOR EACH ROW %s */;;\n\n",
2341
$quoter->quote($trg->{trigger}),
2342
@{$trg}{qw(timing event)},
2343
$quoter->quote($trg->{table}),
2346
$result .= "DELIMITER ;\n\n/*!50003 SET SESSION SQL_MODE=\@OLD_SQL_MODE */;\n\n";
2353
elsif ( $what eq 'view' ) {
2354
my $ddl = $self->get_create_table($dbh, $quoter, $db, $tbl);
2355
return '/*!50001 DROP TABLE IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
2356
. '/*!50001 DROP VIEW IF EXISTS ' . $quoter->quote($tbl) . "*/;\n"
2357
. '/*!50001 ' . $ddl->[1] . "*/;\n";
2360
die "You didn't say what to dump.";
2365
my ( $self, $dbh, $quoter, $new ) = @_;
2367
MKDEBUG && _d('No new DB to use');
2370
my $sql = 'USE ' . $quoter->quote($new);
2371
MKDEBUG && _d($dbh, $sql);
1497
return $self->{dbh};
1502
return $self->{dsn};
1507
return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES;
1508
return $self->{hostname} || $self->{dsn_name} || 'unknown host';
1513
if ( $self->{dbh} ) {
1514
PTDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name});
1515
$self->{dbh}->disconnect();
2376
sub get_create_table {
2377
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2378
if ( !$self->{cache} || !$self->{tables}->{$db}->{$tbl} ) {
2379
my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
2380
. q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
2381
. '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
2382
. '@@SQL_QUOTE_SHOW_CREATE := 1 */';
2383
MKDEBUG && _d($sql);
2384
eval { $dbh->do($sql); };
2385
MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2386
$self->_use_db($dbh, $quoter, $db);
2387
$sql = "SHOW CREATE TABLE " . $quoter->quote($db, $tbl);
2388
MKDEBUG && _d($sql);
2390
eval { $href = $dbh->selectrow_hashref($sql); };
2391
if ( $EVAL_ERROR ) {
2392
warn "Failed to $sql. The table may be damaged.\nError: $EVAL_ERROR";
2396
$sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
2397
. '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
2398
MKDEBUG && _d($sql);
2400
my ($key) = grep { m/create table/i } keys %$href;
2402
MKDEBUG && _d('This table is a base table');
2403
$self->{tables}->{$db}->{$tbl} = [ 'table', $href->{$key} ];
2406
MKDEBUG && _d('This table is a view');
2407
($key) = grep { m/create view/i } keys %$href;
2408
$self->{tables}->{$db}->{$tbl} = [ 'view', $href->{$key} ];
2411
return $self->{tables}->{$db}->{$tbl};
2415
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2416
MKDEBUG && _d('Get columns for', $db, $tbl);
2417
if ( !$self->{cache} || !$self->{columns}->{$db}->{$tbl} ) {
2418
$self->_use_db($dbh, $quoter, $db);
2419
my $sql = "SHOW COLUMNS FROM " . $quoter->quote($db, $tbl);
2420
MKDEBUG && _d($sql);
2421
my $cols = $dbh->selectall_arrayref($sql, { Slice => {} });
2423
$self->{columns}->{$db}->{$tbl} = [
2426
@row{ map { lc $_ } keys %$_ } = values %$_;
2431
return $self->{columns}->{$db}->{$tbl};
2435
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2436
my $result = 'CREATE TABLE ' . $quoter->quote($tbl) . " (\n";
2437
$result .= join(",\n",
2438
map { ' ' . $quoter->quote($_->{field}) . ' ' . $_->{type} }
2439
@{$self->get_columns($dbh, $quoter, $db, $tbl)});
2441
MKDEBUG && _d($result);
2446
my ( $self, $dbh, $quoter, $db, $tbl ) = @_;
2447
if ( !$self->{cache} || !$self->{triggers}->{$db} ) {
2448
$self->{triggers}->{$db} = {};
2449
my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
2450
. q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
2451
. '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
2452
. '@@SQL_QUOTE_SHOW_CREATE := 1 */';
2453
MKDEBUG && _d($sql);
2454
eval { $dbh->do($sql); };
2455
MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
2456
$sql = "SHOW TRIGGERS FROM " . $quoter->quote($db);
2457
MKDEBUG && _d($sql);
2458
my $sth = $dbh->prepare($sql);
2461
my $trgs = $sth->fetchall_arrayref({});
2462
foreach my $trg (@$trgs) {
2464
@trg{ map { lc $_ } keys %$trg } = values %$trg;
2465
push @{ $self->{triggers}->{$db}->{ $trg{table} } }, \%trg;
2468
$sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
2469
. '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
2470
MKDEBUG && _d($sql);
2474
return $self->{triggers}->{$db}->{$tbl};
2476
return values %{$self->{triggers}->{$db}};
2480
my ( $self, $dbh, $quoter, $like ) = @_;
2481
if ( !$self->{cache} || !$self->{databases} || $like ) {
2482
my $sql = 'SHOW DATABASES';
2486
push @params, $like;
2488
my $sth = $dbh->prepare($sql);
2489
MKDEBUG && _d($sql, @params);
2490
$sth->execute( @params );
2491
my @dbs = map { $_->[0] } @{$sth->fetchall_arrayref()};
2492
$self->{databases} = \@dbs unless $like;
2495
return @{$self->{databases}};
2498
sub get_table_status {
2499
my ( $self, $dbh, $quoter, $db, $like ) = @_;
2500
if ( !$self->{cache} || !$self->{table_status}->{$db} || $like ) {
2501
my $sql = "SHOW TABLE STATUS FROM " . $quoter->quote($db);
2505
push @params, $like;
2507
MKDEBUG && _d($sql, @params);
2508
my $sth = $dbh->prepare($sql);
2509
$sth->execute(@params);
2510
my @tables = @{$sth->fetchall_arrayref({})};
2512
my %tbl; # Make a copy with lowercased keys
2513
@tbl{ map { lc $_ } keys %$_ } = values %$_;
2514
$tbl{engine} ||= $tbl{type} || $tbl{comment};
2518
$self->{table_status}->{$db} = \@tables unless $like;
2521
return @{$self->{table_status}->{$db}};
2524
sub get_table_list {
2525
my ( $self, $dbh, $quoter, $db, $like ) = @_;
2526
if ( !$self->{cache} || !$self->{table_list}->{$db} || $like ) {
2527
my $sql = "SHOW /*!50002 FULL*/ TABLES FROM " . $quoter->quote($db);
2531
push @params, $like;
2533
MKDEBUG && _d($sql, @params);
2534
my $sth = $dbh->prepare($sql);
2535
$sth->execute(@params);
2536
my @tables = @{$sth->fetchall_arrayref()};
2540
engine => ($_->[1] || '') eq 'VIEW' ? 'VIEW' : '',
2544
$self->{table_list}->{$db} = \@tables unless $like;
2547
return @{$self->{table_list}->{$db}};
2551
my ($package, undef, $line) = caller 0;
2552
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2553
map { defined $_ ? $_ : 'undef' }
2555
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2560
# ###########################################################################
2561
# End MySQLDump package
2562
# ###########################################################################
2564
# ###########################################################################
2565
# TableChunker package
2566
# This package is a copy without comments from the original. The original
2567
# with comments and its test file can be found in the Bazaar repository at,
2568
# lib/TableChunker.pm
2569
# t/lib/TableChunker.t
2570
# See https://launchpad.net/percona-toolkit for more information.
2571
# ###########################################################################
2573
package TableChunker;
2576
use warnings FATAL => 'all';
2577
use English qw(-no_match_vars);
2578
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2580
use POSIX qw(floor ceil);
2581
use List::Util qw(min max);
2583
$Data::Dumper::Indent = 1;
2584
$Data::Dumper::Sortkeys = 1;
2585
$Data::Dumper::Quotekeys = 0;
2588
my ( $class, %args ) = @_;
2589
foreach my $arg ( qw(Quoter MySQLDump) ) {
2590
die "I need a $arg argument" unless $args{$arg};
2593
my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year);
2594
my %real_types = map { $_ => 1 } qw(decimal double float);
2598
int_types => \%int_types,
2599
real_types => \%real_types,
2600
EPOCH => '1970-01-01',
2603
return bless $self, $class;
2606
sub find_chunk_columns {
2607
my ( $self, %args ) = @_;
2608
foreach my $arg ( qw(tbl_struct) ) {
2609
die "I need a $arg argument" unless $args{$arg};
2611
my $tbl_struct = $args{tbl_struct};
2613
my @possible_indexes;
2614
foreach my $index ( values %{ $tbl_struct->{keys} } ) {
2616
next unless $index->{type} eq 'BTREE';
2618
next if grep { defined } @{$index->{col_prefixes}};
2620
if ( $args{exact} ) {
2621
next unless $index->{is_unique} && @{$index->{cols}} == 1;
2624
push @possible_indexes, $index;
2626
MKDEBUG && _d('Possible chunk indexes in order:',
2627
join(', ', map { $_->{name} } @possible_indexes));
2629
my $can_chunk_exact = 0;
2631
foreach my $index ( @possible_indexes ) {
2632
my $col = $index->{cols}->[0];
2634
my $col_type = $tbl_struct->{type_for}->{$col};
2635
next unless $self->{int_types}->{$col_type}
2636
|| $self->{real_types}->{$col_type}
2637
|| $col_type =~ m/char/;
2639
push @candidate_cols, { column => $col, index => $index->{name} };
2642
$can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols;
2645
my $chunk_type = $args{exact} ? 'Exact' : 'Inexact';
2646
_d($chunk_type, 'chunkable:',
2647
join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols));
2651
MKDEBUG && _d('Ordering columns by order in tbl, PK first');
2652
if ( $tbl_struct->{keys}->{PRIMARY} ) {
2653
my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0];
2654
@result = grep { $_->{column} eq $pk_first_col } @candidate_cols;
2655
@candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols;
2658
my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}};
2659
push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} }
2663
_d('Chunkable columns:',
2664
join(', ', map { "$_->{column} on $_->{index}" } @result));
2665
_d('Can chunk exactly:', $can_chunk_exact);
2668
return ($can_chunk_exact, @result);
2671
sub calculate_chunks {
2672
my ( $self, %args ) = @_;
2673
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2674
foreach my $arg ( @required_args ) {
2675
die "I need a $arg argument" unless defined $args{$arg};
2677
MKDEBUG && _d('Calculate chunks for',
2678
join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")}
2679
qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact)
2682
if ( !$args{rows_in_range} ) {
2683
MKDEBUG && _d("Empty table");
2687
if ( $args{rows_in_range} < $args{chunk_size} ) {
2688
MKDEBUG && _d("Chunk size larger than rows in range");
2692
my $q = $self->{Quoter};
2693
my $dbh = $args{dbh};
2694
my $chunk_col = $args{chunk_col};
2695
my $tbl_struct = $args{tbl_struct};
2696
my $col_type = $tbl_struct->{type_for}->{$chunk_col};
2697
MKDEBUG && _d('chunk col type:', $col_type);
2700
if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) {
2701
%chunker = $self->_chunk_numeric(%args);
2703
elsif ( $col_type =~ m/char/ ) {
2704
%chunker = $self->_chunk_char(%args);
2707
die "Cannot chunk $col_type columns";
2709
MKDEBUG && _d("Chunker:", Dumper(\%chunker));
2710
my ($col, $start_point, $end_point, $interval, $range_func)
2711
= @chunker{qw(col start_point end_point interval range_func)};
2714
if ( $start_point < $end_point ) {
2716
push @chunks, "$col = 0" if $chunker{have_zero_chunk};
2720
for ( my $i = $start_point; $i < $end_point; $i += $interval ) {
2721
($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point);
2723
if ( $iter++ == 0 ) {
2725
($chunker{have_zero_chunk} ? "$col > 0 AND " : "")
2726
."$col < " . $q->quote_val($end);
2729
push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end);
2733
my $chunk_range = lc $args{chunk_range} || 'open';
2734
my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}};
2737
push @chunks, "$col >= " . $q->quote_val($beg)
2738
. ($chunk_range eq 'openclosed'
2739
? " AND $col <= " . $q->quote_val($args{max}) : "");
2742
push @chunks, $nullable ? "$col IS NOT NULL" : '1=1';
2745
push @chunks, "$col IS NULL";
2749
MKDEBUG && _d('No chunks; using single chunk 1=1');
2750
push @chunks, '1=1';
2756
sub _chunk_numeric {
2757
my ( $self, %args ) = @_;
2758
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2759
foreach my $arg ( @required_args ) {
2760
die "I need a $arg argument" unless defined $args{$arg};
2762
my $q = $self->{Quoter};
2763
my $db_tbl = $q->quote($args{db}, $args{tbl});
2764
my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}};
2767
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
2768
$range_func = 'range_num';
2770
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
2771
$range_func = "range_$col_type";
2773
elsif ( $col_type eq 'datetime' ) {
2774
$range_func = 'range_datetime';
2777
my ($start_point, $end_point);
2779
$start_point = $self->value_to_number(
2780
value => $args{min},
2781
column_type => $col_type,
2784
$end_point = $self->value_to_number(
2785
value => $args{max},
2786
column_type => $col_type,
2790
if ( $EVAL_ERROR ) {
2791
if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) {
2795
die "Error calculating chunk start and end points for table "
2796
. "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` "
2797
. "with min/max values "
2799
map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max))
2802
. "\nVerify that the min and max values are valid for the column. "
2803
. "If they are valid, this error could be caused by a bug in the "
2808
if ( !defined $start_point ) {
2809
MKDEBUG && _d('Start point is undefined');
2812
if ( !defined $end_point || $end_point < $start_point ) {
2813
MKDEBUG && _d('End point is undefined or before start point');
2816
MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point);
2818
my $have_zero_chunk = 0;
2819
if ( $args{zero_chunk} ) {
2820
if ( $start_point != $end_point && $start_point >= 0 ) {
2821
MKDEBUG && _d('Zero chunking');
2822
my $nonzero_val = $self->get_nonzero_value(
2825
col => $args{chunk_col},
2826
col_type => $col_type,
2829
$start_point = $self->value_to_number(
2830
value => $nonzero_val,
2831
column_type => $col_type,
2834
$have_zero_chunk = 1;
2837
MKDEBUG && _d("Cannot zero chunk");
2840
MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point);
2842
my $interval = $args{chunk_size}
2843
* ($end_point - $start_point)
2844
/ $args{rows_in_range};
2845
if ( $self->{int_types}->{$col_type} ) {
2846
$interval = ceil($interval);
2848
$interval ||= $args{chunk_size};
2849
if ( $args{exact} ) {
2850
$interval = $args{chunk_size};
2852
MKDEBUG && _d('Chunk interval:', $interval, 'units');
2855
col => $q->quote($args{chunk_col}),
2856
start_point => $start_point,
2857
end_point => $end_point,
2858
interval => $interval,
2859
range_func => $range_func,
2860
have_zero_chunk => $have_zero_chunk,
2865
my ( $self, %args ) = @_;
2866
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
2867
foreach my $arg ( @required_args ) {
2868
die "I need a $arg argument" unless defined $args{$arg};
2870
my $q = $self->{Quoter};
2871
my $db_tbl = $q->quote($args{db}, $args{tbl});
2872
my $dbh = $args{dbh};
2873
my $chunk_col = $args{chunk_col};
2877
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
2878
. "ORDER BY `$chunk_col`";
2879
MKDEBUG && _d($dbh, $sql);
2880
$row = $dbh->selectrow_arrayref($sql);
2881
my ($min_col, $max_col) = ($row->[0], $row->[1]);
2883
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
2884
MKDEBUG && _d($dbh, $sql);
2885
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
2886
$ord_sth->execute($min_col, $max_col);
2887
$row = $ord_sth->fetchrow_arrayref();
2888
my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]);
2889
MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord);
2893
MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset});
2894
if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) {
2895
my @sorted_latin1_chars = (
2896
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
2897
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
2898
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
2899
74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
2900
88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161,
2901
162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
2902
176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
2903
190, 191, 215, 216, 222, 223, 247, 255);
2905
my ($first_char, $last_char);
2906
for my $i ( 0..$#sorted_latin1_chars ) {
2907
$first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord;
2909
for my $i ( $first_char..$#sorted_latin1_chars ) {
2910
$last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord;
2913
@chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char];
2914
$base = scalar @chars;
2918
my $tmp_tbl = '__maatkit_char_chunking_map';
2919
my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl);
2920
$sql = "DROP TABLE IF EXISTS $tmp_db_tbl";
2921
MKDEBUG && _d($dbh, $sql);
2923
my $col_def = $args{tbl_struct}->{defs}->{$chunk_col};
2924
$sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) "
2926
MKDEBUG && _d($dbh, $sql);
2929
$sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))";
2930
MKDEBUG && _d($dbh, $sql);
2931
my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues
2932
for my $char_code ( $min_col_ord..$max_col_ord ) {
2933
$ins_char_sth->execute($char_code);
2936
$sql = "SELECT `$chunk_col` FROM $tmp_db_tbl "
2937
. "WHERE `$chunk_col` BETWEEN ? AND ? "
2938
. "ORDER BY `$chunk_col`";
2939
MKDEBUG && _d($dbh, $sql);
2940
my $sel_char_sth = $dbh->prepare($sql);
2941
$sel_char_sth->execute($min_col, $max_col);
2943
@chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() };
2944
$base = scalar @chars;
2946
$sql = "DROP TABLE $tmp_db_tbl";
2947
MKDEBUG && _d($dbh, $sql);
2950
MKDEBUG && _d("Base", $base, "chars:", @chars);
2953
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
2954
MKDEBUG && _d($dbh, $sql);
2955
$row = $dbh->selectrow_arrayref($sql);
2956
my $max_col_len = $row->[0];
2957
MKDEBUG && _d("Max column value:", $max_col, $max_col_len);
2959
for my $n_chars ( 1..$max_col_len ) {
2960
$n_values = $base**$n_chars;
2961
if ( $n_values >= $args{chunk_size} ) {
2962
MKDEBUG && _d($n_chars, "chars in base", $base, "expresses",
2963
$n_values, "values");
2968
my $n_chunks = $args{rows_in_range} / $args{chunk_size};
2969
my $interval = floor($n_values / $n_chunks) || 1;
2971
my $range_func = sub {
2972
my ( $self, $dbh, $start, $interval, $max ) = @_;
2973
my $start_char = $self->base_count(
2978
my $end_char = $self->base_count(
2979
count_to => min($max, $start + $interval),
2983
return $start_char, $end_char;
2987
col => $q->quote($chunk_col),
2989
end_point => $n_values,
2990
interval => $interval,
2991
range_func => $range_func,
2995
sub get_first_chunkable_column {
2996
my ( $self, %args ) = @_;
2997
foreach my $arg ( qw(tbl_struct) ) {
2998
die "I need a $arg argument" unless $args{$arg};
3001
my ($exact, @cols) = $self->find_chunk_columns(%args);
3002
my $col = $cols[0]->{column};
3003
my $idx = $cols[0]->{index};
3005
my $wanted_col = $args{chunk_column};
3006
my $wanted_idx = $args{chunk_index};
3007
MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx);
3009
if ( $wanted_col && $wanted_idx ) {
3010
foreach my $chunkable_col ( @cols ) {
3011
if ( $wanted_col eq $chunkable_col->{column}
3012
&& $wanted_idx eq $chunkable_col->{index} ) {
3019
elsif ( $wanted_col ) {
3020
foreach my $chunkable_col ( @cols ) {
3021
if ( $wanted_col eq $chunkable_col->{column} ) {
3023
$idx = $chunkable_col->{index};
3028
elsif ( $wanted_idx ) {
3029
foreach my $chunkable_col ( @cols ) {
3030
if ( $wanted_idx eq $chunkable_col->{index} ) {
3031
$col = $chunkable_col->{column};
3038
MKDEBUG && _d('First chunkable col/index:', $col, $idx);
3043
my ( $self, %args ) = @_;
3044
my @required_args = qw(dbh db tbl chunk_size);
3045
foreach my $arg ( @required_args ) {
3046
die "I need a $arg argument" unless $args{$arg};
3048
my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args};
3049
my $q = $self->{Quoter};
3050
my $du = $self->{MySQLDump};
3052
my ($n_rows, $avg_row_length);
3054
my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/;
3055
if ( $suffix ) { # Convert to bytes.
3056
$chunk_size = $suffix eq 'k' ? $num * 1_024
3057
: $suffix eq 'M' ? $num * 1_024 * 1_024
3058
: $num * 1_024 * 1_024 * 1_024;
3064
die "Invalid chunk size $chunk_size; must be an integer "
3065
. "with optional suffix kMG";
3068
if ( $suffix || $args{avg_row_length} ) {
3069
my ($status) = $du->get_table_status($dbh, $q, $db, $tbl);
3070
$avg_row_length = $status->{avg_row_length};
3071
if ( !defined $n_rows ) {
3072
$n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef;
3076
return $n_rows, $avg_row_length;
3079
sub get_range_statistics {
3080
my ( $self, %args ) = @_;
3081
my @required_args = qw(dbh db tbl chunk_col tbl_struct);
3082
foreach my $arg ( @required_args ) {
3083
die "I need a $arg argument" unless $args{$arg};
3085
my ($dbh, $db, $tbl, $col) = @args{@required_args};
3086
my $where = $args{where};
3087
my $q = $self->{Quoter};
3089
my $col_type = $args{tbl_struct}->{type_for}->{$col};
3090
my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col};
3092
my $db_tbl = $q->quote($db, $tbl);
3093
$col = $q->quote($col);
3097
my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl"
3098
. ($args{index_hint} ? " $args{index_hint}" : "")
3099
. ($where ? " WHERE ($where)" : '');
3100
MKDEBUG && _d($dbh, $sql);
3101
($min, $max) = $dbh->selectrow_array($sql);
3102
MKDEBUG && _d("Actual end points:", $min, $max);
3104
($min, $max) = $self->get_valid_end_points(
3109
col_type => $col_type,
3113
MKDEBUG && _d("Valid end points:", $min, $max);
3115
if ( $EVAL_ERROR ) {
3116
die "Error getting min and max values for table $db_tbl "
3117
. "on column $col: $EVAL_ERROR";
3120
my $sql = "EXPLAIN SELECT * FROM $db_tbl"
3121
. ($args{index_hint} ? " $args{index_hint}" : "")
3122
. ($where ? " WHERE $where" : '');
3123
MKDEBUG && _d($sql);
3124
my $expl = $dbh->selectrow_hashref($sql);
3129
rows_in_range => $expl->{rows},
3134
my ( $self, %args ) = @_;
3135
foreach my $arg ( qw(database table chunks chunk_num query) ) {
3136
die "I need a $arg argument" unless defined $args{$arg};
3138
MKDEBUG && _d('Injecting chunk', $args{chunk_num});
3139
my $query = $args{query};
3140
my $comment = sprintf("/*%s.%s:%d/%d*/",
3141
$args{database}, $args{table},
3142
$args{chunk_num} + 1, scalar @{$args{chunks}});
3143
$query =~ s!/\*PROGRESS_COMMENT\*/!$comment!;
3144
my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')';
3145
if ( $args{where} && grep { $_ } @{$args{where}} ) {
3147
. join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} )
3150
my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)});
3151
my $index_hint = $args{index_hint} || '';
3153
MKDEBUG && _d('Parameters:',
3154
Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint}));
3155
$query =~ s!/\*WHERE\*/! $where!;
3156
$query =~ s!/\*DB_TBL\*/!$db_tbl!;
3157
$query =~ s!/\*INDEX_HINT\*/! $index_hint!;
3158
$query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!;
3164
sub value_to_number {
3165
my ( $self, %args ) = @_;
3166
my @required_args = qw(column_type dbh);
3167
foreach my $arg ( @required_args ) {
3168
die "I need a $arg argument" unless defined $args{$arg};
3170
my $val = $args{value};
3171
my ($col_type, $dbh) = @args{@required_args};
3172
MKDEBUG && _d('Converting MySQL', $col_type, $val);
3174
return unless defined $val; # value is NULL
3176
my %mysql_conv_func_for = (
3177
timestamp => 'UNIX_TIMESTAMP',
3179
time => 'TIME_TO_SEC',
3180
datetime => 'TO_DAYS',
3184
if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) {
3187
elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) {
3188
my $func = $mysql_conv_func_for{$col_type};
3189
my $sql = "SELECT $func(?)";
3190
MKDEBUG && _d($dbh, $sql, $val);
3191
my $sth = $dbh->prepare($sql);
3192
$sth->execute($val);
3193
($num) = $sth->fetchrow_array();
3195
elsif ( $col_type eq 'datetime' ) {
3196
$num = $self->timestampdiff($dbh, $val);
3199
die "I don't know how to chunk $col_type\n";
3201
MKDEBUG && _d('Converts to', $num);
3206
my ( $self, $dbh, $start, $interval, $max ) = @_;
3207
my $end = min($max, $start + $interval);
3210
$start = sprintf('%.17f', $start) if $start =~ /e/;
3211
$end = sprintf('%.17f', $end) if $end =~ /e/;
3213
$start =~ s/\.(\d{5}).*$/.$1/;
3214
$end =~ s/\.(\d{5}).*$/.$1/;
3216
if ( $end > $start ) {
3217
return ( $start, $end );
3220
die "Chunk size is too small: $end !> $start\n";
3225
my ( $self, $dbh, $start, $interval, $max ) = @_;
3226
my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))";
3227
MKDEBUG && _d($sql);
3228
return $dbh->selectrow_array($sql);
3232
my ( $self, $dbh, $start, $interval, $max ) = @_;
3233
my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))";
3234
MKDEBUG && _d($sql);
3235
return $dbh->selectrow_array($sql);
3238
sub range_datetime {
3239
my ( $self, $dbh, $start, $interval, $max ) = @_;
3240
my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), "
3241
. "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)";
3242
MKDEBUG && _d($sql);
3243
return $dbh->selectrow_array($sql);
3246
sub range_timestamp {
3247
my ( $self, $dbh, $start, $interval, $max ) = @_;
3248
my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))";
3249
MKDEBUG && _d($sql);
3250
return $dbh->selectrow_array($sql);
3254
my ( $self, $dbh, $time ) = @_;
3255
my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) "
3256
. "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400";
3257
MKDEBUG && _d($sql);
3258
my ( $diff ) = $dbh->selectrow_array($sql);
3259
$sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)";
3260
MKDEBUG && _d($sql);
3261
my ( $check ) = $dbh->selectrow_array($sql);
3263
Incorrect datetime math: given $time, calculated $diff but checked to $check.
3264
This could be due to a version of MySQL that overflows on large interval
3265
values to DATE_ADD(), or the given datetime is not a valid date. If not,
3266
please report this as a bug.
3268
unless $check eq $time;
3275
sub get_valid_end_points {
3276
my ( $self, %args ) = @_;
3277
my @required_args = qw(dbh db_tbl col col_type);
3278
foreach my $arg ( @required_args ) {
3279
die "I need a $arg argument" unless $args{$arg};
3281
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3282
my ($real_min, $real_max) = @args{qw(min max)};
3284
my $err_fmt = "Error finding a valid %s value for table $db_tbl on "
3285
. "column $col. The real %s value %s is invalid and "
3286
. "no other valid values were found. Verify that the table "
3287
. "has at least one valid value for this column"
3288
. ($args{where} ? " where $args{where}." : ".");
3290
my $valid_min = $real_min;
3291
if ( defined $valid_min ) {
3292
MKDEBUG && _d("Validating min end point:", $real_min);
3293
$valid_min = $self->_get_valid_end_point(
3298
die sprintf($err_fmt, 'minimum', 'minimum',
3299
(defined $real_min ? $real_min : "NULL"))
3300
unless defined $valid_min;
3303
my $valid_max = $real_max;
3304
if ( defined $valid_max ) {
3305
MKDEBUG && _d("Validating max end point:", $real_min);
3306
$valid_max = $self->_get_valid_end_point(
3311
die sprintf($err_fmt, 'maximum', 'maximum',
3312
(defined $real_max ? $real_max : "NULL"))
3313
unless defined $valid_max;
3316
return $valid_min, $valid_max;
3319
sub _get_valid_end_point {
3320
my ( $self, %args ) = @_;
3321
my @required_args = qw(dbh db_tbl col col_type);
3322
foreach my $arg ( @required_args ) {
3323
die "I need a $arg argument" unless $args{$arg};
3325
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3326
my $val = $args{val};
3328
return $val unless defined $val;
3330
my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3334
MKDEBUG && _d("No validator for", $col_type, "values");
3338
return $val if defined $validate->($dbh, $val);
3340
MKDEBUG && _d("Value is invalid, getting first valid value");
3341
$val = $self->get_first_valid_value(
3344
validate => $validate,
3350
sub get_first_valid_value {
3351
my ( $self, %args ) = @_;
3352
my @required_args = qw(dbh db_tbl col validate endpoint);
3353
foreach my $arg ( @required_args ) {
3354
die "I need a $arg argument" unless $args{$arg};
3356
my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args};
3357
my $tries = defined $args{tries} ? $args{tries} : 5;
3358
my $val = $args{val};
3360
return unless defined $val;
3362
my $cmp = $endpoint =~ m/min/i ? '>'
3363
: $endpoint =~ m/max/i ? '<'
3364
: die "Invalid endpoint arg: $endpoint";
3365
my $sql = "SELECT $col FROM $db_tbl "
3366
. ($args{index_hint} ? "$args{index_hint} " : "")
3367
. "WHERE $col $cmp ? AND $col IS NOT NULL "
3368
. ($args{where} ? "AND ($args{where}) " : "")
3369
. "ORDER BY $col LIMIT 1";
3370
MKDEBUG && _d($dbh, $sql);
3371
my $sth = $dbh->prepare($sql);
3373
my $last_val = $val;
3374
while ( $tries-- ) {
3375
$sth->execute($last_val);
3376
my ($next_val) = $sth->fetchrow_array();
3377
MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries);
3378
if ( !defined $next_val ) {
3379
MKDEBUG && _d('No more rows in table');
3382
if ( defined $validate->($dbh, $next_val) ) {
3383
MKDEBUG && _d('First valid value:', $next_val);
3387
$last_val = $next_val;
3390
$val = undef; # no valid value found
3395
sub _validate_temporal_value {
3396
my ( $dbh, $val ) = @_;
3397
my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))";
3400
MKDEBUG && _d($dbh, $sql, $val);
3401
my $sth = $dbh->prepare($sql);
3402
$sth->execute($val, $val, $val, $val);
3403
($res) = $sth->fetchrow_array();
3406
if ( $EVAL_ERROR ) {
3407
MKDEBUG && _d($EVAL_ERROR);
3412
sub get_nonzero_value {
3413
my ( $self, %args ) = @_;
3414
my @required_args = qw(dbh db_tbl col col_type);
3415
foreach my $arg ( @required_args ) {
3416
die "I need a $arg argument" unless $args{$arg};
3418
my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args};
3419
my $tries = defined $args{tries} ? $args{tries} : 5;
3420
my $val = $args{val};
3422
my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value
3423
: sub { return $_[1]; };
3425
if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry
3426
MKDEBUG && _d('Discarding zero value:', $val);
3427
my $sql = "SELECT $col FROM $db_tbl "
3428
. ($args{index_hint} ? "$args{index_hint} " : "")
3429
. "WHERE $col > ? AND $col IS NOT NULL "
3430
. ($args{where} ? "AND ($args{where}) " : '')
3431
. "ORDER BY $col LIMIT 1";
3432
MKDEBUG && _d($sql);
3433
my $sth = $dbh->prepare($sql);
3435
my $last_val = $val;
3436
while ( $tries-- ) {
3437
$sth->execute($last_val);
3438
my ($next_val) = $sth->fetchrow_array();
3439
if ( $is_nonzero->($dbh, $next_val) ) {
3440
MKDEBUG && _d('First non-zero value:', $next_val);
3444
$last_val = $next_val;
3447
$val = undef; # no non-zero value found
3454
my ( $self, %args ) = @_;
3455
my @required_args = qw(count_to base symbols);
3456
foreach my $arg ( @required_args ) {
3457
die "I need a $arg argument" unless defined $args{$arg};
3459
my ($n, $base, $symbols) = @args{@required_args};
3461
return $symbols->[0] if $n == 0;
3463
my $highest_power = floor(log($n)/log($base));
3464
if ( $highest_power == 0 ){
3465
return $symbols->[$n];
3469
for my $power ( 0..$highest_power ) {
3470
push @base_powers, ($base**$power) || 1;
3474
foreach my $base_power ( reverse @base_powers ) {
3475
my $multiples = floor($n / $base_power);
3476
push @base_multiples, $multiples;
3477
$n -= $multiples * $base_power;
3480
return join('', map { $symbols->[$_] } @base_multiples);
3484
my ($package, undef, $line) = caller 0;
3485
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3486
map { defined $_ ? $_ : 'undef' }
3488
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3493
# ###########################################################################
3494
# End TableChunker package
1521
my ($package, undef, $line) = caller 0;
1522
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1523
map { defined $_ ? $_ : 'undef' }
1525
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1530
# ###########################################################################
3495
1532
# ###########################################################################
3497
1534
# ###########################################################################
3572
1651
# ###########################################################################
3574
1653
# ###########################################################################
1654
# VersionParser package
1655
# This package is a copy without comments from the original. The original
1656
# with comments and its test file can be found in the Bazaar repository at,
1657
# lib/VersionParser.pm
1658
# t/lib/VersionParser.t
1659
# See https://launchpad.net/percona-toolkit for more information.
1660
# ###########################################################################
1662
package VersionParser;
1665
use warnings FATAL => 'all';
1666
use English qw(-no_match_vars);
1667
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1675
my ( $self, $str ) = @_;
1676
my $result = sprintf('%03d%03d%03d', $str =~ m/(\d+)/g);
1677
PTDEBUG && _d($str, 'parses to', $result);
1682
my ( $self, $dbh, $target ) = @_;
1683
if ( !$self->{$dbh} ) {
1684
$self->{$dbh} = $self->parse(
1685
$dbh->selectrow_array('SELECT VERSION()'));
1687
my $result = $self->{$dbh} ge $self->parse($target) ? 1 : 0;
1688
PTDEBUG && _d($self->{$dbh}, 'ge', $target, ':', $result);
1692
sub innodb_version {
1693
my ( $self, $dbh ) = @_;
1695
my $innodb_version = "NO";
1698
grep { $_->{engine} =~ m/InnoDB/i }
1701
@hash{ map { lc $_ } keys %$_ } = values %$_;
1704
@{ $dbh->selectall_arrayref("SHOW ENGINES", {Slice=>{}}) };
1706
PTDEBUG && _d("InnoDB support:", $innodb->{support});
1707
if ( $innodb->{support} =~ m/YES|DEFAULT/i ) {
1708
my $vars = $dbh->selectrow_hashref(
1709
"SHOW VARIABLES LIKE 'innodb_version'");
1710
$innodb_version = !$vars ? "BUILTIN"
1711
: ($vars->{Value} || $vars->{value});
1714
$innodb_version = $innodb->{support}; # probably DISABLED or NO
1718
PTDEBUG && _d("InnoDB version:", $innodb_version);
1719
return $innodb_version;
1723
my ($package, undef, $line) = caller 0;
1724
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1725
map { defined $_ ? $_ : 'undef' }
1727
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1732
# ###########################################################################
1733
# End VersionParser package
1734
# ###########################################################################
1736
# ###########################################################################
1737
# TableParser package
1738
# This package is a copy without comments from the original. The original
1739
# with comments and its test file can be found in the Bazaar repository at,
1740
# lib/TableParser.pm
1741
# t/lib/TableParser.t
1742
# See https://launchpad.net/percona-toolkit for more information.
1743
# ###########################################################################
1745
package TableParser;
1748
use warnings FATAL => 'all';
1749
use English qw(-no_match_vars);
1750
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1753
$Data::Dumper::Indent = 1;
1754
$Data::Dumper::Sortkeys = 1;
1755
$Data::Dumper::Quotekeys = 0;
1758
my ( $class, %args ) = @_;
1759
my @required_args = qw(Quoter);
1760
foreach my $arg ( @required_args ) {
1761
die "I need a $arg argument" unless $args{$arg};
1763
my $self = { %args };
1764
return bless $self, $class;
1767
sub get_create_table {
1768
my ( $self, $dbh, $db, $tbl ) = @_;
1769
die "I need a dbh parameter" unless $dbh;
1770
die "I need a db parameter" unless $db;
1771
die "I need a tbl parameter" unless $tbl;
1772
my $q = $self->{Quoter};
1774
my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, '
1775
. q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), }
1776
. '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, '
1777
. '@@SQL_QUOTE_SHOW_CREATE := 1 */';
1778
PTDEBUG && _d($sql);
1779
eval { $dbh->do($sql); };
1780
PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
1782
$sql = 'USE ' . $q->quote($db);
1783
PTDEBUG && _d($dbh, $sql);
1786
$sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl);
1787
PTDEBUG && _d($sql);
1789
eval { $href = $dbh->selectrow_hashref($sql); };
1790
if ( $EVAL_ERROR ) {
1791
PTDEBUG && _d($EVAL_ERROR);
1795
$sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, '
1796
. '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */';
1797
PTDEBUG && _d($sql);
1800
my ($key) = grep { m/create table/i } keys %$href;
1802
PTDEBUG && _d('This table is a base table');
1803
$href->{$key} =~ s/\b[ ]{2,}/ /g;
1804
$href->{$key} .= "\n";
1807
PTDEBUG && _d('This table is a view');
1808
($key) = grep { m/create view/i } keys %$href;
1811
return $href->{$key};
1815
my ( $self, $ddl, $opts ) = @_;
1818
if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
1819
die "Cannot parse table definition; is ANSI quoting "
1820
. "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
1823
my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
1824
(undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
1826
$ddl =~ s/(`[^`]+`)/\L$1/g;
1828
my $engine = $self->get_engine($ddl);
1830
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
1831
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
1832
PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
1835
@def_for{@cols} = @defs;
1838
my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
1839
foreach my $col ( @cols ) {
1840
my $def = $def_for{$col};
1841
my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
1842
die "Can't determine column type for $def" unless $type;
1843
$type_for{$col} = $type;
1844
if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
1846
$is_numeric{$col} = 1;
1848
if ( $def !~ m/NOT NULL/ ) {
1850
$is_nullable{$col} = 1;
1852
$is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
1855
my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
1857
my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
1862
col_posn => { map { $cols[$_] => $_ } 0..$#cols },
1863
is_col => { map { $_ => 1 } @cols },
1864
null_cols => \@null,
1865
is_nullable => \%is_nullable,
1866
is_autoinc => \%is_autoinc,
1867
clustered_key => $clustered_key,
1870
numeric_cols => \@nums,
1871
is_numeric => \%is_numeric,
1873
type_for => \%type_for,
1874
charset => $charset,
1879
my ( $self, $tbl ) = @_;
1883
(($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
1884
|| ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
1885
|| ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
1886
|| ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
1889
$tbl->{keys}->{$_}->{type} eq 'BTREE'
1891
sort keys %{$tbl->{keys}};
1893
PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
1897
sub find_best_index {
1898
my ( $self, $tbl, $index ) = @_;
1901
($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
1905
die "Index '$index' does not exist in table";
1908
($best) = $self->sort_indexes($tbl);
1911
PTDEBUG && _d('Best index found is', $best);
1915
sub find_possible_keys {
1916
my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
1917
return () unless $where;
1918
my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
1919
. ' WHERE ' . $where;
1920
PTDEBUG && _d($sql);
1921
my $expl = $dbh->selectrow_hashref($sql);
1922
$expl = { map { lc($_) => $expl->{$_} } keys %$expl };
1923
if ( $expl->{possible_keys} ) {
1924
PTDEBUG && _d('possible_keys =', $expl->{possible_keys});
1925
my @candidates = split(',', $expl->{possible_keys});
1926
my %possible = map { $_ => 1 } @candidates;
1927
if ( $expl->{key} ) {
1928
PTDEBUG && _d('MySQL chose', $expl->{key});
1929
unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
1930
PTDEBUG && _d('Before deduping:', join(', ', @candidates));
1932
@candidates = grep { !$seen{$_}++ } @candidates;
1934
PTDEBUG && _d('Final list:', join(', ', @candidates));
1938
PTDEBUG && _d('No keys in possible_keys');
1944
my ( $self, %args ) = @_;
1945
my @required_args = qw(dbh db tbl);
1946
foreach my $arg ( @required_args ) {
1947
die "I need a $arg argument" unless $args{$arg};
1949
my ($dbh, $db, $tbl) = @args{@required_args};
1950
my $q = $self->{Quoter};
1951
my $db_tbl = $q->quote($db, $tbl);
1952
PTDEBUG && _d('Checking', $db_tbl);
1954
my $sql = "SHOW TABLES FROM " . $q->quote($db)
1955
. ' LIKE ' . $q->literal_like($tbl);
1956
PTDEBUG && _d($sql);
1959
$row = $dbh->selectrow_arrayref($sql);
1961
if ( $EVAL_ERROR ) {
1962
PTDEBUG && _d($EVAL_ERROR);
1965
if ( !$row->[0] || $row->[0] ne $tbl ) {
1966
PTDEBUG && _d('Table does not exist');
1970
PTDEBUG && _d('Table exists; no privs to check');
1971
return 1 unless $args{all_privs};
1973
$sql = "SHOW FULL COLUMNS FROM $db_tbl";
1974
PTDEBUG && _d($sql);
1976
$row = $dbh->selectrow_hashref($sql);
1978
if ( $EVAL_ERROR ) {
1979
PTDEBUG && _d($EVAL_ERROR);
1982
if ( !scalar keys %$row ) {
1983
PTDEBUG && _d('Table has no columns:', Dumper($row));
1986
my $privs = $row->{privileges} || $row->{Privileges};
1988
$sql = "DELETE FROM $db_tbl LIMIT 0";
1989
PTDEBUG && _d($sql);
1993
my $can_delete = $EVAL_ERROR ? 0 : 1;
1995
PTDEBUG && _d('User privs on', $db_tbl, ':', $privs,
1996
($can_delete ? 'delete' : ''));
1998
if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
2000
PTDEBUG && _d('User does not have all privs');
2004
PTDEBUG && _d('User has all privs');
2009
my ( $self, $ddl, $opts ) = @_;
2010
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
2011
PTDEBUG && _d('Storage engine:', $engine);
2012
return $engine || undef;
2016
my ( $self, $ddl, $opts, $is_nullable ) = @_;
2017
my $engine = $self->get_engine($ddl);
2019
my $clustered_key = undef;
2022
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
2024
next KEY if $key =~ m/FOREIGN/;
2027
PTDEBUG && _d('Parsed key:', $key_ddl);
2029
if ( $engine !~ m/MEMORY|HEAP/ ) {
2030
$key =~ s/USING HASH/USING BTREE/;
2033
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
2034
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
2035
$type = $type || $special || 'BTREE';
2036
if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
2037
&& $engine =~ m/HEAP|MEMORY/i )
2039
$type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
2042
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
2043
my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
2046
foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
2047
my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
2049
push @col_prefixes, $prefix;
2053
PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
2060
col_prefixes => \@col_prefixes,
2061
is_unique => $unique,
2062
is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
2063
is_col => { map { $_ => 1 } @cols },
2067
if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
2068
my $this_key = $keys->{$name};
2069
if ( $this_key->{name} eq 'PRIMARY' ) {
2070
$clustered_key = 'PRIMARY';
2072
elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
2073
$clustered_key = $this_key->{name};
2075
PTDEBUG && $clustered_key && _d('This key is the clustered key');
2079
return $keys, $clustered_key;
2083
my ( $self, $ddl, $opts ) = @_;
2084
my $q = $self->{Quoter};
2088
$ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
2090
my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
2091
my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
2092
my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
2094
my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
2095
my %parent_tbl = (tbl => $tbl);
2096
$parent_tbl{db} = $db if $db;
2098
if ( $parent !~ m/\./ && $opts->{database} ) {
2099
$parent = $q->quote($opts->{database}) . ".$parent";
2105
cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
2106
parent_tbl => \%parent_tbl,
2107
parent_tblname => $parent,
2108
parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
2109
parent_colnames=> $parent_cols,
2117
sub remove_auto_increment {
2118
my ( $self, $ddl ) = @_;
2119
$ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
2123
sub get_table_status {
2124
my ( $self, $dbh, $db, $like ) = @_;
2125
my $q = $self->{Quoter};
2126
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db);
2130
push @params, $like;
2132
PTDEBUG && _d($sql, @params);
2133
my $sth = $dbh->prepare($sql);
2134
eval { $sth->execute(@params); };
2136
PTDEBUG && _d($EVAL_ERROR);
2139
my @tables = @{$sth->fetchall_arrayref({})};
2141
my %tbl; # Make a copy with lowercased keys
2142
@tbl{ map { lc $_ } keys %$_ } = values %$_;
2143
$tbl{engine} ||= $tbl{type} || $tbl{comment};
2151
my ($package, undef, $line) = caller 0;
2152
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2153
map { defined $_ ? $_ : 'undef' }
2155
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2160
# ###########################################################################
2161
# End TableParser package
2162
# ###########################################################################
2164
# ###########################################################################
2165
# TableNibbler package
2166
# This package is a copy without comments from the original. The original
2167
# with comments and its test file can be found in the Bazaar repository at,
2168
# lib/TableNibbler.pm
2169
# t/lib/TableNibbler.t
2170
# See https://launchpad.net/percona-toolkit for more information.
2171
# ###########################################################################
2173
package TableNibbler;
2176
use warnings FATAL => 'all';
2177
use English qw(-no_match_vars);
2178
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
2181
my ( $class, %args ) = @_;
2182
my @required_args = qw(TableParser Quoter);
2183
foreach my $arg ( @required_args ) {
2184
die "I need a $arg argument" unless $args{$arg};
2186
my $self = { %args };
2187
return bless $self, $class;
2190
sub generate_asc_stmt {
2191
my ( $self, %args ) = @_;
2192
my @required_args = qw(tbl_struct index);
2193
foreach my $arg ( @required_args ) {
2194
die "I need a $arg argument" unless defined $args{$arg};
2196
my ($tbl_struct, $index) = @args{@required_args};
2197
my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
2198
my $q = $self->{Quoter};
2200
die "Index '$index' does not exist in table"
2201
unless exists $tbl_struct->{keys}->{$index};
2202
PTDEBUG && _d('Will ascend index', $index);
2204
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
2205
if ( $args{asc_first} ) {
2206
@asc_cols = $asc_cols[0];
2207
PTDEBUG && _d('Ascending only first column');
2209
PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
2212
my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
2213
foreach my $col ( @asc_cols ) {
2214
if ( !exists $col_posn{$col} ) {
2216
$col_posn{$col} = $#cols;
2218
push @asc_slice, $col_posn{$col};
2220
PTDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice));
2232
foreach my $cmp ( qw(< <= >= >) ) {
2233
$cmp_where = $self->generate_cmp_where(
2235
slice => \@asc_slice,
2238
is_nullable => $tbl_struct->{is_nullable},
2240
$asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where};
2242
my $cmp = $args{asc_only} ? '>' : '>=';
2243
$asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp};
2244
$asc_stmt->{slice} = $cmp_where->{slice};
2245
$asc_stmt->{scols} = $cmp_where->{scols};
2251
sub generate_cmp_where {
2252
my ( $self, %args ) = @_;
2253
foreach my $arg ( qw(type slice cols is_nullable) ) {
2254
die "I need a $arg arg" unless defined $args{$arg};
2256
my @slice = @{$args{slice}};
2257
my @cols = @{$args{cols}};
2258
my $is_nullable = $args{is_nullable};
2259
my $type = $args{type};
2260
my $q = $self->{Quoter};
2262
(my $cmp = $type) =~ s/=//;
2264
my @r_slice; # Resulting slice columns, by ordinal
2265
my @r_scols; # Ditto, by name
2268
foreach my $i ( 0 .. $#slice ) {
2271
foreach my $j ( 0 .. $i - 1 ) {
2272
my $ord = $slice[$j];
2273
my $col = $cols[$ord];
2274
my $quo = $q->quote($col);
2275
if ( $is_nullable->{$col} ) {
2276
push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
2277
push @r_slice, $ord, $ord;
2278
push @r_scols, $col, $col;
2281
push @clause, "$quo = ?";
2282
push @r_slice, $ord;
2283
push @r_scols, $col;
2287
my $ord = $slice[$i];
2288
my $col = $cols[$ord];
2289
my $quo = $q->quote($col);
2290
my $end = $i == $#slice; # Last clause of the whole group.
2291
if ( $is_nullable->{$col} ) {
2292
if ( $type =~ m/=/ && $end ) {
2293
push @clause, "(? IS NULL OR $quo $type ?)";
2295
elsif ( $type =~ m/>/ ) {
2296
push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo $cmp ?))";
2298
else { # If $type =~ m/</ ) {
2299
push @clause, "((? IS NOT NULL AND $quo IS NULL) OR ($quo $cmp ?))";
2301
push @r_slice, $ord, $ord;
2302
push @r_scols, $col, $col;
2305
push @r_slice, $ord;
2306
push @r_scols, $col;
2307
push @clause, ($type =~ m/=/ && $end ? "$quo $type ?" : "$quo $cmp ?");
2310
push @clauses, '(' . join(' AND ', @clause) . ')';
2312
my $result = '(' . join(' OR ', @clauses) . ')';
2321
sub generate_del_stmt {
2322
my ( $self, %args ) = @_;
2324
my $tbl = $args{tbl_struct};
2325
my @cols = $args{cols} ? @{$args{cols}} : ();
2326
my $tp = $self->{TableParser};
2327
my $q = $self->{Quoter};
2332
my $index = $tp->find_best_index($tbl, $args{index});
2333
die "Cannot find an ascendable index in table" unless $index;
2336
@del_cols = @{$tbl->{keys}->{$index}->{cols}};
2339
@del_cols = @{$tbl->{cols}};
2341
PTDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols));
2343
my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
2344
foreach my $col ( @del_cols ) {
2345
if ( !exists $col_posn{$col} ) {
2347
$col_posn{$col} = $#cols;
2349
push @del_slice, $col_posn{$col};
2351
PTDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice));
2362
foreach my $i ( 0 .. $#del_slice ) {
2363
my $ord = $del_slice[$i];
2364
my $col = $cols[$ord];
2365
my $quo = $q->quote($col);
2366
if ( $tbl->{is_nullable}->{$col} ) {
2367
push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))";
2368
push @{$del_stmt->{slice}}, $ord, $ord;
2369
push @{$del_stmt->{scols}}, $col, $col;
2372
push @clauses, "$quo = ?";
2373
push @{$del_stmt->{slice}}, $ord;
2374
push @{$del_stmt->{scols}}, $col;
2378
$del_stmt->{where} = '(' . join(' AND ', @clauses) . ')';
2383
sub generate_ins_stmt {
2384
my ( $self, %args ) = @_;
2385
foreach my $arg ( qw(ins_tbl sel_cols) ) {
2386
die "I need a $arg argument" unless $args{$arg};
2388
my $ins_tbl = $args{ins_tbl};
2389
my @sel_cols = @{$args{sel_cols}};
2391
die "You didn't specify any SELECT columns" unless @sel_cols;
2395
for my $i ( 0..$#sel_cols ) {
2396
next unless $ins_tbl->{is_col}->{$sel_cols[$i]};
2397
push @ins_cols, $sel_cols[$i];
2398
push @ins_slice, $i;
2403
slice => \@ins_slice,
2408
my ($package, undef, $line) = caller 0;
2409
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2410
map { defined $_ ? $_ : 'undef' }
2412
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2417
# ###########################################################################
2418
# End TableNibbler package
2419
# ###########################################################################
2421
# ###########################################################################
3575
2422
# MasterSlave package
3576
2423
# This package is a copy without comments from the original. The original
3577
2424
# with comments and its test file can be found in the Bazaar repository at,
4187
3119
# ###########################################################################
4189
3121
# ###########################################################################
3122
# RowChecksum package
3123
# This package is a copy without comments from the original. The original
3124
# with comments and its test file can be found in the Bazaar repository at,
3125
# lib/RowChecksum.pm
3126
# t/lib/RowChecksum.t
3127
# See https://launchpad.net/percona-toolkit for more information.
3128
# ###########################################################################
3130
package RowChecksum;
3133
use warnings FATAL => 'all';
3134
use English qw(-no_match_vars);
3135
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3137
use List::Util qw(max);
3139
$Data::Dumper::Indent = 1;
3140
$Data::Dumper::Sortkeys = 1;
3141
$Data::Dumper::Quotekeys = 0;
3144
my ( $class, %args ) = @_;
3145
foreach my $arg ( qw(OptionParser Quoter) ) {
3146
die "I need a $arg argument" unless defined $args{$arg};
3148
my $self = { %args };
3149
return bless $self, $class;
3152
sub make_row_checksum {
3153
my ( $self, %args ) = @_;
3154
my @required_args = qw(tbl);
3155
foreach my $arg( @required_args ) {
3156
die "I need a $arg argument" unless $args{$arg};
3158
my ($tbl) = @args{@required_args};
3160
my $o = $self->{OptionParser};
3161
my $q = $self->{Quoter};
3162
my $tbl_struct = $tbl->{tbl_struct};
3163
my $func = $args{func} || uc($o->get('function'));
3164
my $cols = $self->get_checksum_columns(%args);
3167
if ( !$args{no_cols} ) {
3171
if ( $col =~ m/\+ 0/ ) {
3172
my ($real_col) = /^(\S+)/;
3173
$col .= " AS $real_col";
3175
elsif ( $col =~ m/TRIM/ ) {
3176
my ($real_col) = m/TRIM\(([^\)]+)\)/;
3177
$col .= " AS $real_col";
3180
} @{$cols->{select}})
3184
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
3185
my $sep = $o->get('separator') || '#';
3189
my @nulls = grep { $cols->{allowed}->{$_} } @{$tbl_struct->{null_cols}};
3191
my $bitmap = "CONCAT("
3192
. join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
3194
push @{$cols->{select}}, $bitmap;
3197
$query .= @{$cols->{select}} > 1
3198
? "$func(CONCAT_WS('$sep', " . join(', ', @{$cols->{select}}) . '))'
3199
: "$func($cols->{select}->[0])";
3202
my $fnv_func = uc $func;
3203
$query .= "$fnv_func(" . join(', ', @{$cols->{select}}) . ')';
3206
PTDEBUG && _d('Row checksum:', $query);
3210
sub make_chunk_checksum {
3211
my ( $self, %args ) = @_;
3212
my @required_args = qw(tbl);
3213
foreach my $arg( @required_args ) {
3214
die "I need a $arg argument" unless $args{$arg};
3216
if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) {
3217
die "I need a dbh argument"
3219
my ($tbl) = @args{@required_args};
3220
my $o = $self->{OptionParser};
3221
my $q = $self->{Quoter};
3223
my %crc_args = $self->get_crc_args(%args);
3224
PTDEBUG && _d("Checksum strat:", Dumper(\%crc_args));
3226
my $row_checksum = $self->make_row_checksum(
3232
if ( $crc_args{crc_type} =~ m/int$/ ) {
3233
$crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
3237
my $slices = $self->_make_xor_slices(
3238
row_checksum => $row_checksum,
3241
$crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
3244
my $select = "COUNT(*) AS cnt, $crc AS crc";
3245
PTDEBUG && _d('Chunk checksum:', $select);
3249
sub get_checksum_columns {
3250
my ($self, %args) = @_;
3251
my @required_args = qw(tbl);
3252
foreach my $arg( @required_args ) {
3253
die "I need a $arg argument" unless $args{$arg};
3255
my ($tbl) = @args{@required_args};
3256
my $o = $self->{OptionParser};
3257
my $q = $self->{Quoter};
3259
my $trim = $o->get('trim');
3260
my $float_precision = $o->get('float-precision');
3262
my $tbl_struct = $tbl->{tbl_struct};
3263
my $ignore_col = $o->get('ignore-columns') || {};
3264
my $all_cols = $o->get('columns') || $tbl_struct->{cols};
3265
my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols;
3269
my $type = $tbl_struct->{type_for}->{$_};
3270
my $result = $q->quote($_);
3271
if ( $type eq 'timestamp' ) {
3274
elsif ( $float_precision && $type =~ m/float|double/ ) {
3275
$result = "ROUND($result, $float_precision)";
3277
elsif ( $trim && $type =~ m/varchar/ ) {
3278
$result = "TRIM($result)";
3283
$cols{$_} && !$seen{$_}++
3285
@{$tbl_struct->{cols}};
3294
my ($self, %args) = @_;
3295
my $func = $args{func} || $self->_get_hash_func(%args);
3296
my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
3297
my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
3299
if ( $args{dbh} && $crc_type !~ m/int$/ ) {
3300
$opt_slice = $self->_optimize_xor(%args, func=>$func);
3305
crc_width => $crc_width,
3306
crc_type => $crc_type,
3307
opt_slice => $opt_slice,
3311
sub _get_hash_func {
3312
my ( $self, %args ) = @_;
3313
my @required_args = qw(dbh);
3314
foreach my $arg( @required_args ) {
3315
die "I need a $arg argument" unless $args{$arg};
3317
my ($dbh) = @args{@required_args};
3318
my $o = $self->{OptionParser};
3319
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
3321
if ( my $func = $o->get('function') ) {
3322
unshift @funcs, $func;
3325
my ($result, $error);
3326
foreach my $func ( @funcs ) {
3328
my $sql = "SELECT $func('test-string')";
3329
PTDEBUG && _d($sql);
3330
$args{dbh}->do($sql);
3332
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
3333
$error .= qq{$func cannot be used because "$1"\n};
3334
PTDEBUG && _d($func, 'cannot be used because', $1);
3336
PTDEBUG && _d('Chosen hash func:', $result);
3339
die $error || 'No hash functions (CRC32, MD5, etc.) are available';
3342
sub _get_crc_width {
3343
my ( $self, %args ) = @_;
3344
my @required_args = qw(dbh func);
3345
foreach my $arg( @required_args ) {
3346
die "I need a $arg argument" unless $args{$arg};
3348
my ($dbh, $func) = @args{@required_args};
3351
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
3353
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
3354
$crc_width = max(16, length($val));
3361
my ( $self, %args ) = @_;
3362
my @required_args = qw(dbh func);
3363
foreach my $arg( @required_args ) {
3364
die "I need a $arg argument" unless $args{$arg};
3366
my ($dbh, $func) = @args{@required_args};
3370
my $sql = "SELECT $func('a')";
3371
my $sth = $dbh->prepare($sql);
3374
$type = $sth->{mysql_type_name}->[0];
3375
$length = $sth->{mysql_length}->[0];
3376
PTDEBUG && _d($sql, $type, $length);
3377
if ( $type eq 'bigint' && $length < 20 ) {
3382
PTDEBUG && _d('crc_type:', $type, 'length:', $length);
3387
my ( $self, %args ) = @_;
3388
my @required_args = qw(dbh func);
3389
foreach my $arg( @required_args ) {
3390
die "I need a $arg argument" unless $args{$arg};
3392
my ($dbh, $func) = @args{@required_args};
3394
die "$func never needs BIT_XOR optimization"
3395
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
3398
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
3401
my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
3403
do { # Try different positions till sliced result equals non-sliced.
3404
PTDEBUG && _d('Trying slice', $opt_slice);
3405
$dbh->do('SET @crc := "", @cnt := 0');
3406
my $slices = $self->_make_xor_slices(
3407
row_checksum => "\@crc := $func('a')",
3408
crc_width => $crc_width,
3409
opt_slice => $opt_slice,
3412
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
3413
$sliced = ($dbh->selectrow_array($sql))[0];
3414
if ( $sliced ne $unsliced ) {
3415
PTDEBUG && _d('Slice', $opt_slice, 'does not work');
3419
} while ( $start < $crc_width && $sliced ne $unsliced );
3421
if ( $sliced eq $unsliced ) {
3422
PTDEBUG && _d('Slice', $opt_slice, 'works');
3426
PTDEBUG && _d('No slice works');
3431
sub _make_xor_slices {
3432
my ( $self, %args ) = @_;
3433
my @required_args = qw(row_checksum crc_width);
3434
foreach my $arg( @required_args ) {
3435
die "I need a $arg argument" unless $args{$arg};
3437
my ($row_checksum, $crc_width) = @args{@required_args};
3438
my ($opt_slice) = $args{opt_slice};
3441
for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
3442
my $len = $crc_width - $start + 1;
3447
"LPAD(CONV(BIT_XOR("
3448
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
3449
. ", 10, 16), $len, '0')";
3452
if ( defined $opt_slice && $opt_slice < @slices ) {
3453
$slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
3456
map { s/\@crc/$row_checksum/ } @slices;
3459
return join(', ', @slices);
3462
sub find_replication_differences {
3463
my ($self, %args) = @_;
3464
my @required_args = qw(dbh repl_table);
3465
foreach my $arg( @required_args ) {
3466
die "I need a $arg argument" unless $args{$arg};
3468
my ($dbh, $repl_table) = @args{@required_args};
3471
= "SELECT CONCAT(db, '.', tbl) AS `table`, "
3472
. "chunk, chunk_index, lower_boundary, upper_boundary, "
3473
. "COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, "
3475
. "this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), 0"
3476
. ") AS crc_diff, this_cnt, master_cnt, this_crc, master_crc "
3477
. "FROM $repl_table "
3478
. "WHERE (master_cnt <> this_cnt OR master_crc <> this_crc "
3479
. "OR ISNULL(master_crc) <> ISNULL(this_crc))"
3480
. ($args{where} ? " AND ($args{where})" : "");
3481
PTDEBUG && _d($sql);
3482
my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
3487
my ($package, undef, $line) = caller 0;
3488
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3489
map { defined $_ ? $_ : 'undef' }
3491
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3496
# ###########################################################################
3497
# End RowChecksum package
3498
# ###########################################################################
3500
# ###########################################################################
3501
# NibbleIterator package
3502
# This package is a copy without comments from the original. The original
3503
# with comments and its test file can be found in the Bazaar repository at,
3504
# lib/NibbleIterator.pm
3505
# t/lib/NibbleIterator.t
3506
# See https://launchpad.net/percona-toolkit for more information.
3507
# ###########################################################################
3509
package NibbleIterator;
3512
use warnings FATAL => 'all';
3513
use English qw(-no_match_vars);
3514
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
3517
$Data::Dumper::Indent = 1;
3518
$Data::Dumper::Sortkeys = 1;
3519
$Data::Dumper::Quotekeys = 0;
3522
my ( $class, %args ) = @_;
3523
my @required_args = qw(Cxn tbl chunk_size OptionParser Quoter TableNibbler TableParser);
3524
foreach my $arg ( @required_args ) {
3525
die "I need a $arg argument" unless $args{$arg};
3527
my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};
3529
my $where = $o->get('where');
3530
my ($row_est, $mysql_index) = get_row_estimate(%args, where => $where);
3531
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
3532
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
3534
PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
3537
&& !defined $args{resume}->{lower_boundary}
3538
&& !defined $args{resume}->{upper_boundary} ) {
3539
PTDEBUG && _d('Resuming from one nibble table');
3543
my $index = _find_best_index(%args, mysql_index => $mysql_index);
3544
if ( !$index && !$one_nibble ) {
3545
die "There is no good index and the table is oversized.";
3548
my $tbl_struct = $tbl->{tbl_struct};
3549
my $ignore_col = $o->get('ignore-columns') || {};
3550
my $all_cols = $o->get('columns') || $tbl_struct->{cols};
3551
my @cols = grep { !$ignore_col->{$_} } @$all_cols;
3553
if ( $one_nibble ) {
3555
= ($args{dml} ? "$args{dml} " : "SELECT ")
3556
. ($args{select} ? $args{select}
3557
: join(', ', map { $q->quote($_) } @cols))
3558
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
3559
. ($where ? " WHERE $where" : '')
3560
. " /*checksum table*/";
3561
PTDEBUG && _d('One nibble statement:', $nibble_sql);
3563
my $explain_nibble_sql
3565
. ($args{select} ? $args{select}
3566
: join(', ', map { $q->quote($_) } @cols))
3567
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
3568
. ($where ? " WHERE $where" : '')
3569
. " /*explain checksum table*/";
3570
PTDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);
3576
nibble_sql => $nibble_sql,
3577
explain_nibble_sql => $explain_nibble_sql,
3581
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
3583
my $asc = $args{TableNibbler}->generate_asc_stmt(
3585
tbl_struct => $tbl->{tbl_struct},
3590
PTDEBUG && _d('Ascend params:', Dumper($asc));
3592
my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
3593
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
3596
= "SELECT /*!40001 SQL_NO_CACHE */ "
3597
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
3599
. ($where ? " WHERE $where" : '')
3600
. " ORDER BY $order_by"
3602
. " /*first lower boundary*/";
3603
PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
3606
if ( $args{resume} ) {
3608
= "SELECT /*!40001 SQL_NO_CACHE */ "
3609
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
3611
. " WHERE " . $asc->{boundaries}->{'>'}
3612
. ($where ? " AND ($where)" : '')
3613
. " ORDER BY $order_by"
3615
. " /*resume lower boundary*/";
3616
PTDEBUG && _d('Resume lower boundary statement:', $resume_lb_sql);
3620
= "SELECT /*!40001 SQL_NO_CACHE */ "
3621
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
3623
. ($where ? " WHERE $where" : '')
3625
. join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
3627
. " /*last upper boundary*/";
3628
PTDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
3631
= "SELECT /*!40001 SQL_NO_CACHE */ "
3632
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
3634
. " WHERE " . $asc->{boundaries}->{'>='}
3635
. ($where ? " AND ($where)" : '')
3636
. " ORDER BY $order_by"
3638
. " /*next chunk boundary*/";
3639
PTDEBUG && _d('Upper boundary statement:', $ub_sql);
3642
= ($args{dml} ? "$args{dml} " : "SELECT ")
3643
. ($args{select} ? $args{select}
3644
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
3646
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
3647
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
3648
. ($where ? " AND ($where)" : '')
3649
. ($args{order_by} ? " ORDER BY $order_by" : "")
3650
. " /*checksum chunk*/";
3651
PTDEBUG && _d('Nibble statement:', $nibble_sql);
3653
my $explain_nibble_sql
3655
. ($args{select} ? $args{select}
3656
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
3658
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
3659
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
3660
. ($where ? " AND ($where)" : '')
3661
. ($args{order_by} ? " ORDER BY $order_by" : "")
3662
. " /*explain checksum chunk*/";
3663
PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
3665
my $limit = $chunk_size - 1;
3666
PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
3672
first_lb_sql => $first_lb_sql,
3673
last_ub_sql => $last_ub_sql,
3675
nibble_sql => $nibble_sql,
3676
explain_ub_sql => "EXPLAIN $ub_sql",
3677
explain_nibble_sql => $explain_nibble_sql,
3678
resume_lb_sql => $resume_lb_sql,
3680
columns => $asc->{scols},
3683
boundaries => $asc->{boundaries},
3684
order_by => $order_by,
3689
$self->{row_est} = $row_est;
3690
$self->{nibbleno} = 0;
3691
$self->{have_rows} = 0;
3693
$self->{oktonibble} = 1;
3695
return bless $self, $class;
3701
if ( !$self->{oktonibble} ) {
3702
PTDEBUG && _d('Not ok to nibble');
3706
my %callback_args = (
3707
Cxn => $self->{Cxn},
3708
tbl => $self->{tbl},
3709
NibbleIterator => $self,
3712
if ($self->{nibbleno} == 0) {
3713
$self->_prepare_sths();
3714
$self->_get_bounds();
3715
if ( my $callback = $self->{callbacks}->{init} ) {
3716
$self->{oktonibble} = $callback->(%callback_args);
3717
PTDEBUG && _d('init callback returned', $self->{oktonibble});
3718
if ( !$self->{oktonibble} ) {
3719
$self->{no_more_boundaries} = 1;
3726
while ( $self->{have_rows} || $self->_next_boundaries() ) {
3727
if ( !$self->{have_rows} ) {
3728
$self->{nibbleno}++;
3729
PTDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
3730
join(', ', (@{$self->{lower}}, @{$self->{upper}})));
3731
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
3732
$self->{have_rows} = $callback->(%callback_args);
3735
$self->{nibble_sth}->execute(@{$self->{lower}}, @{$self->{upper}});
3736
$self->{have_rows} = $self->{nibble_sth}->rows();
3738
PTDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
3741
if ( $self->{have_rows} ) {
3742
my $row = $self->{nibble_sth}->fetchrow_arrayref();
3745
PTDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
3750
PTDEBUG && _d('No rows in nibble or nibble skipped');
3751
if ( my $callback = $self->{callbacks}->{after_nibble} ) {
3752
$callback->(%callback_args);
3755
$self->{have_rows} = 0;
3758
PTDEBUG && _d('Done nibbling');
3759
if ( my $callback = $self->{callbacks}->{done} ) {
3760
$callback->(%callback_args);
3768
return $self->{nibbleno};
3771
sub set_nibble_number {
3772
my ($self, $n) = @_;
3773
die "I need a number" unless $n;
3774
$self->{nibbleno} = $n;
3775
PTDEBUG && _d('Set new nibble number:', $n);
3781
return $self->{index};
3787
nibble => $self->{nibble_sth},
3788
explain_nibble => $self->{explain_nibble_sth},
3789
upper_boundary => $self->{ub_sth},
3790
explain_upper_boundary => $self->{explain_ub_sth},
3797
first_lower => $self->{first_lower},
3798
lower => $self->{lower},
3799
upper => $self->{upper},
3800
next_lower => $self->{next_lower},
3801
last_upper => $self->{last_upper},
3806
my ($self, $boundary, $values) = @_;
3807
die "I need a boundary parameter"
3809
die "Invalid boundary: $boundary"
3810
unless $boundary =~ m/^(?:lower|upper|next_lower|last_upper)$/;
3811
die "I need a values arrayref parameter"
3812
unless $values && ref $values eq 'ARRAY';
3813
$self->{$boundary} = $values;
3814
PTDEBUG && _d('Set new', $boundary, 'boundary:', Dumper($values));
3820
return $self->{one_nibble};
3825
return $self->{limit} + 1;
3828
sub set_chunk_size {
3829
my ($self, $limit) = @_;
3830
return if $self->{one_nibble};
3831
die "Chunk size must be > 0" unless $limit;
3832
$self->{limit} = $limit - 1;
3833
PTDEBUG && _d('Set new chunk size (LIMIT):', $limit);
3839
return $self->{sql};
3842
sub more_boundaries {
3844
return !$self->{no_more_boundaries};
3849
return $self->{row_est};
3852
sub _find_best_index {
3854
my @required_args = qw(Cxn tbl TableParser);
3855
my ($cxn, $tbl, $tp) = @args{@required_args};
3856
my $tbl_struct = $tbl->{tbl_struct};
3857
my $indexes = $tbl_struct->{keys};
3859
my $want_index = $args{chunk_index};
3860
if ( $want_index ) {
3861
PTDEBUG && _d('User wants to use index', $want_index);
3862
if ( !exists $indexes->{$want_index} ) {
3863
PTDEBUG && _d('Cannot use user index because it does not exist');
3864
$want_index = undef;
3868
if ( !$want_index && $args{mysql_index} ) {
3869
PTDEBUG && _d('MySQL wants to use index', $args{mysql_index});
3870
$want_index = $args{mysql_index};
3874
my @possible_indexes;
3875
if ( $want_index ) {
3876
if ( $indexes->{$want_index}->{is_unique} ) {
3877
PTDEBUG && _d('Will use wanted index');
3878
$best_index = $want_index;
3881
PTDEBUG && _d('Wanted index is a possible index');
3882
push @possible_indexes, $want_index;
3886
PTDEBUG && _d('Auto-selecting best index');
3887
foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
3888
if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
3889
$best_index = $index;
3893
push @possible_indexes, $index;
3898
if ( !$best_index && @possible_indexes ) {
3899
PTDEBUG && _d('No PRIMARY or unique indexes;',
3900
'will use index with highest cardinality');
3901
foreach my $index ( @possible_indexes ) {
3902
$indexes->{$index}->{cardinality} = _get_index_cardinality(
3907
@possible_indexes = sort {
3909
= $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality};
3911
$cmp = scalar @{$indexes->{$b}->{cols}}
3912
<=> scalar @{$indexes->{$a}->{cols}};
3915
} @possible_indexes;
3916
$best_index = $possible_indexes[0];
3919
PTDEBUG && _d('Best index:', $best_index);
3923
sub _get_index_cardinality {
3925
my @required_args = qw(Cxn tbl index Quoter);
3926
my ($cxn, $tbl, $index, $q) = @args{@required_args};
3928
my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
3929
. " WHERE Key_name = '$index'";
3930
PTDEBUG && _d($sql);
3931
my $cardinality = 1;
3932
my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name');
3933
foreach my $row ( values %$rows ) {
3934
$cardinality *= $row->{cardinality} if $row->{cardinality};
3936
PTDEBUG && _d('Index', $index, 'cardinality:', $cardinality);
3937
return $cardinality;
3940
sub get_row_estimate {
3942
my @required_args = qw(Cxn tbl OptionParser TableParser Quoter);
3943
my ($cxn, $tbl, $o, $tp, $q) = @args{@required_args};
3945
if ( $args{where} ) {
3946
PTDEBUG && _d('WHERE clause, using explain plan for row estimate');
3947
my $table = $q->quote(@{$tbl}{qw(db tbl)});
3948
my $sql = "EXPLAIN SELECT * FROM $table WHERE $args{where}";
3949
PTDEBUG && _d($sql);
3950
my $expl = $cxn->dbh()->selectrow_hashref($sql);
3951
PTDEBUG && _d(Dumper($expl));
3952
return ($expl->{rows} || 0), $expl->{key};
3955
PTDEBUG && _d('No WHERE clause, using table status for row estimate');
3956
return $tbl->{tbl_status}->{rows} || 0;
3962
PTDEBUG && _d('Preparing statement handles');
3964
my $dbh = $self->{Cxn}->dbh();
3966
$self->{nibble_sth} = $dbh->prepare($self->{nibble_sql});
3967
$self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
3969
if ( !$self->{one_nibble} ) {
3970
$self->{ub_sth} = $dbh->prepare($self->{ub_sql});
3971
$self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
3980
if ( $self->{one_nibble} ) {
3981
if ( $self->{resume} ) {
3982
$self->{no_more_boundaries} = 1;
3987
my $dbh = $self->{Cxn}->dbh();
3989
$self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
3990
PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));
3992
if ( my $nibble = $self->{resume} ) {
3993
if ( defined $nibble->{lower_boundary}
3994
&& defined $nibble->{upper_boundary} ) {
3995
my $sth = $dbh->prepare($self->{resume_lb_sql});
3996
my @ub = split ',', $nibble->{upper_boundary};
3997
PTDEBUG && _d($sth->{Statement}, 'params:', @ub);
3999
$self->{next_lower} = $sth->fetchrow_arrayref();
4004
$self->{next_lower} = $self->{first_lower};
4006
PTDEBUG && _d('Next lower boundary:', Dumper($self->{next_lower}));
4008
if ( !$self->{next_lower} ) {
4009
PTDEBUG && _d('At end of table, or no more boundaries to resume');
4010
$self->{no_more_boundaries} = 1;
4013
$self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
4014
PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
4019
sub _next_boundaries {
4022
if ( $self->{no_more_boundaries} ) {
4023
PTDEBUG && _d('No more boundaries');
4024
return; # stop nibbling
4027
if ( $self->{one_nibble} ) {
4028
$self->{lower} = $self->{upper} = [];
4029
$self->{no_more_boundaries} = 1; # for next call
4030
return 1; # continue nibbling
4033
if ( $self->identical_boundaries($self->{lower}, $self->{next_lower}) ) {
4034
PTDEBUG && _d('Infinite loop detected');
4035
my $tbl = $self->{tbl};
4036
my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}};
4037
my $n_cols = scalar @{$index->{cols}};
4038
my $chunkno = $self->{nibbleno};
4039
die "Possible infinite loop detected! "
4040
. "The lower boundary for chunk $chunkno is "
4041
. "<" . join(', ', @{$self->{lower}}) . "> and the lower "
4042
. "boundary for chunk " . ($chunkno + 1) . " is also "
4043
. "<" . join(', ', @{$self->{next_lower}}) . ">. "
4044
. "This usually happens when using a non-unique single "
4045
. "column index. The current chunk index for table "
4046
. "$tbl->{db}.$tbl->{tbl} is $self->{index} which is"
4047
. ($index->{is_unique} ? '' : ' not') . " unique and covers "
4048
. ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n";
4050
$self->{lower} = $self->{next_lower};
4052
if ( my $callback = $self->{callbacks}->{next_boundaries} ) {
4053
my $oktonibble = $callback->(
4054
Cxn => $self->{Cxn},
4055
tbl => $self->{tbl},
4056
NibbleIterator => $self,
4058
PTDEBUG && _d('next_boundaries callback returned', $oktonibble);
4059
if ( !$oktonibble ) {
4060
$self->{no_more_boundaries} = 1;
4061
return; # stop nibbling
4065
PTDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
4066
join(', ', @{$self->{lower}}), $self->{limit});
4067
$self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit});
4068
my $boundary = $self->{ub_sth}->fetchall_arrayref();
4069
PTDEBUG && _d('Next boundary:', Dumper($boundary));
4070
if ( $boundary && @$boundary ) {
4071
$self->{upper} = $boundary->[0]; # this nibble
4072
if ( $boundary->[1] ) {
4073
$self->{next_lower} = $boundary->[1]; # next nibble
4076
$self->{no_more_boundaries} = 1; # for next call
4077
PTDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
4081
$self->{no_more_boundaries} = 1; # for next call
4082
$self->{upper} = $self->{last_upper};
4083
PTDEBUG && _d('Last upper boundary:', Dumper($self->{upper}));
4085
$self->{ub_sth}->finish();
4087
return 1; # continue nibbling
4090
sub identical_boundaries {
4091
my ($self, $b1, $b2) = @_;
4093
return 0 if ($b1 && !$b2) || (!$b1 && $b2);
4095
return 1 if !$b1 && !$b2;
4097
die "Boundaries have different numbers of values"
4098
if scalar @$b1 != scalar @$b2; # shouldn't happen
4099
my $n_vals = scalar @$b1;
4100
for my $i ( 0..($n_vals-1) ) {
4101
return 0 if $b1->[$i] ne $b2->[$i]; # diff
4108
foreach my $key ( keys %$self ) {
4109
if ( $key =~ m/_sth$/ ) {
4110
PTDEBUG && _d('Finish', $key);
4111
$self->{$key}->finish();
4118
my ($package, undef, $line) = caller 0;
4119
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4120
map { defined $_ ? $_ : 'undef' }
4122
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4127
# ###########################################################################
4128
# End NibbleIterator package
4129
# ###########################################################################
4131
# ###########################################################################
4132
# OobNibbleIterator package
4133
# This package is a copy without comments from the original. The original
4134
# with comments and its test file can be found in the Bazaar repository at,
4135
# lib/OobNibbleIterator.pm
4136
# t/lib/OobNibbleIterator.t
4137
# See https://launchpad.net/percona-toolkit for more information.
4138
# ###########################################################################
4140
package OobNibbleIterator;
4141
use base 'NibbleIterator';
4144
use warnings FATAL => 'all';
4145
use English qw(-no_match_vars);
4146
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
4149
$Data::Dumper::Indent = 1;
4150
$Data::Dumper::Sortkeys = 1;
4151
$Data::Dumper::Quotekeys = 0;
4154
my ( $class, %args ) = @_;
4155
my @required_args = qw();
4156
foreach my $arg ( @required_args ) {
4157
die "I need a $arg argument" unless $args{$arg};
4160
my $self = $class->SUPER::new(%args);
4162
my $q = $self->{Quoter};
4163
my $o = $self->{OptionParser};
4164
my $where = $o->get('where');
4166
if ( !$self->one_nibble() ) {
4168
= ($args{past_dml} || "SELECT ")
4169
. ($args{past_select}
4170
|| join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4171
. " FROM " . $self->{sql}->{from};
4174
= ($where ? " AND ($where)" : '')
4175
. " ORDER BY " . $self->{sql}->{order_by};
4179
. " WHERE " . $self->{sql}->{boundaries}->{'<'}
4181
. " /*past lower chunk*/";
4182
PTDEBUG && _d('Past lower statement:', $past_lower_sql);
4184
my $explain_past_lower_sql
4186
. ($args{past_select}
4187
|| join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4188
. " FROM " . $self->{sql}->{from}
4189
. " WHERE " . $self->{sql}->{boundaries}->{'<'}
4191
. " /*explain past lower chunk*/";
4192
PTDEBUG && _d('Past lower statement:', $explain_past_lower_sql);
4196
. " WHERE " . $self->{sql}->{boundaries}->{'>'}
4198
. " /*past upper chunk*/";
4199
PTDEBUG && _d('Past upper statement:', $past_upper_sql);
4201
my $explain_past_upper_sql
4203
. ($args{past_select}
4204
|| join(', ', map { $q->quote($_) } @{$self->{sql}->{columns}}))
4205
. " FROM " . $self->{sql}->{from}
4206
. " WHERE " . $self->{sql}->{boundaries}->{'>'}
4208
. " /*explain past upper chunk*/";
4209
PTDEBUG && _d('Past upper statement:', $explain_past_upper_sql);
4211
$self->{past_lower_sql} = $past_lower_sql;
4212
$self->{past_upper_sql} = $past_upper_sql;
4213
$self->{explain_past_lower_sql} = $explain_past_lower_sql;
4214
$self->{explain_past_upper_sql} = $explain_past_upper_sql;
4216
$self->{past_nibbles} = [qw(lower upper)];
4217
if ( my $nibble = $args{resume} ) {
4218
if ( !defined $nibble->{lower_boundary}
4219
|| !defined $nibble->{upper_boundary} ) {
4220
$self->{past_nibbles} = !defined $nibble->{lower_boundary}
4225
PTDEBUG && _d('Nibble past', @{$self->{past_nibbles}});
4229
return bless $self, $class;
4232
sub more_boundaries {
4234
return $self->SUPER::more_boundaries() if $self->{one_nibble};
4235
return scalar @{$self->{past_nibbles}} ? 1 : 0;
4241
my $sths = $self->SUPER::statements();
4243
$sths->{past_lower_boundary} = $self->{past_lower_sth};
4244
$sths->{past_upper_boundary} = $self->{past_upper_sth};
4251
PTDEBUG && _d('Preparing out-of-bound statement handles');
4253
if ( !$self->{one_nibble} ) {
4254
my $dbh = $self->{Cxn}->dbh();
4255
$self->{past_lower_sth} = $dbh->prepare($self->{past_lower_sql});
4256
$self->{past_upper_sth} = $dbh->prepare($self->{past_upper_sql});
4257
$self->{explain_past_lower_sth} = $dbh->prepare($self->{explain_past_lower_sql});
4258
$self->{explain_past_upper_sth} = $dbh->prepare($self->{explain_past_upper_sql});
4261
return $self->SUPER::_prepare_sths();
4264
sub _next_boundaries {
4267
return $self->SUPER::_next_boundaries() unless $self->{no_more_boundaries};
4269
if ( my $past = shift @{$self->{past_nibbles}} ) {
4270
if ( $past eq 'lower' ) {
4271
PTDEBUG && _d('Nibbling values below lower boundary');
4272
$self->{nibble_sth} = $self->{past_lower_sth};
4273
$self->{explain_nibble_sth} = $self->{explain_past_lower_sth};
4274
$self->{lower} = [];
4275
$self->{upper} = $self->boundaries()->{first_lower};
4276
$self->{next_lower} = undef;
4278
elsif ( $past eq 'upper' ) {
4279
PTDEBUG && _d('Nibbling values above upper boundary');
4280
$self->{nibble_sth} = $self->{past_upper_sth};
4281
$self->{explain_nibble_sth} = $self->{explain_past_upper_sth};
4282
$self->{lower} = $self->boundaries()->{last_upper};
4283
$self->{upper} = [];
4284
$self->{next_lower} = undef;
4287
die "Invalid past nibble: $past";
4289
return 1; # continue nibbling
4292
PTDEBUG && _d('Done nibbling past boundaries');
4293
return; # stop nibbling
4298
foreach my $key ( keys %$self ) {
4299
if ( $key =~ m/_sth$/ ) {
4300
PTDEBUG && _d('Finish', $key);
4301
$self->{$key}->finish();
4308
my ($package, undef, $line) = caller 0;
4309
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4310
map { defined $_ ? $_ : 'undef' }
4312
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4317
# ###########################################################################
4318
# End OobNibbleIterator package
4319
# ###########################################################################
4321
# ###########################################################################
4190
4322
# Daemon package
4191
4323
# This package is a copy without comments from the original. The original
4192
4324
# with comments and its test file can be found in the Bazaar repository at,
5191
5877
# ########################################################################
5192
# Ready to work now.
5878
# Connect to the master.
5193
5879
# ########################################################################
5194
5880
my $vp = new VersionParser();
5195
my $tp = new TableParser(Quoter => $q);
5196
my $tc = new TableChecksum(Quoter=> $q, VersionParser => $vp);
5197
my $ms = new MasterSlave(VersionParser => $vp);
5198
my $du = new MySQLDump();
5199
my $ch = new TableChunker(Quoter => $q, MySQLDump => $du);
5200
my %common_modules = (
5212
my $main_dbh = get_cxn($hosts[0], %common_modules);
5214
# #########################################################################
5215
# Prepare --throttle-method.
5216
# #########################################################################
5217
my $throttle_method = $o->get('throttle-method');
5219
if ( lc($throttle_method) eq 'slavelag' ) {
5220
if ( $o->get('check-slave-lag') ) {
5221
MKDEBUG && _d('Using --check-slave-lag DSN for throttle');
5222
# OptionParser can't auto-copy DSN vals from a cmd line DSN
5223
# to an opt DSN, so we copy them manually.
5224
my $dsn = $dp->copy($hosts[0], $o->get('check-slave-lag'));
5225
push @slaves, { dsn=>$dsn, dbh=>get_cxn($dsn, %common_modules) };
5228
MKDEBUG && _d('Recursing to slaves for throttle');
5229
$ms->recurse_to_slaves(
5233
recurse => $o->get('recurse'),
5234
method => $o->get('recursion-method'),
5236
my ( $dsn, $dbh, $level, $parent ) = @_;
5237
return unless $level;
5238
MKDEBUG && _d('throttle slave:', $dp->as_string($dsn));
5239
$dbh->{InactiveDestroy} = 1; # Prevent destroying on fork.
5240
$dbh->{FetchHashKeyName} = 'NAME_lc';
5241
push @slaves, { dsn=>$dsn, dbh=>$dbh };
5249
# ########################################################################
5250
# Load --arg-table information.
5251
# ########################################################################
5253
if ( my $arg_tbl = $o->get('arg-table') ) {
5254
my %col_in_argtable;
5255
my $rows = $main_dbh->selectall_arrayref(
5256
"SELECT * FROM $arg_tbl", { Slice => {} });
5257
foreach my $row ( @$rows ) {
5258
die "Invalid entry in --arg-table: db and tbl must be set"
5259
unless $row->{db} && $row->{tbl};
5260
$args_for{$row->{db}}->{$row->{tbl}} = {
5261
map { $_ => $row->{$_} }
5262
grep { $overridable_args{$_} && defined $row->{$_} }
5265
if ( !%col_in_argtable ) { # do only once
5266
foreach my $key ( keys %$row ) {
5267
next if $key =~ m/^(db|tbl|ts)$/;
5268
die "Column $key (from $arg_tbl given by --arg-table) is not "
5269
. "an overridable argument" unless $overridable_args{$key};
5270
$col_in_argtable{$key} = 1;
5274
if ( $col_in_argtable{since} ) {
5275
$savesince_sth = $main_dbh->prepare(
5276
"UPDATE $arg_tbl SET since=COALESCE(?, NOW()) WHERE db=? AND tbl=?");
5280
# ########################################################################
5281
# Check for replication filters.
5282
# ########################################################################
5283
if ( $o->get('replicate') && $o->get('check-replication-filters') ) {
5284
MKDEBUG && _d("Recursing to slaves to check for replication filters");
5285
my @all_repl_filters;
5286
$ms->recurse_to_slaves(
5290
recurse => undef, # check for filters anywhere
5291
method => $o->get('recursion-method'),
5293
my ( $dsn, $dbh, $level, $parent ) = @_;
5294
my $repl_filters = $ms->get_replication_filters(dbh=>$dbh);
5295
if ( keys %$repl_filters ) {
5296
my $host = $dp->as_string($dsn);
5297
push @all_repl_filters,
5299
filters => $repl_filters,
5306
if ( @all_repl_filters ) {
5307
my $msg = "Cannot checksum with --replicate because replication "
5308
. "filters are set on these hosts:\n";
5309
foreach my $host ( @all_repl_filters ) {
5310
my $filters = $host->{filters};
5311
$msg .= " $host->{name}\n"
5312
. join("\n", map { " $_ = $host->{filters}->{$_}" }
5313
keys %{$host->{filters}})
5316
$msg .= "Please read the --check-replication-filters documentation "
5317
. "to learn how to solve this problem.";
5323
# ########################################################################
5324
# Check replication slaves if desired. If only --replicate-check is given,
5325
# then we will exit here. If --recheck is also given, then we'll continue
5326
# through the entire script but checksum only the inconsistent tables found
5328
# ########################################################################
5329
if ( defined $o->get('replicate-check') ) {
5330
MKDEBUG && _d("Recursing to slaves for replicate check, depth",
5331
$o->get('replicate-check'));
5332
my $callback = $o->get('recheck')
5333
? \&save_inconsistent_tbls
5334
: \&print_inconsistent_tbls;
5335
$ms->recurse_to_slaves(
5339
recurse => $o->get('replicate-check'),
5340
method => $o->get('recursion-method'),
5342
my ( $dsn, $dbh, $level, $parent ) = @_;
5343
my @tbls = $tc->find_replication_differences(
5344
$dbh, $o->get('replicate'));
5345
return unless @tbls;
5347
# Call the callback that does something useful with
5348
# the inconsistent tables.
5349
# o dbh db tbl args_for
5356
args_for => \%args_for,
5362
return $exit_status unless $o->get('recheck');
5365
# ########################################################################
5366
# Otherwise get ready to checksum table data, unless we have only to check
5367
# schemas in which case we can skip all such work, knowing already that we
5369
# ########################################################################
5370
if ( $checksum_table_data ) {
5371
# Verify that CONCAT_WS is compatible across all servers. On older
5372
# versions of MySQL it skips both empty strings and NULL; on newer
5374
if ( $o->get('verify') && @hosts > 1 ) {
5375
verify_checksum_compat(hosts=>\@hosts, %common_modules);
5378
($fetch_sth, $update_sth)
5379
= check_repl_table(dbh=>$main_dbh, %common_modules);
5382
$crc_wid = 16; # Wider than the widest CRC32.
5385
# ########################################################################
5386
# If resuming a previous run, figure out what the previous run finished.
5387
# ########################################################################
5388
if ( $o->get('replicate') && $o->get('resume-replicate') ) {
5389
$already_checksummed = read_repl_table(
5391
host => $hosts[0]->{h},
5395
elsif ( $o->get('resume') ) {
5396
$already_checksummed = parse_resume_file($o->get('resume'));
5399
# ########################################################################
5400
# Set transaction isolation level.
5401
# http://code.google.com/p/maatkit/issues/detail?id=720
5402
# ########################################################################
5403
if ( $o->get('replicate') ) {
5404
my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ";
5882
my $set_on_connect = sub {
5884
return if $o->get('explain');
5887
# https://bugs.launchpad.net/percona-toolkit/+bug/919352
5888
# The tool shouldn't blindly attempt to change binlog_format;
5889
# instead, it should check if it's already set to STATEMENT.
5890
# This is becase starting with MySQL 5.1.29, changing the format
5891
# requires a SUPER user.
5892
if ( $vp->version_ge($dbh, '5.1.5') ) {
5893
$sql = 'SELECT @@binlog_format';
5894
PTDEBUG && _d($dbh, $sql);
5895
my ($original_binlog_format) = $dbh->selectrow_array($sql);
5896
PTDEBUG && _d('Original binlog_format:', $original_binlog_format);
5897
if ( $original_binlog_format !~ /STATEMENT/i ) {
5898
$sql = q{/*!50108 SET @@binlog_format := 'STATEMENT'*/};
5900
PTDEBUG && _d($dbh, $sql);
5903
if ( $EVAL_ERROR ) {
5904
die "Failed to $sql: $EVAL_ERROR\n"
5905
. "This tool requires binlog_format=STATEMENT, "
5906
. "but the current binlog_format is set to "
5907
."$original_binlog_format and an error occurred while "
5908
. "attempting to change it. If running MySQL 5.1.29 or newer, "
5909
. "setting binlog_format requires the SUPER privilege. "
5910
. "You will need to manually set binlog_format to 'STATEMENT' "
5911
. "before running this tool.\n";
5916
# Set transaction isolation level. We set binlog_format to STATEMENT,
5917
# but if the transaction isolation level is set to READ COMMITTED and the
5918
# --replicate table is in InnoDB format, the tool fails with the following
5921
# Binary logging not possible. Message: Transaction level 'READ-COMMITTED'
5922
# in InnoDB is not safe for binlog mode 'STATEMENT'
5924
# See also http://code.google.com/p/maatkit/issues/detail?id=720
5925
$sql = 'SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ';
5406
MKDEBUG && _d($main_dbh, $sql);
5407
$main_dbh->do($sql);
5927
PTDEBUG && _d($dbh, $sql);
5409
5930
if ( $EVAL_ERROR ) {
5410
5931
die "Failed to $sql: $EVAL_ERROR\n"
5411
5932
. "If the --replicate table is InnoDB and the default server "
5412
5933
. "transaction isolation level is not REPEATABLE-READ then "
5413
. "checksumming may fail with errors like \"Binary logging not "
5934
. "checksumming may fail with errors such as \"Binary logging not "
5414
5935
. "possible. Message: Transaction level 'READ-COMMITTED' in "
5415
5936
. "InnoDB is not safe for binlog mode 'STATEMENT'\". In that "
5416
5937
. "case you will need to manually set the transaction isolation "
5417
. "level to REPEATABLE-READ.";
5421
# ########################################################################
5422
# Iterate through databases and tables and do the checksums.
5423
# ########################################################################
5425
# Get table info for all hosts, all slaves, unless we're in the special
5426
# "repl-re-check" mode in which case %tables_to_checksum has already the
5427
# inconsistent tables that we need to re-checksum.
5430
args_for => \%args_for,
5432
) unless ($o->get('replicate-check') && $o->get('recheck'));
5434
# Finally, checksum the tables.
5435
foreach my $database ( keys %tables_to_checksum ) {
5436
my $tables = $tables_to_checksum{$database};
5437
$exit_status |= checksum_tables(
5938
. "level to REPEATABLE-READ.\n";
5941
# We set innodb_lock_wait_timeout=1 so that if this tool happens to cause
5942
# some locking, it will be more likely to be the victim than other
5943
# connections to the server, and thus disrupt the server less.
5944
$sql = 'SHOW SESSION VARIABLES LIKE "innodb_lock_wait_timeout"';
5945
PTDEBUG && _d($dbh, $sql);
5946
my (undef, $lock_wait_timeout) = $dbh->selectrow_array($sql);
5947
PTDEBUG && _d('innodb_lock_wait_timeout', $lock_wait_timeout);
5948
if ( ($lock_wait_timeout || 0) > $o->get('lock-wait-timeout') ) {
5949
$sql = 'SET SESSION innodb_lock_wait_timeout=1';
5951
PTDEBUG && _d($dbh, $sql);
5954
if ( $EVAL_ERROR ) {
5955
warn "Failed to $sql: $EVAL_ERROR\n"
5956
. "The current innodb_lock_wait_timeout value "
5957
. "$lock_wait_timeout is higher than the --lock-wait-timeout "
5958
. "value " . $o->get('lock-wait-timeout') . " and the variable "
5959
. "cannot be changed. innodb_lock_wait_timeout is only dynamic "
5960
. "when using the InnoDB plugin. To prevent this warning, either "
5961
. "specify --lock-wait-time=$lock_wait_timeout, or manually "
5962
. "set innodb_lock_wait_timeout to a value less than or equal "
5963
. "to " . $o->get('lock-wait-timeout') . " and restart MySQL.\n";
5968
# Do not call "new Cxn(" directly; use this sub so that set_on_connect
5969
# is applied to every cxn.
5970
# TODO: maybe this stuff only needs to be set on master cxn?
5971
my $make_cxn = sub {
5977
set => $args{set_vars} ? $set_on_connect : undef,
5979
eval { $cxn->connect() }; # connect or die trying
5980
if ( $EVAL_ERROR ) {
5981
die ts($EVAL_ERROR);
5986
# The dbh and dsn can be used before checksumming starts, but once
5987
# inside the main TABLE loop, only use the master cxn because its
5988
# dbh may be recreated.
5989
my $master_cxn = $make_cxn->(set_vars => 1, dsn_string => shift @ARGV);
5990
my $master_dbh = $master_cxn->dbh(); # just for brevity
5991
my $master_dsn = $master_cxn->dsn(); # just for brevity
5993
# ########################################################################
5994
# If this is not a dry run (--explain was not specified), then we're
5995
# going to checksum the tables, so do the necessary preparations and
5996
# checks. Else, this all can be skipped because all we need for a
5997
# dry run is a connection to the master.
5998
# ########################################################################
5999
my $q = new Quoter();
6000
my $tp = new TableParser(Quoter => $q);
6001
my $rc = new RowChecksum(Quoter=> $q, OptionParser => $o);
6002
my $ms = new MasterSlave(VersionParser => $vp);
6004
my $slaves; # all slaves (that we can find)
6005
my $slave_lag_cxns; # slaves whose lag we'll check
6007
my $replica_lag; # ReplicaLagWaiter object
6008
my $replica_lag_pr; # Progress for ReplicaLagWaiter
6009
my $sys_load; # MySQLStatusWaiter object
6010
my $sys_load_pr; # Progress for MySQLStatusWaiter object
6012
my $repl_table = $q->quote($q->split_unquote($o->get('replicate')));
6013
my $fetch_sth; # fetch chunk from repl table
6014
my $update_sth; # update master_cnt and master_cnt in repl table
6015
my $delete_sth; # delete checksums for one db.tbl from repl table
6017
if ( !$o->get('explain') ) {
6018
# #####################################################################
6019
# Find and connect to slaves.
6020
# #####################################################################
6021
$slaves = $ms->get_slaves(
6028
return $make_cxn->(@_, prev_dsn => $master_cxn->dsn());
6031
PTDEBUG && _d(scalar @$slaves, 'slaves found');
6033
if ( $o->get('check-slave-lag') ) {
6034
PTDEBUG && _d('Will use --check-slave-lag to check for slave lag');
6035
my $cxn = $make_cxn->(
6036
dsn_string => $o->get('check-slave-lag'),
6037
prev_dsn => $master_cxn->dsn(),
6039
$slave_lag_cxns = [ $cxn ];
6042
PTDEBUG && _d('Will check slave lag on all slaves');
6043
$slave_lag_cxns = $slaves;
6046
# #####################################################################
6047
# Possibly check replication slaves and exit.
6048
# #####################################################################
6049
if ( $o->get('replicate-check') && $o->get('replicate-check-only') ) {
6050
PTDEBUG && _d('Will --replicate-check and exit');
6052
foreach my $slave ( @$slaves ) {
6053
my $diffs = $rc->find_replication_differences(
6054
dbh => $slave->dbh(),
6055
repl_table => $repl_table,
6057
PTDEBUG && _d(scalar @$diffs, 'checksum diffs on',
6061
if ( $o->get('quiet') < 2 ) {
6062
print_checksum_diffs(
6070
PTDEBUG && _d('Exit status', $exit_status, 'oktorun', $oktorun);
6071
return $exit_status;
6074
# #####################################################################
6075
# Check for replication filters.
6076
# #####################################################################
6077
if ( $o->get('check-replication-filters') ) {
6078
PTDEBUG && _d("Checking slave replication filters");
6079
my @all_repl_filters;
6080
foreach my $slave ( @$slaves ) {
6081
my $repl_filters = $ms->get_replication_filters(
6082
dbh => $slave->dbh(),
6084
if ( keys %$repl_filters ) {
6085
push @all_repl_filters,
6086
{ name => $slave->name(),
6087
filters => $repl_filters,
6091
if ( @all_repl_filters ) {
6092
my $msg = "Replication filters are set on these hosts:\n";
6093
foreach my $host ( @all_repl_filters ) {
6094
my $filters = $host->{filters};
6095
$msg .= " $host->{name}\n"
6096
. join("\n", map { " $_ = $host->{filters}->{$_}" }
6097
keys %{$host->{filters}})
6100
$msg .= "Please read the --check-replication-filters documentation "
6101
. "to learn how to solve this problem.";
6106
# #####################################################################
6107
# Check that the replication table exists, or possibly create it.
6108
# #####################################################################
6112
repl_table => $repl_table,
6118
if ( $EVAL_ERROR ) {
6119
die ts($EVAL_ERROR);
6122
# #####################################################################
6123
# Make a ReplicaLagWaiter to help wait for slaves after each chunk.
6124
# #####################################################################
6126
# Don't let the master dbh die while waiting for slaves because we
6127
# may wait a very long time for slaves.
6129
# This is called from within the main TABLE loop, so use the
6130
# master cxn; do not use $master_dbh.
6131
my $dbh = $master_cxn->dbh();
6132
if ( !$dbh || !$dbh->ping() ) {
6133
PTDEBUG && _d('Lost connection to master while waiting for slave lag');
6134
eval { $dbh = $master_cxn->connect() }; # connect or die trying
6135
if ( $EVAL_ERROR ) {
6136
$oktorun = 0; # Fatal error
6138
die "Lost connection to master while waiting for replica lag "
6142
$dbh->do("SELECT 'pt-table-checksum keepalive'");
6143
sleep $o->get('check-interval');
6149
my $dbh = $cxn->dbh();
6150
if ( !$dbh || !$dbh->ping() ) {
6151
PTDEBUG && _d('Lost connection to slave', $cxn->name(),
6152
'while waiting for slave lag');
6153
eval { $dbh = $cxn->connect() }; # connect or die trying
6154
if ( $EVAL_ERROR ) {
6155
$oktorun = 0; # Fatal error
6157
die "Lost connection to replica " . $cxn->name()
6158
. " while attempting to get its lag ($EVAL_ERROR)";
6161
return $ms->get_slave_lag($dbh);
6164
$replica_lag = new ReplicaLagWaiter(
6165
slaves => $slave_lag_cxns,
6166
max_lag => $o->get('max-lag'),
6167
oktorun => sub { return $oktorun },
6168
get_lag => $get_lag,
6174
my $sql = "SHOW GLOBAL STATUS LIKE ?";
6175
my $sth = $master_cxn->dbh()->prepare($sql);
6179
PTDEBUG && _d($sth->{Statement}, $var);
6180
$sth->execute($var);
6181
my (undef, $val) = $sth->fetchrow_array();
6186
$sys_load = new MySQLStatusWaiter(
6187
spec => $o->get('max-load'),
6188
get_status => $get_status,
6189
oktorun => sub { return $oktorun },
6193
if ( $o->get('progress') ) {
6194
$replica_lag_pr = new Progress(
6195
jobsize => scalar @$slaves,
6196
spec => $o->get('progress'),
6197
name => "Waiting for replicas to catch up", # not used
6200
$sys_load_pr = new Progress(
6201
jobsize => scalar @{$o->get('max-load')},
6202
spec => $o->get('progress'),
6203
name => "Waiting for --max-load", # not used
6207
# #####################################################################
6208
# Prepare statement handles to update the repl table on the master.
6209
# #####################################################################
6210
$fetch_sth = $master_dbh->prepare(
6211
"SELECT this_crc, this_cnt FROM $repl_table "
6212
. "WHERE db = ? AND tbl = ? AND chunk = ?");
6213
$update_sth = $master_dbh->prepare(
6214
"UPDATE $repl_table SET chunk_time = ?, master_crc = ?, master_cnt = ? "
6215
. "WHERE db = ? AND tbl = ? AND chunk = ?");
6216
$delete_sth = $master_dbh->prepare(
6217
"DELETE FROM $repl_table WHERE db = ? AND tbl = ?");
6218
} # !$o->get('explain')
6220
# ########################################################################
6221
# Checksum args and the DMS part of the checksum query for each table.
6222
# ########################################################################
6223
my %crc_args = $rc->get_crc_args(dbh => $master_dbh);
6224
my $checksum_dml = "REPLACE INTO $repl_table "
6225
. "(db, tbl, chunk, chunk_index,"
6226
. " lower_boundary, upper_boundary, this_cnt, this_crc) "
6227
. "SELECT ?, ?, ?, ?, ?, ?,";
6228
my $past_cols = " COUNT(*), '0'";
6230
# ########################################################################
6231
# Get last chunk for --resume.
6232
# ########################################################################
6234
if ( $o->get('resume') ) {
6235
$last_chunk = last_chunk(
6237
repl_table => $repl_table,
6241
my $schema_iter = new SchemaIterator(
6243
resume => $last_chunk ? $q->quote(@{$last_chunk}{qw(db tbl)})
6245
keep_tbl_status => 1,
6252
!$schema_iter->table_is_allowed(@{$last_chunk}{qw(db tbl)}) ) {
6253
PTDEBUG && _d('Ignoring last table', @{$last_chunk}{qw(db tbl)},
6254
'and resuming from next table');
6255
$last_chunk = undef;
6258
# ########################################################################
6259
# Various variables and modules for checksumming the tables.
6260
# ########################################################################
6264
my $limit = $o->get('chunk-size-limit');
6265
my $tn = new TableNibbler(TableParser => $tp, Quoter => $q);
6266
my $retry = new Retry();
6268
# ########################################################################
6269
# Callbacks for each table's nibble iterator. All checksum work is done
6270
# in these callbacks and the subs that they call.
6271
# ########################################################################
6275
my $tbl = $args{tbl};
6276
my $nibble_iter = $args{NibbleIterator};
6279
if ( $last_chunk ) { # resuming
6280
if ( have_more_chunks(%args, last_chunk => $last_chunk) ) {
6281
$nibble_iter->set_nibble_number($last_chunk->{chunk});
6282
PTDEBUG && _d('Have more chunks; resuming from',
6283
$last_chunk->{chunk}, 'at', $last_chunk->{ts});
6284
if ( !$o->get('quiet') ) {
6285
print "Resuming from $tbl->{db}.$tbl->{tbl} chunk "
6286
. "$last_chunk->{chunk}, timestamp $last_chunk->{ts}\n";
6290
# Problem resuming or no next lower boundary.
6291
PTDEBUG && _d('No more chunks; resuming from next table');
6292
$oktonibble = 0; # don't nibble table; next table
6295
# Just need to call us once to kick-start the resume process.
6296
$last_chunk = undef;
6299
if ( $o->get('explain') ) {
6300
# --explain level 1: print the checksum and next boundary
6303
"-- $tbl->{db}.$tbl->{tbl}\n",
6305
my $statements = $nibble_iter->statements();
6306
foreach my $sth ( sort keys %$statements ) {
6307
next if $sth =~ m/^explain/;
6308
if ( $statements->{$sth} ) {
6309
print $statements->{$sth}->{Statement}, "\n\n";
6313
if ( $o->get('explain') < 2 ) {
6314
$oktonibble = 0; # don't nibble table; next table
6318
if ( $nibble_iter->one_nibble() ) {
6319
PTDEBUG && _d('Getting table row estimate on replicas');
6320
my $chunk_size_limit = $o->get('chunk-size-limit');
6322
foreach my $slave ( @$slaves ) {
6323
my ($n_rows) = NibbleIterator::get_row_estimate(
6326
where => $o->get('where') || "1=1",
6331
PTDEBUG && _d('Table on', $slave->name(),
6332
'has', $n_rows, 'rows');
6334
&& $n_rows > ($tbl->{chunk_size} * $chunk_size_limit) )
6336
PTDEBUG && _d('Table too large on', $slave->name());
6337
push @too_large, [$slave->name(), $n_rows || 0];
6341
if ( $o->get('quiet') < 2 ) {
6343
= "Skipping table $tbl->{db}.$tbl->{tbl} because"
6344
. " on the master it would be checksummed in one chunk"
6345
. " but on these replicas it has too many rows:\n";
6346
foreach my $info ( @too_large ) {
6347
$msg .= " $info->[1] rows on $info->[0]\n";
6349
$msg .= "The current chunk size limit is "
6350
. ($tbl->{chunk_size} * $chunk_size_limit)
6351
. " rows (chunk size=$tbl->{chunk_size}"
6352
. " * chunk size limit=$chunk_size_limit).\n";
6355
$tbl->{checksum_results}->{errors}++;
6360
if ( $oktonibble && $o->get('empty-replicate-table') ) {
6362
dbh => $master_cxn->dbh(),
6363
repl_table => $repl_table,
6367
PTDEBUG && _d($delete_sth->{Statement});
6368
$delete_sth->execute($tbl->{db}, $tbl->{tbl});
6371
# USE the correct db while checksumming this table. The "correct"
6372
# db is a complicated subject; see sub for comments.
6374
dbh => $master_cxn->dbh(),
6375
tbl => $tbl, # XXX working on this table
6376
repl_table => $repl_table,
6380
# #########################################################
6381
# XXX DO NOT CHANGE THE DB UNTIL THIS TABLE IS FINISHED XXX
6382
# #########################################################
6385
return $oktonibble; # continue nibbling table?
6387
next_boundaries => sub {
6389
my $tbl = $args{tbl};
6390
my $nibble_iter = $args{NibbleIterator};
6391
my $sth = $nibble_iter->statements();
6392
my $boundary = $nibble_iter->boundaries();
6394
return 1 if $nibble_iter->one_nibble();
6396
# Check that MySQL will use the nibble index for the next upper
6397
# boundary sql. This check applies to the next nibble. So if
6398
# the current nibble number is 5, then nibble 5 is already done
6399
# and we're checking nibble number 6.
6400
my $expl = explain_statement(
6402
sth => $sth->{explain_upper_boundary},
6403
vals => [ @{$boundary->{lower}}, $nibble_iter->chunk_size() ],
6405
if ( lc($expl->{key} || '')
6406
ne lc($nibble_iter->nibble_index() || '') ) {
6407
PTDEBUG && _d('Cannot nibble next chunk, aborting table');
6408
if ( $o->get('quiet') < 2 ) {
6410
= "Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
6411
. ($nibble_iter->nibble_number() + 1)
6412
. " because it is not safe to chunk. Chunking should "
6414
. ($nibble_iter->nibble_index() || '?')
6415
. " index, but MySQL EXPLAIN reports that "
6416
. ($expl->{key} ? "the $expl->{key}" : "no")
6417
. " index will be used.\n";
6420
$tbl->{checksum_results}->{errors}++;
6421
return 0; # stop nibbling table
6424
# Once nibbling begins for a table, control does not return to this
6425
# tool until nibbling is done because, as noted above, all work is
6426
# done in these callbacks. This callback is the only place where we
6427
# can prematurely stop nibbling by returning false. This allows
6428
# Ctrl-C to stop the tool between nibbles instead of between tables.
6429
return $oktorun; # continue nibbling table?
6431
exec_nibble => sub {
6433
my $tbl = $args{tbl};
6434
my $nibble_iter = $args{NibbleIterator};
6435
my $sth = $nibble_iter->statements();
6436
my $boundary = $nibble_iter->boundaries();
6438
# Count every chunk, even if it's ultimately skipped, etc.
6439
$tbl->{checksum_results}->{n_chunks}++;
6441
# --explain level 2: print chunk,lower boundary values,upper
6443
if ( $o->get('explain') > 1 ) {
6444
my $chunk = $nibble_iter->nibble_number();
6445
if ( $nibble_iter->one_nibble() ) {
6446
printf "%d 1=1\n", $chunk;
6449
my $lb_quoted = join(
6450
',', map { defined $_ ? $_ : 'NULL'} @{$boundary->{lower}});
6451
my $ub_quoted = join(
6452
',', map { defined $_ ? $_ : 'NULL'} @{$boundary->{upper}});
6453
printf "%d %s %s\n", $chunk, $lb_quoted, $ub_quoted;
6455
if ( !$nibble_iter->more_boundaries() ) {
6456
print "\n"; # blank line between this table and the next table
6458
return 0; # next boundary
6461
# If the table is being chunk (i.e., it's not small enough to be
6462
# consumed by one nibble), then check index usage and chunk size.
6463
if ( !$nibble_iter->one_nibble() ) {
6464
my $expl = explain_statement(
6466
sth => $sth->{explain_nibble},
6467
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
6470
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
6473
# Ensure that MySQL is using the chunk index.
6474
if ( lc($expl->{key} || '')
6475
ne lc($nibble_iter->nibble_index() || '') ) {
6476
PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
6477
"$tbl->{db}.$tbl->{tbl} not using chunk index, skipping");
6478
return 0; # next boundary
6481
# Check chunk size limit if the upper boundary and next lower
6482
# boundary are identical.
6484
my $boundary = $nibble_iter->boundaries();
6485
if ( $nibble_iter->identical_boundaries(
6486
$boundary->{upper}, $boundary->{next_lower})
6487
&& $oversize_chunk ) {
6488
PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
6489
"$tbl->{db}.$tbl->{tbl} is too large, skipping");
6490
return 0; # next boundary
6495
# Exec and time the chunk checksum query.
6496
$tbl->{nibble_time} = exec_nibble(
6502
PTDEBUG && _d('Nibble time:', $tbl->{nibble_time});
6504
# We're executing REPLACE queries which don't return rows.
6505
# Returning 0 from this callback causes the nibble iter to
6506
# get the next boundaries/nibble.
6509
after_nibble => sub {
6511
my $tbl = $args{tbl};
6512
my $nibble_iter = $args{NibbleIterator};
6514
# Don't need to do anything here if we're just --explain'ing.
6515
return if $o->get('explain');
6517
# Chunk/nibble number that we just inserted or skipped.
6518
my $chunk = $nibble_iter->nibble_number();
6520
# Nibble time will be zero if the chunk was skipped.
6521
if ( !defined $tbl->{nibble_time} ) {
6522
PTDEBUG && _d('Skipping chunk', $chunk);
6523
$tbl->{checksum_results}->{skipped}++;
6527
# Max chunk number that worked. This may be less than the total
6528
# number of chunks if, for example, chunk 16 of 16 times out, but
6529
# chunk 15 worked. The max chunk is used for checking for diffs
6530
# on the slaves, in the done callback.
6531
$tbl->{max_chunk} = $chunk;
6533
# Fetch the checksum that we just executed from the replicate table.
6534
$fetch_sth->execute(@{$tbl}{qw(db tbl)}, $chunk);
6535
my ($crc, $cnt) = $fetch_sth->fetchrow_array();
6537
$tbl->{checksum_results}->{n_rows} += $cnt || 0;
6539
# We're working on the master, so update the checksum's master_cnt
6541
$update_sth->execute(
6542
# UPDATE repl_table SET
6543
sprintf('%.6f', $tbl->{nibble_time}), # chunk_time
6552
# Should be done automatically, but I like to be explicit.
6553
$fetch_sth->finish();
6554
$update_sth->finish();
6556
# Update rate, chunk size, and progress if the nibble actually
6557
# selected some rows.
6558
if ( ($cnt || 0) > 0 ) {
6559
# Update the rate of rows per second for the entire server.
6560
# This is used for the initial chunk size of the next table.
6561
$total_rows += $cnt;
6562
$total_time += $tbl->{nibble_time};
6563
$total_rate = int($total_rows / $total_time);
6564
PTDEBUG && _d('Total avg rate:', $total_rate);
6566
# Adjust chunk size. This affects the next chunk.
6567
if ( $o->get('chunk-time') ) {
6569
= $tbl->{rate}->update($cnt, $tbl->{nibble_time});
6571
if ( $tbl->{chunk_size} < 1 ) {
6572
# This shouldn't happen. WeightedAvgRate::update() may return
6573
# a value < 1, but minimum chunk size is 1.
6574
$tbl->{chunk_size} = 1;
6576
# This warning is printed once per table.
6577
if ( !$tbl->{warned_slow} && $o->get('quiet') < 2 ) {
6578
warn ts("Checksum queries for table "
6579
. "$tbl->{db}.$tbl->{tbl} are executing very slowly. "
6580
. "--chunk-size has been automatically reduced to 1. "
6581
. "Check that the server is not being overloaded, "
6582
. "or increase --chunk-time. The last chunk, number "
6583
. "$chunk of table $tbl->{db}.$tbl->{tbl}, "
6584
. "selected $cnt rows and took "
6585
. sprintf('%.3f', $tbl->{nibble_time})
6586
. " seconds to execute.\n");
6587
$tbl->{warned_slow} = 1;
6591
# Update chunk-size based on rows/s checksum rate.
6592
$nibble_iter->set_chunk_size($tbl->{chunk_size});
6595
# Every table should have a Progress obj; update it.
6596
if ( my $tbl_pr = $tbl->{progress} ) {
6597
$tbl_pr->update(sub {return $tbl->{checksum_results}->{n_rows}});
6601
# Wait forever for slaves to catch up.
6602
$replica_lag_pr->start() if $replica_lag_pr;
6603
$replica_lag->wait(Progress => $replica_lag_pr);
6605
# Wait forever for system load to abate.
6606
$sys_load_pr->start() if $sys_load_pr;
6607
$sys_load->wait(Progress => $sys_load_pr);
6611
done => sub { # done nibbling table
6613
my $tbl = $args{tbl};
6614
my $nibble_iter = $args{NibbleIterator};
6615
my $max_chunk = $tbl->{max_chunk};
6617
# Don't need to do anything here if we're just --explain'ing.
6618
return if $o->get('explain');
6620
# Wait for all slaves to run all checksum chunks,
6621
# then check for differences.
6622
if ( $max_chunk && $o->get('replicate-check') && scalar @$slaves ) {
6623
PTDEBUG && _d('Checking slave diffs');
6626
if ( $o->get('progress') ) {
6627
$check_pr = new Progress(
6628
jobsize => $max_chunk,
6629
spec => $o->get('progress'),
6630
name => "Waiting to check replicas for differences",
6634
# Wait for the last checksum of this table to replicate
6636
wait_for_last_checksum(
6638
repl_table => $repl_table,
6640
max_chunk => $max_chunk,
6641
check_pr => $check_pr,
6645
# Check each slave for checksum diffs.
6646
foreach my $slave ( @$slaves ) {
6648
my $diffs = $rc->find_replication_differences(
6649
dbh => $slave->dbh(),
6650
repl_table => $repl_table,
6651
where => "db='$tbl->{db}' AND tbl='$tbl->{tbl}'",
6653
PTDEBUG && _d(scalar @$diffs, 'checksum diffs on',
6656
$tbl->{checksum_results}->{diffs} = scalar @$diffs;
6660
if ( $o->get('quiet') < 2 ) {
6661
warn ts("Error checking for checksum differences of table "
6662
. "$tbl->{db}.$tbl->{tbl} on replica " . $slave->name()
6664
. "Check that the replica is running and has the "
6665
. "replicate table $repl_table.\n");
6667
$tbl->{checksum_results}->{errors}++;
6672
# Print table's checksum results if we're not being quiet,
6673
# else print if table has diffs and we're not being completely
6675
if ( !$o->get('quiet')
6676
|| $o->get('quiet') < 2 && $tbl->{checksum_results}->{diffs} ) {
6677
print_checksum_results(tbl => $tbl);
6684
# ########################################################################
6685
# Checksum each table.
6686
# ########################################################################
6689
while ( $oktorun && (my $tbl = $schema_iter->next()) ) {
6691
# Results, stats, and info related to checksuming this table can
6692
# be saved here. print_checksum_results() uses this info.
6693
$tbl->{checksum_results} = {};
6695
# Set table's initial chunk size. If this is the first table,
6696
# then total rate will be zero, so use --chunk-size. Or, if
6697
# --chunk-time=0, then only use --chunk-size for every table.
6698
# Else, the initial chunk size is based on the total rates of
6699
# rows/s from all previous tables. If --chunk-time is really
6700
# small, like 0.001, then Perl int() will probably round the
6701
# chunk size to zero, which is invalid, so we default to 1.
6702
my $chunk_time = $o->get('chunk-time');
6703
my $chunk_size = $chunk_time && $total_rate
6704
? int($total_rate * $chunk_time) || 1
6705
: $o->get('chunk-size');
6706
$tbl->{chunk_size} = $chunk_size;
6708
# Make a nibble iterator for this table. This should only fail
6709
# if the table has no indexes and is too large to checksum in
6711
my $checksum_cols = $rc->make_chunk_checksum(
6712
dbh => $master_cxn->dbh(),
6718
$nibble_iter = new OobNibbleIterator(
6721
chunk_size => $tbl->{chunk_size},
6722
chunk_index => $o->get('chunk-index'),
6723
dml => $checksum_dml,
6724
select => $checksum_cols,
6725
past_dml => $checksum_dml,
6726
past_select => $past_cols,
6727
callbacks => $callbacks,
6728
resume => $last_chunk,
6731
TableNibbler => $tn,
6736
if ( $EVAL_ERROR ) {
6737
if ( $o->get('quiet') < 2 ) {
6738
warn ts("Cannot checksum table $tbl->{db}.$tbl->{tbl}: "
6741
$tbl->{checksum_results}->{errors}++;
6744
# Init a new weighted avg rate calculator for the table.
6745
$tbl->{rate} = new WeightedAvgRate(target_t => $chunk_time);
6747
# Make a Progress obj for this table. It may not be used;
6748
# depends on how many rows, chunk size, how fast the server
6749
# is, etc. But just in case, all tables have a Progress obj.
6750
if ( $o->get('progress')
6751
&& !$nibble_iter->one_nibble()
6752
&& $nibble_iter->row_estimate() )
6754
$tbl->{progress} = new Progress(
6755
jobsize => $nibble_iter->row_estimate(),
6756
spec => $o->get('progress'),
6757
name => "Checksumming $tbl->{db}.$tbl->{tbl}",
6761
# Finally, checksum the table.
6762
# The "1 while" loop is necessary because we're executing REPLACE
6763
# statements which don't return rows and NibbleIterator only
6764
# returns if it has rows to return. So all the work is done via
6765
# the callbacks. -- print_checksum_results(), which is called
6766
# from the done callback, uses this start time.
6767
$tbl->{checksum_results}->{start_time} = time;
6768
1 while $nibble_iter->next();
6771
if ( $EVAL_ERROR ) {
6772
# This should not happen. If it does, it's probably some bug
6773
# or error that we're not catching.
6774
warn ts(($oktorun ? "Error " : "Fatal error ")
6775
. "checksumming table $tbl->{db}.$tbl->{tbl}: "
6777
$tbl->{checksum_results}->{errors}++;
6779
# Print whatever checksums results we got before dying, regardless
6780
# of --quiet because at this point we need all the info we can get.
6781
print_checksum_results(tbl => $tbl);
6784
# Update the tool's exit status.
6785
if ( $tbl->{checksum_results}->{errors}
6786
|| $tbl->{checksum_results}->{diffs} ) {
6791
PTDEBUG && _d('Exit status', $exit_status, 'oktorun', $oktorun);
5447
6792
return $exit_status;
5450
6795
# ############################################################################
5452
6797
# ############################################################################
5454
sub get_all_tbls_info {
5456
foreach my $arg ( qw(o dbh q tp du ch args_for) ) {
5457
die "I need a $arg argument" unless $args{$arg};
5459
my $dbh = $args{dbh};
5460
MKDEBUG && _d('Getting all schema objects');
5462
my $si = new SchemaIterator(
5464
OptionParser => $args{o},
5467
while ( my %schema_obj = $si->next_schema_object() ) {
5468
my $final_o = get_final_opts(
5472
save_tbl_to_checksum(
5475
final_o => $final_o,
5482
sub save_tbl_to_checksum {
5484
foreach my $arg ( qw(q ch du final_o tp dbh db tbl du tp ch vp) ) {
5485
die "I need a $arg argument" unless $args{$arg};
5490
my $final_o = $args{final_o};
5491
my $dbh = $args{dbh};
5493
my $tbl = $args{tbl};
5497
# Skip the table in which checksums are stored.
5498
return if ($final_o->get('replicate')
5499
&& $final_o->get('replicate') eq "$db.$tbl");
5501
eval { # Catch errors caused by tables being dropped during work.
5503
# Parse the table and determine a column that's chunkable. This is
5504
# used not only for chunking, but also for --since.
5505
my $create = $du->get_create_table($dbh, $q, $db, $tbl);
5506
my $struct = $tp->parse($create);
5508
# If there's a --where clause and the user didn't specify a chunk index
5509
# a chunk they want, then get MySQL's chosen index for the where clause
5510
# and make it the preferred index.
5511
# http://code.google.com/p/maatkit/issues/detail?id=378
5512
if ( $final_o->get('where')
5513
&& !$final_o->get('chunk-column')
5514
&& !$final_o->get('chunk-index') )
5516
my ($mysql_chosen_index) = $tp->find_possible_keys(
5517
$dbh, $db, $tbl, $q, $final_o->get('where'));
5518
MKDEBUG && _d("Index chosen by MySQL for --where:",
5519
$mysql_chosen_index);
5520
$final_o->set('chunk-index', $mysql_chosen_index)
5521
if $mysql_chosen_index;
5525
# Get the first chunkable column and index, taking into account
5526
# --chunk-column and --chunk-index. If either of those options
5527
# is specified, get_first_chunkable_column() will try to satisfy
5528
# the request but there's no guarantee either will be selected.
5529
# http://code.google.com/p/maatkit/issues/detail?id=519
5530
my ($chunk_col, $chunk_index) = $ch->get_first_chunkable_column(
5532
chunk_column => $final_o->get('chunk-column'),
5533
chunk_index => $final_o->get('chunk-index'),
5534
tbl_struct => $struct,
5538
if ( $final_o->get('use-index') && $chunk_col ) {
5539
my $hint = $vp->version_ge($dbh, '4.0.9') ? 'FORCE' : 'USE';
5540
$index_hint = "$hint INDEX (" . $q->quote($chunk_index) . ")";
5542
MKDEBUG && _d('Index hint:', $index_hint);
5544
my @chunks = '1=1'; # Default.
5545
my $rows_per_chunk = undef;
5547
if ( $final_o->get('chunk-size') ) {
5548
($rows_per_chunk) = $ch->size_to_rows(
5552
chunk_size => $final_o->get('chunk-size'),
5556
# Calculate chunks for this table.
5557
my %params = $ch->get_range_statistics(
5561
chunk_col => $chunk_col,
5562
tbl_struct => $struct,
5564
if ( !grep { !defined $params{$_} } qw(min max rows_in_range) ) {
5565
@chunks = $ch->calculate_chunks(
5569
tbl_struct => $struct,
5570
chunk_col => $chunk_col,
5571
chunk_size => $rows_per_chunk,
5572
zero_chunk => $final_o->get('zero-chunk'),
5573
chunk_range => $final_o->get('chunk-range'),
5576
$maxval = $params{max};
5581
push @{ $tables_to_checksum{$db} }, {
5586
column => $chunk_col,
5587
chunk_index => $chunk_index,
5588
chunk_size => $rows_per_chunk,
5590
index => $index_hint,
5592
final_o => $final_o,
5595
if ( $EVAL_ERROR ) {
5596
print_err($final_o, $EVAL_ERROR, $db, $tbl);
5602
# Checksum the tables in the given database.
5603
# A separate report for each database and its tables is printed.
5604
sub checksum_tables {
5606
foreach my $arg ( qw(tc du o q db dbh hosts tbls) ) {
5607
die "I need a $arg argument" unless $args{$arg};
5613
my $dbh = $args{dbh};
5614
my $hosts = $args{hosts};
5615
my $tbls = $args{tbls};
5618
my ($hdr, $explain);
5619
my $exit_status = 0;
5621
# NOTE: remember, you can't 'next TABLE' inside the eval{}.
5622
# NOTE: remember to use the final_o embedded within each $table, not $o
5623
foreach my $table ( @$tbls ) {
5624
MKDEBUG && _d("Doing", $db, '.', $table->{table});
5625
MKDEBUG && _d("Table:", Dumper($table));
5626
my $final_o = $table->{final_o};
5628
my $is_chunkable_table = 1; # table should be chunkable unless...
5630
# If there's a chunk size but no chunk index and unchunkable tables
5631
# aren't allowed (they're not by default), then table may still be
5632
# chunkable if it's small, i.e. total rows in table <= chunk size.
5633
if ( $table->{chunk_size}
5634
&& !$table->{chunk_index}
5635
&& !$final_o->get('unchunkable-tables') )
5637
$is_chunkable_table = is_chunkable_table(
5640
tbl => $table->{table},
5641
chunk_size => $table->{chunk_size},
5642
where => $final_o->{where},
5645
MKDEBUG && _d("Unchunkable table small enough to chunk:",
5646
$is_chunkable_table ? 'yes' : 'no');
5649
if ( !$is_chunkable_table ) {
5651
print "# cannot chunk $table->{database} $table->{table}\n";
5657
# Determine the checksum strategy for every table because it
5658
# might change given various --arg-table opts for each table.
5660
my ( $strat, $crc_type, $func, $opt_slice );
5661
if ( $checksum_table_data && $do_table ) {
5662
$strat_ref = determine_checksum_strat(
5667
( $strat, $crc_wid, $crc_type, $func, $opt_slice )
5668
= @$strat_ref{ qw(strat crc_wid crc_type func opt_slice) };
5669
MKDEBUG && _d("Checksum strat:", Dumper($strat_ref));
5672
# --schema doesn't use a checksum strategy, but do_tbl()
5673
# requires a strat arg.
5674
$strat = '--schema';
5676
$md5sum_fmt = "%-${crc_wid}s %s.%s.%s.%d\n";
5678
# Design and print header unless we are resuming in which case
5679
# we should have already re-printed the partial output of the
5680
# resume file in parse_resume_file(). This only has to be done
5681
# once and done here because we need $crc_wid which is determined
5682
# by the checksum strat above.
5684
if ( $o->get('tab') ) {
5685
$hdr = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n";
5686
$explain = "%s\t%s\t%s\n";
5689
my $max_tbl = max(5, map { length($_->{table}) } @$tbls);
5690
my $max_db = max(8, length($db));
5691
my $max_host = max(4, map { length($_->{h}) } @$hosts);
5692
$hdr = "%-${max_db}s %-${max_tbl}s %5s "
5693
. "%-${max_host}s %-6s %10s %${crc_wid}s %4s %4s %4s %4s\n";
5694
$explain = "%-${max_db}s %-${max_tbl}s %s\n";
5696
my @hdr_args = qw(DATABASE TABLE CHUNK HOST ENGINE
5697
COUNT CHECKSUM TIME WAIT STAT LAG);
5698
unless ( $o->get('quiet')
5699
|| $o->get('explain')
5700
|| $o->get('checksum')
5701
|| $o->get('resume') )
5703
printf($hdr, @hdr_args)
5704
or die "Cannot print: $OS_ERROR";
5708
# Clean out the replication table entry for this table.
5709
# http://code.google.com/p/maatkit/issues/detail?id=304
5710
if ( (my $replicate_table = $final_o->get('replicate'))
5711
&& !$final_o->get('explain') ) {
5712
use_repl_db(%args); # USE the proper replicate db
5713
my $max_chunkno = scalar @{$table->{chunks}} - 1;
5714
my $del_sql = "DELETE FROM $replicate_table "
5715
. "WHERE db=? AND tbl=? AND chunk > ?";
5716
MKDEBUG && _d($dbh, $del_sql, $db, $table->{table},$max_chunkno);
5717
$dbh->do($del_sql, {}, $db, $table->{table}, $max_chunkno);
5720
# If --since is given, figure out either
5721
# 1) for temporal sinces, if the table has an update time and that
5722
# time is newer than --since, then checksum the whole table,
5723
# otherwise skip it; or
5724
# 2) for "numerical" sinces, which column to use: either the
5725
# specified column (--sincecolumn) or the auto-discovered one,
5726
# whichever exists in the table, in that order.
5727
# Then, if --savesince is given, save either 1) the current timestamp
5728
# or 2) the resulting WHERE clause.
5729
if ( $final_o->get('since') ) {
5730
if ( is_temporal($final_o->get('since')) ) {
5731
MKDEBUG && _d('--since is temporal');
5733
= $du->get_table_status($dbh, $q, $db, $table->{table});
5734
my $time = $stat->{update_time};
5735
if ( $time && $time lt $final_o->get('since') ) {
5736
MKDEBUG && _d("Skipping table because --since value",
5737
$final_o->get('since'), "is newer than", $time);
5739
$table->{chunks} = [];
5743
MKDEBUG && _d('--since is numerical');
5744
# For numerical sinces, choose the column to apply --since to.
5745
# It may not be the column the user said to use! If the user
5746
# didn't specify a column that's good to chunk on, we'll use
5747
# something else instead.
5749
# $table->{column} is the first chunkable column returned from
5750
# the call to get_first_chunkable_column() in
5751
# save_tbl_to_checksum().
5753
grep { $_ && $table->{struct}->{is_col}->{$_} }
5754
( $table->{column}, $final_o->get('since-column') );
5757
MKDEBUG && _d('Column for numerical --since:',
5758
$db, '.', $table->{table}, '.', $sincecol);
5759
# This ends up being an additional WHERE clause.
5760
$table->{since} = $q->quote($sincecol)
5761
. '>=' . $q->quote_val($final_o->get('since'));
5764
MKDEBUG && _d('No column for numerical --since for',
5765
$db, '.', $table->{table});
5770
# ##################################################################
5771
# The query is independent of the chunk, so I make it once for every
5773
# ##################################################################
5775
if ( $checksum_table_data && $do_table ) {
5776
$query = $tc->make_checksum_query(
5778
tbl => $table->{table},
5779
tbl_struct => $table->{struct},
5780
algorithm => $strat,
5782
crc_wid => $crc_wid,
5783
crc_type => $crc_type,
5784
opt_slice => $opt_slice,
5785
cols => $final_o->get('columns'),
5786
sep => $final_o->get('separator'),
5787
replicate => $final_o->get('replicate'),
5788
float_precision => $final_o->get('float-precision'),
5789
trim => $final_o->get('trim'),
5790
ignorecols => $final_o->get('ignore-columns'),
5797
$exit_status |= checksum_chunks(
5802
explain => $explain,
5803
final_o => $final_o,
5807
# Save the --since value if
5808
# 1) it's temporal and the tbl had changed since --since; or
5809
# 2) it's "numerical" and it had a chunkable or nibble-able
5810
# column and it wasn't empty
5811
# See issues 121 and 122.
5812
if ( $final_o->get('save-since') && $savesince_sth ) {
5813
if ( is_temporal($final_o->get('since')) ) {
5815
"Saving temporal --since value: current timestamp for",
5816
$db, '.', $table->{table});
5817
$savesince_sth->execute(undef,
5818
$db, $table->{table});
5820
elsif ( defined $table->{maxval} ) {
5821
MKDEBUG && _d("Saving numerical --since value:",
5822
$table->{maxval}, "for", $db, '.', $table->{table});
5823
$savesince_sth->execute($table->{maxval},
5824
$db, $table->{table});
5827
MKDEBUG && _d("Cannot save --since value:",
5828
$table->{maxval}, "for", $db, '.', $table->{table});
5832
if ( $EVAL_ERROR ) {
5833
print_err($o, $EVAL_ERROR, $db, $table->{table});
5838
return $exit_status;
5841
sub checksum_chunks {
5843
foreach my $arg ( qw(dp final_o ms o q db tbl hosts hdr explain) ) {
5844
die "I need a $arg argument" unless $args{$arg};
5848
my $final_o = $args{final_o};
5853
my $dbh = $args{dbh};
5854
my @hosts = @{$args{hosts}};
5855
my $tbl = $args{tbl};
5857
my $retry = new Retry();
5859
# ##################################################################
5860
# This loop may seem suboptimal, because it causes a new child to be
5861
# forked for each table, for each host, for each chunk. It also
5862
# causes the program to parallelize only within the chunk; that is,
5863
# no two child processes are running on different chunks at a time.
5864
# This is by design. It lets me unlock the table on the master
5866
# ##################################################################
5867
my $exit_status = 0;
5868
my $num_chunks = scalar(@{$tbl->{chunks}});
5869
my $throttle_method = $o->get('throttle-method');
5870
MKDEBUG && _d('Checksumming', $num_chunks, 'chunks');
5872
foreach my $chunk_num ( 0 .. $num_chunks - 1 ) {
5874
if ( $final_o->get('chunk-size-limit')
5875
&& $final_o->get('chunk-size')
5876
&& $tbl->{chunk_size}
5877
&& !$final_o->get('explain') )
5879
my $is_oversize_chunk = is_oversize_chunk(
5881
db => $tbl->{database},
5882
tbl => $tbl->{table},
5883
chunk => $tbl->{chunks}->[$chunk_num],
5884
chunk_size => $tbl->{chunk_size},
5885
index_hint => $tbl->{index},
5886
where => [$final_o->get('where'), $tbl->{since}],
5887
limit => $final_o->get('chunk-size-limit'),
5890
if ( $is_oversize_chunk ) {
5892
if ( !$final_o->get('quiet') ) {
5893
if ( $final_o->get('checksum') ) {
5894
printf($md5sum_fmt, 'NULL', '',
5895
@{$tbl}{qw(database table)}, $chunk_num)
5896
or die "Cannot print: $OS_ERROR";
5900
@{$tbl}{qw(database table)}, $chunk_num,
5901
$hosts[0]->{h}, $tbl->{struct}->{engine}, 'OVERSIZE',
5902
'NULL', 'NULL', 'NULL', 'NULL', 'NULL')
5903
or die "Cannot print: $OS_ERROR";
5910
if ( $throttle_method eq 'slavelag' ) {
5912
if ( $o->get('progress') ) {
5914
jobsize => scalar @{$args{slaves}},
5915
spec => $o->get('progress'),
5916
name => "Wait for slave(s) to catch up",
5920
slaves => $args{slaves},
5921
max_lag => $o->get('max-lag'),
5922
check_interval => $o->get('check-interval'),
5929
if ( ($num_chunks > 1 || $final_o->get('single-chunk'))
5930
&& $checksum_table_data
5931
&& defined $final_o->get('probability')
5932
&& rand(100) >= $final_o->get('probability') ) {
5933
MKDEBUG && _d('Skipping chunk because of --probability');
5937
if ( $num_chunks > 1
5938
&& $checksum_table_data
5939
&& $final_o->get('modulo')
5940
&& ($chunk_num % $final_o->get('modulo') != $final_o->get('offset')))
5942
MKDEBUG && _d('Skipping chunk', $chunk_num, 'because of --modulo');
5946
my $chunk_start_time = gettimeofday();
5947
MKDEBUG && _d('Starting chunk', $chunk_num, 'at', $chunk_start_time);
5949
if ( $final_o->get('replicate') ) {
5950
# We're in --replicate mode.
5952
# If resuming, check if this db.tbl.chunk.host can be skipped.
5953
if ( $o->get('resume-replicate') ) {
5954
if ( already_checksummed($tbl->{database},
5958
print "# already checksummed:"
5959
. " $tbl->{database}"
5964
unless $o->get('quiet');
5969
$hosts[0]->{dbh} ||= $dbh;
5979
# We're in "normal" mode. Lock table and get position on the master.
5981
if ( !$final_o->get('explain') ) {
5982
if ( $final_o->get('lock') ) {
5983
my $sql = "LOCK TABLES "
5984
. $q->quote($db, $tbl->{table}) . " READ";
5985
MKDEBUG && _d($sql);
5988
if ( $final_o->get('wait') ) {
5989
$tbl->{master_status} = $ms->get_master_status($dbh);
5995
foreach my $i ( 0 .. $#hosts ) {
5996
my $is_master = $i == 0; # First host is assumed to be master.
5997
my $host = $hosts[$i];
5999
# Open a single connection for each host. Re-use the
6000
# connection for the master/single host.
6002
$dbh->{InactiveDestroy} = 1; # Ensure that this is set.
6003
$host->{dbh} ||= $dbh;
6006
$host->{dbh} ||= get_cxn($host, %args);
6009
# If resuming, check if this db.tbl.chunk.host can be skipped.
6010
if ( $final_o->get('resume') ) {
6011
next HOST if already_checksummed($tbl->{database},
6017
# Fork, but only if there's more than one host.
6018
my $pid = @hosts > 1 ? fork() : undef;
6020
if ( @hosts == 1 || (defined($pid) && $pid == 0) ) {
6021
# Do the work (I'm a child, or there's only one host)
6028
dbh => $host->{dbh},
6032
if ( $EVAL_ERROR ) {
6033
print_err($o, $EVAL_ERROR, $db, $tbl->{table},
6034
$dp->as_string($host));
6035
exit(1) if @hosts > 1; # exit only if I'm a child
6038
exit(0) if @hosts > 1; # exit only if I'm a child
6040
elsif ( @hosts > 1 && !defined($pid) ) {
6041
die("Unable to fork!");
6044
# I already exited if I'm a child, so I'm the parent.
6045
$children{$host->{h}} = $pid if @hosts > 1;
6048
# Wait for the children to exit.
6049
foreach my $host ( keys %children ) {
6050
my $pid = waitpid($children{$host}, 0);
6051
MKDEBUG && _d("Child", $pid, "exited with", $CHILD_ERROR);
6052
$exit_status ||= $CHILD_ERROR >> 8;
6054
if ( ($final_o->get('lock') && !$final_o->get('explain')) ) {
6055
my $sql = "UNLOCK TABLES";
6056
MKDEBUG && _d($dbh, $sql);
6061
my $chunk_stop_time = gettimeofday();
6062
MKDEBUG && _d('Finished chunk at', $chunk_stop_time);
6064
# --sleep between chunks. Don't sleep if this is the last/only chunk.
6065
if ( $chunk_num < $num_chunks - 1 ) {
6066
if ( $final_o->get('sleep') && !$final_o->get('explain') ) {
6067
MKDEBUG && _d('Sleeping', $final_o->get('sleep'));
6068
sleep($final_o->get('sleep'));
6070
elsif ( $final_o->get('sleep-coef') && !$final_o->get('explain') ) {
6072
= ($chunk_stop_time - $chunk_start_time)
6073
* $final_o->get('sleep-coef');
6074
MKDEBUG && _d('Sleeping', $sleep_time);
6075
if ( $sleep_time < 0 ) {
6076
warn "Calculated invalid sleep time: "
6077
. "$sleep_time = ($chunk_stop_time - $chunk_start_time) * "
6078
. $final_o->get('sleep-coef')
6079
. ". Sleep time set to 1 second instead.";
6085
} # End foreach CHUNK
6087
return $exit_status;
6090
# Override the command-line arguments with those from --arg-table
6091
# if necessary. Returns a cloned OptionParser object ($final_o).
6092
# This clone is only a partial OptionParser object.
6093
sub get_final_opts {
6095
foreach my $arg ( qw(o dbh db tbl args_for) ) {
6096
die "I need a $arg argument" unless $args{$arg};
6099
my $dbh = $args{dbh};
6101
my $tbl = $args{tbl};
6102
my $args_for = $args{args_for};
6104
my $final_o = $o->clone();
6105
if ( my $override = $args_for->{$db}->{$tbl} ) {
6106
map { $final_o->set($_, $override->{$_}); } keys %$override;
6109
# --since and --offset are potentially expressions that should be
6110
# evaluated by the DB server. This has to be done after the override
6111
# from the --arg-table table.
6112
foreach my $opt ( qw(since offset) ) {
6113
# Don't get MySQL to evaluate if it's temporal, as 2008-08-01 --> 1999
6114
my $val = $final_o->get($opt);
6115
if ( $val && !is_temporal($val) ) {
6116
$final_o->set($opt, eval_expr($opt, $val, $dbh));
6125
return $val && $val =~ m/^\d{4}-\d{2}-\d{2}(?:.[0-9:]+)?/;
6128
sub print_inconsistent_tbls {
6130
foreach my $arg ( qw(o dp dsn tbls) ) {
6131
die "I need a $arg argument" unless $args{$arg};
6135
my $dsn = $args{dsn};
6136
my $tbls = $args{tbls};
6138
return if $o->get('quiet');
6140
my @headers = qw(db tbl chunk cnt_diff crc_diff boundaries);
6141
print "Differences on " . $dp->as_string($dsn, [qw(h P F)]) . "\n";
6142
my $max_db = max(5, map { length($_->{db}) } @$tbls);
6143
my $max_tbl = max(5, map { length($_->{tbl}) } @$tbls);
6144
my $fmt = "%-${max_db}s %-${max_tbl}s %5s %8s %8s %s\n";
6145
printf($fmt, map { uc } @headers) or die "Cannot print: $OS_ERROR";
6146
foreach my $tbl ( @$tbls ) {
6147
printf($fmt, @{$tbl}{@headers}) or die "Cannot print: $OS_ERROR";
6149
print "\n" or die "Cannot print: $OS_ERROR";
6154
sub save_inconsistent_tbls {
6156
foreach my $arg ( qw(dbh tbls) ) {
6157
die "I need a $arg argument" unless $args{$arg};
6159
my $dbh = $args{dbh};
6160
my $tbls = $args{tbls};
6162
foreach my $tbl ( @$tbls ) {
6163
MKDEBUG && _d("Will recheck", $tbl->{db}, '.', $tbl->{tbl},
6164
"(chunk:", $tbl->{boundaries}, ')');
6165
my $final_o = get_final_opts(
6170
my $chunks = [ $tbl->{boundaries} ];
6171
save_tbl_to_checksum(
6175
final_o => $final_o,
6181
# The value may be an expression like 'NOW() - INTERVAL 7 DAY'
6182
# and we should evaluate it.
6184
my ( $name, $val, $dbh ) = @_;
6187
($result) = $dbh->selectrow_array("SELECT $val");
6188
MKDEBUG && _d("option", $name, "evaluates to:", $result);
6190
if ( $EVAL_ERROR && MKDEBUG ) {
6192
_d("Error evaluating option", $name, $EVAL_ERROR);
6197
sub determine_checksum_strat {
6199
foreach my $arg ( qw(o dbh tc) ) {
6200
die "I need a $arg argument" unless $args{$arg};
6203
my $dbh = $args{dbh};
6206
my $ret = { # return vals in easy-to-swallow hash form
6208
crc_type => 'varchar',
6214
$ret->{strat} = $tc->best_algorithm(
6215
algorithm => $o->get('algorithm'),
6217
where => $o->get('where') || $o->get('since'),
6218
chunk => $o->get('chunk-size'),
6219
replicate => $o->get('replicate'),
6220
count => $o->get('count'),
6223
if ( $o->get('algorithm') && $o->get('algorithm') ne $ret->{strat} ) {
6224
warn "--algorithm=".$o->get('algorithm')." can't be used; "
6225
. "falling back to $ret->{strat}\n";
6228
# If using a cryptographic hash strategy, decide what hash function to use,
6229
# and if using BIT_XOR whether and which slice to place the user variable in.
6230
if ( $tc->is_hash_algorithm( $ret->{strat} ) ) {
6231
$ret->{func} = $tc->choose_hash_func(
6232
function => $o->get('function'),
6235
if ( $o->get('function') && $o->get('function') ne $ret->{func} ) {
6236
warn "Checksum function ".$o->get('function')." cannot be used; "
6237
. "using $ret->{func}\n";
6239
$ret->{crc_wid} = $tc->get_crc_wid($dbh, $ret->{func});
6240
($ret->{crc_type}) = $tc->get_crc_type($dbh, $ret->{func});
6242
if ( $o->get('optimize-xor') && $ret->{strat} eq 'BIT_XOR' ) {
6243
if ( $ret->{crc_type} !~ m/int$/ ) {
6245
= $tc->optimize_xor(dbh => $dbh, function => $ret->{func});
6246
if ( !defined $ret->{opt_slice} ) {
6247
warn "Cannot use --optimize-xor, disabling";
6248
$o->set('optimize-xor', 0);
6252
# FNV_64 doesn't need the optimize_xor gizmo.
6253
$o->get('optimize-xor', 0);
6261
sub verify_checksum_compat {
6263
foreach my $arg ( qw(o hosts) ) {
6264
die "I need a $arg argument" unless $args{$arg};
6267
my $hosts = $args{hosts};
6270
foreach my $host ( @$hosts ) {
6271
my $dbh = get_cxn($host, %args);
6272
my $sql = "SELECT MD5(CONCAT_WS(',', '1', ''))";
6273
MKDEBUG && _d($dbh, $sql);
6274
my $cks = $dbh->selectall_arrayref($sql)->[0]->[0];
6275
push @verify_sums, {
6277
ver => $dbh->{mysql_serverinfo},
6281
if ( unique(map { $_->{sum} } @verify_sums ) > 1 ) {
6282
my $max = max(map { length($_->{h}) } @$hosts);
6283
die "Not all servers have compatible versions. Some return different\n"
6284
. "checksum values for the same query, and cannot be compared. This\n"
6285
. "behavior changed in MySQL 4.0.14. Here is info on each host:\n\n"
6287
map { sprintf("%-${max}s %-32s %s", @{$_}{qw(host sum ver)}) }
6288
{ host => 'HOST', sum => 'CHECKSUM', ver => 'VERSION'},
6291
. "\n\nYou can disable this check with --no-verify.\n";
6296
# Check for existence and privileges on the replication table before
6297
# starting, and prepare the statements that will be used to update it.
6298
# Also clean out the checksum table. And create it if needed.
6299
# Returns fetch and update statement handles.
6800
my ($s, $m, $h, $d, $M) = localtime;
6801
my $ts = sprintf('%02d-%02dT%02d:%02d:%02d', $M+1, $d, $h, $m, $s);
6802
return $msg ? "$ts $msg" : $ts;
6806
# Completely ignore these error codes.
6808
# Error: 1592 SQLSTATE: HY000 (ER_BINLOG_UNSAFE_STATEMENT)
6809
# Message: Statement may not be safe to log in statement format.
6810
# Ignore this warning because we have purposely set statement-based
6815
# Warn once per-table for these error codes if the error message
6816
# matches the pattern.
6818
# Error: 1265 SQLSTATE: 01000 (WARN_DATA_TRUNCATED)
6819
# Message: Data truncated for column '%s' at row %ld
6822
# use MySQL's message for this warning
6828
my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser);
6829
foreach my $arg ( @required_args ) {
6830
die "I need a $arg argument" unless $args{$arg};
6832
my ($cxn, $tbl, $nibble_iter, $retry, $q, $o)= @args{@required_args};
6834
my $dbh = $cxn->dbh();
6835
my $sth = $nibble_iter->statements();
6836
my $boundary = $nibble_iter->boundaries();
6837
my $lb_quoted = $q->serialize_list(@{$boundary->{lower}});
6838
my $ub_quoted = $q->serialize_list(@{$boundary->{upper}});
6839
my $chunk = $nibble_iter->nibble_number();
6840
my $chunk_index = $nibble_iter->nibble_index();
6842
return $retry->retry(
6843
tries => $o->get('retries'),
6844
wait => sub { return; },
6846
# ###################################################################
6847
# Start timing the checksum query.
6848
# ###################################################################
6851
# Execute the REPLACE...SELECT checksum query.
6852
PTDEBUG && _d($sth->{nibble}->{Statement},
6853
'lower boundary:', @{$boundary->{lower}},
6854
'upper boundary:', @{$boundary->{upper}});
6855
$sth->{nibble}->execute(
6856
# REPLACE INTO repl_table SELECT
6859
$chunk, # chunk (number)
6860
$chunk_index, # chunk_index
6861
$lb_quoted, # lower_boundary
6862
$ub_quoted, # upper_boundary
6863
# this_cnt, this_crc WHERE
6864
@{$boundary->{lower}}, # upper boundary values
6865
@{$boundary->{upper}}, # lower boundary values
6869
# ###################################################################
6870
# End timing the checksum query.
6871
# ###################################################################
6873
# Check if checksum query caused any warnings.
6874
my $sql_warn = 'SHOW WARNINGS';
6875
PTDEBUG && _d($sql_warn);
6876
my $warnings = $dbh->selectall_arrayref($sql_warn, { Slice => {} } );
6877
foreach my $warning ( @$warnings ) {
6878
my $code = ($warning->{code} || 0);
6879
my $message = $warning->{message};
6880
if ( $ignore_code{$code} ) {
6881
PTDEBUG && _d('Ignoring warning:', $code, $message);
6884
elsif ( $warn_code{$code}
6885
&& (!$warn_code{$code}->{pattern}
6886
|| $message =~ m/$warn_code{$code}->{pattern}/) )
6888
if ( !$tbl->{"warned_code_$code"} ) { # warn once per table
6889
if ( $o->get('quiet') < 2 ) {
6890
warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} "
6891
. "caused MySQL error $code: "
6892
. ($warn_code{$code}->{message}
6893
? $warn_code{$code}->{message}
6897
$tbl->{"warned_code_$code"} = 1;
6898
$tbl->{checksum_results}->{errors}++;
6902
# This die will propagate to fail which will return 0
6903
# and propagate it to final_fail which will die with
6904
# this error message. (So don't wrap it in ts().)
6905
die "Checksum query for table $tbl->{db}.$tbl->{tbl} "
6906
. "caused MySQL error $code:\n"
6907
. " Level: " . ($warning->{level} || '') . "\n"
6908
. " Code: " . ($warning->{code} || '') . "\n"
6909
. " Message: " . ($warning->{message} || '') . "\n"
6910
. " Query: " . $sth->{nibble}->{Statement} . "\n";
6914
# Success: no warnings, no errors. Return nibble time.
6915
return $t_end - $t_start;
6919
my $error = $args{error};
6921
if ( $error =~ m/Lock wait timeout exceeded/
6922
|| $error =~ m/Query execution was interrupted/
6924
# These errors/warnings can be retried, so don't print
6925
# a warning yet; do that in final_fail.
6928
elsif ( $error =~ m/MySQL server has gone away/
6929
|| $error =~ m/Lost connection to MySQL server/
6931
# The 2nd pattern means that MySQL itself died or was stopped.
6932
# The 3rd pattern means that our cxn was killed (KILL <id>).
6933
eval { $dbh = $cxn->connect(); };
6934
return 1 unless $EVAL_ERROR; # reconnected, retry checksum query
6935
$oktorun = 0; # failed to reconnect, exit tool
6938
# At this point, either the error/warning cannot be retried,
6939
# or we failed to reconnect. So stop trying and call final_fail.
6944
my $error = $args{error};
6946
if ( $error =~ /Lock wait timeout exceeded/
6947
|| $error =~ /Query execution was interrupted/
6949
# These errors/warnings are not fatal but only cause this
6950
# nibble to be skipped.
6951
if ( $o->get('quiet') < 2 ) {
6954
return; # skip this nibble
6957
# This die will be caught by the eval inside the TABLE loop.
6958
# Checksumming for this table will stop, which is probably
6959
# good because by this point the error or warning indicates
6960
# that something fundamental is broken or wrong. Checksumming
6961
# will continue with the next table, unless the fail code set
6962
# oktorun=0, in which case the error/warning is fatal.
6963
die "Error executing checksum query: $args{error}\n";
6970
my $line_fmt = "%14s %6s %6s %8s %7s %7s %7s %-s\n";
6971
my @headers = qw(TS ERRORS DIFFS ROWS CHUNKS SKIPPED TIME TABLE);
6973
sub print_checksum_results {
6975
my @required_args = qw(tbl);
6976
foreach my $arg ( @required_args ) {
6977
die "I need a $arg argument" unless $args{$arg};
6979
my ($tbl) = @args{@required_args};
6981
if ($print_header) {
6982
printf $line_fmt, @headers;
6986
my $res = $tbl->{checksum_results};
6989
$res->{errors} || 0,
6991
$res->{n_rows} || 0,
6992
$res->{n_chunks} || 0,
6993
$res->{skipped} || 0,
6994
sprintf('%.3f', $res->{start_time} ? time - $res->{start_time} : 0),
6995
"$tbl->{db}.$tbl->{tbl}";
7002
my @headers = qw(table chunk cnt_diff crc_diff chunk_index lower_boundary upper_boundary);
7004
sub print_checksum_diffs {
7006
my @required_args = qw(cxn diffs);
7007
foreach my $arg ( @required_args ) {
7008
die "I need a $arg argument" unless $args{$arg};
7010
my ($cxn, $diffs) = @args{@required_args};
7012
print "Differences on ", $cxn->name(), "\n";
7013
print join(' ', map { uc $_ } @headers), "\n";
7014
foreach my $diff ( @$diffs ) {
7015
print join(' ', map { defined $_ ? $_ : '' } @{$diff}{@headers}), "\n";
6300
7023
sub check_repl_table {
6301
7024
my ( %args ) = @_;
6302
foreach my $arg ( qw(o dbh tp q) ) {
7025
my @required_args = qw(dbh repl_table OptionParser TableParser Quoter);
7026
foreach my $arg ( @required_args ) {
6303
7027
die "I need a $arg argument" unless $args{$arg};
6306
my $dbh = $args{dbh};
6310
my $replicate_table = $o->get('replicate');
6311
return unless $replicate_table;
6313
use_repl_db(%args); # USE the proper replicate db
6315
my ($db, $tbl) = $q->split_unquote($replicate_table);
7029
my ($dbh, $repl_table, $o, $tp, $q) = @args{@required_args};
7030
PTDEBUG && _d('Checking --replicate table', $repl_table);
7032
# If the repl db doesn't exit, auto-create it, maybe.
7033
my ($db, $tbl) = $q->split_unquote($repl_table);
7034
my $sql = "SHOW DATABASES LIKE '$db'";
7035
PTDEBUG && _d($sql);
7036
my @db_exists = $dbh->selectrow_array($sql);
7037
if ( !@db_exists && $o->get('create-replicate-table') ) {
7038
$sql = "CREATE DATABASE " . $q->quote($db) . " /* pt-table-checksum */";
7042
if ( $EVAL_ERROR ) {
7043
die "--replicate database $db does not exist and it cannot be "
7044
. "created automatically. You need to create the database.\n";
7048
# USE the correct db (probably the repl db, but maybe --replicate-database).
7051
# Check if the repl table exists; if not, create it, maybe.
6316
7052
my $tbl_exists = $tp->check_table(
6405
7159
my $opt = $o->get('replicate-database') ? "--replicate-database"
6406
7160
: "--replicate database";
6407
7161
if ( $EVAL_ERROR =~ m/unknown database/i ) {
6408
die "$opt `$db` does not exist: $EVAL_ERROR";
7162
die "$opt $db does not exist. You need to create the "
7163
. "database or specify a database for $opt that exists.\n";
6411
die "Error using $opt `$db`: $EVAL_ERROR";
7166
die "Error using $opt $db: $EVAL_ERROR\n";
6418
# Returns 1 on successful creation of the replicate table,
6420
7173
sub create_repl_table {
6421
7174
my ( %args ) = @_;
6422
foreach my $arg ( qw(o dbh) ) {
7175
my @required_args = qw(dbh repl_table OptionParser);
7176
foreach my $arg ( @required_args ) {
6423
7177
die "I need a $arg argument" unless $args{$arg};
6426
my $dbh = $args{dbh};
6428
my $replicate_table = $o->get('replicate');
6430
my $sql = $o->read_para_after(
6431
__FILE__, qr/MAGIC_create_replicate/);
6432
$sql =~ s/CREATE TABLE checksum/CREATE TABLE $replicate_table/;
7179
my ($dbh, $repl_table, $o) = @args{@required_args};
7180
PTDEBUG && _d('Creating --replicate table', $repl_table);
7181
my $sql = $o->read_para_after(__FILE__, qr/MAGIC_create_replicate/);
7182
$sql =~ s/CREATE TABLE checksums/CREATE TABLE $repl_table/;
6433
7183
$sql =~ s/;$//;
6434
MKDEBUG && _d($dbh, $sql);
7184
PTDEBUG && _d($dbh, $sql);
6436
7186
$dbh->do($sql);
6438
7188
if ( $EVAL_ERROR ) {
6439
MKDEBUG && _d('--create-replicate-table failed:', $EVAL_ERROR);
6446
sub read_repl_table {
6448
foreach my $arg ( qw(o dbh host) ) {
6449
die "I need a $arg argument" unless $args{$arg};
6452
my $dbh = $args{dbh};
6453
my $host = $args{host};
6455
my $replicate_table = $o->get('replicate');
6456
die "Cannot read replicate table because --replicate was not specified"
6457
unless $replicate_table;
6459
# Read checksums from replicate table.
6460
my $already_checksummed;
6462
= $dbh->selectall_arrayref("SELECT db, tbl, chunk FROM $replicate_table");
6464
# Save each finished checksum.
6465
foreach my $checksum ( @$checksums ) {
6466
my ( $db, $tbl, $chunk ) = @$checksum[0..2];
6467
$already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1;
6470
return $already_checksummed;
6473
sub parse_resume_file {
6474
my ( $resume_file ) = @_;
6476
open my $resume_fh, '<', $resume_file
6477
or die "Cannot open resume file $resume_file: $OS_ERROR";
6479
# The resume file, being the output from a previous run, should
6480
# have the columns DATABASE TABLE CHUNK HOST ... (in that order).
6481
# We only need those first 4 columns. We re-print every line of
6482
# the resume file so the end result will be the whole, finished
6483
# output: what the previous run got done plus what we are about
6484
# to resume and finish.
6485
my $already_checksummed;
6486
while ( my $line = <$resume_fh> ) {
6487
# Re-print every line.
6490
# If the line is a checksum line, parse from it the db, tbl,
6491
# checksum and host.
6492
if ( $line =~ m/^\S+\s+\S+\s+\d+\s+/ ) {
6493
my ( $db, $tbl, $chunk, $host ) = $line =~ m/(\S+)/g;
6494
$already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1;
6499
MKDEBUG && _d("Already checksummed:", Dumper($already_checksummed));
6501
return $already_checksummed;
6504
sub already_checksummed {
6505
my ( $d, $t, $c, $h ) = @_; # db, tbl, chunk num, host
6506
if ( exists $already_checksummed->{$d}->{$t}->{$c}->{$h} ) {
6507
MKDEBUG && _d("Skipping chunk because of --resume:", $d, $t, $c, $h);
6513
sub do_tbl_replicate {
6514
my ( $chunk_num, %args ) = @_;
6515
foreach my $arg ( qw(q host query tbl hdr explain final_o ch retry) ) {
6516
die "I need a $arg argument" unless $args{$arg};
6519
my $final_o = $args{final_o};
6521
my $host = $args{host};
6522
my $hdr = $args{hdr};
6523
my $explain = $args{explain};
6524
my $tbl = $args{tbl};
6525
my $retry = $args{retry};
6527
MKDEBUG && _d('Replicating chunk', $chunk_num,
6528
'of table', $tbl->{database}, '.', $tbl->{table},
6529
'on', $host->{h}, ':', $host->{P});
6531
my $dbh = $host->{dbh};
6534
use_repl_db(%args); # USE the proper replicate db
6539
$sql = $ch->inject_chunks(
6540
query => $args{query},
6541
database => $tbl->{database},
6542
table => $tbl->{table},
6543
chunks => $tbl->{chunks},
6544
chunk_num => $chunk_num,
6545
where => [$final_o->get('where'), $tbl->{since}],
6546
index_hint => $tbl->{index},
6549
if ( MKDEBUG && $chunk_num == 0 ) {
6550
_d("SQL for inject chunk 0:", $sql);
6553
my $where = $tbl->{chunks}->[$chunk_num];
6554
if ( $final_o->get('explain') ) {
6555
if ( $chunk_num == 0 ) {
6556
printf($explain, @{$tbl}{qw(database table)}, $sql)
6557
or die "Cannot print: $OS_ERROR";
6559
printf($explain, @{$tbl}{qw(database table)}, $where)
6560
or die "Cannot print: $OS_ERROR";
6564
# Actually run the checksum query
6567
wait => sub { return; },
6570
$dbh->do('SET @crc := "", @cnt := 0 /*!50108 , '
6571
. '@@binlog_format := "STATEMENT"*/');
6572
$dbh->do($sql, {}, @{$tbl}{qw(database table)}, $where);
6576
die $EVAL_ERROR; # caught in checksum_tables()
6580
# Catch any warnings thrown....
6581
my $sql_warn = 'SHOW WARNINGS';
6582
MKDEBUG && _d($sql_warn);
6583
my $warnings = $dbh->selectall_arrayref($sql_warn, { Slice => {} } );
6584
foreach my $warning ( @$warnings ) {
6585
if ( $warning->{message} =~ m/Data truncated for column 'boundaries'/ ) {
6586
_d("Warning: WHERE clause too large for boundaries column; ",
6587
"pt-table-sync may fail; value:", $where);
6589
elsif ( ($warning->{code} || 0) == 1592 ) {
6590
# Error: 1592 SQLSTATE: HY000 (ER_BINLOG_UNSAFE_STATEMENT)
6591
# Message: Statement may not be safe to log in statement format.
6592
# Ignore this warning because we have purposely set statement-based
6594
MKDEBUG && _d('Ignoring warning:', $warning->{message});
6597
# die doesn't permit extra line breaks so warn then die.
6598
warn "\nChecksum query caused a warning:\n"
6600
map { "\t$_: " . $warning->{$_} || '' } qw(level code message)
6602
. "\n\tquery: $sql\n\n";
6607
# Update the master_crc etc columns
6608
$fetch_sth->execute(@{$tbl}{qw(database table)}, $chunk_num);
6609
( $crc, $cnt ) = $fetch_sth->fetchrow_array();
6610
$update_sth->execute($crc, $cnt, @{$tbl}{qw(database table)}, $chunk_num);
6614
if ( !$final_o->get('quiet') && !$final_o->get('explain') ) {
6615
if ( $final_o->get('checksum') ) {
6616
printf($md5sum_fmt, $crc, $host->{h},
6617
@{$tbl}{qw(database table)}, $chunk_num)
6618
or die "Cannot print: $OS_ERROR";
6622
@{$tbl}{qw(database table)}, $chunk_num,
6623
$host->{h}, $tbl->{struct}->{engine}, $cnt, $crc,
6624
$end - $beg, 'NULL', 'NULL', 'NULL')
6625
or die "Cannot print: $OS_ERROR";
6633
my ( $chunk_num, $is_master, %args ) = @_;
6634
foreach my $arg ( qw(du final_o ms q tc dbh host tbl hdr explain strat) ) {
6635
die "I need a $arg argument" unless $args{$arg};
6638
my $final_o = $args{final_o};
6643
my $host = $args{host};
6644
my $tbl = $args{tbl};
6645
my $explain = $args{explain};
6646
my $hdr = $args{hdr};
6647
my $strat = $args{strat};
6649
MKDEBUG && _d('Checksumming chunk', $chunk_num,
6650
'of table', $tbl->{database}, '.', $tbl->{table},
6651
'on', $host->{h}, ':', $host->{P},
6652
'using algorithm', $strat);
6654
my $dbh = $host->{dbh};
6655
$dbh->do("USE " . $q->quote($tbl->{database}));
6662
# Begin timing the checksum operation.
6665
# I'm a slave. Wait to catch up to the master. Calculate slave lag.
6666
if ( !$is_master && !$final_o->get('explain') ) {
6667
if ( $final_o->get('wait') ) {
6668
MKDEBUG && _d('Waiting to catch up to master for --wait');
6669
my $result = $ms->wait_for_master(
6670
master_status => $tbl->{master_status},
6672
timeout => $final_o->get('wait'),
6674
$sta = $result && defined $result->{result}
6679
if ( $final_o->get('slave-lag') ) {
6680
MKDEBUG && _d('Getting slave lag for --slave-lag');
6681
my $res = $ms->get_slave_status($dbh);
6682
$lag = $res && defined $res->{seconds_behind_master}
6683
? $res->{seconds_behind_master}
6688
# Time the checksum operation and the wait-for-master operation separately.
6691
# Check that table exists on slave.
6693
if ( !$is_master || !$checksum_table_data ) {
6694
$have_table = $tp->check_table(
6696
db => $tbl->{database},
6697
tbl => $tbl->{table},
6699
warn "$tbl->{database}.$tbl->{table} does not exist on slave"
6700
. ($host->{h} ? " $host->{h}" : '')
6701
. ($host->{P} ? ":$host->{P}" : '')
6705
if ( $have_table ) {
6706
# Do the checksum operation.
6707
if ( $checksum_table_data ) {
6708
if ( $strat eq 'CHECKSUM' ) {
6709
if ( $final_o->get('crc') ) {
6710
$crc = do_checksum(%args);
6712
if ( $final_o->get('count') ) {
6713
$cnt = do_count($chunk_num, %args);
6716
elsif ( $final_o->get('crc') ) {
6717
( $cnt, $crc ) = do_var_crc($chunk_num, %args);
6721
$cnt = do_count($chunk_num, %args);
6724
else { # Checksum SHOW CREATE TABLE for --schema.
6726
= $du->get_create_table($dbh, $q, $tbl->{database}, $tbl->{table});
6727
$create = $create->[1];
6728
$create = $tp->remove_auto_increment($create);
6729
$crc = $tc->crc32($create);
6735
if ( !$final_o->get('quiet') && !$final_o->get('explain') ) {
6736
if ( $final_o->get('checksum') ) {
6737
printf($md5sum_fmt, $crc, $host->{h},
6738
@{$tbl}{qw(database table)}, $chunk_num)
6739
or die "Cannot print: $OS_ERROR";
6743
@{$tbl}{qw(database table)}, $chunk_num,
6744
$host->{h}, $tbl->{struct}->{engine}, $cnt, $crc,
6745
$end - $mid, $mid - $beg, $sta, $lag)
6746
or die "Cannot print: $OS_ERROR";
6754
my ( $dsn, %args ) = @_;
6755
foreach my $arg ( qw(o dp) ) {
6756
die "I need a $arg argument" unless $args{$arg};
6761
if ( $o->get('ask-pass') && !defined $dsn->{p} ) {
6762
$dsn->{p} = OptionParser::prompt_noecho("Enter password for $dsn->{h}: ");
6765
my $ac = $o->get('lock') ? 0 : 1;
6766
my $dbh = $dp->get_dbh(
6767
$dp->get_cxn_params($dsn), { AutoCommit => $ac });
6768
$dp->fill_in_dsn($dbh, $dsn);
6769
$dbh->{InactiveDestroy} = 1; # Prevent destroying on fork.
6770
$dbh->{FetchHashKeyName} = 'NAME_lc';
6775
my ( $chunk_num, %args ) = @_;
6776
foreach my $arg ( qw(ch dbh query tbl explain final_o) ) {
6777
die "I need a $arg argument" unless $args{$arg};
6779
my $final_o = $args{final_o};
6781
my $tbl = $args{tbl};
6782
my $explain = $args{explain};
6783
my $dbh = $args{dbh};
6785
MKDEBUG && _d("do_var_crc for", $tbl->{table});
6787
my $sql = $ch->inject_chunks(
6788
query => $args{query},
6789
database => $tbl->{database},
6790
table => $tbl->{table},
6791
chunks => $tbl->{chunks},
6792
chunk_num => $chunk_num,
6793
where => [$final_o->get('where'), $tbl->{since}],
6794
index_hint => $tbl->{index},
6797
if ( MKDEBUG && $chunk_num == 0 ) {
6798
_d("SQL for chunk 0:", $sql);
6801
if ( $final_o->get('explain') ) {
6802
if ( $chunk_num == 0 ) {
6803
printf($explain, @{$tbl}{qw(database table)}, $sql)
6804
or die "Cannot print: $OS_ERROR";
6806
printf($explain, @{$tbl}{qw(database table)},$tbl->{chunks}->[$chunk_num])
6807
or die "Cannot print: $OS_ERROR";
6811
$dbh->do('set @crc := "", @cnt := 0');
6812
my $res = $dbh->selectall_arrayref($sql, { Slice => {} })->[0];
6813
return ($res->{cnt}, $res->{crc});
6818
foreach my $arg ( qw(dbh query tbl explain final_o) ) {
6819
die "I need a $arg argument" unless $args{$arg};
6821
my $dbh = $args{dbh};
6822
my $final_o = $args{final_o};
6823
my $tbl = $args{tbl};
6824
my $query = $args{query};
6825
my $explain = $args{explain};
6827
MKDEBUG && _d("do_checksum for", $tbl->{table});
6829
if ( $final_o->get('explain') ) {
6830
printf($explain, @{$tbl}{qw(database table)}, $query)
6831
or die "Cannot print: $OS_ERROR";
6834
my $res = $dbh->selectrow_hashref($query);
6836
my ($key) = grep { m/checksum/i } keys %$res;
6837
return defined $res->{$key} ? $res->{$key} : 'NULL';
6845
my ( $chunk_num, %args ) = @_;
6846
foreach my $arg ( qw(q dbh tbl explain final_o) ) {
6847
die "I need a $arg argument" unless $args{$arg};
6849
my $final_o = $args{final_o};
6850
my $tbl = $args{tbl};
6851
my $explain = $args{explain};
6852
my $dbh = $args{dbh};
6855
MKDEBUG && _d("do_count for", $tbl->{table});
6857
my $sql = "SELECT COUNT(*) FROM "
6858
. $q->quote(@{$tbl}{qw(database table)});
6859
if ( $final_o->get('where') || $final_o->get('since') ) {
6860
my $where_since = ($final_o->get('where'), $final_o->get('since'));
6862
. join(" AND ", map { "($_)" } grep { $_ } @$where_since )
6865
if ( $final_o->get('explain') ) {
6866
printf($explain, @{$tbl}{qw(database table)}, $sql)
6867
or die "Cannot print: $OS_ERROR";
6870
return $dbh->selectall_arrayref($sql)->[0]->[0];
6878
grep { !$seen{$_}++ } @_;
6881
# Tries to extract the MySQL error message and print it
6883
my ( $o, $msg, $db, $tbl, $host ) = @_;
6884
return if !defined $msg
6885
# Honor --quiet in the (common?) event of dropped tables or deadlocks
6886
or ($o->get('quiet')
6887
&& $EVAL_ERROR =~ m/: Table .*? doesn't exist|Deadlock found/);
6888
$msg =~ s/^.*?failed: (.*?) at \S+ line (\d+).*$/$1 at line $2/s;
6890
if ( $db && $tbl ) {
6891
$msg .= " while doing $db.$tbl";
6894
$msg .= " on $host";
6896
print STDERR $msg, "\n";
6899
# Returns when Seconds_Behind_Master on all the given slaves
6900
# is < max_lag, waits check_interval seconds between checks
6901
# if a slave is lagging too much.
6902
sub wait_for_slaves {
6904
my $slaves = $args{slaves};
6905
my $max_lag = $args{max_lag};
6906
my $check_interval = $args{check_interval};
6907
my $dp = $args{DSNParser};
6908
my $ms = $args{MasterSlave};
6909
my $pr = $args{progress};
6911
return unless scalar @$slaves;
6912
my $n_slaves = @$slaves;
6916
# If you use the default Progress report callback, you'll need to
6917
# to add Transformers.pm to this tool.
6919
$pr_callback = sub {
6920
my ($fraction, $elapsed, $remaining, $eta, $slave_no) = @_;
6922
print STDERR "Waiting for slave(s) to catchup...\n";
6926
print STDERR "Still waiting ($elapsed seconds)...\n";
6930
$pr->set_callback($pr_callback);
6933
for my $slave_no ( 0..($n_slaves-1) ) {
6934
my $slave = $slaves->[$slave_no];
6935
MKDEBUG && _d('Checking slave', $dp->as_string($slave->{dsn}),
6936
'lag for throttle');
6937
my $lag = $ms->get_slave_lag($slave->{dbh});
6938
while ( !defined $lag || $lag > $max_lag ) {
6939
MKDEBUG && _d('Slave lag', $lag, '>', $max_lag,
6940
'; sleeping', $check_interval);
6942
# Report what we're waiting for before we wait.
6943
$pr->update(sub { return $slave_no; }) if $pr;
6945
sleep $check_interval;
6946
$lag = $ms->get_slave_lag($slave->{dbh});
6948
MKDEBUG && _d('Slave ready, lag', $lag, '<=', $max_lag);
6954
# Sub: is_oversize_chunk
6955
# Determine if the chunk is oversize.
6960
# Required Arguments:
6962
# * db - db name, not quoted
6963
# * tbl - tbl name, not quoted
6964
# * chunk_size - chunk size in number of rows
6965
# * chunk - chunk, e.g. "`a` > 10"
6966
# * limit - oversize if rows > factor * chunk_size
6967
# * Quoter - <Quoter> object
6969
# Optional Arguments:
6970
# * where - Arrayref of WHERE clauses added to chunk
6971
# * index_hint - FORCE INDEX clause
6974
# True if EXPLAIN rows is >= chunk_size * limit, else false
6975
sub is_oversize_chunk {
6977
my @required_args = qw(dbh db tbl chunk_size chunk limit Quoter);
6978
foreach my $arg ( @required_args ) {
6979
die "I need a $arg argument" unless $args{$arg};
6982
my $where = [$args{chunk}, $args{where} ? @{$args{where}} : ()];
6985
$expl = _explain(%args, where => $where);
6987
if ( $EVAL_ERROR ) {
6988
# This shouldn't happen in production but happens in testing because
6989
# we chunk tables that don't actually exist.
6990
MKDEBUG && _d("Failed to EXPLAIN chunk:", $EVAL_ERROR);
6991
return $args{chunk};
6993
MKDEBUG && _d("Chunk", $args{chunk}, "covers", ($expl->{rows} || 0), "rows");
6995
return ($expl->{rows} || 0) >= $args{chunk_size} * $args{limit} ? 1 : 0;
6998
# Sub: is_chunkable_table
6999
# Determine if the table is chunkable.
7004
# Required Arguments:
7006
# * db - db name, not quoted
7007
# * tbl - tbl name, not quoted
7008
# * chunk_size - chunk size in number of rows
7009
# * Quoter - <Quoter> object
7011
# Optional Arguments:
7012
# * where - Arrayref of WHERE clauses added to chunk
7013
# * index_hint - FORCE INDEX clause
7016
# True if EXPLAIN rows is <= chunk_size, else false
7017
sub is_chunkable_table {
7019
my @required_args = qw(dbh db tbl chunk_size Quoter);
7020
foreach my $arg ( @required_args ) {
7021
die "I need a $arg argument" unless $args{$arg};
7026
$expl = _explain(%args);
7028
if ( $EVAL_ERROR ) {
7029
# This shouldn't happen in production but happens in testing because
7030
# we chunk tables that don't actually exist.
7031
MKDEBUG && _d("Failed to EXPLAIN table:", $EVAL_ERROR);
7032
return; # errr on the side of caution: not chunkable if not explainable
7034
MKDEBUG && _d("Table has", ($expl->{rows} || 0), "rows");
7036
return ($expl->{rows} || 0) <= $args{chunk_size} ? 1 : 0;
7040
# EXPLAIN a chunk or table.
7045
# Required Arguments:
7047
# * db - db name, not quoted
7048
# * tbl - tbl name, not quoted
7049
# * Quoter - <Quoter> object
7051
# Optional Arguments:
7052
# * where - Arrayref of WHERE clauses added to chunk
7053
# * index_hint - FORCE INDEX clause
7056
# Hashref of first EXPLAIN row
7059
my @required_args = qw(dbh db tbl Quoter);
7060
foreach my $arg ( @required_args ) {
7061
die "I need a $arg argument" unless $args{$arg};
7063
my ($dbh, $db, $tbl, $q) = @args{@required_args};
7065
my $db_tbl = $q->quote($db, $tbl);
7067
if ( $args{where} && @{$args{where}} ) {
7068
$where = join(" AND ", map { "($_)" } grep { defined } @{$args{where}});
7070
my $sql = "EXPLAIN SELECT * FROM $db_tbl"
7071
. ($args{index_hint} ? " $args{index_hint}" : "")
7072
. ($args{where} ? " WHERE $where" : "");
7073
MKDEBUG && _d($dbh, $sql);
7075
my $expl = $dbh->selectrow_hashref($sql);
7189
die ts("--create-replicate-table failed: $EVAL_ERROR");
7195
# Sub: explain_statement
7196
# EXPLAIN a statement.
7198
# Required Arguments:
7199
# * tbl - Standard tbl hashref
7200
# * sth - Sth with EXLAIN <statement>
7201
# * vals - Values for sth, if any
7204
# Hashref with EXPLAIN plan
7205
sub explain_statement {
7207
my @required_args = qw(tbl sth vals);
7208
foreach my $arg ( @required_args ) {
7209
die "I need a $arg argument" unless defined $args{$arg};
7211
my ($tbl, $sth, $vals) = @args{@required_args};
7215
PTDEBUG && _d($sth->{Statement}, 'params:', @$vals);
7216
$sth->execute(@$vals);
7217
$expl = $sth->fetchrow_hashref();
7220
if ( $EVAL_ERROR ) {
7221
# This shouldn't happen.
7222
warn ts("Error executing " . $sth->{Statement} . ": $EVAL_ERROR\n");
7223
$tbl->{checksum_results}->{errors}++;
7225
PTDEBUG && _d('EXPLAIN plan:', Dumper($expl));
7231
my @required_args = qw(dbh repl_table);
7232
foreach my $arg ( @required_args ) {
7233
die "I need a $arg argument" unless $args{$arg};
7235
my ($dbh, $repl_table, $q) = @args{@required_args};
7236
PTDEBUG && _d('Getting last chunk for --resume');
7238
my $sql = "SELECT * FROM $repl_table FORCE INDEX (ts_db_tbl) "
7239
. "WHERE master_cnt IS NOT NULL "
7240
. "ORDER BY ts DESC, db DESC, tbl DESC LIMIT 1";
7241
PTDEBUG && _d($sql);
7242
my $sth = $dbh->prepare($sql);
7244
my $last_chunk = $sth->fetchrow_hashref();
7246
PTDEBUG && _d('Last chunk:', Dumper($last_chunk));
7248
if ( !$last_chunk || !$last_chunk->{ts} ) {
7249
PTDEBUG && _d('Replicate table is empty; will not resume');
7256
sub have_more_chunks {
7258
my @required_args = qw(tbl last_chunk NibbleIterator);
7259
foreach my $arg ( @required_args ) {
7260
die "I need a $arg argument" unless $args{$arg};
7262
my ($tbl, $last_chunk, $nibble_iter) = @args{@required_args};
7263
PTDEBUG && _d('Checking for more chunks beyond last chunk');
7265
# If there's no next lower boundary, then this is the last
7266
# chunk of the table.
7267
if ( !$nibble_iter->more_boundaries() ) {
7268
PTDEBUG && _d('No more boundaries');
7272
# The previous chunk index must match the current chunk index,
7273
# else we don't know what to do.
7274
my $chunk_index = lc($nibble_iter->nibble_index() || '');
7275
if (lc($last_chunk->{chunk_index} || '') ne $chunk_index) {
7276
warn ts("Cannot resume from table $tbl->{db}.$tbl->{tbl} chunk "
7277
. "$last_chunk->{chunk} because the chunk indexes are different: "
7278
. ($last_chunk->{chunk_index} ? $last_chunk->{chunk_index}
7280
. " was used originally but "
7281
. ($chunk_index ? $chunk_index : "no index")
7282
. " is used now. If the table has not changed significantly, "
7283
. "this may be caused by running the tool with different command "
7284
. "line options. This table will be skipped and checksumming "
7285
. "will resume with the next table.\n");
7286
$tbl->{checksum_results}->{errors}++;
7290
return 1; # more chunks
7293
sub wait_for_last_checksum {
7295
my @required_args = qw(tbl repl_table slaves max_chunk OptionParser);
7296
foreach my $arg ( @required_args ) {
7297
die "I need a $arg argument" unless defined $args{$arg};
7299
my ($tbl, $repl_table, $slaves, $max_chunk, $o) = @args{@required_args};
7300
my $check_pr = $args{check_pr};
7302
# Requiring "AND master_crc IS NOT NULL" avoids a race condition
7303
# when the system is fast but replication is slow. In such cases,
7304
# we can select on the slave before the update for $update_sth
7305
# replicates; this causes a false-positive diff.
7306
my $sql = "SELECT MAX(chunk) FROM $repl_table "
7307
. "WHERE db='$tbl->{db}' AND tbl='$tbl->{tbl}' "
7308
. "AND master_crc IS NOT NULL";
7309
PTDEBUG && _d($sql);
7312
my $n_slaves = scalar @$slaves - 1;
7315
while ( $oktorun && ($chunks[0] || 0) < $max_chunk ) {
7317
for my $i ( 0..$n_slaves ) {
7318
my $slave = $slaves->[$i];
7319
if ( $skip_slave{$i} ) {
7320
PTDEBUG && _d('Skipping slave', $slave->name(),
7321
'due to previous error it caused');
7325
my ($chunk) = $slave->dbh()->selectrow_array($sql);
7326
PTDEBUG && _d($slave->name(), 'max chunk:', $chunk);
7327
push @chunks, $chunk || 0;
7330
if ( $o->get('quiet') < 2 ) {
7331
warn ts("Error waiting for the last checksum of table "
7332
. "$tbl->{db}.$tbl->{tbl} to replicate to "
7333
. "replica " . $slave->name() . ": $EVAL_ERROR\n"
7334
. "Check that the replica is running and has the "
7335
. "replicate table $repl_table. Checking the replica "
7336
. "for checksum differences will probably cause "
7337
. "another error.\n");
7339
$tbl->{checksum_results}->{errors}++;
7340
$skip_slave{$i} = 1;
7344
@chunks = sort { $a <=> $b } @chunks;
7345
if ( $chunks[0] < $max_chunk ) {
7347
$check_pr->update(sub { return $chunks[0]; });
7350
# We shouldn't wait long here because we already waited
7351
# for all slaves to catchup at least until --max-lag.
7352
$sleep_time += 0.25 if $sleep_time <= $o->get('max-lag');
7353
PTDEBUG && _d('Sleep', $sleep_time, 'waiting for chunks');
7360
# Catches signals so we can exit gracefully.
7362
my ( $signal ) = @_;
7364
print STDERR "# Caught SIG$signal.\n";
7368
print STDERR "# Exiting on SIG$signal.\n";
7080
7374
my ($package, undef, $line) = caller 0;
7081
7375
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
7157
7438
=head1 DESCRIPTION
7159
pt-table-checksum generates table checksums for MySQL tables, typically
7160
useful for verifying your slaves are in sync with the master. The checksums
7161
are generated by a query on the server, and there is very little network
7162
traffic as a result.
7164
Checksums typically take about twice as long as COUNT(*) on very large InnoDB
7165
tables in my tests. For smaller tables, COUNT(*) is a good bit faster than
7166
the checksums. See L<"--algorithm"> for more details on performance.
7168
If you specify more than one server, pt-table-checksum assumes the first
7169
server is the master and others are slaves. Checksums are parallelized for
7170
speed, forking off a child process for each table. Duplicate server names are
7171
ignored, but if you want to checksum a server against itself you can use two
7172
different forms of the hostname (for example, "localhost 127.0.0.1", or
7173
"h=localhost,P=3306 h=localhost,P=3307").
7175
If you want to compare the tables in one database to those in another database
7176
on the same server, just checksum both databases:
7178
pt-table-checksum --databases db1,db2
7180
You can then use L<pt-checksum-filter> to compare the results in both databases
7183
pt-table-checksum examines table structure only on the first host specified,
7184
so if anything differs on the others, it won't notice. It ignores views.
7186
The checksums work on MySQL version 3.23.58 through 6.0-alpha. They will not
7187
necessarily produce the same values on all versions. Differences in
7188
formatting and/or space-padding between 4.1 and 5.0, for example, will cause
7189
the checksums to be different.
7191
=head1 SPECIFYING HOSTS
7193
Each host is specified on the command line as a DSN. A DSN is a comma-separted
7194
list of C<option=value> pairs. The most basic DSN is C<h=host> to specify
7195
the hostname of the server and use default for everything else (port, etc.).
7196
See L<"DSN OPTIONS"> for more information.
7198
DSN options that are listed as C<copy: yes> are copied from the first DSN
7199
to subsequent DSNs that do not specify the DSN option. For example,
7200
C<h=host1,P=12345 h=host2> is equivalent to C<h=host1,P=12345 h=host2,P=12345>.
7201
This allows you to avoid repeating DSN options that have the same value
7204
Connection-related command-line options like L<"--user"> and L<"--password">
7205
provide default DSN values for the corresponding DSN options indicated by
7206
the short form of each option. For example, the short form of L<"--user">
7207
is C<-u> which corresponds to the C<u> DSN option, so C<--user bob h=host>
7208
is equivalent to C<h=host,u=bob>. These defaults apply to all DSNs that
7209
do not specify the DSN option.
7211
The DSN option value precedence from higest to lowest is:
7213
* explicit values in each DSN on the command-line
7214
* copied values from the first DSN
7215
* default values from connection-related command-line options
7217
If you are confused about how pt-table-checksum will connect to your servers,
7218
use the L<"--explain-hosts"> option and it will tell you.
7220
=head1 HOW FAST IS IT?
7222
Speed and efficiency are important, because the typical use case is checksumming
7223
large amounts of data.
7225
C<pt-table-checksum> is designed to do very little work itself, and generates
7226
very little network traffic aside from inspecting table structures with C<SHOW
7227
CREATE TABLE>. The results of checksum queries are typically 40-character or
7230
The MySQL server does the bulk of the work, in the form of the checksum queries.
7231
The following benchmarks show the checksum query times for various checksum
7232
algorithms. The first two results are simply running C<COUNT(col8)> and
7233
C<CHECKSUM TABLE> on the table. C<CHECKSUM TABLE> is just C<CRC32> under the
7234
hood, but it's implemented inside the storage engine layer instead of at the
7237
ALGORITHM HASH FUNCTION EXTRA TIME
7238
============== ============= ============== =====
7243
BIT_XOR MD5 --optimize-xor 80.0
7245
BIT_XOR SHA1 --optimize-xor 90.1
7250
The tests are entirely CPU-bound. The sample data is an InnoDB table with the
7251
following structure:
7260
col7 smallint unsigned NOT NULL,
7261
col8 timestamp NOT NULL,
7262
PRIMARY KEY (col2, col1),
7267
The table has 4303585 rows, 365969408 bytes of data and 173457408 bytes of
7268
indexes. The server is a Dell PowerEdge 1800 with dual 32-bit Xeon 2.8GHz
7269
processors and 2GB of RAM. The tests are fully CPU-bound, and the server is
7270
otherwise idle. The results are generally consistent to within a tenth of a
7271
second on repeated runs.
7273
C<CRC32> is the default checksum function to use, and should be enough for most
7274
cases. If you need stronger guarantees that your data is identical, you should
7275
use one of the other functions.
7277
=head1 ALGORITHM SELECTION
7279
The L<"--algorithm"> option allows you to specify which algorithm you would
7280
like to use, but it does not guarantee that pt-table-checksum will use this
7281
algorithm. pt-table-checksum will ultimately select the best algorithm possible
7282
given various factors such as the MySQL version and other command line options.
7284
The three basic algorithms in descending order of preference are CHECKSUM,
7285
BIT_XOR and ACCUM. CHECKSUM cannot be used if any one of these criteria
7290
* --chunk-size is used
7291
* --replicate is used
7293
* MySQL version less than 4.1.1
7295
The BIT_XOR algorithm also requires MySQL version 4.1.1 or later.
7297
After checking these criteria, if the requested L<"--algorithm"> remains then it
7298
is used, otherwise the first remaining algorithm with the highest preference
7301
=head1 CONSISTENT CHECKSUMS
7303
If you are using this tool to verify your slaves still have the same data as the
7304
master, which is why I wrote it, you should read this section.
7306
The best way to do this with replication is to use the L<"--replicate"> option.
7307
When the queries are finished running on the master and its slaves, you can go
7308
to the slaves and issue SQL queries to see if any tables are different from the
7309
master. Try the following:
7311
SELECT db, tbl, chunk, this_cnt-master_cnt AS cnt_diff,
7312
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc)
7315
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
7316
OR ISNULL(master_crc) <> ISNULL(this_crc);
7318
The L<"--replicate-check"> option can do this query for you. If you can't use
7319
this method, try the following:
7325
If your servers are not being written to, you can just run the tool with no
7328
pt-table-checksum server1 server2 ... serverN
7332
If the servers are being written to, you need some way to make sure they are
7333
consistent at the moment you run the checksums. For situations other than
7334
master-slave replication, you will have to figure this out yourself. You may be
7335
able to use the L<"--where"> option with a date or time column to only checksum
7336
data that's not recent.
7340
If you are checksumming a master and slaves, you can do a fast parallel
7341
checksum and assume the slaves are caught up to the master. In practice, this
7342
tends to work well except for tables which are constantly updated. You can
7343
use the L<"--slave-lag"> option to see how far behind each slave was when it
7344
checksummed a given table. This can help you decide whether to investigate
7349
The next most disruptive technique is to lock the table on the master, then take
7350
checksums. This should prevent changes from propagating to the slaves. You can
7351
just lock on the master (with L<"--lock">), or you can both lock on the master
7352
and wait on the slaves till they reach that point in the master's binlog
7353
(L<"--wait">). Which is better depends on your workload; only you know that.
7357
If you decide to make the checksums on the slaves wait until they're guaranteed
7358
to be caught up to the master, the algorithm looks like this:
7365
Slave(s): wait for pos, then checksum
7367
Master: unlock table
7372
What I typically do when I'm not using the L<"--replicate"> option is simply run
7373
the tool on all servers with no further options. This runs fast, parallel,
7374
non-blocking checksums simultaneously. If there are tables that look different,
7375
I re-run with L<"--wait">=600 on the tables in question. This makes the tool
7376
lock on the master as explained above.
7440
pt-table-checksum is designed to do the right thing by default in almost every
7441
case. When in doubt, use L<"--explain"> to see how the tool will checksum a
7442
table. The following is a high-level overview of how the tool functions.
7444
In contrast to older versions of pt-table-checksum, this tool is focused on a
7445
single purpose, and does not have a lot of complexity or support many different
7446
checksumming techniques. It executes checksum queries on only one server, and
7447
these flow through replication to re-execute on replicas. If you need the older
7448
behavior, you can use Percona Toolkit version 1.0.
7450
pt-table-checksum connects to the server you specify, and finds databases and
7451
tables that match the filters you specify (if any). It works one table at a
7452
time, so it does not accumulate large amounts of memory or do a lot of work
7453
before beginning to checksum. This makes it usable on very large servers. We
7454
have used it on servers with hundreds of thousands of databases and tables, and
7455
trillions of rows. No matter how large the server is, pt-table-checksum works
7458
One reason it can work on very large tables is that it divides each table into
7459
chunks of rows, and checksums each chunk with a single REPLACE..SELECT query.
7460
It varies the chunk size to make the checksum queries run in the desired amount
7461
of time. The goal of chunking the tables, instead of doing each table with a
7462
single big query, is to ensure that checksums are unintrusive and don't cause
7463
too much replication lag or load on the server. That's why the target time for
7464
each chunk is 0.5 seconds by default.
7466
The tool keeps track of how quickly the server is able to execute the queries,
7467
and adjusts the chunks as it learns more about the server's performance. It
7468
uses an exponentially decaying weighted average to keep the chunk size stable,
7469
yet remain responsive if the server's performance changes during checksumming
7470
for any reason. This means that the tool will quickly throttle itself if your
7471
server becomes heavily loaded during a traffic spike or a background task, for
7474
Chunking is accomplished by a technique that we used to call "nibbling" in other
7475
tools in Percona Toolkit. It is the same technique used for pt-archiver, for
7476
example. The legacy chunking algorithms used in older versions of
7477
pt-table-checksum are removed, because they did not result in predictably sized
7478
chunks, and didn't work well on many tables. All that is required to divide a
7479
table into chunks is an index of some sort (preferably a primary key or unique
7480
index). If there is no index, and the table contains a suitably small number of
7481
rows, the tool will checksum the table in a single chunk.
7483
pt-table-checksum has many other safeguards to ensure that it does not interfere
7484
with any server's operation, including replicas. To accomplish this,
7485
pt-table-checksum detects replicas and connects to them automatically. (If this
7486
fails, you can give it a hint with the L<"--recursion-method"> option.)
7488
The tool monitors replicas continually. If any replica falls too far behind in
7489
replication, pt-table-checksum pauses to allow it to catch up. If any replica
7490
has an error, or replication stops, pt-table-checksum pauses and waits. In
7491
addition, pt-table-checksum looks for common causes of problems, such as
7492
replication filters, and refuses to operate unless you force it to. Replication
7493
filters are dangerous, because the queries that pt-table-checksum executes could
7494
potentially conflict with them and cause replication to fail.
7496
pt-table-checksum verifies that chunks are not too large to checksum safely. It
7497
performs an EXPLAIN query on each chunk, and skips chunks that might be larger
7498
than the desired number of rows. You can configure the sensitivity of this
7499
safeguard with the L<"--chunk-size-limit"> option. If a table will be
7500
checksummed in a single chunk because it has a small number of rows, then
7501
pt-table-checksum additionally verifies that the table isn't oversized on
7502
replicas. This avoids the following scenario: a table is empty on the master
7503
but is very large on a replica, and is checksummed in a single large query,
7504
which causes a very long delay in replication.
7506
There are several other safeguards. For example, pt-table-checksum sets its
7507
session-level innodb_lock_wait_timeout to 1 second, so that if there is a lock
7508
wait, it will be the victim instead of causing other queries to time out.
7509
Another safeguard checks the load on the database server, and pauses if the load
7510
is too high. There is no single right answer for how to do this, but by default
7511
pt-table-checksum will pause if there are more than 25 concurrently executing
7512
queries. You should probably set a sane value for your server with the
7513
L<"--max-load"> option.
7515
Checksumming usually is a low-priority task that should yield to other work on
7516
the server. However, a tool that must be restarted constantly is difficult to
7517
use. Thus, pt-table-checksum is very resilient to errors. For example, if the
7518
database administrator needs to kill pt-table-checksum's queries for any reason,
7519
that is not a fatal error. Users often run pt-kill to kill any long-running
7520
checksum queries. The tool will retry a killed query once, and if it fails
7521
again, it will move on to the next chunk of that table. The same behavior
7522
applies if there is a lock wait timeout. The tool will print a warning if such
7523
an error happens, but only once per table. If the connection to any server
7524
fails, pt-table-checksum will attempt to reconnect and continue working.
7526
If pt-table-checksum encounters a condition that causes it to stop completely,
7527
it is easy to resume it with the L<"--resume"> option. It will begin from the
7528
last chunk of the last table that it processed. You can also safely stop the
7529
tool with CTRL-C. It will finish the chunk it is currently processing, and then
7530
exit. You can resume it as usual afterwards.
7532
After pt-table-checksum finishes checksumming all of the chunks in a table, it
7533
pauses and waits for all detected replicas to finish executing the checksum
7534
queries. Once that is finished, it checks all of the replicas to see if they
7535
have the same data as the master, and then prints a line of output with the
7536
results. You can see a sample of its output later in this documentation.
7538
The tool prints progress indicators during time-consuming operations. It prints
7539
a progress indicator as each table is checksummed. The progress is computed by
7540
the estimated number of rows in the table. It will also print a progress report
7541
when it pauses to wait for replication to catch up, and when it is waiting to
7542
check replicas for differences from the master. You can make the output less
7543
verbose with the L<"--quiet"> option.
7545
If you wish, you can query the checksum tables manually to get a report of which
7546
tables and chunks have differences from the master. The following query will
7547
report every database and table with differences, along with a summary of the
7548
number of chunks and rows possibly affected:
7550
SELECT db, tbl, SUM(this_cnt) AS total_rows, COUNT(*) AS chunks
7551
FROM percona.checksums
7553
master_cnt <> this_cnt
7554
OR master_crc <> this_crc
7555
OR ISNULL(master_crc) <> ISNULL(this_crc))
7558
The table referenced in that query is the checksum table, where the checksums
7559
are stored. Each row in the table contains the checksum of one chunk of data
7560
from some table in the server.
7562
Version 2.0 of pt-table-checksum is not backwards compatible with pt-table-sync
7563
version 1.0. In some cases this is not a serious problem. Adding a
7564
"boundaries" column to the table, and then updating it with a manually generated
7565
WHERE clause, may suffice to let pt-table-sync version 1.0 interoperate with
7566
pt-table-checksum version 2.0. Assuming an integer primary key named 'id', You
7567
can try something like the following:
7569
ALTER TABLE checksums ADD boundaries VARCHAR(500);
7571
SET boundaries = COALESCE(CONCAT('id BETWEEN ', lower_boundary,
7572
' AND ', upper_boundary), '1=1');
7380
Output is to STDOUT, one line per server and table, with header lines for each
7381
database. I tried to make the output easy to process with awk. For this reason
7382
columns are always present. If there's no value, pt-table-checksum prints
7385
The default is column-aligned output for human readability, but you can change
7386
it to tab-separated if you want. Use the L<"--tab"> option for this.
7388
Output is unsorted, though all lines for one table should be output together.
7389
For speed, all checksums are done in parallel (as much as possible) and may
7390
complete out of the order in which they were started. You might want to run
7391
them through another script or command-line utility to make sure they are in the
7392
order you want. If you pipe the output through L<pt-checksum-filter>, you
7393
can sort the output and/or avoid seeing output about tables that have no
7396
The columns in the output are as follows. The database, table, and chunk come
7397
first so you can sort by them easily (they are the "primary key").
7399
Output from L<"--replicate-check"> and L<"--checksum"> are different.
7405
The database the table is in.
7576
The tool prints tabular results, one line per table:
7578
TS ERRORS DIFFS ROWS CHUNKS SKIPPED TIME TABLE
7579
10-20T08:36:50 0 0 200 1 0 0.005 db1.tbl1
7580
10-20T08:36:50 0 0 603 7 0 0.035 db1.tbl2
7581
10-20T08:36:50 0 0 16 1 0 0.003 db2.tbl3
7582
10-20T08:36:50 0 0 600 6 0 0.024 db2.tbl4
7584
Errors, warnings, and progress reports are printed to standard error. See also
7587
Each table's results are printed when the tool finishes checksumming the table.
7588
The columns are as follows:
7594
The timestamp (without the year) when the tool finished checksumming the table.
7598
The number of errors and warnings that occurred while checksumming the table.
7599
Errors and warnings are printed to standard error while the table is in
7604
The number of chunks that differ from the master on one or more replicas.
7605
If C<--no-replicate-check> is specified, this column will always have zeros.
7606
If L<"--replicate-check-only"> is specified, then only tables with differences
7611
The number of rows selected and checksummed from the table. It might be
7612
different from the number of rows in the table if you use the --where option.
7616
The number of chunks into which the table was divided.
7620
The number of chunks that were skipped due to errors or warnings, or because
7621
they were oversized.
7625
The time elapsed while checksumming the table.
7629
The database and table that was checksummed.
7633
If L<"--replicate-check-only"> is specified, only checksum differences on
7634
detected replicas are printed. The output is different: one paragraph per
7635
replica, one checksum difference per line, and values are separted by spaces:
7637
Differences on h=127.0.0.1,P=12346
7638
TABLE CHUNK CNT_DIFF CRC_DIFF CHUNK_INDEX LOWER_BOUNDARY UPPER_BOUNDARY
7639
db1.tbl1 1 0 1 PRIMARY 1 100
7640
db1.tbl1 6 0 1 PRIMARY 501 600
7642
Differences on h=127.0.0.1,P=12347
7643
TABLE CHUNK CNT_DIFF CRC_DIFF CHUNK_INDEX LOWER_BOUNDARY UPPER_BOUNDARY
7644
db1.tbl1 1 0 1 PRIMARY 1 100
7645
db2.tbl2 9 5 0 PRIMARY 101 200
7647
The first line of a paragraph indicates the replica with differences.
7648
In this example there are two: h=127.0.0.1,P=12346 and h=127.0.0.1,P=12347.
7649
The columns are as follows:
7655
The database and table that differs from the master.
7413
The chunk (see L<"--chunk-size">). Zero if you are not doing chunked checksums.
7417
The server's hostname.
7421
The table's storage engine.
7425
The table's row count, unless you specified to skip it. If C<OVERSIZE> is
7426
printed, the chunk was skipped because the actual number of rows was greater
7427
than L<"--chunk-size"> times L<"--chunk-size-limit">.
7431
The table's checksum, unless you specified to skip it or the table has no rows.
7432
some types of checksums will be 0 if there are no rows; others will print NULL.
7436
How long it took to checksum the C<CHUNK>, not including C<WAIT> time.
7437
Total checksum time is C<WAIT + TIME>.
7441
How long the slave waited to catch up to its master before beginning to
7442
checksum. C<WAIT> is always 0 for the master. See L<"--wait">.
7446
The return value of MASTER_POS_WAIT(). C<STAT> is always C<NULL> for the
7451
How far the slave lags the master, as reported by SHOW SLAVE STATUS.
7452
C<LAG> is always C<NULL> for the master.
7659
The chunk number of the table that differs from the master.
7663
The number of chunk rows on the replica minus the number of chunk rows
7668
1 if the CRC of the chunk on the replica is different than the CRC of the
7669
chunk on the master, else 0.
7673
The index used to chunk the table.
7675
=item LOWER_BOUNDARY
7677
The index values that define the lower boundary of the chunk.
7679
=item UPPER_BOUNDARY
7681
The index values that define the upper boundary of the chunk.
7456
=head1 REPLICATE TABLE MAINTENANCE
7458
If you use L<"--replicate"> to store and replicate checksums, you may need to
7459
perform maintenance on the replicate table from time to time to remove old
7460
checksums. This section describes when checksums in the replicate table are
7461
deleted automatically by pt-table-checksum and when you must manually delete
7464
Before starting, pt-table-checksum calculates chunks for each table, even
7465
if L<"--chunk-size"> is not specified (in that case there is one chunk: "1=1").
7466
Then, before checksumming each table, the tool deletes checksum chunks in the
7467
replicate table greater than the current number of chunks. For example,
7468
if a table is chunked into 100 chunks, 0-99, then pt-table-checksum does:
7470
DELETE FROM replicate table WHERE db=? AND tbl=? AND chunk > 99
7472
That removes any high-end chunks from previous runs which no longer exist.
7473
Currently, this operation cannot be disabled.
7475
If you use L<"--resume">, L<"--resume-replicate">, or L<"--modulo">, then
7476
you need to be careful that the number of rows in a table does not decrease
7477
so much that the number of chunks decreases too, else some checksum chunks may
7478
be deleted. The one exception is if only rows at the high end of the range
7479
are deleted. In that case, the high-end chunks are deleted and lower chunks
7480
remain unchanged. An increasing number of rows or chunks should not cause
7481
any adverse affects.
7483
Changing the L<"--chunk-size"> between runs with L<"--resume">,
7484
L<"--resume-replicate">, or L<"--modulo"> can cause odd or invalid checksums.
7485
You should not do this. It won't work with the resume options. With
7486
L<"--modulo">, the safest thing to do is manually delete all the rows in
7487
the replicate table for the table in question and start over.
7489
If the replicate table becomes cluttered with old or invalid checksums
7490
and the auto-delete operation is not deleting them, then you will need to
7491
manually clean up the replicate table. Alternatively, if you specify
7492
L<"--empty-replicate-table">, then the tool deletes every row in the
7495
7685
=head1 EXIT STATUS
7497
An exit status of 0 (sometimes also called a return value or return code)
7498
indicates success. If there is an error checksumming any table, the exit status
7501
When running L<"--replicate-check">, if any slave has chunks that differ from
7502
the master, the exit status is 1.
7506
If you are using innotop (see L<http://code.google.com/p/innotop>),
7507
mytop, or another tool to watch currently running MySQL queries, you may see
7508
the checksum queries. They look similar to this:
7510
REPLACE /*test.test_tbl:'2'/'5'*/ INTO test.checksum(db, ...
7512
Since pt-table-checksum's queries run for a long time and tend to be
7513
textually very long, and thus won't fit on one screen of these monitoring
7514
tools, I've been careful to place a comment at the beginning of the query so
7515
you can see what it is and what it's doing. The comment contains the name of
7516
the table that's being checksummed, the chunk it is currently checksumming,
7517
and how many chunks will be checksummed. In the case above, it is
7518
checksumming chunk 2 of 5 in table test.test_tbl.
7687
A non-zero exit status indicates errors, warnings, or checksum differences.
7522
L<"--schema"> is restricted to option groups Connection, Filter, Output, Help, Config, Safety.
7524
L<"--empty-replicate-table">, L<"--resume"> and L<"--resume-replicate"> are mutually exclusive.
7526
7691
This tool accepts additional command-line arguments. Refer to the
7527
7692
L<"SYNOPSIS"> and usage information for details.
7535
Checksum algorithm (ACCUM|CHECKSUM|BIT_XOR).
7537
Specifies which checksum algorithm to use. Valid arguments are CHECKSUM,
7538
BIT_XOR and ACCUM. The latter two do cryptographic hash checksums.
7539
See also L<"ALGORITHM SELECTION">.
7541
CHECKSUM is built into MySQL, but has some disadvantages. BIT_XOR and ACCUM are
7542
implemented by SQL queries. They use a cryptographic hash of all columns
7543
concatenated together with a separator, followed by a bitmap of each nullable
7544
column that is NULL (necessary because CONCAT_WS() skips NULL columns).
7546
CHECKSUM is the default. This method uses MySQL's built-in CHECKSUM TABLE
7547
command, which is a CRC32 behind the scenes. It cannot be used before MySQL
7548
4.1.1, and various options disable it as well. It does not simultaneously count
7549
rows; that requires an extra COUNT(*) query. This is a good option when you are
7550
using MyISAM tables with live checksums enabled; in this case both the COUNT(*)
7551
and CHECKSUM queries will run very quickly.
7553
The BIT_XOR algorithm is available for MySQL 4.1.1 and newer. It uses
7554
BIT_XOR(), which is order-independent, to reduce all the rows to a single
7557
ACCUM uses a user variable as an accumulator. It reduces each row to a single
7558
checksum, which is concatenated with the accumulator and re-checksummed. This
7559
technique is order-dependent. If the table has a primary key, it will be used
7560
to order the results for consistency; otherwise it's up to chance.
7562
The pathological worst case is where identical rows will cancel each other out
7563
in the BIT_XOR. In this case you will not be able to distinguish a table full
7564
of one value from a table full of another value. The ACCUM algorithm will
7567
However, the ACCUM algorithm is order-dependent, so if you have two tables
7568
with identical data but the rows are out of order, you'll get different
7569
checksums with ACCUM.
7571
If a given algorithm won't work for some reason, pt-table-checksum falls back to
7572
another. The least common denominator is ACCUM, which works on MySQL 3.23.2 and
7579
The database.table with arguments for each table to checksum.
7581
This table may be named anything you wish. It must contain at least the
7584
CREATE TABLE checksum_args (
7585
db char(64) NOT NULL,
7586
tbl char(64) NOT NULL,
7587
-- other columns as desired
7588
PRIMARY KEY (db, tbl)
7591
In addition to the columns shown, it may contain any of the other columns listed
7592
here (Note: this list is used by the code, MAGIC_overridable_args):
7594
algorithm chunk-column chunk-index chunk-size columns count crc function lock
7595
modulo use-index offset optimize-xor chunk-size-limit probability separator
7596
save-since single-chunk since since-column sleep sleep-coef trim wait where
7598
Each of these columns corresponds to the long form of a command-line option.
7599
Each column should be NULL-able. Column names with hyphens should be enclosed
7600
in backticks (e.g. `chunk-size`) when the table is created. The data type does
7601
not matter, but it's suggested you use a sensible data type to prevent garbage
7604
When C<pt-table-checksum> checksums a table, it will look for a matching entry
7605
in this table. Any column that has a defined value will override the
7606
corresponding command-line argument for the table being currently processed.
7607
In this way it is possible to specify custom command-line arguments for any
7610
If you add columns to the table that aren't in the above list of allowable
7611
columns, it's an error. The exceptions are C<db>, C<tbl>, and C<ts>. The C<ts>
7612
column can be used as a timestamp for easy visibility into the last time the
7613
C<since> column was updated with L<"--save-since">.
7615
This table is assumed to be located on the first server given on the
7618
7696
=item --ask-pass
7620
7698
group: Connection
8069
8054
Port number to use for connection.
8073
type: int; default: 100
8075
Checksums will be run with this percent probability.
8077
This is an integer between 1 and 100. If 100, every chunk of every table will
8078
certainly be checksummed. If less than that, there is a chance that some chunks
8079
of some tables will be skipped. This is useful for routine jobs designed to
8080
randomly sample bits of tables without checksumming the whole server. By
8081
default, if a table is not chunkable, it will be checksummed every time even
8082
when the probability is less than 100. You can override this with
8083
L<"--single-chunk">.
8085
See also L<"--modulo">.
8087
8056
=item --progress
8089
8058
type: array; default: time,30
8091
Print progress reports to STDERR. Currently, this feature is only for when
8092
L<"--throttle-method"> waits for slaves to catch up.
8060
Print progress reports to STDERR.
8094
8062
The value is a comma-separated list with two parts. The first part can be
8095
8063
percentage, time, or iterations; the second part specifies how often an update
8096
should be printed, in percentage, seconds, or number of iterations.
8064
should be printed, in percentage, seconds, or number of iterations. The tool
8065
prints progress reports for a variety of time-consuming operations, including
8066
waiting for replicas to catch up if they become lagged.
8100
short form: -q; group: Output
8102
Do not print checksum results.
8106
Re-checksum chunks that L<"--replicate-check"> found to be different.
8070
short form: -q; cumulative: yes; default: 0
8072
Print only the most important information (disables L<"--progress">).
8073
Specifying this option once causes the tool to print only errors, warnings, and
8074
tables that have checksum differences.
8076
Specifying this option twice causes the tool to print only errors. In this
8077
case, you can use the tool's exit status to determine if there were any warnings
8078
or checksum differences.
8108
8080
=item --recurse
8110
type: int; group: Throttle
8112
Number of levels to recurse in the hierarchy when discovering slaves.
8113
Default is infinite.
8115
See L<"--recursion-method">.
8084
Number of levels to recurse in the hierarchy when discovering replicas.
8085
Default is infinite. See also L<"--recursion-method">.
8117
8087
=item --recursion-method
8121
Preferred recursion method for discovering slaves.
8123
Possible methods are:
8091
Preferred recursion method for discovering replicas. Possible methods are:
8126
=========== ================
8094
=========== ==================
8127
8095
processlist SHOW PROCESSLIST
8128
8096
hosts SHOW SLAVE HOSTS
8130
The processlist method is preferred because SHOW SLAVE HOSTS is not reliable.
8131
However, the hosts method is required if the server uses a non-standard
8132
port (not 3306). Usually pt-table-checksum does the right thing and finds
8133
the slaves, but you may give a preferred method and it will be used first.
8134
If it doesn't find any slaves, the other methods will be tried.
8097
dsn=DSN DSNs from a table
8099
The processlist method is the default, because SHOW SLAVE HOSTS is not
8100
reliable. However, the hosts method can work better if the server uses a
8101
non-standard port (not 3306). The tool usually does the right thing and
8102
finds all replicas, but you may give a preferred method and it will be used
8105
The hosts method requires replicas to be configured with report_host,
8108
The dsn method is special: it specifies a table from which other DSN strings
8109
are read. The specified DSN must specify a D and t, or a database-qualified
8110
t. The DSN table should have the following structure:
8112
CREATE TABLE `dsns` (
8113
`id` int(11) NOT NULL AUTO_INCREMENT,
8114
`parent_id` int(11) DEFAULT NULL,
8115
`dsn` varchar(255) NOT NULL,
8119
To make the tool monitor only the hosts 10.10.1.16 and 10.10.1.17 for
8120
replication lag and checksum differences, insert the values C<h=10.10.1.16> and
8121
C<h=10.10.1.17> into the table. Currently, the DSNs are ordered by id, but id
8122
and parent_id are otherwise ignored.
8136
8124
=item --replicate
8140
Replicate checksums to slaves (disallows --algorithm CHECKSUM).
8142
This option enables a completely different checksum strategy for a consistent,
8143
lock-free checksum across a master and its slaves. Instead of running the
8144
checksum queries on each server, you run them only on the master. You specify a
8145
table, fully qualified in db.table format, to insert the results into. The
8146
checksum queries will insert directly into the table, so they will be replicated
8147
through the binlog to the slaves.
8149
When the queries are finished replicating, you can run a simple query on each
8150
slave to see which tables have differences from the master. With the
8151
L<"--replicate-check"> option, pt-table-checksum can run the query for you to
8152
make it even easier. See L<"CONSISTENT CHECKSUMS"> for details.
8154
If you find tables that have differences, you can use the chunk boundaries in a
8155
WHERE clause with L<pt-table-sync> to help repair them more efficiently. See
8156
L<pt-table-sync> for details.
8158
The table must have at least these columns: db, tbl, chunk, boundaries,
8159
this_crc, master_crc, this_cnt, master_cnt. The table may be named anything you
8160
wish. Here is a suggested table structure, which is automatically used for
8161
L<"--create-replicate-table"> (MAGIC_create_replicate):
8163
CREATE TABLE checksum (
8164
db char(64) NOT NULL,
8165
tbl char(64) NOT NULL,
8167
boundaries char(100) NOT NULL,
8168
this_crc char(40) NOT NULL,
8169
this_cnt int NOT NULL,
8170
master_crc char(40) NULL,
8171
master_cnt int NULL,
8172
ts timestamp NOT NULL,
8173
PRIMARY KEY (db, tbl, chunk)
8176
Be sure to choose an appropriate storage engine for the checksum table. If you
8177
are checksumming InnoDB tables, for instance, a deadlock will break replication
8178
if the checksum table is non-transactional, because the transaction will still
8179
be written to the binlog. It will then replay without a deadlock on the
8180
slave and break replication with "different error on master and slave." This
8181
is not a problem with pt-table-checksum, it's a problem with MySQL
8182
replication, and you can read more about it in the MySQL manual.
8184
This works only with statement-based replication (pt-table-checksum will switch
8185
the binlog format to STATEMENT for the duration of the session if your server
8186
uses row-based replication).
8188
In contrast to running the tool against multiple servers at once, using this
8189
option eliminates the complexities of synchronizing checksum queries across
8190
multiple servers, which normally requires locking and unlocking, waiting for
8191
master binlog positions, and so on. Thus, it disables L<"--lock">, L<"--wait">,
8192
and L<"--slave-lag"> (but not L<"--check-slave-lag">, which is a way to throttle
8193
the execution speed).
8195
The checksum queries actually do a REPLACE into this table, so existing rows
8196
need not be removed before running. However, you may wish to do this anyway to
8197
remove rows related to tables that don't exist anymore. The
8198
L<"--empty-replicate-table"> option does this for you.
8200
Since the table must be qualified with a database (e.g. C<db.checksums>),
8201
pt-table-checksum will only USE this database. This may be important if any
8202
replication options are set because it could affect whether or not changes
8203
to the table are replicated.
8205
If the slaves have any --replicate-do-X or --replicate-ignore-X options, you
8206
should be careful not to checksum any databases or tables that exist on the
8207
master and not the slaves. Changes to such tables may not normally be executed
8208
on the slaves because of the --replicate options, but the checksum queries
8209
modify the contents of the table that stores the checksums, not the tables whose
8210
data you are checksumming. Therefore, these queries will be executed on the
8211
slave, and if the table or database you're checksumming does not exist, the
8212
queries will cause replication to fail. For more information on replication
8213
rules, see L<http://dev.mysql.com/doc/en/replication-rules.html>.
8215
The table specified by L<"--replicate"> will never be checksummed itself.
8217
=item --replicate-check
8221
Check results in L<"--replicate"> table, to the specified depth. You must use
8222
this after you run the tool normally; it skips the checksum step and only checks
8225
It recursively finds differences recorded in the table given by
8226
L<"--replicate">. It recurses to the depth you specify: 0 is no recursion
8227
(check only the server you specify), 1 is check the server and its slaves, 2 is
8228
check the slaves of its slaves, and so on.
8230
It finds differences by running the query shown in L<"CONSISTENT CHECKSUMS">,
8231
and prints results, then exits after printing. This is just a convenient way of
8232
running the query so you don't have to do it manually.
8234
The output is one informational line per slave host, followed by the results
8235
of the query, if any. If L<"--quiet"> is specified, there is no output. If
8236
there are no differences between the master and any slave, there is no output.
8237
If any slave has chunks that differ from the master, pt-table-checksum's
8238
exit status is 1; otherwise it is 0.
8240
This option makes C<pt-table-checksum> look for slaves by running C<SHOW
8241
PROCESSLIST>. If it finds connections that appear to be from slaves, it derives
8242
connection information for each slave with the same default-and-override method
8243
described in L<"SPECIFYING HOSTS">.
8245
If C<SHOW PROCESSLIST> doesn't return any rows, C<pt-table-checksum> looks at
8246
C<SHOW SLAVE HOSTS> instead. The host and port, and user and password if
8247
available, from C<SHOW SLAVE HOSTS> are combined into a DSN and used as the
8248
argument. This requires slaves to be configured with C<report-host>,
8249
C<report-port> and so on.
8251
This requires the @@SERVER_ID system variable, so it works only on MySQL
8126
type: string; default: percona.checksums
8128
Write checksum results to this table. The replicate table must have this
8129
structure (MAGIC_create_replicate):
8131
CREATE TABLE checksums (
8132
db char(64) NOT NULL,
8133
tbl char(64) NOT NULL,
8135
chunk_time float NULL,
8136
chunk_index varchar(200) NULL,
8137
lower_boundary text NULL,
8138
upper_boundary text NULL,
8139
this_crc char(40) NOT NULL,
8140
this_cnt int NOT NULL,
8141
master_crc char(40) NULL,
8142
master_cnt int NULL,
8143
ts timestamp NOT NULL,
8144
PRIMARY KEY (db, tbl, chunk),
8145
INDEX ts_db_tbl (ts, db, tbl)
8148
By default, L<"--[no]create-replicate-table"> is true, so the database and
8149
the table specified by this option are created automatically if they do not
8152
Be sure to choose an appropriate storage engine for the replicate table. If you
8153
are checksumming InnoDB tables, and you use MyISAM for this table, a deadlock
8154
will break replication, because the mixture of transactional and
8155
non-transactional tables in the checksum statements will cause it to be written
8156
to the binlog even though it had an error. It will then replay without a
8157
deadlock on the replicas, and break replication with "different error on master
8158
and slave." This is not a problem with pt-table-checksum; it's a problem with
8159
MySQL replication, and you can read more about it in the MySQL manual.
8161
The replicate table is never checksummed (the tool automatically adds this
8162
table to L<"--ignore-tables">).
8164
=item --[no]replicate-check
8168
Check replicas for data differences after finishing each table. The tool finds
8169
differences by executing a simple SELECT statement on all detected replicas.
8170
The query compares the replica's checksum results to the master's checksum
8171
results. It reports differences in the DIFFS column of the output.
8173
=item --replicate-check-only
8175
Check replicas for consistency without executing checksum queries.
8176
This option is used only with L<"--[no]replicate-check">. If specified,
8177
pt-table-checksum doesn't checksum any tables. It checks replicas for
8178
differences found by previous checksumming, and then exits. It might be useful
8179
if you run pt-table-checksum quietly in a cron job, for example, and later want
8180
a report on the results of the cron job, perhaps to implement a Nagios check.
8254
8182
=item --replicate-database
8258
C<USE> only this database with L<"--replicate">. By default, pt-table-checksum
8259
executes USE to set its default database to the database that contains the table
8260
it's currently working on. It changes its default database as it works on
8261
different tables. This is is a best effort to avoid problems with replication
8262
filters such as binlog_ignore_db and replicate_ignore_db. However, replication
8263
filters can create a situation where there simply is no one right way to do
8264
things. Some statements might not be replicated, and others might cause
8265
replication to fail on the slaves. In such cases, it is up to the user to
8266
specify a safe default database. This option specifies a default database that
8267
pt-table-checksum selects with USE, and never changes afterwards. See also
8268
<L"--[no]check-replication-filters">.
8186
USE only this database. By default, pt-table-checksum executes USE to select
8187
the database that contains the table it's currently working on. This is is a
8188
best effort to avoid problems with replication filters such as binlog_ignore_db
8189
and replicate_ignore_db. However, replication filters can create a situation
8190
where there simply is no one right way to do things. Some statements might not
8191
be replicated, and others might cause replication to fail. In such cases, you
8192
can use this option to specify a default database that pt-table-checksum selects
8193
with USE, and never changes. See also <L"--[no]check-replication-filters">.
8274
Resume checksum using given output file from a previously interrupted run.
8276
The given output file should be the literal output from a previous run of
8277
C<pt-table-checksum>. For example:
8279
pt-table-checksum host1 host2 -C 100 > checksum_results.txt
8280
pt-table-checksum host1 host2 -C 100 --resume checksum_results.txt
8282
The command line options given to the first run and the resumed run must
8283
be identical (except, of course, for --resume). If they are not, the result
8284
will be unpredictable and probably wrong.
8286
L<"--resume"> does not work with L<"--replicate">; for that, use
8287
L<"--resume-replicate">.
8289
=item --resume-replicate
8291
Resume L<"--replicate">.
8293
This option resumes a previous checksum operation using L<"--replicate">.
8294
It is like L<"--resume"> but does not require an output file. Instead,
8295
it uses the checksum table given to L<"--replicate"> to determine where to
8296
resume the checksum operation.
8300
When L<"--arg-table"> and L<"--since"> are given, save the current L<"--since">
8301
value into that table's C<since> column after checksumming. In this way you can
8302
incrementally checksum tables by starting where the last one finished.
8304
The value to be saved could be the current timestamp, or it could be the maximum
8305
existing value of the column given by L<"--since-column">. It depends on what
8306
options are in effect. See the description of L<"--since"> to see how
8307
timestamps are different from ordinary values.
8311
Checksum C<SHOW CREATE TABLE> instead of table data.
8197
Resume checksumming from the last completed chunk (disables
8198
L<"--[no]empty-replicate-table">). If the tool stops before it checksums all
8199
tables, this option makes checksumming resume from the last chunk of the last
8200
table that it finished.
8204
type: int; default: 2
8206
Retry a chunk this many times when there is a nonfatal error. Nonfatal errors
8207
are problems such as a lock wait timeout or the query being killed.
8313
8209
=item --separator
8315
8211
type: string; default: #
8317
The separator character used for CONCAT_WS().
8319
This character is used to join the values of columns when checksumming with
8320
L<"--algorithm"> of BIT_XOR or ACCUM.
8213
The separator character used for CONCAT_WS(). This character is used to join
8214
the values of columns when checksumming.
8322
8216
=item --set-vars