3
# This program is copyright 2009-@CURRENTYEAR@ Percona Inc.
4
# Feedback and improvements are welcome.
6
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
7
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
8
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
10
# This program is free software; you can redistribute it and/or modify it under
11
# the terms of the GNU General Public License as published by the Free Software
12
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
13
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
16
# You should have received a copy of the GNU General Public License along with
17
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
18
# Place, Suite 330, Boston, MA 02111-1307 USA.
21
use warnings FATAL => 'all';
23
our $VERSION = '@VERSION@';
24
our $DISTRIB = '@DISTRIB@';
25
our $SVN_REV = sprintf("%d", (q$Revision: 7531 $ =~ m/(\d+)/g, 0));
27
# ###########################################################################
28
# DSNParser package 7388
29
# This package is a copy without comments from the original. The original
30
# with comments and its test file can be found in the SVN repository at,
31
# trunk/common/DSNParser.pm
32
# trunk/common/t/DSNParser.t
33
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
34
# ###########################################################################
39
use warnings FATAL => 'all';
40
use English qw(-no_match_vars);
41
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
44
$Data::Dumper::Indent = 0;
45
$Data::Dumper::Quotekeys = 0;
50
my $have_dbi = $EVAL_ERROR ? 0 : 1;
54
my ( $class, %args ) = @_;
55
foreach my $arg ( qw(opts) ) {
56
die "I need a $arg argument" unless $args{$arg};
59
opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD.
61
foreach my $opt ( @{$args{opts}} ) {
62
if ( !$opt->{key} || !$opt->{desc} ) {
63
die "Invalid DSN option: ", Dumper($opt);
65
MKDEBUG && _d('DSN option:',
67
map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') }
71
$self->{opts}->{$opt->{key}} = {
74
copy => $opt->{copy} || 0,
77
return bless $self, $class;
81
my ( $self, $prop, $value ) = @_;
83
MKDEBUG && _d('Setting', $prop, 'property');
84
$self->{$prop} = $value;
86
return $self->{$prop};
90
my ( $self, $dsn, $prev, $defaults ) = @_;
92
MKDEBUG && _d('No DSN to parse');
95
MKDEBUG && _d('Parsing', $dsn);
100
my $opts = $self->{opts};
102
foreach my $dsn_part ( split(/,/, $dsn) ) {
103
if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) {
104
$given_props{$prop_key} = $prop_val;
107
MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part);
108
$given_props{h} = $dsn_part;
112
foreach my $key ( keys %$opts ) {
113
MKDEBUG && _d('Finding value for', $key);
114
$final_props{$key} = $given_props{$key};
115
if ( !defined $final_props{$key}
116
&& defined $prev->{$key} && $opts->{$key}->{copy} )
118
$final_props{$key} = $prev->{$key};
119
MKDEBUG && _d('Copying value for', $key, 'from previous DSN');
121
if ( !defined $final_props{$key} ) {
122
$final_props{$key} = $defaults->{$key};
123
MKDEBUG && _d('Copying value for', $key, 'from defaults');
127
foreach my $key ( keys %given_props ) {
128
die "Unknown DSN option '$key' in '$dsn'. For more details, "
129
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
130
. "for complete documentation."
131
unless exists $opts->{$key};
133
if ( (my $required = $self->prop('required')) ) {
134
foreach my $key ( keys %$required ) {
135
die "Missing required DSN option '$key' in '$dsn'. For more details, "
136
. "please use the --help option, or try 'perldoc $PROGRAM_NAME' "
137
. "for complete documentation."
138
unless $final_props{$key};
142
return \%final_props;
146
my ( $self, $o ) = @_;
147
die 'I need an OptionParser object' unless ref $o eq 'OptionParser';
150
map { "$_=".$o->get($_); }
151
grep { $o->has($_) && $o->get($_) }
152
keys %{$self->{opts}}
154
MKDEBUG && _d('DSN string made from options:', $dsn_string);
155
return $self->parse($dsn_string);
159
my ( $self, $dsn, $props ) = @_;
160
return $dsn unless ref $dsn;
161
my %allowed = $props ? map { $_=>1 } @$props : ();
163
map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) }
164
grep { defined $dsn->{$_} && $self->{opts}->{$_} }
165
grep { !$props || $allowed{$_} }
172
= "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n"
173
. " KEY COPY MEANING\n"
174
. " === ==== =============================================\n";
175
my %opts = %{$self->{opts}};
176
foreach my $key ( sort keys %opts ) {
178
. ($opts{$key}->{copy} ? 'yes ' : 'no ')
179
. ($opts{$key}->{desc} || '[No description]')
182
$usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n";
187
my ( $self, $info ) = @_;
189
my %opts = %{$self->{opts}};
190
my $driver = $self->prop('dbidriver') || '';
191
if ( $driver eq 'Pg' ) {
192
$dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';'
193
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
194
grep { defined $info->{$_} }
198
$dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';'
199
. join(';', map { "$opts{$_}->{dsn}=$info->{$_}" }
200
grep { defined $info->{$_} }
202
. ';mysql_read_default_group=client';
205
return ($dsn, $info->{u}, $info->{p});
209
my ( $self, $dbh, $dsn ) = @_;
210
my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name');
211
my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()');
213
$dsn->{h} ||= $vars->{hostname}->{Value};
214
$dsn->{S} ||= $vars->{'socket'}->{Value};
215
$dsn->{P} ||= $vars->{port}->{Value};
221
my ( $self, $cxn_string, $user, $pass, $opts ) = @_;
227
ShowErrorStatement => 1,
228
mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0),
230
@{$defaults}{ keys %$opts } = values %$opts;
232
if ( $opts->{mysql_use_result} ) {
233
$defaults->{mysql_use_result} = 1;
237
die "Cannot connect to MySQL because the Perl DBI module is not "
238
. "installed or not found. Run 'perl -MDBI' to see the directories "
239
. "that Perl searches for DBI. If DBI is not installed, try:\n"
240
. " Debian/Ubuntu apt-get install libdbi-perl\n"
241
. " RHEL/CentOS yum install perl-DBI\n"
242
. " OpenSolaris pgk install pkg:/SUNWpmdbi\n";
248
while ( !$dbh && $tries-- ) {
249
MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, ' {',
250
join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ), '}');
253
$dbh = DBI->connect($cxn_string, $user, $pass, $defaults);
255
if ( $cxn_string =~ m/mysql/i ) {
258
$sql = 'SELECT @@SQL_MODE';
259
MKDEBUG && _d($dbh, $sql);
260
my ($sql_mode) = $dbh->selectrow_array($sql);
262
$sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1'
263
. '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO'
264
. ($sql_mode ? ",$sql_mode" : '')
266
MKDEBUG && _d($dbh, $sql);
269
if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) {
270
$sql = "/*!40101 SET NAMES $charset*/";
271
MKDEBUG && _d($dbh, ':', $sql);
273
MKDEBUG && _d('Enabling charset for STDOUT');
274
if ( $charset eq 'utf8' ) {
275
binmode(STDOUT, ':utf8')
276
or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
279
binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR";
283
if ( $self->prop('set-vars') ) {
284
$sql = "SET " . $self->prop('set-vars');
285
MKDEBUG && _d($dbh, ':', $sql);
290
if ( !$dbh && $EVAL_ERROR ) {
291
MKDEBUG && _d($EVAL_ERROR);
292
if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) {
293
MKDEBUG && _d('Going to try again without utf8 support');
294
delete $defaults->{mysql_enable_utf8};
296
elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) {
297
die "Cannot connect to MySQL because the Perl DBD::mysql module is "
298
. "not installed or not found. Run 'perl -MDBD::mysql' to see "
299
. "the directories that Perl searches for DBD::mysql. If "
300
. "DBD::mysql is not installed, try:\n"
301
. " Debian/Ubuntu apt-get install libdbd-mysql-perl\n"
302
. " RHEL/CentOS yum install perl-DBD-MySQL\n"
303
. " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n";
311
MKDEBUG && _d('DBH info: ',
313
Dumper($dbh->selectrow_hashref(
314
'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')),
315
'Connection info:', $dbh->{mysql_hostinfo},
316
'Character set info:', Dumper($dbh->selectall_arrayref(
317
'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})),
318
'$DBD::mysql::VERSION:', $DBD::mysql::VERSION,
319
'$DBI::VERSION:', $DBI::VERSION,
326
my ( $self, $dbh ) = @_;
327
if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) {
330
my ( $hostname, $one ) = $dbh->selectrow_array(
331
'SELECT /*!50038 @@hostname, */ 1');
336
my ( $self, $dbh ) = @_;
337
MKDEBUG && $self->print_active_handles($dbh);
341
sub print_active_handles {
342
my ( $self, $thing, $level ) = @_;
344
printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level,
345
$thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : ''))
346
or die "Cannot print: $OS_ERROR";
347
foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) {
348
$self->print_active_handles( $handle, $level + 1 );
353
my ( $self, $dsn_1, $dsn_2, %args ) = @_;
354
die 'I need a dsn_1 argument' unless $dsn_1;
355
die 'I need a dsn_2 argument' unless $dsn_2;
359
if ( $args{overwrite} ) {
360
$val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key};
363
$val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key};
366
} keys %{$self->{opts}};
371
my ($package, undef, $line) = caller 0;
372
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
373
map { defined $_ ? $_ : 'undef' }
375
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
380
# ###########################################################################
381
# End DSNParser package
382
# ###########################################################################
384
# ###########################################################################
385
# OptionParser package 7102
386
# This package is a copy without comments from the original. The original
387
# with comments and its test file can be found in the SVN repository at,
388
# trunk/common/OptionParser.pm
389
# trunk/common/t/OptionParser.t
390
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
391
# ###########################################################################
393
package OptionParser;
396
use warnings FATAL => 'all';
397
use List::Util qw(max);
398
use English qw(-no_match_vars);
399
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
403
my $POD_link_re = '[LC]<"?([^">]+)"?>';
406
my ( $class, %args ) = @_;
407
my @required_args = qw();
408
foreach my $arg ( @required_args ) {
409
die "I need a $arg argument" unless $args{$arg};
412
my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
413
$program_name ||= $PROGRAM_NAME;
414
my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
426
head1 => 'OPTIONS', # These args are used internally
427
skip_rules => 0, # to instantiate another Option-
428
item => '--(.*)', # Parser obj that parses the
429
attributes => \%attributes, # DSN OPTIONS section. Tools
430
parse_attributes => \&_parse_attribs, # don't tinker with these args.
434
strict => 1, # disabled by a special rule
435
program_name => $program_name,
441
allowed_groups => {},
443
rules => [], # desc of rules for --help
444
mutex => [], # rule: opts are mutually exclusive
445
atleast1 => [], # rule: at least one opt is required
446
disables => {}, # rule: opt disables other opts
447
defaults_to => {}, # rule: opt defaults to value of other opt
450
"/etc/maatkit/maatkit.conf",
451
"/etc/maatkit/$program_name.conf",
452
"$home/.maatkit.conf",
453
"$home/.$program_name.conf",
456
string => 's', # standard Getopt type
457
int => 'i', # standard Getopt type
458
float => 'f', # standard Getopt type
459
Hash => 'H', # hash, formed from a comma-separated list
460
hash => 'h', # hash as above, but only if a value is given
461
Array => 'A', # array, similar to Hash
462
array => 'a', # array, similar to hash
464
size => 'z', # size with kMG suffix (powers of 2^10)
465
time => 'm', # time, with an optional suffix of s/h/m/d
469
return bless $self, $class;
473
my ( $self, $file ) = @_;
474
$file ||= $self->{file} || __FILE__;
475
my @specs = $self->_pod_to_specs($file);
476
$self->_parse_specs(@specs);
478
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
479
my $contents = do { local $/ = undef; <$fh> };
481
if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
482
MKDEBUG && _d('Parsing DSN OPTIONS');
487
my $parse_dsn_attribs = sub {
488
my ( $self, $option, $attribs ) = @_;
490
my $val = $attribs->{$_};
492
$val = $val eq 'yes' ? 1
495
$attribs->{$_} = $val;
503
my $dsn_o = new OptionParser(
504
description => 'DSN OPTIONS',
505
head1 => 'DSN OPTIONS',
506
dsn => 0, # XXX don't infinitely recurse!
507
item => '\* (.)', # key opts are a single character
508
skip_rules => 1, # no rules before opts
509
attributes => $dsn_attribs,
510
parse_attributes => $parse_dsn_attribs,
514
key => $_->{spec}->{key},
515
dsn => $_->{spec}->{dsn},
516
copy => $_->{spec}->{copy},
520
} $dsn_o->_pod_to_specs($file);
521
$self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
529
return $self->{DSNParser};
532
sub get_defaults_files {
534
return @{$self->{default_files}};
538
my ( $self, $file ) = @_;
539
$file ||= $self->{file} || __FILE__;
540
open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
546
local $INPUT_RECORD_SEPARATOR = '';
547
while ( $para = <$fh> ) {
548
next unless $para =~ m/^=head1 $self->{head1}/;
552
while ( $para = <$fh> ) {
553
last if $para =~ m/^=over/;
554
next if $self->{skip_rules};
557
$para =~ s/$POD_link_re/$1/go;
558
MKDEBUG && _d('Option rule:', $para);
562
die "POD has no $self->{head1} section" unless $para;
565
if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
567
MKDEBUG && _d($para);
570
$para = <$fh>; # read next paragraph, possibly attributes
572
if ( $para =~ m/: / ) { # attributes
575
my ( $attrib, $val) = split(/: /, $_);
576
die "Unrecognized attribute for --$option: $attrib"
577
unless $self->{attributes}->{$attrib};
579
} split(/; /, $para);
580
if ( $attribs{'short form'} ) {
581
$attribs{'short form'} =~ s/-//;
583
$para = <$fh>; # read next paragraph, probably short help desc
586
MKDEBUG && _d('Option has no attributes');
591
$para =~ s/$POD_link_re/$1/go;
593
$para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
594
MKDEBUG && _d('Short help:', $para);
596
die "No description after option spec $option" if $para =~ m/^=item/;
598
if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
599
$option = $base_option;
600
$attribs{'negatable'} = 1;
604
spec => $self->{parse_attributes}->($self, $option, \%attribs),
606
. (defined $attribs{default} ? " (default $attribs{default})" : ''),
607
group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
610
while ( $para = <$fh> ) {
612
if ( $para =~ m/^=head1/ ) {
613
$para = undef; # Can't 'last' out of a do {} block.
616
last if $para =~ m/^=item /;
620
die "No valid specs in $self->{head1}" unless @specs;
623
return @specs, @rules;
627
my ( $self, @specs ) = @_;
628
my %disables; # special rule that requires deferred checking
630
foreach my $opt ( @specs ) {
631
if ( ref $opt ) { # It's an option spec, not a rule.
632
MKDEBUG && _d('Parsing opt spec:',
633
map { ($_, '=>', $opt->{$_}) } keys %$opt);
635
my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
637
die "Cannot parse long option from spec $opt->{spec}";
639
$opt->{long} = $long;
641
die "Duplicate long option --$long" if exists $self->{opts}->{$long};
642
$self->{opts}->{$long} = $opt;
644
if ( length $long == 1 ) {
645
MKDEBUG && _d('Long opt', $long, 'looks like short opt');
646
$self->{short_opts}->{$long} = $long;
650
die "Duplicate short option -$short"
651
if exists $self->{short_opts}->{$short};
652
$self->{short_opts}->{$short} = $long;
653
$opt->{short} = $short;
656
$opt->{short} = undef;
659
$opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
660
$opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
661
$opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
663
$opt->{group} ||= 'default';
664
$self->{groups}->{ $opt->{group} }->{$long} = 1;
666
$opt->{value} = undef;
669
my ( $type ) = $opt->{spec} =~ m/=(.)/;
670
$opt->{type} = $type;
671
MKDEBUG && _d($long, 'type:', $type);
674
$opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
676
if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
677
$self->{defaults}->{$long} = defined $def ? $def : 1;
678
MKDEBUG && _d($long, 'default:', $def);
681
if ( $long eq 'config' ) {
682
$self->{defaults}->{$long} = join(',', $self->get_defaults_files());
685
if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
686
$disables{$long} = $dis;
687
MKDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
690
$self->{opts}->{$long} = $opt;
692
else { # It's an option rule, not a spec.
693
MKDEBUG && _d('Parsing rule:', $opt);
694
push @{$self->{rules}}, $opt;
695
my @participants = $self->_get_participants($opt);
698
if ( $opt =~ m/mutually exclusive|one and only one/ ) {
700
push @{$self->{mutex}}, \@participants;
701
MKDEBUG && _d(@participants, 'are mutually exclusive');
703
if ( $opt =~ m/at least one|one and only one/ ) {
705
push @{$self->{atleast1}}, \@participants;
706
MKDEBUG && _d(@participants, 'require at least one');
708
if ( $opt =~ m/default to/ ) {
710
$self->{defaults_to}->{$participants[0]} = $participants[1];
711
MKDEBUG && _d($participants[0], 'defaults to', $participants[1]);
713
if ( $opt =~ m/restricted to option groups/ ) {
715
my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
716
my @groups = split(',', $groups);
717
%{$self->{allowed_groups}->{$participants[0]}} = map {
722
if( $opt =~ m/accepts additional command-line arguments/ ) {
725
MKDEBUG && _d("Strict mode disabled by rule");
728
die "Unrecognized option rule: $opt" unless $rule_ok;
732
foreach my $long ( keys %disables ) {
733
my @participants = $self->_get_participants($disables{$long});
734
$self->{disables}->{$long} = \@participants;
735
MKDEBUG && _d('Option', $long, 'disables', @participants);
741
sub _get_participants {
742
my ( $self, $str ) = @_;
744
foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
745
die "Option --$long does not exist while processing rule $str"
746
unless exists $self->{opts}->{$long};
747
push @participants, $long;
749
MKDEBUG && _d('Participants for', $str, ':', @participants);
750
return @participants;
755
my %opts = %{$self->{opts}};
761
my %short_opts = %{$self->{short_opts}};
766
my ( $self, %defaults ) = @_;
767
$self->{defaults} = {};
768
foreach my $long ( keys %defaults ) {
769
die "Cannot set default for nonexistent option $long"
770
unless exists $self->{opts}->{$long};
771
$self->{defaults}->{$long} = $defaults{$long};
772
MKDEBUG && _d('Default val for', $long, ':', $defaults{$long});
779
return $self->{defaults};
784
return $self->{groups};
788
my ( $self, $opt, $val ) = @_;
789
my $long = exists $self->{opts}->{$opt} ? $opt
790
: exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
791
: die "Getopt::Long gave a nonexistent option: $opt";
793
$opt = $self->{opts}->{$long};
794
if ( $opt->{is_cumulative} ) {
798
$opt->{value} = $val;
801
MKDEBUG && _d('Got option', $long, '=', $val);
807
foreach my $long ( keys %{$self->{opts}} ) {
808
$self->{opts}->{$long}->{got} = 0;
809
$self->{opts}->{$long}->{value}
810
= exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
811
: $self->{opts}->{$long}->{is_cumulative} ? 0
814
$self->{got_opts} = 0;
816
$self->{errors} = [];
818
if ( @ARGV && $ARGV[0] eq "--config" ) {
820
$self->_set_option('config', shift @ARGV);
822
if ( $self->has('config') ) {
824
foreach my $filename ( split(',', $self->get('config')) ) {
826
push @extra_args, $self->_read_config_file($filename);
829
if ( $self->got('config') ) {
837
unshift @ARGV, @extra_args;
840
Getopt::Long::Configure('no_ignore_case', 'bundling');
842
map { $_->{spec} => sub { $self->_set_option(@_); } }
843
grep { $_->{long} ne 'config' } # --config is handled specially above.
844
values %{$self->{opts}}
845
) or $self->save_error('Error parsing options');
847
if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
848
printf("%s Ver %s Distrib %s Changeset %s\n",
849
$self->{program_name}, $main::VERSION, $main::DISTRIB, $main::SVN_REV)
850
or die "Cannot print: $OS_ERROR";
854
if ( @ARGV && $self->{strict} ) {
855
$self->save_error("Unrecognized command-line options @ARGV");
858
foreach my $mutex ( @{$self->{mutex}} ) {
859
my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
861
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
862
@{$mutex}[ 0 .. scalar(@$mutex) - 2] )
863
. ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
864
. ' are mutually exclusive.';
865
$self->save_error($err);
869
foreach my $required ( @{$self->{atleast1}} ) {
870
my @set = grep { $self->{opts}->{$_}->{got} } @$required;
872
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
873
@{$required}[ 0 .. scalar(@$required) - 2] )
874
.' or --'.$self->{opts}->{$required->[-1]}->{long};
875
$self->save_error("Specify at least one of $err");
879
$self->_check_opts( keys %{$self->{opts}} );
880
$self->{got_opts} = 1;
885
my ( $self, @long ) = @_;
886
my $long_last = scalar @long;
888
foreach my $i ( 0..$#long ) {
889
my $long = $long[$i];
891
my $opt = $self->{opts}->{$long};
893
if ( exists $self->{disables}->{$long} ) {
894
my @disable_opts = @{$self->{disables}->{$long}};
895
map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
896
MKDEBUG && _d('Unset options', @disable_opts,
897
'because', $long,'disables them');
900
if ( exists $self->{allowed_groups}->{$long} ) {
902
my @restricted_groups = grep {
903
!exists $self->{allowed_groups}->{$long}->{$_}
904
} keys %{$self->{groups}};
907
foreach my $restricted_group ( @restricted_groups ) {
909
foreach my $restricted_opt (
910
keys %{$self->{groups}->{$restricted_group}} )
912
next RESTRICTED_OPT if $restricted_opt eq $long;
913
push @restricted_opts, $restricted_opt
914
if $self->{opts}->{$restricted_opt}->{got};
918
if ( @restricted_opts ) {
920
if ( @restricted_opts == 1 ) {
921
$err = "--$restricted_opts[0]";
925
map { "--$self->{opts}->{$_}->{long}" }
927
@restricted_opts[0..scalar(@restricted_opts) - 2]
929
. ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
931
$self->save_error("--$long is not allowed with $err");
936
elsif ( $opt->{is_required} ) {
937
$self->save_error("Required option --$long must be specified");
940
$self->_validate_type($opt);
941
if ( $opt->{parsed} ) {
945
MKDEBUG && _d('Temporarily failed to parse', $long);
949
die "Failed to parse options, possibly due to circular dependencies"
950
if @long == $long_last;
958
my ( $self, $opt ) = @_;
961
if ( !$opt->{type} ) {
966
my $val = $opt->{value};
968
if ( $val && $opt->{type} eq 'm' ) { # type time
969
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
970
my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
972
my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
974
MKDEBUG && _d('No suffix given; using', $suffix, 'for',
975
$opt->{long}, '(value:', $val, ')');
977
if ( $suffix =~ m/[smhd]/ ) {
978
$val = $suffix eq 's' ? $num # Seconds
979
: $suffix eq 'm' ? $num * 60 # Minutes
980
: $suffix eq 'h' ? $num * 3600 # Hours
981
: $num * 86400; # Days
982
$opt->{value} = ($prefix || '') . $val;
983
MKDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
986
$self->save_error("Invalid time suffix for --$opt->{long}");
989
elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
990
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
992
my $from_key = $self->{defaults_to}->{ $opt->{long} };
994
MKDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
995
if ( $self->{opts}->{$from_key}->{parsed} ) {
996
$prev = $self->{opts}->{$from_key}->{value};
999
MKDEBUG && _d('Cannot parse', $opt->{long}, 'until',
1000
$from_key, 'parsed');
1004
my $defaults = $self->{DSNParser}->parse_options($self);
1005
$opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
1007
elsif ( $val && $opt->{type} eq 'z' ) { # type size
1008
MKDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
1009
$self->_parse_size($opt, $val);
1011
elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
1012
$opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
1014
elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
1015
$opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
1018
MKDEBUG && _d('Nothing to validate for option',
1019
$opt->{long}, 'type', $opt->{type}, 'value', $val);
1027
my ( $self, $opt ) = @_;
1028
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1029
die "Option $opt does not exist"
1030
unless $long && exists $self->{opts}->{$long};
1031
return $self->{opts}->{$long}->{value};
1035
my ( $self, $opt ) = @_;
1036
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1037
die "Option $opt does not exist"
1038
unless $long && exists $self->{opts}->{$long};
1039
return $self->{opts}->{$long}->{got};
1043
my ( $self, $opt ) = @_;
1044
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1045
return defined $long ? exists $self->{opts}->{$long} : 0;
1049
my ( $self, $opt, $val ) = @_;
1050
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
1051
die "Option $opt does not exist"
1052
unless $long && exists $self->{opts}->{$long};
1053
$self->{opts}->{$long}->{value} = $val;
1058
my ( $self, $error ) = @_;
1059
push @{$self->{errors}}, $error;
1065
return $self->{errors};
1070
warn "No usage string is set" unless $self->{usage}; # XXX
1071
return "Usage: " . ($self->{usage} || '') . "\n";
1076
warn "No description string is set" unless $self->{description}; # XXX
1077
my $descr = ($self->{description} || $self->{program_name} || '')
1078
. " For more details, please use the --help option, "
1079
. "or try 'perldoc $PROGRAM_NAME' "
1080
. "for complete documentation.";
1081
$descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
1082
unless $ENV{DONT_BREAK_LINES};
1083
$descr =~ s/ +$//mg;
1087
sub usage_or_errors {
1088
my ( $self, $file, $return ) = @_;
1089
$file ||= $self->{file} || __FILE__;
1091
if ( !$self->{description} || !$self->{usage} ) {
1092
MKDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
1093
my %synop = $self->_parse_synopsis($file);
1094
$self->{description} ||= $synop{description};
1095
$self->{usage} ||= $synop{usage};
1096
MKDEBUG && _d("Description:", $self->{description},
1097
"\nUsage:", $self->{usage});
1100
if ( $self->{opts}->{help}->{got} ) {
1101
print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
1102
exit 0 unless $return;
1104
elsif ( scalar @{$self->{errors}} ) {
1105
print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
1106
exit 0 unless $return;
1114
my $usage = $self->usage() . "\n";
1115
if ( (my @errors = @{$self->{errors}}) ) {
1116
$usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
1119
return $usage . "\n" . $self->descr();
1124
die "Run get_opts() before print_usage()" unless $self->{got_opts};
1125
my @opts = values %{$self->{opts}};
1129
length($_->{long}) # option long name
1130
+ ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
1131
+ ($_->{type} ? 2 : 0) # "=x" where x is the opt type
1138
+ ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
1139
+ ($self->{opts}->{$_}->{type} ? 2 : 0)
1141
values %{$self->{short_opts}});
1143
my $lcol = max($maxl, ($maxs + 3));
1144
my $rcol = 80 - $lcol - 6;
1145
my $rpad = ' ' x ( 80 - $rcol );
1147
$maxs = max($lcol - 3, $maxs);
1149
my $usage = $self->descr() . "\n" . $self->usage();
1151
my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
1152
push @groups, 'default';
1154
foreach my $group ( reverse @groups ) {
1155
$usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
1157
sort { $a->{long} cmp $b->{long} }
1158
grep { $_->{group} eq $group }
1161
my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
1162
my $short = $opt->{short};
1163
my $desc = $opt->{desc};
1165
$long .= $opt->{type} ? "=$opt->{type}" : "";
1167
if ( $opt->{type} && $opt->{type} eq 'm' ) {
1168
my ($s) = $desc =~ m/\(suffix (.)\)/;
1170
$desc =~ s/\s+\(suffix .\)//;
1171
$desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
1172
. "d=days; if no suffix, $s is used.";
1174
$desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g);
1177
$usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
1180
$usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
1185
$usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
1187
if ( (my @rules = @{$self->{rules}}) ) {
1188
$usage .= "\nRules:\n\n";
1189
$usage .= join("\n", map { " $_" } @rules) . "\n";
1191
if ( $self->{DSNParser} ) {
1192
$usage .= "\n" . $self->{DSNParser}->usage();
1194
$usage .= "\nOptions and values after processing arguments:\n\n";
1195
foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
1196
my $val = $opt->{value};
1197
my $type = $opt->{type} || '';
1198
my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
1199
$val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
1200
: !defined $val ? '(No value)'
1201
: $type eq 'd' ? $self->{DSNParser}->as_string($val)
1202
: $type =~ m/H|h/ ? join(',', sort keys %$val)
1203
: $type =~ m/A|a/ ? join(',', @$val)
1205
$usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
1211
shift @_ if ref $_[0] eq __PACKAGE__;
1212
my ( $prompt ) = @_;
1213
local $OUTPUT_AUTOFLUSH = 1;
1215
or die "Cannot print: $OS_ERROR";
1218
require Term::ReadKey;
1219
Term::ReadKey::ReadMode('noecho');
1220
chomp($response = <STDIN>);
1221
Term::ReadKey::ReadMode('normal');
1223
or die "Cannot print: $OS_ERROR";
1225
if ( $EVAL_ERROR ) {
1226
die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
1232
print '# ', $^X, ' ', $], "\n";
1233
my $uname = `uname -a`;
1235
$uname =~ s/\s+/ /g;
1238
printf("# %s Ver %s Distrib %s Changeset %s line %d\n",
1239
$PROGRAM_NAME, ($main::VERSION || ''), ($main::DISTRIB || ''),
1240
($main::SVN_REV || ''), __LINE__);
1241
print('# Arguments: ',
1242
join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n");
1245
sub _read_config_file {
1246
my ( $self, $filename ) = @_;
1247
open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
1253
while ( my $line = <$fh> ) {
1255
next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
1256
$line =~ s/\s+#.*$//g;
1257
$line =~ s/^\s+|\s+$//g;
1258
if ( $line eq '--' ) {
1264
&& (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
1266
push @args, grep { defined $_ } ("$prefix$opt", $arg);
1268
elsif ( $line =~ m/./ ) {
1272
die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
1279
sub read_para_after {
1280
my ( $self, $file, $regex ) = @_;
1281
open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
1282
local $INPUT_RECORD_SEPARATOR = '';
1284
while ( $para = <$fh> ) {
1285
next unless $para =~ m/^=pod$/m;
1288
while ( $para = <$fh> ) {
1289
next unless $para =~ m/$regex/;
1294
close $fh or die "Can't close $file: $OS_ERROR";
1302
my $hashref = $self->{$_};
1304
foreach my $key ( keys %$hashref ) {
1305
my $ref = ref $hashref->{$key};
1306
$val_copy->{$key} = !$ref ? $hashref->{$key}
1307
: $ref eq 'HASH' ? { %{$hashref->{$key}} }
1308
: $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
1312
} qw(opts short_opts defaults);
1314
foreach my $scalar ( qw(got_opts) ) {
1315
$clone{$scalar} = $self->{$scalar};
1318
return bless \%clone;
1322
my ( $self, $opt, $val ) = @_;
1324
if ( lc($val || '') eq 'null' ) {
1325
MKDEBUG && _d('NULL size for', $opt->{long});
1326
$opt->{value} = 'null';
1330
my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
1331
my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
1332
if ( defined $num ) {
1334
$num *= $factor_for{$factor};
1335
MKDEBUG && _d('Setting option', $opt->{y},
1336
'to num', $num, '* factor', $factor);
1338
$opt->{value} = ($pre || '') . $num;
1341
$self->save_error("Invalid size for --$opt->{long}");
1346
sub _parse_attribs {
1347
my ( $self, $option, $attribs ) = @_;
1348
my $types = $self->{types};
1350
. ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
1351
. ($attribs->{'negatable'} ? '!' : '' )
1352
. ($attribs->{'cumulative'} ? '+' : '' )
1353
. ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
1356
sub _parse_synopsis {
1357
my ( $self, $file ) = @_;
1358
$file ||= $self->{file} || __FILE__;
1359
MKDEBUG && _d("Parsing SYNOPSIS in", $file);
1361
local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
1362
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
1364
1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
1365
die "$file does not contain a SYNOPSIS section" unless $para;
1367
for ( 1..2 ) { # 1 for the usage, 2 for the description
1372
MKDEBUG && _d("Raw SYNOPSIS text:", @synop);
1373
my ($usage, $desc) = @synop;
1374
die "The SYNOPSIS section in $file is not formatted properly"
1375
unless $usage && $desc;
1377
$usage =~ s/^\s*Usage:\s+(.+)/$1/;
1381
$desc =~ s/\s{2,}/ /g;
1382
$desc =~ s/\. ([A-Z][a-z])/. $1/g;
1386
description => $desc,
1392
my ($package, undef, $line) = caller 0;
1393
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1394
map { defined $_ ? $_ : 'undef' }
1396
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1401
# ###########################################################################
1402
# End OptionParser package
1403
# ###########################################################################
1405
# ###########################################################################
1406
# SlowLogParser package 7522
1407
# This package is a copy without comments from the original. The original
1408
# with comments and its test file can be found in the SVN repository at,
1409
# trunk/common/SlowLogParser.pm
1410
# trunk/common/t/SlowLogParser.t
1411
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
1412
# ###########################################################################
1413
package SlowLogParser;
1416
use warnings FATAL => 'all';
1417
use English qw(-no_match_vars);
1420
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1427
return bless $self, $class;
1430
my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/;
1431
my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/;
1432
my $slow_log_hd_line = qr{
1434
T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix
1436
[/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary)
1443
my ( $self, %args ) = @_;
1444
my @required_args = qw(next_event tell);
1445
foreach my $arg ( @required_args ) {
1446
die "I need a $arg argument" unless $args{$arg};
1448
my ($next_event, $tell) = @args{@required_args};
1450
my $pending = $self->{pending};
1451
local $INPUT_RECORD_SEPARATOR = ";\n#";
1452
my $trimlen = length($INPUT_RECORD_SEPARATOR);
1453
my $pos_in_log = $tell->();
1458
defined($stmt = shift @$pending)
1459
or defined($stmt = $next_event->())
1461
my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
1462
$pos_in_log = $tell->();
1464
if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log
1465
my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
1466
if ( @chunks > 1 ) {
1467
MKDEBUG && _d("Found multiple chunks");
1468
$stmt = shift @chunks;
1469
unshift @$pending, @chunks;
1473
$stmt = '#' . $stmt unless $stmt =~ m/\A#/;
1474
$stmt =~ s/;\n#?\Z//;
1477
my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed);
1479
my $len = length($stmt);
1482
while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
1483
$pos = pos($stmt); # Be careful not to mess this up!
1484
my $line = $1; # Necessary for /g and pos() to work.
1485
MKDEBUG && _d($line);
1487
if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) {
1489
if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) {
1490
MKDEBUG && _d("Got ts", $time);
1491
push @properties, 'ts', $time;
1494
&& ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
1496
MKDEBUG && _d("Got user, host, ip", $user, $host, $ip);
1497
push @properties, 'user', $user, 'host', $host, 'ip', $ip;
1503
&& ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
1505
MKDEBUG && _d("Got user, host, ip", $user, $host, $ip);
1506
push @properties, 'user', $user, 'host', $host, 'ip', $ip;
1510
elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) {
1511
MKDEBUG && _d("Got admin command");
1512
$line =~ s/^#\s+//; # string leading "# ".
1513
push @properties, 'cmd', 'Admin', 'arg', $line;
1514
push @properties, 'bytes', length($properties[-1]);
1519
elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap!
1520
MKDEBUG && _d("Got some line with properties");
1522
if ( $line =~ m/Schema:\s+\w+: / ) {
1523
MKDEBUG && _d('Removing empty Schema attrib');
1524
$line =~ s/Schema:\s+//;
1525
MKDEBUG && _d($line);
1528
my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g;
1529
push @properties, @temp;
1532
elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) {
1533
MKDEBUG && _d("Got a default database:", $db);
1534
push @properties, 'db', $db;
1538
elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) {
1539
MKDEBUG && _d("Got some setting:", $setting);
1540
push @properties, split(/,|\s*=\s*/, $setting);
1544
if ( !$found_arg && $pos == $len ) {
1545
MKDEBUG && _d("Did not find arg, looking for special cases");
1546
local $INPUT_RECORD_SEPARATOR = ";\n";
1547
if ( defined(my $l = $next_event->()) ) {
1550
MKDEBUG && _d("Found admin statement", $l);
1551
push @properties, 'cmd', 'Admin', 'arg', $l;
1552
push @properties, 'bytes', length($properties[-1]);
1556
MKDEBUG && _d("I can't figure out what to do with this line");
1562
MKDEBUG && _d("Got the query/arg line");
1563
my $arg = substr($stmt, $pos - length($line));
1564
push @properties, 'arg', $arg, 'bytes', length($arg);
1565
if ( $args{misc} && $args{misc}->{embed}
1566
&& ( my ($e) = $arg =~ m/($args{misc}->{embed})/)
1568
push @properties, $e =~ m/$args{misc}->{capture}/g;
1574
MKDEBUG && _d('Properties of event:', Dumper(\@properties));
1575
my $event = { @properties };
1576
if ( $args{stats} ) {
1577
$args{stats}->{events_read}++;
1578
$args{stats}->{events_parsed}++;
1584
$args{oktorun}->(0) if $args{oktorun};
1589
my ($package, undef, $line) = caller 0;
1590
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1591
map { defined $_ ? $_ : 'undef' }
1593
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1598
# ###########################################################################
1599
# End SlowLogParser package
1600
# ###########################################################################
1602
# ###########################################################################
1603
# Transformers package 7226
1604
# This package is a copy without comments from the original. The original
1605
# with comments and its test file can be found in the SVN repository at,
1606
# trunk/common/Transformers.pm
1607
# trunk/common/t/Transformers.t
1608
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
1609
# ###########################################################################
1611
package Transformers;
1614
use warnings FATAL => 'all';
1615
use English qw(-no_match_vars);
1616
use Time::Local qw(timegm timelocal);
1617
use Digest::MD5 qw(md5_hex);
1619
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1622
our @ISA = qw(Exporter);
1623
our %EXPORT_TAGS = ();
1625
our @EXPORT_OK = qw(
1639
our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
1640
our $proper_ts = qr/(\d\d\d\d)-(\d\d)-(\d\d)[T ](\d\d):(\d\d):(\d\d)(\.\d+)?/;
1641
our $n_ts = qr/(\d{1,5})([shmd]?)/; # Limit \d{1,5} because \d{6} looks
1644
my ( $t, %args ) = @_;
1645
my $p_ms = defined $args{p_ms} ? $args{p_ms} : 0; # precision for ms vals
1646
my $p_s = defined $args{p_s} ? $args{p_s} : 0; # precision for s vals
1651
$t = sprintf('%.17f', $t) if $t =~ /e/;
1653
$t =~ s/\.(\d{1,6})\d*/\.$1/;
1655
if ($t > 0 && $t <= 0.000999) {
1656
$f = ($t * 1000000) . 'us';
1658
elsif ($t >= 0.001000 && $t <= 0.999999) {
1659
$f = sprintf("%.${p_ms}f", $t * 1000);
1660
$f = ($f * 1) . 'ms'; # * 1 to remove insignificant zeros
1663
$f = sprintf("%.${p_s}f", $t);
1664
$f = ($f * 1) . 's'; # * 1 to remove insignificant zeros
1667
$f = 0; # $t should = 0 at this point
1674
my ( $is, $of, %args ) = @_;
1675
my $p = $args{p} || 0; # float precision
1676
my $fmt = $p ? "%.${p}f" : "%d";
1677
return sprintf $fmt, ($is * 100) / ($of ||= 1);
1681
my ( $secs, $fmt ) = @_;
1683
return '00:00' unless $secs;
1685
$fmt ||= $secs >= 86_400 ? 'd'
1686
: $secs >= 3_600 ? 'h'
1690
$fmt eq 'd' ? sprintf(
1691
"%d+%02d:%02d:%02d",
1692
int($secs / 86_400),
1693
int(($secs % 86_400) / 3_600),
1694
int(($secs % 3_600) / 60),
1696
: $fmt eq 'h' ? sprintf(
1698
int(($secs % 86_400) / 3_600),
1699
int(($secs % 3_600) / 60),
1703
int(($secs % 3_600) / 60),
1708
my ( $val, $default_suffix ) = @_;
1709
die "I need a val argument" unless defined $val;
1711
my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
1712
$suffix = $suffix || $default_suffix || 's';
1713
if ( $suffix =~ m/[smhd]/ ) {
1714
$t = $suffix eq 's' ? $num * 1 # Seconds
1715
: $suffix eq 'm' ? $num * 60 # Minutes
1716
: $suffix eq 'h' ? $num * 3600 # Hours
1717
: $num * 86400; # Days
1719
$t *= -1 if $prefix && $prefix eq '-';
1722
die "Invalid suffix for $val: $suffix";
1728
my ( $num, %args ) = @_;
1729
my $p = defined $args{p} ? $args{p} : 2; # float precision
1730
my $d = defined $args{d} ? $args{d} : 1_024; # divisor
1732
my @units = ('', qw(k M G T P E Z Y));
1733
while ( $num >= $d && $n < @units - 1 ) {
1745
my ( $time, $gmt ) = @_;
1746
my ( $sec, $min, $hour, $mday, $mon, $year )
1747
= $gmt ? gmtime($time) : localtime($time);
1750
my $val = sprintf("%d-%02d-%02dT%02d:%02d:%02d",
1751
$year, $mon, $mday, $hour, $min, $sec);
1752
if ( my ($us) = $time =~ m/(\.\d+)$/ ) {
1753
$us = sprintf("%.6f", $us);
1760
sub parse_timestamp {
1762
if ( my($y, $m, $d, $h, $i, $s, $f)
1763
= $val =~ m/^$mysql_ts$/ )
1765
return sprintf "%d-%02d-%02d %02d:%02d:"
1766
. (defined $f ? '%09.6f' : '%02d'),
1767
$y + 2000, $m, $d, $h, $i, (defined $f ? $s + $f : $s);
1772
sub unix_timestamp {
1773
my ( $val, $gmt ) = @_;
1774
if ( my($y, $m, $d, $h, $i, $s, $us) = $val =~ m/^$proper_ts$/ ) {
1776
? timegm($s, $i, $h, $d, $m - 1, $y)
1777
: timelocal($s, $i, $h, $d, $m - 1, $y);
1778
if ( defined $us ) {
1779
$us = sprintf('%.6f', $us);
1787
sub any_unix_timestamp {
1788
my ( $val, $callback ) = @_;
1790
if ( my ($n, $suffix) = $val =~ m/^$n_ts$/ ) {
1791
$n = $suffix eq 's' ? $n # Seconds
1792
: $suffix eq 'm' ? $n * 60 # Minutes
1793
: $suffix eq 'h' ? $n * 3600 # Hours
1794
: $suffix eq 'd' ? $n * 86400 # Days
1795
: $n; # default: Seconds
1796
MKDEBUG && _d('ts is now - N[shmd]:', $n);
1799
elsif ( $val =~ m/^\d{9,}/ ) {
1800
MKDEBUG && _d('ts is already a unix timestamp');
1803
elsif ( my ($ymd, $hms) = $val =~ m/^(\d{6})(?:\s+(\d+:\d+:\d+))?/ ) {
1804
MKDEBUG && _d('ts is MySQL slow log timestamp');
1805
$val .= ' 00:00:00' unless $hms;
1806
return unix_timestamp(parse_timestamp($val));
1808
elsif ( ($ymd, $hms) = $val =~ m/^(\d{4}-\d\d-\d\d)(?:[T ](\d+:\d+:\d+))?/) {
1809
MKDEBUG && _d('ts is properly formatted timestamp');
1810
$val .= ' 00:00:00' unless $hms;
1811
return unix_timestamp($val);
1814
MKDEBUG && _d('ts is MySQL expression');
1815
return $callback->($val) if $callback && ref $callback eq 'CODE';
1818
MKDEBUG && _d('Unknown ts type:', $val);
1824
my $checksum = uc substr(md5_hex($val), -16);
1825
MKDEBUG && _d($checksum, 'checksum for', $val);
1830
my ( $string ) = @_;
1831
return unless $string;
1832
my $poly = 0xEDB88320;
1833
my $crc = 0xFFFFFFFF;
1834
foreach my $char ( split(//, $string) ) {
1835
my $comp = ($crc ^ ord($char)) & 0xFF;
1837
$comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
1839
$crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
1841
return $crc ^ 0xFFFFFFFF;
1845
my ($package, undef, $line) = caller 0;
1846
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1847
map { defined $_ ? $_ : 'undef' }
1849
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1854
# ###########################################################################
1855
# End Transformers package
1856
# ###########################################################################
1858
# ###########################################################################
1859
# QueryRewriter package 7473
1860
# This package is a copy without comments from the original. The original
1861
# with comments and its test file can be found in the SVN repository at,
1862
# trunk/common/QueryRewriter.pm
1863
# trunk/common/t/QueryRewriter.t
1864
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
1865
# ###########################################################################
1867
use warnings FATAL => 'all';
1869
package QueryRewriter;
1871
use English qw(-no_match_vars);
1873
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
1875
our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
1876
|UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
1877
my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
1882
(?> [^()]+ ) # Non-parens without backtracking
1884
(??{ $bal }) # Group with matching parens
1889
my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
1890
my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
1891
my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
1892
my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
1896
my ( $class, %args ) = @_;
1897
my $self = { %args };
1898
return bless $self, $class;
1901
sub strip_comments {
1902
my ( $self, $query ) = @_;
1903
return unless $query;
1904
$query =~ s/$olc_re//go;
1905
$query =~ s/$mlc_re//go;
1906
if ( $query =~ m/$vlc_rf/i ) { # contains show + version
1907
$query =~ s/$vlc_re//go;
1913
my ( $self, $query, $length ) = @_;
1917
(?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
1918
(?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
1920
\s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
1921
{$1 /*... omitted ...*/$2}xsi;
1923
return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
1925
my $last_length = 0;
1926
my $query_length = length($query);
1929
&& $query_length > $length
1930
&& $query_length < ( $last_length || $query_length + 1 )
1932
$last_length = $query_length;
1934
(\bIN\s*\() # The opening of an IN list
1935
([^\)]+) # Contents of the list, assuming no item contains paren
1936
(?=\)) # Close of the list
1947
my ( $snippet ) = @_;
1948
my @vals = split(/,/, $snippet);
1949
return $snippet unless @vals > 20;
1950
my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
1959
my ( $self, $query ) = @_;
1961
$query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
1962
&& return 'mysqldump';
1963
$query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # mk-table-checksum, etc query
1964
&& return 'maatkit';
1965
$query =~ m/\Aadministrator command: /
1967
$query =~ m/\A\s*(call\s+\S+)\(/i
1968
&& return lc($1); # Warning! $1 used, be careful.
1969
if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
1970
$query = $beginning; # Shorten multi-value INSERT statements ASAP
1973
$query =~ s/$olc_re//go;
1974
$query =~ s/$mlc_re//go;
1975
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
1978
$query =~ s/\\["']//g; # quoted strings
1979
$query =~ s/".*?"/?/sg; # quoted strings
1980
$query =~ s/'.*?'/?/sg; # quoted strings
1981
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers
1982
$query =~ s/[xb.+-]\?/?/g; # Clean up leftovers
1983
$query =~ s/\A\s+//; # Chop off leading whitespace
1984
chomp $query; # Kill trailing whitespace
1985
$query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
1987
$query =~ s/\bnull\b/?/g; # Get rid of NULLs
1988
$query =~ s{ # Collapse IN and VALUES lists
1989
\b(in|values?)(?:[\s,]*\([\s?,]*\))+
1992
$query =~ s{ # Collapse UNION
1993
\b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
1995
{$1 /*repeat$2*/}xg;
1996
$query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
1998
if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
1999
1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
2006
my ( $self, $query ) = @_;
2008
$query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
2009
$query =~ m/\A\s*use\s+/ && return "USE";
2010
$query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
2011
$query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
2013
if ( $query =~ m/\Aadministrator command:/ ) {
2014
$query =~ s/administrator command:/ADMIN/;
2019
$query = $self->strip_comments($query);
2021
if ( $query =~ m/\A\s*SHOW\s+/i ) {
2022
MKDEBUG && _d($query);
2025
$query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
2026
$query =~ s/\s+COUNT[^)]+\)//g;
2028
$query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
2030
$query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
2031
$query =~ s/\s+/ /g;
2032
MKDEBUG && _d($query);
2036
eval $QueryParser::data_def_stmts;
2037
eval $QueryParser::tbl_ident;
2038
my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
2040
my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
2041
$obj = uc $obj if $obj;
2042
MKDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
2044
= $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
2045
MKDEBUG && _d('Matches db or table:', $db_or_tbl);
2046
return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
2049
my @verbs = $query =~ m/\b($verbs)\b/gio;
2052
grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
2055
if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
2056
MKDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
2057
my $union = grep { $_ eq 'UNION' } @verbs;
2058
@verbs = $union ? qw(SELECT UNION) : qw(SELECT);
2061
my $verb_str = join(q{ }, @verbs);
2065
sub __distill_tables {
2066
my ( $self, $query, $table, %args ) = @_;
2067
my $qp = $args{QueryParser} || $self->{QueryParser};
2068
die "I need a QueryParser argument" unless $qp;
2072
$_ =~ s/(_?)[0-9]+/$1?/g;
2074
} grep { defined $_ } $qp->get_tables($query);
2076
push @tables, $table if $table;
2080
grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
2087
my ( $self, $query, %args ) = @_;
2089
if ( $args{generic} ) {
2090
my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
2091
return '' unless $cmd;
2092
$query = (uc $cmd) . ($arg ? " $arg" : '');
2095
my ($verbs, $table) = $self->distill_verbs($query, %args);
2097
if ( $verbs && $verbs =~ m/^SHOW/ ) {
2103
map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
2107
my @tables = $self->__distill_tables($query, $table, %args);
2108
$query = join(q{ }, $verbs, @tables);
2113
$query = $args{trf}->($query, %args);
2119
sub convert_to_select {
2120
my ( $self, $query ) = @_;
2121
return unless $query;
2123
return if $query =~ m/=\s*\(\s*SELECT /i;
2127
update(?:\s+(?:low_priority|ignore))?\s+(.*?)
2129
(?:\s*where\b(.*?))?
2130
(limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
2133
{__update_to_select($1, $2, $3, $4)}exsi
2136
(?:insert(?:\s+ignore)?|replace)\s+
2137
.*?\binto\b(.*?)\(([^\)]+)\)\s*
2138
values?\s*(\(.*?\))\s*
2139
(?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2142
{__insert_to_select($1, $2, $3)}exsi
2145
(?:insert(?:\s+ignore)?|replace)\s+
2146
(?:.*?\binto)\b(.*?)\s*
2148
(?:\blimit\b|on\s+duplicate\s+key.*)?\s*
2151
{__insert_to_select_with_set($1, $2)}exsi
2158
{__delete_to_select($1, $2)}exsi;
2159
$query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
2160
$query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
2164
sub convert_select_list {
2165
my ( $self, $query ) = @_;
2167
\A\s*select(.*?)\bfrom\b
2169
{$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
2173
sub __delete_to_select {
2174
my ( $delete, $join ) = @_;
2175
if ( $join =~ m/\bjoin\b/ ) {
2176
return "select 1 from $join";
2178
return "select * from $join";
2181
sub __insert_to_select {
2182
my ( $tbl, $cols, $vals ) = @_;
2183
MKDEBUG && _d('Args:', @_);
2184
my @cols = split(/,/, $cols);
2185
MKDEBUG && _d('Cols:', @cols);
2186
$vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
2187
my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
2188
MKDEBUG && _d('Vals:', @vals);
2189
if ( @cols == @vals ) {
2190
return "select * from $tbl where "
2191
. join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
2194
return "select * from $tbl limit 1";
2198
sub __insert_to_select_with_set {
2199
my ( $from, $set ) = @_;
2200
$set =~ s/,/ and /g;
2201
return "select * from $from where $set ";
2204
sub __update_to_select {
2205
my ( $from, $set, $where, $limit ) = @_;
2206
return "select $set from $from "
2207
. ( $where ? "where $where" : '' )
2208
. ( $limit ? " $limit " : '' );
2211
sub wrap_in_derived {
2212
my ( $self, $query ) = @_;
2213
return unless $query;
2214
return $query =~ m/\A\s*select/i
2215
? "select 1 from ($query) as x limit 1"
2220
my ($package, undef, $line) = caller 0;
2221
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2222
map { defined $_ ? $_ : 'undef' }
2224
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2229
# ###########################################################################
2230
# End QueryRewriter package
2231
# ###########################################################################
2233
# ###########################################################################
2234
# QueryParser package 7452
2235
# This package is a copy without comments from the original. The original
2236
# with comments and its test file can be found in the SVN repository at,
2237
# trunk/common/QueryParser.pm
2238
# trunk/common/t/QueryParser.t
2239
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2240
# ###########################################################################
2242
package QueryParser;
2245
use warnings FATAL => 'all';
2246
use English qw(-no_match_vars);
2248
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2249
our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
2250
our $tbl_regex = qr{
2251
\b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
2253
\(? # Optional paren around tables
2255
(?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
2258
our $has_derived = qr{
2263
our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
2265
our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
2273
my ( $self, $query ) = @_;
2274
return unless $query;
2275
MKDEBUG && _d('Getting tables for', $query);
2277
my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
2279
MKDEBUG && _d('Special table type:', $ddl_stmt);
2280
$query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
2281
if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
2282
MKDEBUG && _d('Query alters a database, not a table');
2285
if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
2286
my ($select) = $query =~ m/\b(SELECT\b.+)/is;
2287
MKDEBUG && _d('CREATE TABLE ... SELECT:', $select);
2288
return $self->get_tables($select);
2290
my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
2291
MKDEBUG && _d('Matches table:', $tbl);
2295
$query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
2297
if ( $query =~ /^\s*LOCK TABLES/i ) {
2298
MKDEBUG && _d('Special table type: LOCK TABLES');
2299
$query =~ s/^(\s*LOCK TABLES\s+)//;
2300
$query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g;
2301
MKDEBUG && _d('Locked tables:', $query);
2302
$query = "FROM $query";
2305
$query =~ s/\\["']//g; # quoted strings
2306
$query =~ s/".*?"/?/sg; # quoted strings
2307
$query =~ s/'.*?'/?/sg; # quoted strings
2310
foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
2311
MKDEBUG && _d('Match tables:', $tbls);
2313
next if $tbls =~ m/\ASELECT\b/i;
2315
foreach my $tbl ( split(',', $tbls) ) {
2316
$tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
2318
if ( $tbl !~ m/[a-zA-Z]/ ) {
2319
MKDEBUG && _d('Skipping suspicious table name:', $tbl);
2329
sub has_derived_table {
2330
my ( $self, $query ) = @_;
2331
my $match = $query =~ m/$has_derived/;
2332
MKDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
2337
my ( $self, $query, $list ) = @_;
2343
return $result unless $query;
2345
$query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
2347
$query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
2350
my ($tbl_refs, $from) = $query =~ m{
2352
(FROM|INTO|UPDATE)\b\s* # Keyword before table refs
2355
(?:\s+|\z) # If the query does not end with the table
2356
(?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
2361
if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
2362
$tbl_refs =~ s/\([^\)]+\)\s*//;
2365
MKDEBUG && _d('tbl refs:', $tbl_refs);
2367
my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
2369
my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
2371
$tbl_refs =~ s/ = /=/g;
2376
( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
2380
my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
2381
MKDEBUG && _d('Match table:', $tbl_ref);
2382
push @tbl_refs, $tbl_ref;
2383
$alias = $self->trim_identifier($alias);
2385
if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
2386
MKDEBUG && _d('Subquery', $tbl_ref);
2387
$result->{TABLE}->{$alias} = undef;
2391
my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
2392
$db = $self->trim_identifier($db);
2393
$tbl = $self->trim_identifier($tbl);
2394
$result->{TABLE}->{$alias || $tbl} = $tbl;
2395
$result->{DATABASE}->{$tbl} = $db if $db;
2399
MKDEBUG && _d("No tables ref in", $query);
2411
my ( $self, $query ) = @_;
2412
return unless $query;
2413
$query = $self->clean_query($query);
2414
MKDEBUG && _d('Splitting', $query);
2416
my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
2418
my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
2421
if ( @split_statements == 1 ) {
2422
push @statements, $query;
2425
for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
2426
push @statements, $split_statements[$i].$split_statements[$i+1];
2428
if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
2429
$statements[-2] .= pop @statements;
2434
MKDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
2439
my ( $self, $query ) = @_;
2440
return unless $query;
2441
$query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
2442
$query =~ s/^\s+//; # Remove leading spaces
2443
$query =~ s/\s+$//; # Remove trailing spaces
2444
$query =~ s/\s{2,}/ /g; # Remove extra spaces
2448
sub split_subquery {
2449
my ( $self, $query ) = @_;
2450
return unless $query;
2451
$query = $self->clean_query($query);
2455
my $sqno = 0; # subquery number
2457
while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
2460
MKDEBUG && _d($word, $sqno);
2461
if ( $word =~ m/^\(?SELECT\b/i ) {
2462
my $start_pos = $pos - length($word) - 1;
2465
MKDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
2466
$subqueries[$sqno] = {
2467
start_pos => $start_pos,
2471
lp => 1, # left parentheses
2472
rp => 0, # right parentheses
2477
MKDEBUG && _d('Main SELECT at pos 0');
2481
next unless $sqno; # next unless we're in a subquery
2482
MKDEBUG && _d('In subquery', $sqno);
2483
my $sq = $subqueries[$sqno];
2484
if ( $sq->{done} ) {
2485
MKDEBUG && _d('This subquery is done; SQL is for',
2486
($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
2489
push @{$sq->{words}}, $word;
2490
my $lp = ($word =~ tr/\(//) || 0;
2491
my $rp = ($word =~ tr/\)//) || 0;
2492
MKDEBUG && _d('parentheses left', $lp, 'right', $rp);
2493
if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
2494
my $end_pos = $pos - 1;
2495
MKDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
2496
$sq->{end_pos} = $end_pos;
2497
$sq->{len} = $end_pos - $sq->{start_pos};
2502
for my $i ( 1..$#subqueries ) {
2503
my $sq = $subqueries[$i];
2505
$sq->{sql} = join(' ', @{$sq->{words}});
2507
$sq->{start_pos} + 1, # +1 for (
2508
$sq->{len} - 1, # -1 for )
2512
return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
2516
my ( $self, $query, $qr ) = @_;
2517
my ($type, undef) = $qr->distill_verbs($query);
2519
if ( $type =~ m/^SELECT\b/ ) {
2522
elsif ( $type =~ m/^$data_manip_stmts\b/
2523
|| $type =~ m/^$data_def_stmts\b/ ) {
2534
my ( $self, $query ) = @_;
2536
return $cols unless $query;
2539
if ( $query =~ m/^SELECT/i ) {
2552
|SQL_CALC_FOUND_ROWS
2555
($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
2557
elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
2558
($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
2561
MKDEBUG && _d('Columns:', $cols_def);
2563
@$cols = split(',', $cols_def);
2576
my ( $self, $query ) = @_;
2577
return unless $query;
2581
$query = $self->clean_query($query);
2583
$parsed->{query} = $query,
2584
$parsed->{tables} = $self->get_aliases($query, 1);
2585
$parsed->{columns} = $self->get_columns($query);
2587
my ($type) = $query =~ m/^(\w+)/;
2588
$parsed->{type} = lc $type;
2591
$parsed->{sub_queries} = [];
2596
sub extract_tables {
2597
my ( $self, %args ) = @_;
2598
my $query = $args{query};
2599
my $default_db = $args{default_db};
2600
my $q = $self->{Quoter} || $args{Quoter};
2601
return unless $query;
2602
MKDEBUG && _d('Extracting tables');
2605
foreach my $db_tbl ( $self->get_tables($query) ) {
2606
next unless $db_tbl;
2607
next if $seen{$db_tbl}++; # Unique-ify for issue 337.
2608
my ( $db, $tbl ) = $q->split_unquote($db_tbl);
2609
push @tables, [ $db || $default_db, $tbl ];
2614
sub trim_identifier {
2615
my ($self, $str) = @_;
2616
return unless defined $str;
2624
my ($package, undef, $line) = caller 0;
2625
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2626
map { defined $_ ? $_ : 'undef' }
2628
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2633
# ###########################################################################
2634
# End QueryParser package
2635
# ###########################################################################
2637
# ###########################################################################
2638
# FileIterator package 7096
2639
# This package is a copy without comments from the original. The original
2640
# with comments and its test file can be found in the SVN repository at,
2641
# trunk/common/FileIterator.pm
2642
# trunk/common/t/FileIterator.t
2643
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2644
# ###########################################################################
2645
package FileIterator;
2648
use warnings FATAL => 'all';
2650
use English qw(-no_match_vars);
2652
$Data::Dumper::Indent = 1;
2653
$Data::Dumper::Sortkeys = 1;
2654
$Data::Dumper::Quotekeys = 0;
2656
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2659
my ( $class, %args ) = @_;
2663
return bless $self, $class;
2667
my ( $self, @filenames ) = @_;
2669
my @final_filenames;
2671
foreach my $fn ( @filenames ) {
2672
if ( !defined $fn ) {
2673
warn "Skipping undefined filename";
2677
if ( !-e $fn || !-r $fn ) {
2678
warn "$fn does not exist or is not readable";
2682
push @final_filenames, $fn;
2685
if ( !@filenames ) {
2686
push @final_filenames, '-';
2687
MKDEBUG && _d('Auto-adding "-" to the list of filenames');
2690
MKDEBUG && _d('Final filenames:', @final_filenames);
2692
while ( @final_filenames ) {
2693
my $fn = shift @final_filenames;
2694
MKDEBUG && _d('Filename:', $fn);
2695
if ( $fn eq '-' ) { # Magical STDIN filename.
2696
return (*STDIN, undef, undef);
2698
open my $fh, '<', $fn or warn "Cannot open $fn: $OS_ERROR";
2700
return ( $fh, $fn, -s $fn );
2703
return (); # Avoids $f being set to 0 in list context.
2708
my ($package, undef, $line) = caller 0;
2709
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
2710
map { defined $_ ? $_ : 'undef' }
2712
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
2717
# ###########################################################################
2718
# End FileIterator package
2719
# ###########################################################################
2721
# ###########################################################################
2722
# SQLParser package 7497
2723
# This package is a copy without comments from the original. The original
2724
# with comments and its test file can be found in the SVN repository at,
2725
# trunk/common/SQLParser.pm
2726
# trunk/common/t/SQLParser.t
2727
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
2728
# ###########################################################################
2734
use warnings FATAL => 'all';
2735
use English qw(-no_match_vars);
2736
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
2739
$Data::Dumper::Indent = 1;
2740
$Data::Dumper::Sortkeys = 1;
2741
$Data::Dumper::Quotekeys = 0;
2743
my $quoted_ident = qr/`[^`]+`/;
2744
my $unquoted_ident = qr/
2745
\@{0,2} # optional @ or @@ for variables
2746
\w+ # the ident name
2747
(?:\([^\)]*\))? # optional function params
2750
my $ident_alias = qr/
2751
\s+ # space before alias
2752
(?:(AS)\s+)? # optional AS keyword
2753
((?>$quoted_ident|$unquoted_ident)) # alais
2756
my $table_ident = qr/(?:
2757
((?:(?>$quoted_ident|$unquoted_ident)\.?){1,2}) # table
2758
(?:$ident_alias)? # optional alias
2761
my $column_ident = qr/(?:
2762
((?:(?>$quoted_ident|$unquoted_ident|\*)\.?){1,3}) # column
2763
(?:$ident_alias)? # optional alias
2767
my ( $class, %args ) = @_;
2771
return bless $self, $class;
2775
my ( $self, $query ) = @_;
2776
return unless $query;
2778
my $allowed_types = qr/(?:
2786
$query = $self->clean_query($query);
2789
if ( $query =~ s/^(\w+)\s+// ) {
2791
MKDEBUG && _d('Query type:', $type);
2792
die "Cannot parse " . uc($type) . " queries"
2793
unless $type =~ m/$allowed_types/i;
2796
die "Query does not begin with a word"; # shouldn't happen
2799
$query = $self->normalize_keyword_spaces($query);
2802
if ( $query =~ m/(\(SELECT )/i ) {
2803
MKDEBUG && _d('Removing subqueries');
2804
@subqueries = $self->remove_subqueries($query);
2805
$query = shift @subqueries;
2808
my $parse_func = "parse_$type";
2809
my $struct = $self->$parse_func($query);
2811
MKDEBUG && _d($parse_func, 'failed to parse query');
2814
$struct->{type} = $type;
2815
$self->_parse_clauses($struct);
2817
if ( @subqueries ) {
2818
MKDEBUG && _d('Parsing subqueries');
2819
foreach my $subquery ( @subqueries ) {
2820
my $subquery_struct = $self->parse($subquery->{query});
2821
@{$subquery_struct}{keys %$subquery} = values %$subquery;
2822
push @{$struct->{subqueries}}, $subquery_struct;
2826
MKDEBUG && _d('Query struct:', Dumper($struct));
2831
sub _parse_clauses {
2832
my ( $self, $struct ) = @_;
2833
foreach my $clause ( keys %{$struct->{clauses}} ) {
2834
if ( $clause =~ m/ / ) {
2835
(my $clause_no_space = $clause) =~ s/ /_/g;
2836
$struct->{clauses}->{$clause_no_space} = $struct->{clauses}->{$clause};
2837
delete $struct->{clauses}->{$clause};
2838
$clause = $clause_no_space;
2841
my $parse_func = "parse_$clause";
2842
$struct->{$clause} = $self->$parse_func($struct->{clauses}->{$clause});
2844
if ( $clause eq 'select' ) {
2845
MKDEBUG && _d('Parsing subquery clauses');
2846
$struct->{select}->{type} = 'select';
2847
$self->_parse_clauses($struct->{select});
2854
my ( $self, $query ) = @_;
2855
return unless $query;
2857
$query =~ s/^\s*--.*$//gm; # -- comments
2858
$query =~ s/\s+/ /g; # extra spaces/flatten
2859
$query =~ s!/\*.*?\*/!!g; # /* comments */
2860
$query =~ s/^\s+//; # leading spaces
2861
$query =~ s/\s+$//; # trailing spaces
2866
sub normalize_keyword_spaces {
2867
my ( $self, $query ) = @_;
2869
$query =~ s/\b(VALUE(?:S)?)\(/$1 (/i;
2870
$query =~ s/\bON\(/on (/gi;
2871
$query =~ s/\bUSING\(/using (/gi;
2873
$query =~ s/\(\s+SELECT\s+/(SELECT /gi;
2879
my ( $self, $query, $keywords, $first_clause, $clauses ) = @_;
2880
return unless $query;
2883
1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;
2885
my @clause = grep { defined $_ }
2886
($query =~ m/\G(.+?)(?:$clauses\s+|\Z)/gci);
2888
my $clause = $first_clause,
2889
my $value = shift @clause;
2890
$struct->{clauses}->{$clause} = $value;
2891
MKDEBUG && _d('Clause:', $clause, $value);
2894
$clause = shift @clause;
2895
$value = shift @clause;
2896
$struct->{clauses}->{lc $clause} = $value;
2897
MKDEBUG && _d('Clause:', $clause, $value);
2900
($struct->{unknown}) = ($query =~ m/\G(.+)/);
2906
my ( $self, $query ) = @_;
2907
if ( $query =~ s/FROM\s+//i ) {
2908
my $keywords = qr/(LOW_PRIORITY|QUICK|IGNORE)/i;
2909
my $clauses = qr/(FROM|WHERE|ORDER BY|LIMIT)/i;
2910
return $self->_parse_query($query, $keywords, 'from', $clauses);
2913
die "DELETE without FROM: $query";
2918
my ( $self, $query ) = @_;
2919
return unless $query;
2922
my $keywords = qr/(LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)/i;
2923
1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;
2925
if ( $query =~ m/ON DUPLICATE KEY UPDATE (.+)/i ) {
2927
die "No values after ON DUPLICATE KEY UPDATE: $query" unless $values;
2928
$struct->{clauses}->{on_duplicate} = $values;
2929
MKDEBUG && _d('Clause: on duplicate key update', $values);
2931
$query =~ s/\s+ON DUPLICATE KEY UPDATE.+//;
2934
if ( my @into = ($query =~ m/
2935
(?:INTO\s+)? # INTO, optional
2936
(.+?)\s+ # table ref
2937
(\([^\)]+\)\s+)? # column list, optional
2938
(VALUE.?|SET|SELECT)\s+ # start of next caluse
2941
my $tbl = shift @into; # table ref
2942
$struct->{clauses}->{into} = $tbl;
2943
MKDEBUG && _d('Clause: into', $tbl);
2945
my $cols = shift @into; # columns, maybe
2947
$cols =~ s/[\(\)]//g;
2948
$struct->{clauses}->{columns} = $cols;
2949
MKDEBUG && _d('Clause: columns', $cols);
2952
my $next_clause = lc(shift @into); # VALUES, SET or SELECT
2953
die "INSERT/REPLACE without clause after table: $query"
2954
unless $next_clause;
2955
$next_clause = 'values' if $next_clause eq 'value';
2956
my ($values) = ($query =~ m/\G(.+)/gci);
2957
die "INSERT/REPLACE without values: $query" unless $values;
2958
$struct->{clauses}->{$next_clause} = $values;
2959
MKDEBUG && _d('Clause:', $next_clause, $values);
2962
($struct->{unknown}) = ($query =~ m/\G(.+)/);
2968
*parse_replace = \&parse_insert;
2972
my ( $self, $query ) = @_;
2975
my $final_keywords = qr/(FOR UPDATE|LOCK IN SHARE MODE)/i;
2976
1 while $query =~ s/\s+$final_keywords/(push @keywords, $1), ''/gie;
2989
|SQL_CALC_FOUND_ROWS
3001
my $struct = $self->_parse_query($query, $keywords, 'columns', $clauses);
3003
map { s/ /_/g; $struct->{keywords}->{lc $_} = 1; } @keywords;
3009
my $keywords = qr/(LOW_PRIORITY|IGNORE)/i;
3010
my $clauses = qr/(SET|WHERE|ORDER BY|LIMIT)/i;
3011
return _parse_query(@_, $keywords, 'tables', $clauses);
3016
my ( $self, $from ) = @_;
3017
return unless $from;
3018
MKDEBUG && _d('Parsing FROM', $from);
3020
my $comma_join = qr/(?>\s*,\s*)/;
3021
my $ansi_join = qr/(?>
3023
(?:(?:INNER|CROSS|STRAIGHT_JOIN|LEFT|RIGHT|OUTER|NATURAL)\s+)*
3028
my @tbls; # all table refs, a hashref for each
3029
my $tbl_ref; # current table ref hashref
3030
my $join; # join info hahsref for current table ref
3031
foreach my $thing ( split /($comma_join|$ansi_join)/io, $from ) {
3032
die "Error parsing FROM clause" unless $thing;
3036
MKDEBUG && _d('Table thing:', $thing);
3038
if ( $thing =~ m/\s+(?:ON|USING)\s+/i ) {
3039
MKDEBUG && _d("JOIN condition");
3040
my ($tbl_ref_txt, $join_condition_verb, $join_condition_value)
3041
= $thing =~ m/^(.+?)\s+(ON|USING)\s+(.+)/i;
3043
$tbl_ref = $self->parse_table_reference($tbl_ref_txt);
3045
$join->{condition} = lc $join_condition_verb;
3046
if ( $join->{condition} eq 'on' ) {
3047
my $where = $self->parse_where($join_condition_value);
3048
$join->{where} = $where;
3051
$join_condition_value =~ s/^\s*\(//;
3052
$join_condition_value =~ s/\)\s*$//;
3053
$join->{columns} = $self->_parse_csv($join_condition_value);
3056
elsif ( $thing =~ m/(?:,|JOIN)/i ) {
3058
$tbl_ref->{join} = $join;
3060
push @tbls, $tbl_ref;
3061
MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref));
3066
$join->{to} = $tbls[-1]->{tbl};
3067
if ( $thing eq ',' ) {
3068
$join->{type} = 'inner';
3072
my $type = $thing =~ m/^(.+?)\s+JOIN$/i ? lc $1 : 'inner';
3073
$join->{type} = $type;
3078
$tbl_ref = $self->parse_table_reference($thing);
3079
MKDEBUG && _d('Table reference:', Dumper($tbl_ref));
3085
$tbl_ref->{join} = $join;
3087
push @tbls, $tbl_ref;
3088
MKDEBUG && _d("Complete table reference:", Dumper($tbl_ref));
3094
sub parse_table_reference {
3095
my ( $self, $tbl_ref ) = @_;
3096
return unless $tbl_ref;
3097
MKDEBUG && _d('Parsing table reference:', $tbl_ref);
3102
(?:FORCE|USE|INGORE)\s
3107
$tbl{index_hint} = $1;
3108
MKDEBUG && _d('Index hint:', $tbl{index_hint});
3111
if ( $tbl_ref =~ m/$table_ident/ ) {
3112
my ($db_tbl, $as, $alias) = ($1, $2, $3); # XXX
3113
my $ident_struct = $self->parse_identifier('table', $db_tbl);
3114
$alias =~ s/`//g if $alias;
3115
@tbl{keys %$ident_struct} = values %$ident_struct;
3116
$tbl{explicit_alias} = 1 if $as;
3117
$tbl{alias} = $alias if $alias;
3120
die "Table ident match failed"; # shouldn't happen
3126
no warnings; # Why? See same line above.
3127
*parse_into = \&parse_from;
3128
*parse_tables = \&parse_from;
3132
my ( $self, $where ) = @_;
3133
return unless $where;
3134
MKDEBUG && _d("Parsing WHERE", $where);
3150
|(?:(?:\sNOT\s)?BETWEEN)
3157
(?:$op_symbol) # don't need spaces around the symbols, e.g.: col=1
3158
|(?:\s+$op_verb) # must have space before verb op, e.g.: col LIKE ...
3166
while ( $where =~ m/\b(and|or)\b/gi ) {
3167
my $pos = (pos $where) - (length $1); # pos at and|or, not after
3169
$pred = substr $where, $offset, ($pos-$offset);
3171
push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;
3175
$pred = substr $where, $offset;
3177
push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;
3178
MKDEBUG && _d("Predicate fragments:", Dumper(\@pred));
3179
MKDEBUG && _d("Predicate frags with operators:", @has_op);
3181
my $n = scalar @pred - 1;
3182
for my $i ( 1..$n ) {
3184
my $j = $i - 1; # preceding pred frag
3186
next if $pred[$j] !~ m/\s+between\s+/i && $self->_is_constant($pred[$i]);
3188
if ( !$has_op[$i] ) {
3189
$pred[$j] .= $pred[$i];
3193
MKDEBUG && _d("Predicate fragments joined:", Dumper(\@pred));
3195
for my $i ( 0..@pred ) {
3197
next unless defined $pred;
3198
my $n_single_quotes = ($pred =~ tr/'//);
3199
my $n_double_quotes = ($pred =~ tr/"//);
3200
if ( ($n_single_quotes % 2) || ($n_double_quotes % 2) ) {
3201
$pred[$i] .= $pred[$i + 1];
3202
$pred[$i + 1] = undef;
3205
MKDEBUG && _d("Predicate fragments balanced:", Dumper(\@pred));
3208
foreach my $pred ( @pred ) {
3209
next unless defined $pred;
3213
if ( $pred =~ s/^(and|or)\s+//i ) {
3216
my ($col, $op, $val) = $pred =~ m/^(.+?)$op_pat(.+)$/o;
3217
if ( !$col || !$op ) {
3218
if ( $self->_is_constant($pred) ) {
3222
die "Failed to parse WHERE condition: $pred";
3228
$col =~ s/^\(+//; # no unquoted column name begins with (
3237
if ( ($op || '') !~ m/IN/i && $val !~ m/^\w+\([^\)]+\)$/ ) {
3241
if ( $val =~ m/NULL|TRUE|FALSE/i ) {
3253
return \@predicates;
3257
my ( $self, $val ) = @_;
3258
return 0 unless defined $val;
3259
$val =~ s/^\s*(?:and|or)\s+//;
3261
$val =~ m/^\s*(?:TRUE|FALSE)\s*$/i || $val =~ m/^\s*-?\d+\s*$/ ? 1 : 0;
3265
my ( $self, $having ) = @_;
3269
sub parse_group_by {
3270
my ( $self, $group_by ) = @_;
3271
return unless $group_by;
3272
MKDEBUG && _d('Parsing GROUP BY', $group_by);
3274
my $with_rollup = $group_by =~ s/\s+WITH ROLLUP\s*//i;
3276
my $idents = $self->parse_identifiers( $self->_parse_csv($group_by) );
3278
$idents->{with_rollup} = 1 if $with_rollup;
3283
sub parse_order_by {
3284
my ( $self, $order_by ) = @_;
3285
return unless $order_by;
3286
MKDEBUG && _d('Parsing ORDER BY', $order_by);
3287
my $idents = $self->parse_identifiers( $self->_parse_csv($order_by) );
3292
my ( $self, $limit ) = @_;
3293
return unless $limit;
3297
if ( $limit =~ m/(\S+)\s+OFFSET\s+(\S+)/i ) {
3298
$struct->{explicit_offset} = 1;
3299
$struct->{row_count} = $1;
3300
$struct->{offset} = $2;
3303
my ($offset, $cnt) = $limit =~ m/(?:(\S+),\s+)?(\S+)/i;
3304
$struct->{row_count} = $cnt;
3305
$struct->{offset} = $offset if defined $offset;
3311
my ( $self, $values ) = @_;
3312
return unless $values;
3313
$values =~ s/^\s*\(//;
3314
$values =~ s/\s*\)//;
3315
my $vals = $self->_parse_csv(
3324
my ( $self, $set ) = @_;
3325
MKDEBUG && _d("Parse SET", $set);
3327
my $vals = $self->_parse_csv($set);
3328
return unless $vals && @$vals;
3331
foreach my $col_val ( @$vals ) {
3332
my ($col, $val) = $col_val =~ m/^([^=]+)\s*=\s*(.+)/;
3333
my $ident_struct = $self->parse_identifier('column', $col);
3338
MKDEBUG && _d("SET:", Dumper($set_struct));
3339
push @set, $set_struct;
3345
my ( $self, $vals, %args ) = @_;
3346
return unless $vals;
3349
if ( $args{quoted_values} ) {
3350
my $quote_char = '';
3352
foreach my $val ( split(',', $vals) ) {
3353
MKDEBUG && _d("Next value:", $val);
3354
if ( $quote_char ) {
3355
MKDEBUG && _d("Value is part of previous quoted value");
3356
$vals[-1] .= ",$val";
3358
if ( $val =~ m/[^\\]*$quote_char$/ ) {
3359
if ( $args{remove_quotes} ) {
3360
$vals[-1] =~ s/^\s*$quote_char//;
3361
$vals[-1] =~ s/$quote_char\s*$//;
3363
MKDEBUG && _d("Previous quoted value is complete:", $vals[-1]);
3372
if ( $val =~ m/^(['"])/ ) {
3373
MKDEBUG && _d("Value is quoted");
3374
$quote_char = $1; # XXX
3375
if ( $val =~ m/.$quote_char$/ ) {
3376
MKDEBUG && _d("Value is complete");
3378
if ( $args{remove_quotes} ) {
3379
$vals[-1] =~ s/^\s*$quote_char//;
3380
$vals[-1] =~ s/$quote_char\s*$//;
3384
MKDEBUG && _d("Quoted value is not complete");
3391
MKDEBUG && _d("Saving value", ($quote_char ? "fragment" : ""));
3396
@vals = map { s/^\s+//; s/\s+$//; $_ } split(',', $vals);
3402
no warnings; # Why? See same line above.
3403
*parse_on_duplicate = \&_parse_csv;
3407
my ( $self, $cols ) = @_;
3408
MKDEBUG && _d('Parsing columns list:', $cols);
3412
while (pos $cols < length $cols) {
3413
if ($cols =~ m/\G\s*$column_ident\s*(?>,|\Z)/gcxo) {
3414
my ($db_tbl_col, $as, $alias) = ($1, $2, $3); # XXX
3415
my $ident_struct = $self->parse_identifier('column', $db_tbl_col);
3416
$alias =~ s/`//g if $alias;
3419
($as ? (explicit_alias => 1) : ()),
3420
($alias ? (alias => $alias) : ()),
3422
push @cols, $col_struct;
3425
die "Column ident match failed"; # shouldn't happen
3432
sub remove_subqueries {
3433
my ( $self, $query ) = @_;
3436
while ( $query =~ m/(\(SELECT )/gi ) {
3437
my $pos = (pos $query) - (length $1);
3438
push @start_pos, $pos;
3441
@start_pos = reverse @start_pos;
3443
for my $i ( 0..$#start_pos ) {
3445
pos $query = $start_pos[$i];
3446
while ( $query =~ m/([\(\)])/cg ) {
3448
$closed += ($c eq '(' ? 1 : -1);
3449
last unless $closed;
3451
push @end_pos, pos $query;
3457
for my $i ( 0..$#start_pos ) {
3458
MKDEBUG && _d('Query:', $query);
3459
my $offset = $start_pos[$i];
3460
my $len = $end_pos[$i] - $start_pos[$i] - $len_adj;
3461
MKDEBUG && _d("Subquery $n start", $start_pos[$i],
3462
'orig end', $end_pos[$i], 'adj', $len_adj, 'adj end',
3463
$offset + $len, 'len', $len);
3466
my $token = '__SQ' . $n . '__';
3467
my $subquery = substr($query, $offset, $len, $token);
3468
MKDEBUG && _d("Subquery $n:", $subquery);
3470
my $outer_start = $start_pos[$i + 1];
3471
my $outer_end = $end_pos[$i + 1];
3472
if ( $outer_start && ($outer_start < $start_pos[$i])
3473
&& $outer_end && ($outer_end > $end_pos[$i]) ) {
3474
MKDEBUG && _d("Subquery $n nested in next subquery");
3475
$len_adj += $len - length $token;
3476
$struct->{nested} = $i + 1;
3479
MKDEBUG && _d("Subquery $n not nested");
3481
if ( $subqueries[-1] && $subqueries[-1]->{nested} ) {
3482
MKDEBUG && _d("Outermost subquery");
3486
if ( $query =~ m/(?:=|>|<|>=|<=|<>|!=|<=>)\s*$token/ ) {
3487
$struct->{context} = 'scalar';
3489
elsif ( $query =~ m/\b(?:IN|ANY|SOME|ALL|EXISTS)\s*$token/i ) {
3490
if ( $query !~ m/\($token\)/ ) {
3491
$query =~ s/$token/\($token\)/;
3492
$len_adj -= 2 if $struct->{nested};
3494
$struct->{context} = 'list';
3497
$struct->{context} = 'identifier';
3499
MKDEBUG && _d("Subquery $n context:", $struct->{context});
3501
$subquery =~ s/^\s*\(//;
3502
$subquery =~ s/\s*\)\s*$//;
3504
$struct->{query} = $subquery;
3505
push @subqueries, $struct;
3509
return $query, @subqueries;
3512
sub parse_identifiers {
3513
my ( $self, $idents ) = @_;
3514
return unless $idents;
3515
MKDEBUG && _d("Parsing identifiers");
3518
foreach my $ident ( @$idents ) {
3519
MKDEBUG && _d("Identifier:", $ident);
3522
if ( $ident =~ s/\s+(ASC|DESC)\s*$//i ) {
3523
$parts->{sort} = uc $1; # XXX
3526
if ( $ident =~ m/^\d+$/ ) { # Position like 5
3527
MKDEBUG && _d("Positional ident");
3528
$parts->{position} = $ident;
3530
elsif ( $ident =~ m/^\w+\(/ ) { # Function like MIN(col)
3531
MKDEBUG && _d("Expression ident");
3532
my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/;
3533
$parts->{function} = uc $func;
3534
$parts->{expression} = $expr if $expr;
3536
else { # Ref like (table.)column
3537
MKDEBUG && _d("Table/column ident");
3538
my ($tbl, $col) = $self->split_unquote($ident);
3539
$parts->{table} = $tbl if $tbl;
3540
$parts->{column} = $col;
3542
push @ident_parts, $parts;
3545
return \@ident_parts;
3548
sub parse_identifier {
3549
my ( $self, $type, $ident ) = @_;
3550
return unless $type && $ident;
3551
MKDEBUG && _d("Parsing", $type, "identifier:", $ident);
3554
my @ident_parts = map { s/`//g; $_; } split /[.]/, $ident;
3555
if ( @ident_parts == 3 ) {
3556
@ident_struct{qw(db tbl col)} = @ident_parts;
3558
elsif ( @ident_parts == 2 ) {
3559
my @parts_for_type = $type eq 'column' ? qw(tbl col)
3560
: $type eq 'table' ? qw(db tbl)
3561
: die "Invalid identifier type: $type";
3562
@ident_struct{@parts_for_type} = @ident_parts;
3564
elsif ( @ident_parts == 1 ) {
3565
my $part = $type eq 'column' ? 'col' : 'tbl';
3566
@ident_struct{($part)} = @ident_parts;
3569
die "Invalid number of parts in $type reference: $ident";
3572
if ( $self->{SchemaQualifier} ) {
3573
if ( $type eq 'column' && !$ident_struct{tbl} ) {
3574
my $qcol = $self->{SchemaQualifier}->qualify_column(
3575
column => $ident_struct{col},
3577
$ident_struct{db} = $qcol->{db} if $qcol->{db};
3578
$ident_struct{tbl} = $qcol->{tbl} if $qcol->{tbl};
3580
elsif ( $type eq 'table' && !$ident_struct{db} ) {
3581
my $db = $self->{SchemaQualifier}->get_database_for_table(
3582
table => $ident_struct{tbl},
3584
$ident_struct{db} = $db if $db;
3588
MKDEBUG && _d($type, "identifier struct:", Dumper(\%ident_struct));
3589
return \%ident_struct;
3593
my ( $self, $db_tbl, $default_db ) = @_;
3595
my ( $db, $tbl ) = split(/[.]/, $db_tbl);
3604
my ( $self, $thing ) = @_;
3606
return 0 unless $thing;
3608
return 0 if $thing =~ m/\s*['"]/;
3610
return 0 if $thing =~ m/^\s*\d+(?:\.\d+)?\s*$/;
3612
return 0 if $thing =~ m/^\s*(?>
3617
return 1 if $thing =~ m/^\s*$column_ident\s*$/;
3622
sub set_SchemaQualifier {
3623
my ( $self, $sq ) = @_;
3624
$self->{SchemaQualifier} = $sq;
3629
my ($package, undef, $line) = caller 0;
3630
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
3631
map { defined $_ ? $_ : 'undef' }
3633
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
3639
# ###########################################################################
3640
# End SQLParser package
3641
# ###########################################################################
3643
# ###########################################################################
3644
# TableUsage package 7498
3645
# This package is a copy without comments from the original. The original
3646
# with comments and its test file can be found in the SVN repository at,
3647
# trunk/common/TableUsage.pm
3648
# trunk/common/t/TableUsage.t
3649
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
3650
# ###########################################################################
3656
use warnings FATAL => 'all';
3657
use English qw(-no_match_vars);
3660
$Data::Dumper::Indent = 1;
3661
$Data::Dumper::Sortkeys = 1;
3662
$Data::Dumper::Quotekeys = 0;
3664
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
3667
my ( $class, %args ) = @_;
3668
my @required_args = qw(QueryParser SQLParser);
3669
foreach my $arg ( @required_args ) {
3670
die "I need a $arg argument" unless $args{$arg};
3674
constant_data_value => 'DUAL',
3679
return bless $self, $class;
3682
sub get_table_usage {
3683
my ( $self, %args ) = @_;
3684
my @required_args = qw(query);
3685
foreach my $arg ( @required_args ) {
3686
die "I need a $arg argument" unless $args{$arg};
3688
my ($query) = @args{@required_args};
3689
MKDEBUG && _d('Getting table access for',
3690
substr($query, 0, 100), (length $query > 100 ? '...' : ''));
3692
my $cats; # arrayref of CAT hashrefs for each table
3696
$query_struct = $self->{SQLParser}->parse($query);
3698
if ( $EVAL_ERROR ) {
3699
MKDEBUG && _d('Failed to parse query with SQLParser:', $EVAL_ERROR);
3700
if ( $EVAL_ERROR =~ m/Cannot parse/ ) {
3701
$cats = $self->_get_tables_used_from_query_parser(%args);
3708
$cats = $self->_get_tables_used_from_query_struct(
3709
query_struct => $query_struct,
3714
MKDEBUG && _d('Query table access:', Dumper($cats));
3718
sub _get_tables_used_from_query_parser {
3719
my ( $self, %args ) = @_;
3720
my @required_args = qw(query);
3721
foreach my $arg ( @required_args ) {
3722
die "I need a $arg argument" unless $args{$arg};
3724
my ($query) = @args{@required_args};
3725
MKDEBUG && _d('Getting tables used from query parser');
3727
$query = $self->{QueryParser}->clean_query($query);
3728
my ($query_type) = $query =~ m/^\s*(\w+)\s+/;
3729
$query_type = uc $query_type;
3730
die "Query does not begin with a word" unless $query_type; # shouldn't happen
3732
if ( $query_type eq 'DROP' ) {
3733
my ($drop_what) = $query =~ m/^\s*DROP\s+(\w+)\s+/i;
3734
die "Invalid DROP query: $query" unless $drop_what;
3735
$query_type .= '_' . uc($drop_what);
3739
foreach my $table ( $self->{QueryParser}->get_tables($query) ) {
3741
push @{$tables_used[0]}, {
3743
context => $query_type,
3747
return \@tables_used;
3750
sub _get_tables_used_from_query_struct {
3751
my ( $self, %args ) = @_;
3752
my @required_args = qw(query_struct);
3753
foreach my $arg ( @required_args ) {
3754
die "I need a $arg argument" unless $args{$arg};
3756
my ($query_struct) = @args{@required_args};
3757
my $sp = $self->{SQLParser};
3759
MKDEBUG && _d('Getting table used from query struct');
3761
my $query_type = uc $query_struct->{type};
3762
my $tbl_refs = $query_type =~ m/(?:SELECT|DELETE)/ ? 'from'
3763
: $query_type =~ m/(?:INSERT|REPLACE)/ ? 'into'
3764
: $query_type =~ m/UPDATE/ ? 'tables'
3765
: die "Cannot find table references for $query_type queries";
3766
my $tables = $query_struct->{$tbl_refs};
3768
if ( !$tables || @$tables == 0 ) {
3769
MKDEBUG && _d("Query does not use any tables");
3771
[ { context => $query_type, table => $self->{constant_data_value} } ]
3776
if ( $query_struct->{where} ) {
3777
$where = $self->_get_tables_used_in_where(
3780
where => $query_struct->{where},
3785
if ( $query_type eq 'UPDATE' && @{$query_struct->{tables}} > 1 ) {
3786
MKDEBUG && _d("Multi-table UPDATE");
3789
foreach my $table ( @$tables ) {
3790
my $table = $self->_qualify_table_name(
3794
tbl => $table->{tbl},
3800
MKDEBUG && _d("Table usage from TLIST:", Dumper($table_usage));
3801
push @join_tables, $table_usage;
3803
if ( $where && $where->{joined_tables} ) {
3804
foreach my $table ( @{$where->{joined_tables}} ) {
3806
context => $query_type,
3809
MKDEBUG && _d("Table usage from WHERE (implicit join):",
3810
Dumper($table_usage));
3811
push @join_tables, $table_usage;
3816
if ( $where && $where->{filter_tables} ) {
3817
foreach my $table ( @{$where->{filter_tables}} ) {
3822
MKDEBUG && _d("Table usage from WHERE:", Dumper($table_usage));
3823
push @where_tables, $table_usage;
3827
my $set_tables = $self->_get_tables_used_in_set(
3830
set => $query_struct->{set},
3832
foreach my $table ( @$set_tables ) {
3834
{ # the written table
3835
context => 'UPDATE',
3836
table => $table->{table},
3838
{ # source of data written to the written table
3839
context => 'SELECT',
3840
table => $table->{value},
3843
MKDEBUG && _d("Table usage from UPDATE SET:", Dumper(\@table_usage));
3844
push @tables_used, [
3850
} # multi-table UPDATE
3852
if ( $query_type eq 'SELECT' ) {
3853
my $clist_tables = $self->_get_tables_used_in_columns(
3856
columns => $query_struct->{columns},
3858
foreach my $table ( @$clist_tables ) {
3860
context => 'SELECT',
3863
MKDEBUG && _d("Table usage from CLIST:", Dumper($table_usage));
3864
push @{$tables_used[0]}, $table_usage;
3868
if ( @$tables > 1 || $query_type ne 'SELECT' ) {
3869
my $default_context = @$tables > 1 ? 'TLIST' : $query_type;
3870
foreach my $table ( @$tables ) {
3871
my $qualified_table = $self->_qualify_table_name(
3875
tbl => $table->{tbl},
3878
my $context = $default_context;
3879
if ( $table->{join} && $table->{join}->{condition} ) {
3881
if ( $table->{join}->{condition} eq 'using' ) {
3882
MKDEBUG && _d("Table joined with USING condition");
3883
my $joined_table = $self->_qualify_table_name(
3886
tbl => $table->{join}->{to},
3888
$self->_change_context(
3890
table => $joined_table,
3891
tables_used => $tables_used[0],
3892
old_context => 'TLIST',
3893
new_context => 'JOIN',
3896
elsif ( $table->{join}->{condition} eq 'on' ) {
3897
MKDEBUG && _d("Table joined with ON condition");
3898
my $on_tables = $self->_get_tables_used_in_where(
3901
where => $table->{join}->{where},
3902
clause => 'JOIN condition', # just for debugging
3904
MKDEBUG && _d("JOIN ON tables:", Dumper($on_tables));
3905
foreach my $joined_table ( @{$on_tables->{joined_tables}} ) {
3906
$self->_change_context(
3908
table => $joined_table,
3909
tables_used => $tables_used[0],
3910
old_context => 'TLIST',
3911
new_context => 'JOIN',
3916
warn "Unknown JOIN condition: $table->{join}->{condition}";
3921
context => $context,
3922
table => $qualified_table,
3924
MKDEBUG && _d("Table usage from TLIST:", Dumper($table_usage));
3925
push @{$tables_used[0]}, $table_usage;
3929
if ( $where && $where->{joined_tables} ) {
3930
foreach my $joined_table ( @{$where->{joined_tables}} ) {
3931
MKDEBUG && _d("Table joined implicitly in WHERE:", $joined_table);
3932
$self->_change_context(
3934
table => $joined_table,
3935
tables_used => $tables_used[0],
3936
old_context => 'TLIST',
3937
new_context => 'JOIN',
3942
if ( $query_type =~ m/(?:INSERT|REPLACE)/ ) {
3943
if ( $query_struct->{select} ) {
3944
MKDEBUG && _d("Getting tables used in INSERT-SELECT");
3945
my $select_tables = $self->_get_tables_used_from_query_struct(
3947
query_struct => $query_struct->{select},
3949
push @{$tables_used[0]}, @{$select_tables->[0]};
3953
context => 'SELECT',
3954
table => $self->{constant_data_value},
3956
MKDEBUG && _d("Table usage from SET/VALUES:", Dumper($table_usage));
3957
push @{$tables_used[0]}, $table_usage;
3960
elsif ( $query_type eq 'UPDATE' ) {
3961
my $set_tables = $self->_get_tables_used_in_set(
3964
set => $query_struct->{set},
3966
foreach my $table ( @$set_tables ) {
3968
context => 'SELECT',
3969
table => $table->{value_is_table} ? $table->{table}
3970
: $self->{constant_data_value},
3972
MKDEBUG && _d("Table usage from SET:", Dumper($table_usage));
3973
push @{$tables_used[0]}, $table_usage;
3977
if ( $where && $where->{filter_tables} ) {
3978
foreach my $table ( @{$where->{filter_tables}} ) {
3983
MKDEBUG && _d("Table usage from WHERE:", Dumper($table_usage));
3984
push @{$tables_used[0]}, $table_usage;
3989
return \@tables_used;
3992
sub _get_tables_used_in_columns {
3993
my ( $self, %args ) = @_;
3994
my @required_args = qw(tables columns);
3995
foreach my $arg ( @required_args ) {
3996
die "I need a $arg argument" unless $args{$arg};
3998
my ($tables, $columns) = @args{@required_args};
4000
MKDEBUG && _d("Getting tables used in CLIST");
4003
if ( @$tables == 1 ) {
4004
MKDEBUG && _d("Single table SELECT:", $tables->[0]->{tbl});
4005
my $table = $self->_qualify_table_name(
4007
db => $tables->[0]->{db},
4008
tbl => $tables->[0]->{tbl},
4012
elsif ( @$columns == 1 && $columns->[0]->{col} eq '*' ) {
4013
if ( $columns->[0]->{tbl} ) {
4014
MKDEBUG && _d("SELECT all columns from one table");
4015
my $table = $self->_qualify_table_name(
4017
db => $columns->[0]->{db},
4018
tbl => $columns->[0]->{tbl},
4023
MKDEBUG && _d("SELECT all columns from all tables");
4024
foreach my $table ( @$tables ) {
4025
my $table = $self->_qualify_table_name(
4029
tbl => $table->{tbl},
4031
push @tables, $table;
4036
MKDEBUG && _d(scalar @$tables, "table SELECT");
4039
foreach my $column ( @$columns ) {
4040
next COLUMN unless $column->{tbl};
4041
my $table = $self->_qualify_table_name(
4043
db => $column->{db},
4044
tbl => $column->{tbl},
4046
push @tables, $table if $table && !$seen{$table}++;
4053
sub _get_tables_used_in_where {
4054
my ( $self, %args ) = @_;
4055
my @required_args = qw(tables where);
4056
foreach my $arg ( @required_args ) {
4057
die "I need a $arg argument" unless $args{$arg};
4059
my ($tables, $where) = @args{@required_args};
4060
my $sql_parser = $self->{SQLParser};
4062
MKDEBUG && _d("Getting tables used in", $args{clause} || 'WHERE');
4067
foreach my $cond ( @$where ) {
4068
MKDEBUG && _d("Condition:", Dumper($cond));
4069
my @tables; # tables used in this condition
4071
my $is_constant = 0;
4072
my $unknown_table = 0;
4074
foreach my $arg ( qw(left_arg right_arg) ) {
4075
if ( !defined $cond->{$arg} ) {
4076
MKDEBUG && _d($arg, "is a constant value");
4081
if ( $sql_parser->is_identifier($cond->{$arg}) ) {
4082
MKDEBUG && _d($arg, "is an identifier");
4083
my $ident_struct = $sql_parser->parse_identifier(
4088
if ( !$ident_struct->{tbl} ) {
4089
if ( @$tables == 1 ) {
4090
MKDEBUG && _d("Condition column is not table-qualified; ",
4091
"using query's only table:", $tables->[0]->{tbl});
4092
$ident_struct->{tbl} = $tables->[0]->{tbl};
4095
MKDEBUG && _d("Condition column is not table-qualified and",
4096
"query has multiple tables; cannot determine its table");
4097
if ( $cond->{$arg} !~ m/\w+\(/ # not a function
4098
&& $cond->{$arg} !~ m/^[\d.]+$/) { # not a number
4105
if ( !$ident_struct->{db} && @$tables == 1 && $tables->[0]->{db} ) {
4106
MKDEBUG && _d("Condition column is not database-qualified; ",
4107
"using its table's database:", $tables->[0]->{db});
4108
$ident_struct->{db} = $tables->[0]->{db};
4111
my $table = $self->_qualify_table_name(
4116
push @tables, $table;
4120
MKDEBUG && _d($arg, "is a value");
4125
if ( $is_constant || $n_vals == 2 ) {
4126
MKDEBUG && _d("Condition is a constant or two values");
4127
$filter_tables{$self->{constant_data_value}} = undef;
4130
if ( @tables == 1 ) {
4131
if ( $unknown_table ) {
4132
MKDEBUG && _d("Condition joins table",
4133
$tables[0], "to column from unknown table");
4134
$join_tables{$tables[0]} = undef;
4137
MKDEBUG && _d("Condition filters table", $tables[0]);
4138
$filter_tables{$tables[0]} = undef;
4141
elsif ( @tables == 2 ) {
4142
MKDEBUG && _d("Condition joins tables",
4143
$tables[0], "and", $tables[1]);
4144
$join_tables{$tables[0]} = undef;
4145
$join_tables{$tables[1]} = undef;
4151
filter_tables => [ sort keys %filter_tables ],
4152
joined_tables => [ sort keys %join_tables ],
4156
sub _get_tables_used_in_set {
4157
my ( $self, %args ) = @_;
4158
my @required_args = qw(tables set);
4159
foreach my $arg ( @required_args ) {
4160
die "I need a $arg argument" unless $args{$arg};
4162
my ($tables, $set) = @args{@required_args};
4163
my $sql_parser = $self->{SQLParser};
4165
MKDEBUG && _d("Getting tables used in SET");
4168
if ( @$tables == 1 ) {
4169
my $table = $self->_qualify_table_name(
4171
db => $tables->[0]->{db},
4172
tbl => $tables->[0]->{tbl},
4176
value => $self->{constant_data_value}
4180
foreach my $cond ( @$set ) {
4181
next unless $cond->{tbl};
4182
my $table = $self->_qualify_table_name(
4185
tbl => $cond->{tbl},
4188
my $value = $self->{constant_data_value};
4189
my $value_is_table = 0;
4190
if ( $sql_parser->is_identifier($cond->{value}) ) {
4191
my $ident_struct = $sql_parser->parse_identifier(
4195
$value_is_table = 1;
4196
$value = $self->_qualify_table_name(
4198
db => $ident_struct->{db},
4199
tbl => $ident_struct->{tbl},
4206
value_is_table => $value_is_table,
4214
sub _get_real_table_name {
4215
my ( $self, %args ) = @_;
4216
my @required_args = qw(tables name);
4217
foreach my $arg ( @required_args ) {
4218
die "I need a $arg argument" unless $args{$arg};
4220
my ($tables, $name) = @args{@required_args};
4222
foreach my $table ( @$tables ) {
4223
if ( $table->{tbl} eq $name
4224
|| ($table->{alias} || "") eq $name ) {
4225
MKDEBUG && _d("Real table name for", $name, "is", $table->{tbl});
4226
return $table->{tbl};
4229
MKDEBUG && _d("Table", $name, "does not exist in query");
4233
sub _qualify_table_name {
4234
my ( $self, %args) = @_;
4235
my @required_args = qw(tables tbl);
4236
foreach my $arg ( @required_args ) {
4237
die "I need a $arg argument" unless $args{$arg};
4239
my ($tables, $table) = @args{@required_args};
4241
MKDEBUG && _d("Qualifying table with database:", $table);
4243
my ($tbl, $db) = reverse split /[.]/, $table;
4245
$tbl = $self->_get_real_table_name(%args, name => $tbl);
4246
return unless $tbl; # shouldn't happen
4251
$db_tbl = "$db.$tbl";
4253
elsif ( $args{db} ) {
4254
$db_tbl = "$args{db}.$tbl";
4257
foreach my $tbl_info ( @$tables ) {
4258
if ( ($tbl_info->{tbl} eq $tbl) && $tbl_info->{db} ) {
4259
$db_tbl = "$tbl_info->{db}.$tbl";
4264
if ( !$db_tbl && $args{default_db} ) {
4265
$db_tbl = "$args{default_db}.$tbl";
4269
MKDEBUG && _d("Cannot determine database for table", $tbl);
4274
MKDEBUG && _d("Table qualified with database:", $db_tbl);
4278
sub _change_context {
4279
my ( $self, %args) = @_;
4280
my @required_args = qw(tables_used table old_context new_context tables);
4281
foreach my $arg ( @required_args ) {
4282
die "I need a $arg argument" unless $args{$arg};
4284
my ($tables_used, $table, $old_context, $new_context) = @args{@required_args};
4285
MKDEBUG && _d("Change context of table", $table, "from", $old_context,
4286
"to", $new_context);
4287
foreach my $used_table ( @$tables_used ) {
4288
if ( $used_table->{table} eq $table
4289
&& $used_table->{context} eq $old_context ) {
4290
$used_table->{context} = $new_context;
4294
MKDEBUG && _d("Table", $table, "is not used; cannot set its context");
4299
my ($package, undef, $line) = caller 0;
4300
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4301
map { defined $_ ? $_ : 'undef' }
4303
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4309
# ###########################################################################
4310
# End TableUsage package
4311
# ###########################################################################
4313
# ###########################################################################
4314
# Daemon package 6255
4315
# This package is a copy without comments from the original. The original
4316
# with comments and its test file can be found in the SVN repository at,
4317
# trunk/common/Daemon.pm
4318
# trunk/common/t/Daemon.t
4319
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4320
# ###########################################################################
4325
use warnings FATAL => 'all';
4327
use POSIX qw(setsid);
4328
use English qw(-no_match_vars);
4330
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4333
my ( $class, %args ) = @_;
4334
foreach my $arg ( qw(o) ) {
4335
die "I need a $arg argument" unless $args{$arg};
4340
log_file => $o->has('log') ? $o->get('log') : undef,
4341
PID_file => $o->has('pid') ? $o->get('pid') : undef,
4344
check_PID_file(undef, $self->{PID_file});
4346
MKDEBUG && _d('Daemonized child will log to', $self->{log_file});
4347
return bless $self, $class;
4353
MKDEBUG && _d('About to fork and daemonize');
4354
defined (my $pid = fork()) or die "Cannot fork: $OS_ERROR";
4356
MKDEBUG && _d('I am the parent and now I die');
4360
$self->{PID_owner} = $PID;
4363
POSIX::setsid() or die "Cannot start a new session: $OS_ERROR";
4364
chdir '/' or die "Cannot chdir to /: $OS_ERROR";
4366
$self->_make_PID_file();
4368
$OUTPUT_AUTOFLUSH = 1;
4372
open STDIN, '/dev/null'
4373
or die "Cannot reopen STDIN to /dev/null: $OS_ERROR";
4376
if ( $self->{log_file} ) {
4378
open STDOUT, '>>', $self->{log_file}
4379
or die "Cannot open log file $self->{log_file}: $OS_ERROR";
4382
open STDERR, ">&STDOUT"
4383
or die "Cannot dupe STDERR to STDOUT: $OS_ERROR";
4388
open STDOUT, '>', '/dev/null'
4389
or die "Cannot reopen STDOUT to /dev/null: $OS_ERROR";
4393
open STDERR, '>', '/dev/null'
4394
or die "Cannot reopen STDERR to /dev/null: $OS_ERROR";
4398
MKDEBUG && _d('I am the child and now I live daemonized');
4402
sub check_PID_file {
4403
my ( $self, $file ) = @_;
4404
my $PID_file = $self ? $self->{PID_file} : $file;
4405
MKDEBUG && _d('Checking PID file', $PID_file);
4406
if ( $PID_file && -f $PID_file ) {
4408
eval { chomp($pid = `cat $PID_file`); };
4409
die "Cannot cat $PID_file: $OS_ERROR" if $EVAL_ERROR;
4410
MKDEBUG && _d('PID file exists; it contains PID', $pid);
4412
my $pid_is_alive = kill 0, $pid;
4413
if ( $pid_is_alive ) {
4414
die "The PID file $PID_file already exists "
4415
. " and the PID that it contains, $pid, is running";
4418
warn "Overwriting PID file $PID_file because the PID that it "
4419
. "contains, $pid, is not running";
4423
die "The PID file $PID_file already exists but it does not "
4428
MKDEBUG && _d('No PID file');
4435
if ( exists $self->{child} ) {
4436
die "Do not call Daemon::make_PID_file() for daemonized scripts";
4438
$self->_make_PID_file();
4439
$self->{PID_owner} = $PID;
4443
sub _make_PID_file {
4446
my $PID_file = $self->{PID_file};
4448
MKDEBUG && _d('No PID file to create');
4452
$self->check_PID_file();
4454
open my $PID_FH, '>', $PID_file
4455
or die "Cannot open PID file $PID_file: $OS_ERROR";
4457
or die "Cannot print to PID file $PID_file: $OS_ERROR";
4459
or die "Cannot close PID file $PID_file: $OS_ERROR";
4461
MKDEBUG && _d('Created PID file:', $self->{PID_file});
4465
sub _remove_PID_file {
4467
if ( $self->{PID_file} && -f $self->{PID_file} ) {
4468
unlink $self->{PID_file}
4469
or warn "Cannot remove PID file $self->{PID_file}: $OS_ERROR";
4470
MKDEBUG && _d('Removed PID file');
4473
MKDEBUG && _d('No PID to remove');
4481
$self->_remove_PID_file() if ($self->{PID_owner} || 0) == $PID;
4487
my ($package, undef, $line) = caller 0;
4488
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4489
map { defined $_ ? $_ : 'undef' }
4491
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4496
# ###########################################################################
4497
# End Daemon package
4498
# ###########################################################################
4500
# ###########################################################################
4501
# Runtime package 7221
4502
# This package is a copy without comments from the original. The original
4503
# with comments and its test file can be found in the SVN repository at,
4504
# trunk/common/Runtime.pm
4505
# trunk/common/t/Runtime.t
4506
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4507
# ###########################################################################
4512
use warnings FATAL => 'all';
4513
use English qw(-no_match_vars);
4514
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4517
my ( $class, %args ) = @_;
4518
my @required_args = qw(now);
4519
foreach my $arg ( @required_args ) {
4520
die "I need a $arg argument" unless $args{$arg};
4523
if ( ($args{runtime} || 0) < 0 ) {
4524
die "runtime argument must be greater than zero"
4529
start_time => undef,
4535
return bless $self, $class;
4539
my ( $self, %args ) = @_;
4541
if ( $self->{stop} ) {
4542
MKDEBUG && _d("No time left because stop was called");
4546
my $now = $self->{now}->(%args);
4547
MKDEBUG && _d("Current time:", $now);
4549
if ( !defined $self->{start_time} ) {
4550
$self->{start_time} = $now;
4553
return unless defined $now;
4555
my $runtime = $self->{runtime};
4556
return unless defined $runtime;
4558
if ( !$self->{end_time} ) {
4559
$self->{end_time} = $now + $runtime;
4560
MKDEBUG && _d("End time:", $self->{end_time});
4563
$self->{time_left} = $self->{end_time} - $now;
4564
MKDEBUG && _d("Time left:", $self->{time_left});
4565
return $self->{time_left};
4569
my ( $self, %args ) = @_;
4570
my $time_left = $self->time_left(%args);
4571
return 1 if !defined $time_left; # run forever
4572
return $time_left <= 0 ? 0 : 1; # <=0s means runtime has elapsed
4576
my ( $self, %args ) = @_;
4578
my $start_time = $self->{start_time};
4579
return 0 unless $start_time;
4581
my $now = $self->{now}->(%args);
4582
MKDEBUG && _d("Current time:", $now);
4584
my $time_elapsed = $now - $start_time;
4585
MKDEBUG && _d("Time elapsed:", $time_elapsed);
4586
if ( $time_elapsed < 0 ) {
4587
warn "Current time $now is earlier than start time $start_time";
4589
return $time_elapsed;
4594
$self->{start_time} = undef;
4595
$self->{end_time} = undef;
4596
$self->{time_left} = undef;
4598
MKDEBUG && _d("Reset runtime");
4615
my ($package, undef, $line) = caller 0;
4616
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4617
map { defined $_ ? $_ : 'undef' }
4619
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4624
# ###########################################################################
4625
# End Runtime package
4626
# ###########################################################################
4628
# ###########################################################################
4629
# Progress package 7096
4630
# This package is a copy without comments from the original. The original
4631
# with comments and its test file can be found in the SVN repository at,
4632
# trunk/common/Progress.pm
4633
# trunk/common/t/Progress.t
4634
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4635
# ###########################################################################
4639
use warnings FATAL => 'all';
4641
use English qw(-no_match_vars);
4643
$Data::Dumper::Indent = 1;
4644
$Data::Dumper::Sortkeys = 1;
4645
$Data::Dumper::Quotekeys = 0;
4647
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4650
my ( $class, %args ) = @_;
4651
foreach my $arg (qw(jobsize)) {
4652
die "I need a $arg argument" unless defined $args{$arg};
4654
if ( (!$args{report} || !$args{interval}) ) {
4655
if ( $args{spec} && @{$args{spec}} == 2 ) {
4656
@args{qw(report interval)} = @{$args{spec}};
4659
die "I need either report and interval arguments, or a spec";
4663
my $name = $args{name} || "Progress";
4664
$args{start} ||= time();
4667
last_reported => $args{start},
4668
fraction => 0, # How complete the job is
4670
my ($fraction, $elapsed, $remaining, $eta) = @_;
4671
printf STDERR "$name: %3d%% %s remain\n",
4673
Transformers::secs_to_time($remaining),
4674
Transformers::ts($eta);
4678
return bless $self, $class;
4682
shift @_ if $_[0] eq 'Progress'; # Permit calling as Progress-> or Progress::
4684
if ( @$spec != 2 ) {
4685
die "spec array requires a two-part argument\n";
4687
if ( $spec->[0] !~ m/^(?:percentage|time|iterations)$/ ) {
4688
die "spec array's first element must be one of "
4689
. "percentage,time,iterations\n";
4691
if ( $spec->[1] !~ m/^\d+$/ ) {
4692
die "spec array's second element must be an integer\n";
4697
my ( $self, $callback ) = @_;
4698
$self->{callback} = $callback;
4702
my ( $self, $start ) = @_;
4703
$self->{start} = $self->{last_reported} = $start || time();
4707
my ( $self, $callback, $now ) = @_;
4708
my $jobsize = $self->{jobsize};
4710
$self->{iterations}++; # How many updates have happened;
4712
if ( $self->{report} eq 'time'
4713
&& $self->{interval} > $now - $self->{last_reported}
4717
elsif ( $self->{report} eq 'iterations'
4718
&& ($self->{iterations} - 1) % $self->{interval} > 0
4722
$self->{last_reported} = $now;
4724
my $completed = $callback->();
4725
$self->{updates}++; # How many times we have run the update callback
4727
return if $completed > $jobsize;
4729
my $fraction = $completed > 0 ? $completed / $jobsize : 0;
4731
if ( $self->{report} eq 'percentage'
4732
&& $self->fraction_modulo($self->{fraction})
4733
>= $self->fraction_modulo($fraction)
4735
$self->{fraction} = $fraction;
4738
$self->{fraction} = $fraction;
4740
my $elapsed = $now - $self->{start};
4743
if ( $completed > 0 && $completed <= $jobsize && $elapsed > 0 ) {
4744
my $rate = $completed / $elapsed;
4746
$remaining = ($jobsize - $completed) / $rate;
4747
$eta = $now + int($remaining);
4750
$self->{callback}->($fraction, $elapsed, $remaining, $eta, $completed);
4753
sub fraction_modulo {
4754
my ( $self, $num ) = @_;
4755
$num *= 100; # Convert from fraction to percentage
4756
return sprintf('%d',
4757
sprintf('%d', $num / $self->{interval}) * $self->{interval});
4761
my ($package, undef, $line) = caller 0;
4762
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4763
map { defined $_ ? $_ : 'undef' }
4765
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4770
# ###########################################################################
4771
# End Progress package
4772
# ###########################################################################
4774
# ###########################################################################
4775
# Pipeline package 7509
4776
# This package is a copy without comments from the original. The original
4777
# with comments and its test file can be found in the SVN repository at,
4778
# trunk/common/Pipeline.pm
4779
# trunk/common/t/Pipeline.t
4780
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4781
# ###########################################################################
4786
use warnings FATAL => 'all';
4787
use English qw(-no_match_vars);
4788
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4791
$Data::Dumper::Indent = 1;
4792
$Data::Dumper::Sortkeys = 1;
4793
$Data::Dumper::Quotekeys = 0;
4794
use Time::HiRes qw(time);
4797
my ( $class, %args ) = @_;
4798
my @required_args = qw();
4799
foreach my $arg ( @required_args ) {
4800
die "I need a $arg argument" unless defined $args{$arg};
4805
continue_on_error => 0,
4809
procs => [], # coderefs for pipeline processes
4810
names => [], # names for each ^ pipeline proc
4811
instrumentation => { # keyed on proc index in procs
4818
return bless $self, $class;
4822
my ( $self, %args ) = @_;
4823
my @required_args = qw(process name);
4824
foreach my $arg ( @required_args ) {
4825
die "I need a $arg argument" unless defined $args{$arg};
4827
my ($process, $name) = @args{@required_args};
4829
push @{$self->{procs}}, $process;
4830
push @{$self->{names}}, $name;
4831
if ( $self->{instrument} ) {
4832
$self->{instrumentation}->{$name} = { time => 0, calls => 0 };
4834
MKDEBUG && _d("Added pipeline process", $name);
4841
return @{$self->{names}};
4845
my ( $self, %args ) = @_;
4847
die "Cannot execute pipeline because no process have been added"
4848
unless scalar @{$self->{procs}};
4850
my $oktorun = $args{oktorun};
4851
die "I need an oktorun argument" unless $oktorun;
4852
die '$oktorun argument must be a reference' unless ref $oktorun;
4854
my $pipeline_data = $args{pipeline_data} || {};
4855
$pipeline_data->{oktorun} = $oktorun;
4857
my $stats = $args{stats}; # optional
4859
MKDEBUG && _d("Pipeline starting at", time);
4860
my $instrument = $self->{instrument};
4861
my $processes = $self->{procs};
4863
while ( $$oktorun ) {
4864
my $procno = 0; # so we can see which proc if one causes an error
4868
while ( $procno < scalar @{$self->{procs}} ) {
4869
my $call_start = $instrument ? time : 0;
4871
MKDEBUG && _d("Pipeline process", $self->{names}->[$procno]);
4872
$output = $processes->[$procno]->($pipeline_data);
4874
if ( $instrument ) {
4875
my $call_end = time;
4876
my $call_t = $call_end - $call_start;
4877
$self->{instrumentation}->{$self->{names}->[$procno]}->{time} += $call_t;
4878
$self->{instrumentation}->{$self->{names}->[$procno]}->{count}++;
4879
$self->{instrumentation}->{Pipeline}->{time} += $call_t;
4880
$self->{instrumentation}->{Pipeline}->{count}++;
4883
MKDEBUG && _d("Pipeline restarting early after",
4884
$self->{names}->[$procno]);
4886
$stats->{"pipeline_restarted_after_"
4887
.$self->{names}->[$procno]}++;
4889
last PIPELINE_PROCESS;
4894
if ( $EVAL_ERROR ) {
4895
warn "Pipeline process $procno ("
4896
. ($self->{names}->[$procno] || "")
4897
. ") caused an error: $EVAL_ERROR";
4898
die $EVAL_ERROR unless $self->{continue_on_error};
4902
MKDEBUG && _d("Pipeline stopped at", time);
4906
sub instrumentation {
4908
return $self->{instrumentation};
4913
foreach my $proc_name ( @{$self->{names}} ) {
4914
if ( exists $self->{instrumentation}->{$proc_name} ) {
4915
$self->{instrumentation}->{$proc_name}->{calls} = 0;
4916
$self->{instrumentation}->{$proc_name}->{time} = 0;
4919
$self->{instrumentation}->{Pipeline}->{calls} = 0;
4920
$self->{instrumentation}->{Pipeline}->{time} = 0;
4925
my ($package, undef, $line) = caller 0;
4926
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
4927
map { defined $_ ? $_ : 'undef' }
4929
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
4934
# ###########################################################################
4935
# End Pipeline package
4936
# ###########################################################################
4938
# ###########################################################################
4939
# Quoter package 6850
4940
# This package is a copy without comments from the original. The original
4941
# with comments and its test file can be found in the SVN repository at,
4942
# trunk/common/Quoter.pm
4943
# trunk/common/t/Quoter.t
4944
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
4945
# ###########################################################################
4950
use warnings FATAL => 'all';
4951
use English qw(-no_match_vars);
4953
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
4956
my ( $class, %args ) = @_;
4957
return bless {}, $class;
4961
my ( $self, @vals ) = @_;
4962
foreach my $val ( @vals ) {
4965
return join('.', map { '`' . $_ . '`' } @vals);
4969
my ( $self, $val ) = @_;
4971
return 'NULL' unless defined $val; # undef = NULL
4972
return "''" if $val eq ''; # blank string = ''
4973
return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data
4975
$val =~ s/(['\\])/\\$1/g;
4980
my ( $self, $db_tbl, $default_db ) = @_;
4982
my ( $db, $tbl ) = split(/[.]/, $db_tbl);
4991
my ( $self, $like ) = @_;
4992
return unless $like;
4993
$like =~ s/([%_])/\\$1/g;
4998
my ( $self, $default_db, $db_tbl ) = @_;
4999
return unless $db_tbl;
5000
my ($db, $tbl) = split(/[.]/, $db_tbl);
5005
$db = "`$db`" if $db && $db !~ m/^`/;
5006
$tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/;
5007
return $db ? "$db.$tbl" : $tbl;
5012
# ###########################################################################
5013
# End Quoter package
5014
# ###########################################################################
5016
# ###########################################################################
5017
# TableParser package 7156
5018
# This package is a copy without comments from the original. The original
5019
# with comments and its test file can be found in the SVN repository at,
5020
# trunk/common/TableParser.pm
5021
# trunk/common/t/TableParser.t
5022
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
5023
# ###########################################################################
5025
package TableParser;
5028
use warnings FATAL => 'all';
5029
use English qw(-no_match_vars);
5031
$Data::Dumper::Indent = 1;
5032
$Data::Dumper::Sortkeys = 1;
5033
$Data::Dumper::Quotekeys = 0;
5035
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5038
my ( $class, %args ) = @_;
5039
my @required_args = qw(Quoter);
5040
foreach my $arg ( @required_args ) {
5041
die "I need a $arg argument" unless $args{$arg};
5043
my $self = { %args };
5044
return bless $self, $class;
5048
my ( $self, $ddl, $opts ) = @_;
5050
if ( ref $ddl eq 'ARRAY' ) {
5051
if ( lc $ddl->[0] eq 'table' ) {
5061
if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
5062
die "Cannot parse table definition; is ANSI quoting "
5063
. "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
5066
my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
5067
(undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
5069
$ddl =~ s/(`[^`]+`)/\L$1/g;
5071
my $engine = $self->get_engine($ddl);
5073
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
5074
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
5075
MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
5078
@def_for{@cols} = @defs;
5081
my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
5082
foreach my $col ( @cols ) {
5083
my $def = $def_for{$col};
5084
my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
5085
die "Can't determine column type for $def" unless $type;
5086
$type_for{$col} = $type;
5087
if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
5089
$is_numeric{$col} = 1;
5091
if ( $def !~ m/NOT NULL/ ) {
5093
$is_nullable{$col} = 1;
5095
$is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
5098
my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
5100
my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
5105
col_posn => { map { $cols[$_] => $_ } 0..$#cols },
5106
is_col => { map { $_ => 1 } @cols },
5107
null_cols => \@null,
5108
is_nullable => \%is_nullable,
5109
is_autoinc => \%is_autoinc,
5110
clustered_key => $clustered_key,
5113
numeric_cols => \@nums,
5114
is_numeric => \%is_numeric,
5116
type_for => \%type_for,
5117
charset => $charset,
5122
my ( $self, $tbl ) = @_;
5126
(($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
5127
|| ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
5128
|| ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
5129
|| ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
5132
$tbl->{keys}->{$_}->{type} eq 'BTREE'
5134
sort keys %{$tbl->{keys}};
5136
MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
5140
sub find_best_index {
5141
my ( $self, $tbl, $index ) = @_;
5144
($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
5148
die "Index '$index' does not exist in table";
5151
($best) = $self->sort_indexes($tbl);
5154
MKDEBUG && _d('Best index found is', $best);
5158
sub find_possible_keys {
5159
my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
5160
return () unless $where;
5161
my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
5162
. ' WHERE ' . $where;
5163
MKDEBUG && _d($sql);
5164
my $expl = $dbh->selectrow_hashref($sql);
5165
$expl = { map { lc($_) => $expl->{$_} } keys %$expl };
5166
if ( $expl->{possible_keys} ) {
5167
MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
5168
my @candidates = split(',', $expl->{possible_keys});
5169
my %possible = map { $_ => 1 } @candidates;
5170
if ( $expl->{key} ) {
5171
MKDEBUG && _d('MySQL chose', $expl->{key});
5172
unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
5173
MKDEBUG && _d('Before deduping:', join(', ', @candidates));
5175
@candidates = grep { !$seen{$_}++ } @candidates;
5177
MKDEBUG && _d('Final list:', join(', ', @candidates));
5181
MKDEBUG && _d('No keys in possible_keys');
5187
my ( $self, %args ) = @_;
5188
my @required_args = qw(dbh db tbl);
5189
foreach my $arg ( @required_args ) {
5190
die "I need a $arg argument" unless $args{$arg};
5192
my ($dbh, $db, $tbl) = @args{@required_args};
5193
my $q = $self->{Quoter};
5194
my $db_tbl = $q->quote($db, $tbl);
5195
MKDEBUG && _d('Checking', $db_tbl);
5197
my $sql = "SHOW TABLES FROM " . $q->quote($db)
5198
. ' LIKE ' . $q->literal_like($tbl);
5199
MKDEBUG && _d($sql);
5202
$row = $dbh->selectrow_arrayref($sql);
5204
if ( $EVAL_ERROR ) {
5205
MKDEBUG && _d($EVAL_ERROR);
5208
if ( !$row->[0] || $row->[0] ne $tbl ) {
5209
MKDEBUG && _d('Table does not exist');
5213
MKDEBUG && _d('Table exists; no privs to check');
5214
return 1 unless $args{all_privs};
5216
$sql = "SHOW FULL COLUMNS FROM $db_tbl";
5217
MKDEBUG && _d($sql);
5219
$row = $dbh->selectrow_hashref($sql);
5221
if ( $EVAL_ERROR ) {
5222
MKDEBUG && _d($EVAL_ERROR);
5225
if ( !scalar keys %$row ) {
5226
MKDEBUG && _d('Table has no columns:', Dumper($row));
5229
my $privs = $row->{privileges} || $row->{Privileges};
5231
$sql = "DELETE FROM $db_tbl LIMIT 0";
5232
MKDEBUG && _d($sql);
5236
my $can_delete = $EVAL_ERROR ? 0 : 1;
5238
MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
5239
($can_delete ? 'delete' : ''));
5241
if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
5243
MKDEBUG && _d('User does not have all privs');
5247
MKDEBUG && _d('User has all privs');
5252
my ( $self, $ddl, $opts ) = @_;
5253
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
5254
MKDEBUG && _d('Storage engine:', $engine);
5255
return $engine || undef;
5259
my ( $self, $ddl, $opts, $is_nullable ) = @_;
5260
my $engine = $self->get_engine($ddl);
5262
my $clustered_key = undef;
5265
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
5267
next KEY if $key =~ m/FOREIGN/;
5270
MKDEBUG && _d('Parsed key:', $key_ddl);
5272
if ( $engine !~ m/MEMORY|HEAP/ ) {
5273
$key =~ s/USING HASH/USING BTREE/;
5276
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
5277
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
5278
$type = $type || $special || 'BTREE';
5279
if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
5280
&& $engine =~ m/HEAP|MEMORY/i )
5282
$type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
5285
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
5286
my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
5289
foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
5290
my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
5292
push @col_prefixes, $prefix;
5296
MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
5303
col_prefixes => \@col_prefixes,
5304
is_unique => $unique,
5305
is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
5306
is_col => { map { $_ => 1 } @cols },
5310
if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
5311
my $this_key = $keys->{$name};
5312
if ( $this_key->{name} eq 'PRIMARY' ) {
5313
$clustered_key = 'PRIMARY';
5315
elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
5316
$clustered_key = $this_key->{name};
5318
MKDEBUG && $clustered_key && _d('This key is the clustered key');
5322
return $keys, $clustered_key;
5326
my ( $self, $ddl, $opts ) = @_;
5330
$ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
5332
my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
5333
my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
5334
my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
5336
if ( $parent !~ m/\./ && $opts->{database} ) {
5337
$parent = "`$opts->{database}`.$parent";
5343
cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
5344
parent_tbl => $parent,
5345
parent_colnames=> $parent_cols,
5346
parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
5354
sub remove_auto_increment {
5355
my ( $self, $ddl ) = @_;
5356
$ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
5360
sub remove_secondary_indexes {
5361
my ( $self, $ddl ) = @_;
5362
my $sec_indexes_ddl;
5363
my $tbl_struct = $self->parse($ddl);
5365
if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) {
5366
my $clustered_key = $tbl_struct->{clustered_key};
5367
$clustered_key ||= '';
5369
my @sec_indexes = map {
5370
my $key_def = $_->{ddl};
5371
$key_def =~ s/([\(\)])/\\$1/g;
5372
$ddl =~ s/\s+$key_def//i;
5374
my $key_ddl = "ADD $_->{ddl}";
5375
$key_ddl .= ',' unless $key_ddl =~ m/,$/;
5378
grep { $_->{name} ne $clustered_key }
5379
values %{$tbl_struct->{keys}};
5380
MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes));
5382
if ( @sec_indexes ) {
5383
$sec_indexes_ddl = join(' ', @sec_indexes);
5384
$sec_indexes_ddl =~ s/,$//;
5387
$ddl =~ s/,(\n\) )/$1/s;
5390
MKDEBUG && _d('Not removing secondary indexes from',
5391
$tbl_struct->{engine}, 'table');
5394
return $ddl, $sec_indexes_ddl, $tbl_struct;
5398
my ($package, undef, $line) = caller 0;
5399
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5400
map { defined $_ ? $_ : 'undef' }
5402
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5407
# ###########################################################################
5408
# End TableParser package
5409
# ###########################################################################
5411
# ###########################################################################
5412
# MysqldumpParser package 7500
5413
# This package is a copy without comments from the original. The original
5414
# with comments and its test file can be found in the SVN repository at,
5415
# trunk/common/MysqldumpParser.pm
5416
# trunk/common/t/MysqldumpParser.t
5417
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
5418
# ###########################################################################
5419
package MysqldumpParser;
5423
use warnings FATAL => 'all';
5425
use English qw(-no_match_vars);
5427
$Data::Dumper::Indent = 1;
5428
$Data::Dumper::Sortkeys = 1;
5429
$Data::Dumper::Quotekeys = 0;
5431
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5433
my $open_comment = qr{/\*!\d{5} };
5436
my ( $class, %args ) = @_;
5437
my @required_args = qw();
5438
foreach my $arg ( @required_args ) {
5439
die "I need a $arg argument" unless $args{$arg};
5444
return bless $self, $class;
5447
sub parse_create_tables {
5448
my ( $self, %args ) = @_;
5449
my @required_args = qw(file);
5450
foreach my $arg ( @required_args ) {
5451
die "I need a $arg argument" unless $args{$arg};
5453
my ($file) = @args{@required_args};
5455
MKDEBUG && _d('Parsing CREATE TABLE from', $file);
5456
open my $fh, '<', $file
5457
or die "Cannot open $file: $OS_ERROR";
5459
local $INPUT_RECORD_SEPARATOR = '';
5464
while (defined(my $chunk = <$fh>)) {
5465
MKDEBUG && _d('db:', $db, 'chunk:', $chunk);
5466
if ($chunk =~ m/Database: (\S+)/) {
5468
$db =~ s/^`//; # strip leading `
5469
$db =~ s/`$//; # and trailing `
5470
MKDEBUG && _d('New db:', $db);
5472
elsif ($chunk =~ m/CREATE TABLE/) {
5473
MKDEBUG && _d('Chunk has CREATE TABLE');
5475
if ($chunk =~ m/DROP VIEW IF EXISTS/) {
5476
MKDEBUG && _d('Table is a VIEW, skipping');
5481
= $chunk =~ m/^(?:$open_comment)?(CREATE TABLE.+?;)$/ms;
5482
if ( !$create_table ) {
5483
warn "Failed to parse CREATE TABLE from\n" . $chunk;
5486
$create_table =~ s/ \*\/;\Z/;/; # remove end of version comment
5488
push @{$schema{$db}}, $create_table;
5491
MKDEBUG && _d('Chunk has other data, ignoring');
5501
my ($package, undef, $line) = caller 0;
5502
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5503
map { defined $_ ? $_ : 'undef' }
5505
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5511
# ###########################################################################
5512
# End MysqldumpParser package
5513
# ###########################################################################
5515
# ###########################################################################
5516
# SchemaQualifier package 7499
5517
# This package is a copy without comments from the original. The original
5518
# with comments and its test file can be found in the SVN repository at,
5519
# trunk/common/SchemaQualifier.pm
5520
# trunk/common/t/SchemaQualifier.t
5521
# See http://code.google.com/p/maatkit/wiki/Developers for more information.
5522
# ###########################################################################
5523
package SchemaQualifier;
5527
use warnings FATAL => 'all';
5529
use English qw(-no_match_vars);
5531
$Data::Dumper::Indent = 1;
5532
$Data::Dumper::Sortkeys = 1;
5533
$Data::Dumper::Quotekeys = 0;
5535
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5538
my ( $class, %args ) = @_;
5539
my @required_args = qw(TableParser Quoter);
5540
foreach my $arg ( @required_args ) {
5541
die "I need a $arg argument" unless $args{$arg};
5545
schema => {}, # db > tbl > col
5546
duplicate_column_name => {},
5547
duplicate_table_name => {},
5549
return bless $self, $class;
5554
return $self->{schema};
5557
sub get_duplicate_column_names {
5559
return keys %{$self->{duplicate_column_name}};
5562
sub get_duplicate_table_names {
5564
return keys %{$self->{duplicate_table_name}};
5567
sub set_schema_from_mysqldump {
5568
my ( $self, %args ) = @_;
5569
my @required_args = qw(dump);
5570
foreach my $arg ( @required_args ) {
5571
die "I need a $arg argument" unless $args{$arg};
5573
my ($dump) = @args{@required_args};
5575
my $schema = $self->{schema};
5576
my $tp = $self->{TableParser};
5581
foreach my $db (keys %$dump) {
5583
warn "Empty database from parsed mysqldump output";
5588
foreach my $table_def ( @{$dump->{$db}} ) {
5589
if ( !$table_def ) {
5590
warn "Empty CREATE TABLE for database $db parsed from mysqldump output";
5593
my $tbl_struct = $tp->parse($table_def);
5594
$schema->{$db}->{$tbl_struct->{name}} = $tbl_struct->{is_col};
5596
map { $column_name{$_}++ } @{$tbl_struct->{cols}};
5597
$table_name{$tbl_struct->{name}}++;
5601
map { $self->{duplicate_column_name}->{$_} = 1 }
5602
grep { $column_name{$_} > 1 }
5605
map { $self->{duplicate_table_name}->{$_} = 1 }
5606
grep { $table_name{$_} > 1 }
5609
MKDEBUG && _d('Schema:', Dumper($schema));
5613
sub qualify_column {
5614
my ( $self, %args ) = @_;
5615
my @required_args = qw(column);
5616
foreach my $arg ( @required_args ) {
5617
die "I need a $arg argument" unless $args{$arg};
5619
my ($column) = @args{@required_args};
5621
MKDEBUG && _d('Qualifying', $column);
5622
my ($col, $tbl, $db) = reverse map { s/`//g; $_ } split /[.]/, $column;
5623
MKDEBUG && _d('Column', $column, 'has db', $db, 'tbl', $tbl, 'col', $col);
5630
if ( !$qcol{tbl} ) {
5631
@qcol{qw(db tbl)} = $self->get_table_for_column(column => $qcol{col});
5633
elsif ( !$qcol{db} ) {
5634
$qcol{db} = $self->get_database_for_table(table => $qcol{tbl});
5637
MKDEBUG && _d('Column is already database-table qualified');
5643
sub get_table_for_column {
5644
my ( $self, %args ) = @_;
5645
my @required_args = qw(column);
5646
foreach my $arg ( @required_args ) {
5647
die "I need a $arg argument" unless $args{$arg};
5649
my ($col) = @args{@required_args};
5650
MKDEBUG && _d('Getting table for column', $col);
5652
if ( $self->{duplicate_column_name}->{$col} ) {
5653
MKDEBUG && _d('Column name is duplicate, cannot qualify it');
5657
my $schema = $self->{schema};
5658
foreach my $db ( keys %{$schema} ) {
5659
foreach my $tbl ( keys %{$schema->{$db}} ) {
5660
if ( $schema->{$db}->{$tbl}->{$col} ) {
5661
MKDEBUG && _d('Column is in database', $db, 'table', $tbl);
5667
MKDEBUG && _d('Failed to find column in any table');
5671
sub get_database_for_table {
5672
my ( $self, %args ) = @_;
5673
my @required_args = qw(table);
5674
foreach my $arg ( @required_args ) {
5675
die "I need a $arg argument" unless $args{$arg};
5677
my ($tbl) = @args{@required_args};
5678
MKDEBUG && _d('Getting database for table', $tbl);
5680
if ( $self->{duplicate_table_name}->{$tbl} ) {
5681
MKDEBUG && _d('Table name is duplicate, cannot qualify it');
5685
my $schema = $self->{schema};
5686
foreach my $db ( keys %{$schema} ) {
5687
if ( $schema->{$db}->{$tbl} ) {
5688
MKDEBUG && _d('Table is in database', $db);
5693
MKDEBUG && _d('Failed to find table in any database');
5698
my ($package, undef, $line) = caller 0;
5699
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
5700
map { defined $_ ? $_ : 'undef' }
5702
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
5708
# ###########################################################################
5709
# End SchemaQualifier package
5710
# ###########################################################################
5712
# ###########################################################################
5713
# This is a combination of modules and programs in one -- a runnable module.
5714
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
5715
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
5717
# Check at the end of this package for the call to main() which actually runs
5719
# ###########################################################################
5720
package mk_table_usage;
5722
use English qw(-no_match_vars);
5725
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
5727
use sigtrap 'handler', \&sig_int, 'normal-signals';
5729
Transformers->import(qw(make_checksum));
5731
# Global variables. Only really essential variables should be here.
5735
@ARGV = @_; # set global ARGV for this package
5736
$oktorun = 1; # reset between tests else pipeline won't run
5738
# ########################################################################
5739
# Get configuration information.
5740
# ########################################################################
5741
my $o = new OptionParser();
5745
my $dp = $o->DSNParser();
5746
$dp->prop('set-vars', $o->get('set-vars'));
5748
$o->usage_or_errors();
5750
# ########################################################################
5751
# Connect to MySQl for --explain-extended.
5752
# ########################################################################
5753
my $explain_ext_dbh;
5754
if ( my $dsn = $o->get('explain-extended') ) {
5755
$explain_ext_dbh = get_cxn(
5762
# ########################################################################
5763
# Make common modules.
5764
# ########################################################################
5765
my $qp = new QueryParser();
5766
my $qr = new QueryRewriter(QueryParser => $qp);
5767
my $sp = new SQLParser();
5768
my $tu = new TableUsage(
5769
constant_data_value => $o->get('constant-data-value'),
5773
my %common_modules = (
5777
QueryRewriter => $qr,
5780
# ########################################################################
5781
# Parse the --create-table-definitions files.
5782
# ########################################################################
5783
if ( my $files = $o->get('create-table-definitions') ) {
5784
my $q = new Quoter();
5785
my $tp = new TableParser(Quoter => $q);
5786
my $sq = new SchemaQualifier(TableParser => $tp, Quoter => $q);
5788
my $dump_parser = new MysqldumpParser();
5790
foreach my $file ( @$files ) {
5791
my $dump = $dump_parser->parse_create_tables(file => $file);
5792
if ( !$dump || !keys %$dump ) {
5793
warn "No CREATE TABLE statements were found in $file";
5796
$sq->set_schema_from_mysqldump(dump => $dump);
5798
$sp->set_SchemaQualifier($sq);
5801
# ########################################################################
5802
# Set up an array of callbacks.
5803
# ########################################################################
5804
my $pipeline_data = {
5805
# Add here any data to inject into the pipeline.
5806
# This hashref is $args in each pipeline process.
5808
my $pipeline = new Pipeline(
5810
continue_on_error => $o->get('continue-on-error'),
5818
# Stuff you'd like to do to make sure pipeline data is prepped
5819
# and ready to go...
5821
$args->{event} = undef; # remove event from previous pass
5823
if ( $o->got('query') ) {
5824
if ( $args->{query} ) {
5825
delete $args->{query}; # terminate
5828
$args->{query} = $o->get('query'); # analyze query once
5838
my $fi = new FileIterator();
5839
my $next_file = $fi->get_file_itr(@ARGV);
5840
my $input_fh; # the current input fh
5841
my $pr; # Progress obj for ^
5848
if ( $o->got('query') ) {
5849
MKDEBUG && _d("No input; using --query");
5853
# Only get the next file when there's no fh or no more events in
5854
# the current fh. This allows us to do collect-and-report cycles
5855
# (i.e. iterations) on huge files. This doesn't apply to infinite
5856
# inputs because they don't set more_events false.
5857
if ( !$args->{input_fh} || !$args->{more_events} ) {
5858
if ( $args->{input_fh} ) {
5859
close $args->{input_fh}
5860
or die "Cannot close input fh: $OS_ERROR";
5862
my ($fh, $filename, $filesize) = $next_file->();
5864
MKDEBUG && _d('Reading', $filename);
5866
# Create callback to read next event. Some inputs, like
5867
# Processlist, may use something else but most next_event.
5868
if ( my $read_time = $o->get('read-timeout') ) {
5870
= sub { return read_timeout($fh, $read_time); };
5873
$args->{next_event} = sub { return <$fh>; };
5875
$args->{input_fh} = $fh;
5876
$args->{tell} = sub { return tell $fh; };
5877
$args->{more_events} = 1;
5879
# Make a progress reporter, one per file.
5880
if ( $o->get('progress') && $filename && -e $filename ) {
5882
jobsize => $filesize,
5883
spec => $o->get('progress'),
5889
MKDEBUG && _d("No more input");
5890
# This will cause terminator proc to terminate the pipeline.
5891
$args->{input_fh} = undef;
5892
$args->{more_events} = 0;
5895
$pr->update($args->{tell}) if $pr;
5902
if ( $o->got('query') ) {
5907
if ( $args->{query} ) {
5908
$args->{event}->{arg} = $args->{query};
5915
# Only slowlogs are supported, but if we want parse other formats,
5916
# just tweak the code below to be like mk-query-digest.
5918
slowlog => ['SlowLogParser'],
5919
# binlog => ['BinaryLogParser'],
5920
# genlog => ['GeneralLogParser'],
5921
# tcpdump => ['TcpdumpParser','MySQLProtocolParser'],
5923
my $type = ['slowlog'];
5924
$type = $alias_for{$type->[0]} if $alias_for{$type->[0]};
5926
foreach my $module ( @$type ) {
5929
$parser = $module->new(
5933
if ( $EVAL_ERROR ) {
5934
die "Failed to load $module module: $EVAL_ERROR";
5938
name => ref $parser,
5941
if ( $args->{input_fh} ) {
5942
my $event = $parser->parse_event(
5943
event => $args->{event},
5944
next_event => $args->{next_event},
5945
tell => $args->{tell},
5946
oktorun => sub { $args->{more_events} = $_[0]; },
5949
$args->{event} = $event;
5952
MKDEBUG && _d("No more events, input EOF");
5953
return; # next input
5955
# No input, let pipeline run so the last report is printed.
5964
my $runtime = new Runtime(
5965
now => sub { return time },
5966
runtime => $o->get('run-time'),
5970
name => 'terminator',
5974
# Stop running if there's no more input.
5975
if ( !$args->{input_fh} && !$args->{query} ) {
5976
MKDEBUG && _d("No more input, terminating pipeline");
5978
# This shouldn't happen, but I want to know if it does.
5979
warn "Event in the pipeline but no current input: "
5983
$oktorun = 0; # 2. terminate pipeline
5984
return; # 1. exit pipeline early
5987
# Stop running if --run-time has elapsed.
5988
if ( !$runtime->have_time() ) {
5989
MKDEBUG && _d("No more time, terminating pipeline");
5990
$oktorun = 0; # 2. terminate pipeline
5991
return; # 1. exit pipeline early
5994
# There's input and time left so keep runnning...
5995
if ( $args->{event} ) {
5996
MKDEBUG && _d("Event in pipeline, continuing");
6000
MKDEBUG && _d("No event in pipeline, get next event");
6007
# ########################################################################
6008
# All pipeline processes after the terminator expect an event
6009
# (i.e. that $args->{event} exists and is a valid event).
6010
# ########################################################################
6012
if ( $o->get('filter') ) { # filter
6013
my $filter = $o->get('filter');
6014
if ( -f $filter && -r $filter ) {
6015
MKDEBUG && _d('Reading file', $filter, 'for --filter code');
6016
open my $fh, "<", $filter or die "Cannot open $filter: $OS_ERROR";
6017
$filter = do { local $/ = undef; <$fh> };
6021
$filter = "( $filter )"; # issue 565
6023
my $code = 'sub { my ( $args ) = @_; my $event = $args->{event}; '
6024
. "$filter && return \$args; };";
6025
MKDEBUG && _d('--filter code:', $code);
6026
my $sub = eval $code
6027
or die "Error compiling --filter code: $code\n$EVAL_ERROR";
6035
if ( $explain_ext_dbh ) { # explain extended
6036
my $default_db = $o->get('database');
6039
name => 'explain extended',
6042
my $query = $args->{event}->{arg};
6043
return unless $query;
6044
my $qualified_query;
6046
$qualified_query = qualify_query(
6048
dbh => $explain_ext_dbh,
6049
db => $args->{event}->{db} || $default_db,
6052
if ( $EVAL_ERROR ) {
6056
$args->{event}->{original_arg} = $query;
6057
$args->{event}->{arg} = $qualified_query;
6061
} # explain extended
6064
my $default_db = $o->get('database');
6065
my $id_attrib = $o->get('id-attribute');
6069
name => 'table usage',
6072
my $event = $args->{event};
6073
my $query = $event->{arg};
6074
return unless $query;
6078
if ( !exists $event->{$id_attrib}
6079
|| !defined $event->{$id_attrib}) {
6080
MKDEBUG && _d("Event", $id_attrib, "attrib doesn't exist",
6081
"or isn't defined, skipping");
6084
$query_id = $event->{$id_attrib};
6087
$query_id = "0x" . make_checksum(
6088
$qr->fingerprint($event->{original_arg} || $event->{arg}));
6091
my $table_usage = $tu->get_table_usage(
6093
default_db => $event->{db} || $default_db,
6096
# TODO: I think this will happen for SELECT NOW(); i.e. not
6097
# sure what TableUsage returns for such queries.
6098
if ( !$table_usage || @$table_usage == 0 ) {
6099
MKDEBUG && _d("Query does not use any tables");
6104
table_usage => $table_usage,
6105
query_id => $query_id,
6114
# ########################################################################
6115
# Daemonize now that everything is setup and ready to work.
6116
# ########################################################################
6118
if ( $o->get('daemonize') ) {
6119
$daemon = new Daemon(o=>$o);
6120
$daemon->daemonize();
6121
MKDEBUG && _d('I am a daemon now');
6123
elsif ( $o->get('pid') ) {
6124
# We're not daemoninzing, it just handles PID stuff.
6125
$daemon = new Daemon(o=>$o);
6126
$daemon->make_PID_file();
6129
# ########################################################################
6131
# ########################################################################
6133
# Pump the pipeline until either no more input, or we're interrupted by
6134
# CTRL-C, or--this shouldn't happen--the pipeline causes an error. All
6135
# work happens inside the pipeline via the procs we created above.
6136
my $exit_status = 0;
6139
oktorun => \$oktorun,
6140
pipeline_data => $pipeline_data,
6143
if ( $EVAL_ERROR ) {
6144
warn "The pipeline caused an error: $EVAL_ERROR";
6147
MKDEBUG && _d("Pipeline data:", Dumper($pipeline_data));
6149
$explain_ext_dbh->disconnect() if $explain_ext_dbh;
6151
return $exit_status;
6154
# ###########################################################################
6156
# ###########################################################################
6157
sub report_table_usage {
6159
my @required_args = qw(table_usage query_id);
6160
foreach my $arg ( @required_args ) {
6161
die "I need a $arg argument" unless $args{$arg};
6163
my ($table_usage, $query_id) = @args{@required_args};
6164
MKDEBUG && _d("Reporting table usage");
6166
my $target_tbl_num = 1;
6168
foreach my $table ( @$table_usage ) {
6169
print "Query_id: $query_id." . ($target_tbl_num++) . "\n";
6172
foreach my $usage ( @$table ) {
6173
die "Invalid table usage: " . Dumper($usage)
6174
unless $usage->{context} && $usage->{table};
6176
print "$usage->{context} $usage->{table}\n";
6186
my @required_args = qw(query dbh);
6187
foreach my $arg ( @required_args ) {
6188
die "I need a $arg argument" unless $args{$arg};
6190
my ($query, $dbh) = @args{@required_args};
6193
if ( my $db = $args{db} ) {
6195
MKDEBUG && _d($dbh, $sql);
6199
$sql = "EXPLAIN EXTENDED $query";
6200
MKDEBUG && _d($dbh, $sql);
6201
$dbh->do($sql); # don't need the result
6203
$sql = "SHOW WARNINGS";
6204
MKDEBUG && _d($dbh, $sql);
6205
my $warning = $dbh->selectrow_hashref($sql);
6206
if ( ($warning->{level} || "") !~ m/Note/i
6207
|| ($warning->{code} || 0) != 1003 ) {
6208
die "EXPLAIN EXTENDED failed:\n"
6209
. " Level: " . ($warning->{level} || "") . "\n"
6210
. " Code: " . ($warning->{code} || "") . "\n"
6211
. "Message: " . ($warning->{message} || "") . "\n";
6214
return $warning->{message};
6219
my @required_args = qw(dsn OptionParser DSNParser);
6220
foreach my $arg ( @required_args ) {
6221
die "I need a $arg argument" unless $args{$arg};
6223
my ($dsn, $o, $dp) = @args{@required_args};
6225
if ( $o->get('ask-pass') ) {
6226
$dsn->{p} = OptionParser::prompt_noecho("Enter password "
6227
. ($args{for} ? "for $args{for}: " : ": "));
6230
my $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), $args{opts});
6231
$dbh->{FetchHashKeyName} = 'NAME_lc';
6236
my ( $signal ) = @_;
6238
print STDERR "# Caught SIG$signal.\n";
6242
print STDERR "# Exiting on SIG$signal.\n";
6248
my ($package, undef, $line) = caller 0;
6249
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
6250
map { defined $_ ? $_ : 'undef' }
6252
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
6255
# ############################################################################
6257
# ############################################################################
6258
if ( !caller ) { exit main(@ARGV); }
6260
1; # Because this is a module as well as a script.
6262
# #############################################################################
6264
# #############################################################################
6270
mk-table-usage - Read queries from a log and analyze how they use tables.
6274
Usage: mk-table-usage [OPTION...] [FILE...]
6276
mk-table-usage reads queries from slow query logs and analyzes how they use
6277
tables. If no FILE is specified, STDIN is read. Table usage for every query
6278
is printed to STDOUT.
6282
mk-table-use is very low risk because it only reads and examines queries from
6283
a log and executes C<EXPLAIN EXTENDED> if the L<"--explain-extended"> option
6286
At the time of this release, there are no known bugs that could cause serious
6289
The authoritative source for updated information is always the online issue
6290
tracking system. Issues that affect this tool will be marked as such. You can
6291
see a list of such issues at the following URL:
6292
L<http://www.maatkit.org/bugs/mk-table-usage>.
6294
See also L<"BUGS"> for more information on filing bugs and getting help.
6298
mk-table-usage reads queries from slow query logs and analyzes how they use
6299
tables. Table usage indicates more than just which tables are read from or
6300
written to by the query, it also indicates data flow: data in and data out.
6301
Data flow is determined by the contexts in which tables are used by the query.
6302
A single table can be used in several different contexts in the same query.
6303
The reported table usage for each query lists every context for every table.
6304
This CONTEXT-TABLE list tells how and where data flows, i.e. the query's table
6305
usage. The L<"OUTPUT"> section lists the possible contexts and describes how
6306
to read a table usage report.
6308
Since this tool analyzes table usage, it's important that queries use
6309
table-qualified columns. If a query uses only one table, then all columns
6310
must be from that table and there's no problem. But if a query uses
6311
multiple tables and the columns are not table-qualified, then that creates a
6312
problem that can only be solved by knowing the query's database and specifying
6313
L<"--explain-extended">. If the slow log does not specify the database
6314
used by the query, then you can specify a default database with L<"--database">.
6315
There is no other way to know or guess the database, so the query will be
6316
skipped. Secondly, if the database is known, then specifying
6317
L<"--explain-extended"> causes mk-table-usage to do C<EXPLAIN EXTENDED ...>
6318
C<SHOW WARNINGS> to get the fully qualified query as reported by MySQL
6319
(i.e. all identifiers are fully database- and/or table-qualified). For
6320
best results, you should specify L<"--explain-extended"> and
6321
L<"--database"> if you know that all queries use the same database.
6323
Each query is identified in the output by either an MD5 hex checksum
6324
of the query's fingerprint or the query's value for the specified
6325
L<"--id-attribute">. The query ID is for parsing and storing the table
6326
usage reports in a table that is keyed on the query ID. See L<"OUTPUT">
6327
for more information.
6331
The table usage report that is printed for each query looks similar to the
6334
Query_id: 0x1CD27577D202A339.1
6341
Query_id: 0x1CD27577D202A339.2
6348
Usage reports are separated by blank lines. The first line is always the
6349
query ID: a unique ID that can be used to parse the output and store the
6350
usage reports in a table keyed on this ID. The query ID has two parts
6351
separated by a period: the query ID and the target table number.
6353
If L<"--id-attribute"> is not specified, then query IDs are automatically
6354
created by making an MD5 hex checksum of the query's fingerprint
6355
(as shown above, e.g. C<0x1CD27577D202A339>); otherwise, the query ID is the
6356
query's value for the given attribute.
6358
The target table number starts at 1 and increments by 1 for each table that
6359
the query affects. Only multi-table UPDATE queries can affect
6360
multiple tables with a single query, so this number is 1 for all other types
6361
of queries. (Multi-table DELETE queries are not supported.)
6362
The example output above is from this query:
6364
UPDATE t1 AS a JOIN t2 AS b USING (id)
6365
SET a.foo="bar", b.foo="bat"
6368
The C<SET> clause indicates that two tables are updated: C<a> aliased as C<t1>,
6369
and C<b> aliased as C<t2>. So two usage reports are printed, one for each
6370
table, and this is indicated in the output by their common query ID but
6371
incrementing target table number.
6373
After the first line is a variable number of CONTEXT-TABLE lines. Possible
6380
SELECT means that data is taken out of the table for one of two reasons:
6381
to be returned to the user as part of a result set, or to be put into another
6382
table as part of an INSERT or UPDATE. In the first case, since only SELECT
6383
queries return result sets, a SELECT context is always listed for SELECT
6384
queries. In the second case, data from one table is used to insert or
6385
update rows in another table. For example, the UPDATE query in the example
6386
above has the usage:
6392
SET a.foo="bar", b.foo="bat"
6394
DUAL is used for any values that does not originate in a table, in this case the
6395
literal values "bar" and "bat". If that C<SET> clause were C<SET a.foo=b.foo>
6396
instead, then the complete usage would be:
6398
Query_id: 0x1CD27577D202A339.1
6405
The presence of a SELECT context after another context, such as UPDATE or
6406
INSERT, indicates where the UPDATE or INSERT retrieves its data. The example
6407
immediately above reflects an UPDATE query that updates rows in table C<t1>
6408
with data from table C<t2>.
6410
=item * Any other query type
6412
Any other query type, such as INSERT, UPDATE, DELETE, etc. may be a context.
6413
All these types indicate that the table is written or altered in some way.
6414
If a SELECT context follows one of these types, then data is read from the
6415
SELECT table and written to this table. This happens, for example, with
6416
INSERT..SELECT or UPDATE queries that set column values using values from
6417
tables instead of constant values.
6419
These query types are not supported:
6427
The JOIN context lists tables that are joined, either with an explicit JOIN in
6428
the FROM clause, or implicitly in the WHERE clause, such as C<t1.id = t2.id>.
6432
The WHERE context lists tables that are used in the WHERE clause to filter
6433
results. This does not include tables that are implicitly joined in the
6434
WHERE clause; those are listed as JOIN contexts. For example:
6436
WHERE t1.id > 100 AND t1.id < 200 AND t2.foo IS NOT NULL
6443
Only unique tables are listed; that is why table C<t1> is listed only once.
6447
The TLIST context lists tables that are accessed by the query but do not
6448
appear in any other context. These tables are usually an implicit
6449
full cartesian join, so they should be avoided. For example, the query
6450
C<SELECT * FROM t1, t2> results in:
6452
Query_id: 0xBDDEB6EDA41897A8.1
6458
First of all, there are two SELECT contexts, because C<SELECT *> selects
6459
rows from all tables; C<t1> and C<t2> in this case. Secondly, the tables
6460
are implicitly joined, but without any kind of join condition, which results
6461
in a full cartesian join as indicated by the TLIST context for each.
6467
mk-table-usage exits 1 on any kind of error, or 0 if no errors.
6471
This tool accepts additional command-line arguments. Refer to the
6472
L<"SYNOPSIS"> and usage information for details.
6478
Prompt for a password when connecting to MySQL.
6482
short form: -A; type: string
6484
Default character set. If the value is utf8, sets Perl's binmode on
6485
STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and
6486
runs SET NAMES UTF8 after connecting to MySQL. Any other value sets
6487
binmode on STDOUT without the utf8 layer, and runs SET NAMES after
6488
connecting to MySQL.
6494
Read this comma-separated list of config files; if specified, this must be the
6495
first option on the command line.
6497
=item --constant-data-value
6499
type: string; default: DUAL
6501
Value to print for constant data. Constant data means all data not
6502
from tables (or subqueries since subqueries are not supported). For example,
6503
real constant values like strings ("foo") and numbers (42), and data from
6504
functions like C<NOW()>. For example, in the query
6505
C<INSERT INTO t (c) VALUES ('a')>, the string 'a' is constant data, so the
6506
table usage report is:
6511
The first line indicates that data is inserted into table C<t> and the second
6512
line indicates that that data comes from some constant value.
6514
=item --[no]continue-on-error
6518
Continue parsing even if there is an error.
6520
=item --create-table-definitions
6524
Read C<CREATE TABLE> definitions from this list of comma-separated files.
6525
If you cannot use L<"--explain-extended"> to fully qualify table and column
6526
names, you can save the output of C<mysqldump --no-data> to one or more files
6527
and specify those files with this option. The tool will parse all
6528
C<CREATE TABLE> definitions from the files and use this information to
6529
qualify table and column names. If a column name is used in multiple tables,
6530
or table name is used in multiple databases, these duplicates cannot be
6535
Fork to the background and detach from the shell. POSIX
6536
operating systems only.
6540
short form: -D; type: string
6544
=item --defaults-file
6546
short form: -F; type: string
6548
Only read mysql options from the given file. You must give an absolute pathname.
6550
=item --explain-extended
6554
EXPLAIN EXTENDED queries on this host to fully qualify table and column names.
6560
Discard events for which this Perl code doesn't return true.
6562
This option is a string of Perl code or a file containing Perl code that gets
6563
compiled into a subroutine with one argument: $event. This is a hashref.
6564
If the given value is a readable file, then mk-query-digest reads the entire
6565
file and uses its contents as the code. The file should not contain
6566
a shebang (#!/usr/bin/perl) line.
6568
If the code returns true, the chain of callbacks continues; otherwise it ends.
6569
The code is the last statement in the subroutine other than C<return $event>.
6570
The subroutine template is:
6572
sub { $event = shift; filter && return $event; }
6574
Filters given on the command line are wrapped inside parentheses like like
6575
C<( filter )>. For complex, multi-line filters, you must put the code inside
6576
a file so it will not be wrapped inside parentheses. Either way, the filter
6577
must produce syntactically valid code given the template. For example, an
6578
if-else branch given on the command line would not be valid:
6580
--filter 'if () { } else { }' # WRONG
6582
Since it's given on the command line, the if-else branch would be wrapped inside
6583
parentheses which is not syntactically valid. So to accomplish something more
6584
complex like this would require putting the code in a file, for example
6587
my $event_ok; if (...) { $event_ok=1; } else { $event_ok=0; } $event_ok
6589
Then specify C<--filter filter.txt> to read the code from filter.txt.
6591
If the filter code won't compile, mk-query-digest will die with an error.
6592
If the filter code does compile, an error may still occur at runtime if the
6593
code tries to do something wrong (like pattern match an undefined value).
6594
mk-query-digest does not provide any safeguards so code carefully!
6596
An example filter that discards everything but SELECT statements:
6598
--filter '$event->{arg} =~ m/^select/i'
6600
This is compiled into a subroutine like the following:
6602
sub { $event = shift; ( $event->{arg} =~ m/^select/i ) && return $event; }
6604
It is permissible for the code to have side effects (to alter C<$event>).
6606
You can find an explanation of the structure of $event at
6607
L<http://code.google.com/p/maatkit/wiki/EventAttributes>.
6609
Here are more examples of filter code:
6613
=item Host/IP matches domain.com
6615
--filter '($event->{host} || $event->{ip} || "") =~ m/domain.com/'
6617
Sometimes MySQL logs the host where the IP is expected. Therefore, we
6620
=item User matches john
6622
--filter '($event->{user} || "") =~ m/john/'
6624
=item More than 1 warning
6626
--filter '($event->{Warning_count} || 0) > 1'
6628
=item Query does full table scan or full join
6630
--filter '(($event->{Full_scan} || "") eq "Yes") || (($event->{Full_join} || "") eq "Yes")'
6632
=item Query was not served from query cache
6634
--filter '($event->{QC_Hit} || "") eq "No"'
6636
=item Query is 1 MB or larger
6638
--filter '$event->{bytes} >= 1_048_576'
6642
Since L<"--filter"> allows you to alter C<$event>, you can use it to do other
6643
things, like create new attributes.
6652
short form: -h; type: string
6656
=item --id-attribute
6660
Identify each event using this attribute. If not ID attribute is given, then
6661
events are identified with the query's checksum: an MD5 hex checksum of the
6662
query's fingerprint.
6668
Print all output to this file when daemonized.
6672
short form: -p; type: string
6674
Password to use when connecting.
6680
Create the given PID file when running. The file contains the process
6681
ID of the daemonized instance. The PID file is removed when the
6682
daemonized instance exits. The program checks for the existence of the
6683
PID file when starting; if it exists and the process with the matching PID
6684
exists, the program exits.
6688
short form: -P; type: int
6690
Port number to use for connection.
6694
type: array; default: time,30
6696
Print progress reports to STDERR. The value is a comma-separated list with two
6697
parts. The first part can be percentage, time, or iterations; the second part
6698
specifies how often an update should be printed, in percentage, seconds, or
6699
number of iterations.
6705
Analyze only this given query. If you want to analyze the table usage of
6706
one simple query by providing on the command line instead of reading it
6707
from a slow log file, then specify that query with this option. The default
6708
L<"--id-attribute"> will be used which is the query's checksum.
6710
=item --read-timeout
6712
type: time; default: 0
6714
Wait this long for an event from the input; 0 to wait forever.
6716
This option sets the maximum time to wait for an event from the input. If an
6717
event is not received after the specified time, the script stops reading the
6718
input and prints its reports.
6720
This option requires the Perl POSIX module.
6726
How long to run before exiting. The default is to run forever (you can
6727
interrupt with CTRL-C).
6731
type: string; default: wait_timeout=10000
6733
Set these MySQL variables. Immediately after connecting to MySQL, this
6734
string will be appended to SET and executed.
6738
short form: -S; type: string
6740
Socket file to use for connection.
6744
short form: -u; type: string
6746
User for login if not current user.
6750
Show version and exit.
6756
These DSN options are used to create a DSN. Each option is given like
6757
C<option=value>. The options are case-sensitive, so P and p are not the
6758
same option. There cannot be whitespace before or after the C<=> and
6759
if the value contains whitespace it must be quoted. DSN options are
6760
comma-separated. See the L<maatkit> manpage for full details.
6766
dsn: charset; copy: yes
6768
Default character set.
6772
dsn: database; copy: yes
6774
Database that contains the query review table.
6778
dsn: mysql_read_default_file; copy: yes
6780
Only read default options from the given file
6784
dsn: host; copy: yes
6790
dsn: password; copy: yes
6792
Password to use when connecting.
6796
dsn: port; copy: yes
6798
Port number to use for connection.
6802
dsn: mysql_socket; copy: yes
6804
Socket file to use for connection.
6808
dsn: user; copy: yes
6810
User for login if not current user.
6816
You can download Maatkit from Google Code at
6817
L<http://code.google.com/p/maatkit/>, or you can get any of the tools
6818
easily with a command like the following:
6820
wget http://www.maatkit.org/get/toolname
6822
wget http://www.maatkit.org/trunk/toolname
6824
Where C<toolname> can be replaced with the name (or fragment of a name) of any
6825
of the Maatkit tools. Once downloaded, they're ready to run; no installation is
6826
needed. The first URL gets the latest released version of the tool, and the
6827
second gets the latest trunk code from Subversion.
6831
The environment variable C<MKDEBUG> enables verbose debugging output in all of
6836
=head1 SYSTEM REQUIREMENTS
6838
You need Perl and some core packages that ought to be installed in any
6839
reasonably new version of Perl.
6843
For a list of known bugs see L<http://www.maatkit.org/bugs/mk-table-usage>.
6845
Please use Google Code Issues and Groups to report bugs or request support:
6846
L<http://code.google.com/p/maatkit/>. You can also join #maatkit on Freenode to
6849
Please include the complete command-line used to reproduce the problem you are
6850
seeing, the version of all MySQL servers involved, the complete output of the
6851
tool when run with L<"--version">, and if possible, debugging output produced by
6852
running with the C<MKDEBUG=1> environment variable.
6854
=head1 COPYRIGHT, LICENSE AND WARRANTY
6856
This program is copyright 2009-@CURRENTYEAR@ Percona Inc.
6857
Feedback and improvements are welcome.
6859
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
6860
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6861
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
6863
This program is free software; you can redistribute it and/or modify it under
6864
the terms of the GNU General Public License as published by the Free Software
6865
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
6866
systems, you can issue `man perlgpl' or `man perlartistic' to read these
6869
You should have received a copy of the GNU General Public License along with
6870
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
6871
Place, Suite 330, Boston, MA 02111-1307 USA.
6879
This manual page documents Ver @VERSION@ Distrib @DISTRIB@ $Revision: 7531 $.