~percona-toolkit-dev/percona-toolkit/cant-nibble-bug-918056

217.3.1 by Daniel Nichter
Add pt-fingerprint.
1
#!/usr/bin/env perl
2
3
# This program is part of Percona Toolkit: http://www.percona.com/software/
4
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
5
# notices and disclaimers.
6
7
use strict;
8
use warnings FATAL => 'all';
395.1.4 by Brian Fraser
Final update-modules before the release
9
10
# This tool is "fat-packed": most of its dependent modules are embedded
11
# in this file.  Setting %INC to this file for each module makes Perl aware
12
# of this so it will not try to load the module from @INC.  See the tool's
13
# documentation for a full list of dependencies.
14
BEGIN {
15
   $INC{$_} = __FILE__ for map { (my $pkg = "$_.pm") =~ s!::!/!g; $pkg } (qw(
16
      OptionParser
17
      QueryParser
18
      QueryRewriter
19
   ));
20
}
217.3.1 by Daniel Nichter
Add pt-fingerprint.
21
22
# ###########################################################################
23
# OptionParser package
24
# This package is a copy without comments from the original.  The original
25
# with comments and its test file can be found in the Bazaar repository at,
26
#   lib/OptionParser.pm
27
#   t/lib/OptionParser.t
28
# See https://launchpad.net/percona-toolkit for more information.
29
# ###########################################################################
30
{
31
package OptionParser;
32
33
use strict;
34
use warnings FATAL => 'all';
35
use English qw(-no_match_vars);
36
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
37
38
use List::Util qw(max);
39
use Getopt::Long;
40
41
my $POD_link_re = '[LC]<"?([^">]+)"?>';
42
43
sub new {
44
   my ( $class, %args ) = @_;
45
   my @required_args = qw();
46
   foreach my $arg ( @required_args ) {
47
      die "I need a $arg argument" unless $args{$arg};
48
   }
49
50
   my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
51
   $program_name ||= $PROGRAM_NAME;
52
   my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
53
54
   my %attributes = (
55
      'type'       => 1,
56
      'short form' => 1,
57
      'group'      => 1,
58
      'default'    => 1,
59
      'cumulative' => 1,
60
      'negatable'  => 1,
61
   );
62
63
   my $self = {
64
      head1             => 'OPTIONS',        # These args are used internally
65
      skip_rules        => 0,                # to instantiate another Option-
66
      item              => '--(.*)',         # Parser obj that parses the
67
      attributes        => \%attributes,     # DSN OPTIONS section.  Tools
68
      parse_attributes  => \&_parse_attribs, # don't tinker with these args.
69
70
      %args,
71
72
      strict            => 1,  # disabled by a special rule
73
      program_name      => $program_name,
74
      opts              => {},
75
      got_opts          => 0,
76
      short_opts        => {},
77
      defaults          => {},
78
      groups            => {},
79
      allowed_groups    => {},
80
      errors            => [],
81
      rules             => [],  # desc of rules for --help
82
      mutex             => [],  # rule: opts are mutually exclusive
83
      atleast1          => [],  # rule: at least one opt is required
84
      disables          => {},  # rule: opt disables other opts 
85
      defaults_to       => {},  # rule: opt defaults to value of other opt
86
      DSNParser         => undef,
87
      default_files     => [
88
         "/etc/percona-toolkit/percona-toolkit.conf",
89
         "/etc/percona-toolkit/$program_name.conf",
90
         "$home/.percona-toolkit.conf",
91
         "$home/.$program_name.conf",
92
      ],
93
      types             => {
94
         string => 's', # standard Getopt type
95
         int    => 'i', # standard Getopt type
96
         float  => 'f', # standard Getopt type
97
         Hash   => 'H', # hash, formed from a comma-separated list
98
         hash   => 'h', # hash as above, but only if a value is given
99
         Array  => 'A', # array, similar to Hash
100
         array  => 'a', # array, similar to hash
101
         DSN    => 'd', # DSN
102
         size   => 'z', # size with kMG suffix (powers of 2^10)
103
         time   => 'm', # time, with an optional suffix of s/h/m/d
104
      },
105
   };
106
107
   return bless $self, $class;
108
}
109
110
sub get_specs {
111
   my ( $self, $file ) = @_;
112
   $file ||= $self->{file} || __FILE__;
113
   my @specs = $self->_pod_to_specs($file);
114
   $self->_parse_specs(@specs);
115
116
   open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
117
   my $contents = do { local $/ = undef; <$fh> };
118
   close $fh;
119
   if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
120
      PTDEBUG && _d('Parsing DSN OPTIONS');
121
      my $dsn_attribs = {
122
         dsn  => 1,
123
         copy => 1,
124
      };
125
      my $parse_dsn_attribs = sub {
126
         my ( $self, $option, $attribs ) = @_;
127
         map {
128
            my $val = $attribs->{$_};
129
            if ( $val ) {
130
               $val    = $val eq 'yes' ? 1
131
                       : $val eq 'no'  ? 0
132
                       :                 $val;
133
               $attribs->{$_} = $val;
134
            }
135
         } keys %$attribs;
136
         return {
137
            key => $option,
138
            %$attribs,
139
         };
140
      };
141
      my $dsn_o = new OptionParser(
142
         description       => 'DSN OPTIONS',
143
         head1             => 'DSN OPTIONS',
144
         dsn               => 0,         # XXX don't infinitely recurse!
145
         item              => '\* (.)',  # key opts are a single character
146
         skip_rules        => 1,         # no rules before opts
147
         attributes        => $dsn_attribs,
148
         parse_attributes  => $parse_dsn_attribs,
149
      );
150
      my @dsn_opts = map {
151
         my $opts = {
152
            key  => $_->{spec}->{key},
153
            dsn  => $_->{spec}->{dsn},
154
            copy => $_->{spec}->{copy},
155
            desc => $_->{desc},
156
         };
157
         $opts;
158
      } $dsn_o->_pod_to_specs($file);
159
      $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
160
   }
161
162
   if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
163
      $self->{version} = $1;
164
      PTDEBUG && _d($self->{version});
165
   }
166
167
   return;
168
}
169
170
sub DSNParser {
171
   my ( $self ) = @_;
172
   return $self->{DSNParser};
173
};
174
175
sub get_defaults_files {
176
   my ( $self ) = @_;
177
   return @{$self->{default_files}};
178
}
179
180
sub _pod_to_specs {
181
   my ( $self, $file ) = @_;
182
   $file ||= $self->{file} || __FILE__;
183
   open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
184
185
   my @specs = ();
186
   my @rules = ();
187
   my $para;
188
189
   local $INPUT_RECORD_SEPARATOR = '';
190
   while ( $para = <$fh> ) {
191
      next unless $para =~ m/^=head1 $self->{head1}/;
192
      last;
193
   }
194
195
   while ( $para = <$fh> ) {
196
      last if $para =~ m/^=over/;
197
      next if $self->{skip_rules};
198
      chomp $para;
199
      $para =~ s/\s+/ /g;
200
      $para =~ s/$POD_link_re/$1/go;
201
      PTDEBUG && _d('Option rule:', $para);
202
      push @rules, $para;
203
   }
204
205
   die "POD has no $self->{head1} section" unless $para;
206
207
   do {
208
      if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
209
         chomp $para;
210
         PTDEBUG && _d($para);
211
         my %attribs;
212
213
         $para = <$fh>; # read next paragraph, possibly attributes
214
215
         if ( $para =~ m/: / ) { # attributes
216
            $para =~ s/\s+\Z//g;
217
            %attribs = map {
218
                  my ( $attrib, $val) = split(/: /, $_);
219
                  die "Unrecognized attribute for --$option: $attrib"
220
                     unless $self->{attributes}->{$attrib};
221
                  ($attrib, $val);
222
               } split(/; /, $para);
223
            if ( $attribs{'short form'} ) {
224
               $attribs{'short form'} =~ s/-//;
225
            }
226
            $para = <$fh>; # read next paragraph, probably short help desc
227
         }
228
         else {
229
            PTDEBUG && _d('Option has no attributes');
230
         }
231
232
         $para =~ s/\s+\Z//g;
233
         $para =~ s/\s+/ /g;
234
         $para =~ s/$POD_link_re/$1/go;
235
236
         $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
237
         PTDEBUG && _d('Short help:', $para);
238
239
         die "No description after option spec $option" if $para =~ m/^=item/;
240
241
         if ( my ($base_option) =  $option =~ m/^\[no\](.*)/ ) {
242
            $option = $base_option;
243
            $attribs{'negatable'} = 1;
244
         }
245
246
         push @specs, {
247
            spec  => $self->{parse_attributes}->($self, $option, \%attribs), 
248
            desc  => $para
249
               . (defined $attribs{default} ? " (default $attribs{default})" : ''),
250
            group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
251
         };
252
      }
253
      while ( $para = <$fh> ) {
254
         last unless $para;
255
         if ( $para =~ m/^=head1/ ) {
256
            $para = undef; # Can't 'last' out of a do {} block.
257
            last;
258
         }
259
         last if $para =~ m/^=item /;
260
      }
261
   } while ( $para );
262
263
   die "No valid specs in $self->{head1}" unless @specs;
264
265
   close $fh;
266
   return @specs, @rules;
267
}
268
269
sub _parse_specs {
270
   my ( $self, @specs ) = @_;
271
   my %disables; # special rule that requires deferred checking
272
273
   foreach my $opt ( @specs ) {
274
      if ( ref $opt ) { # It's an option spec, not a rule.
275
         PTDEBUG && _d('Parsing opt spec:',
276
            map { ($_, '=>', $opt->{$_}) } keys %$opt);
277
278
         my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
279
         if ( !$long ) {
280
            die "Cannot parse long option from spec $opt->{spec}";
281
         }
282
         $opt->{long} = $long;
283
284
         die "Duplicate long option --$long" if exists $self->{opts}->{$long};
285
         $self->{opts}->{$long} = $opt;
286
287
         if ( length $long == 1 ) {
288
            PTDEBUG && _d('Long opt', $long, 'looks like short opt');
289
            $self->{short_opts}->{$long} = $long;
290
         }
291
292
         if ( $short ) {
293
            die "Duplicate short option -$short"
294
               if exists $self->{short_opts}->{$short};
295
            $self->{short_opts}->{$short} = $long;
296
            $opt->{short} = $short;
297
         }
298
         else {
299
            $opt->{short} = undef;
300
         }
301
435.5.1 by fraserb at gmail
Removed optional_value, made --version-check have default: off, updated the tools and documentation with the changes, and added the auto value to Pingback.pm
302
         $opt->{is_negatable}  = $opt->{spec} =~ m/!/        ? 1 : 0;
303
         $opt->{is_cumulative} = $opt->{spec} =~ m/\+/       ? 1 : 0;
304
         $opt->{is_required}   = $opt->{desc} =~ m/required/ ? 1 : 0;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
305
306
         $opt->{group} ||= 'default';
307
         $self->{groups}->{ $opt->{group} }->{$long} = 1;
308
309
         $opt->{value} = undef;
310
         $opt->{got}   = 0;
311
312
         my ( $type ) = $opt->{spec} =~ m/=(.)/;
313
         $opt->{type} = $type;
314
         PTDEBUG && _d($long, 'type:', $type);
315
316
317
         $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
318
319
         if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
320
            $self->{defaults}->{$long} = defined $def ? $def : 1;
321
            PTDEBUG && _d($long, 'default:', $def);
322
         }
323
324
         if ( $long eq 'config' ) {
325
            $self->{defaults}->{$long} = join(',', $self->get_defaults_files());
326
         }
327
328
         if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
329
            $disables{$long} = $dis;
330
            PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
331
         }
332
333
         $self->{opts}->{$long} = $opt;
334
      }
335
      else { # It's an option rule, not a spec.
336
         PTDEBUG && _d('Parsing rule:', $opt); 
337
         push @{$self->{rules}}, $opt;
338
         my @participants = $self->_get_participants($opt);
339
         my $rule_ok = 0;
340
341
         if ( $opt =~ m/mutually exclusive|one and only one/ ) {
342
            $rule_ok = 1;
343
            push @{$self->{mutex}}, \@participants;
344
            PTDEBUG && _d(@participants, 'are mutually exclusive');
345
         }
346
         if ( $opt =~ m/at least one|one and only one/ ) {
347
            $rule_ok = 1;
348
            push @{$self->{atleast1}}, \@participants;
349
            PTDEBUG && _d(@participants, 'require at least one');
350
         }
351
         if ( $opt =~ m/default to/ ) {
352
            $rule_ok = 1;
353
            $self->{defaults_to}->{$participants[0]} = $participants[1];
354
            PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
355
         }
356
         if ( $opt =~ m/restricted to option groups/ ) {
357
            $rule_ok = 1;
358
            my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
359
            my @groups = split(',', $groups);
360
            %{$self->{allowed_groups}->{$participants[0]}} = map {
361
               s/\s+//;
362
               $_ => 1;
363
            } @groups;
364
         }
365
         if( $opt =~ m/accepts additional command-line arguments/ ) {
366
            $rule_ok = 1;
367
            $self->{strict} = 0;
368
            PTDEBUG && _d("Strict mode disabled by rule");
369
         }
370
371
         die "Unrecognized option rule: $opt" unless $rule_ok;
372
      }
373
   }
374
375
   foreach my $long ( keys %disables ) {
376
      my @participants = $self->_get_participants($disables{$long});
377
      $self->{disables}->{$long} = \@participants;
378
      PTDEBUG && _d('Option', $long, 'disables', @participants);
379
   }
380
381
   return; 
382
}
383
384
sub _get_participants {
385
   my ( $self, $str ) = @_;
386
   my @participants;
387
   foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
388
      die "Option --$long does not exist while processing rule $str"
389
         unless exists $self->{opts}->{$long};
390
      push @participants, $long;
391
   }
392
   PTDEBUG && _d('Participants for', $str, ':', @participants);
393
   return @participants;
394
}
395
396
sub opts {
397
   my ( $self ) = @_;
398
   my %opts = %{$self->{opts}};
399
   return %opts;
400
}
401
402
sub short_opts {
403
   my ( $self ) = @_;
404
   my %short_opts = %{$self->{short_opts}};
405
   return %short_opts;
406
}
407
408
sub set_defaults {
409
   my ( $self, %defaults ) = @_;
410
   $self->{defaults} = {};
411
   foreach my $long ( keys %defaults ) {
412
      die "Cannot set default for nonexistent option $long"
413
         unless exists $self->{opts}->{$long};
414
      $self->{defaults}->{$long} = $defaults{$long};
415
      PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
416
   }
417
   return;
418
}
419
420
sub get_defaults {
421
   my ( $self ) = @_;
422
   return $self->{defaults};
423
}
424
425
sub get_groups {
426
   my ( $self ) = @_;
427
   return $self->{groups};
428
}
429
430
sub _set_option {
431
   my ( $self, $opt, $val ) = @_;
432
   my $long = exists $self->{opts}->{$opt}       ? $opt
433
            : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
434
            : die "Getopt::Long gave a nonexistent option: $opt";
435
436
   $opt = $self->{opts}->{$long};
437
   if ( $opt->{is_cumulative} ) {
438
      $opt->{value}++;
439
   }
435.5.1 by fraserb at gmail
Removed optional_value, made --version-check have default: off, updated the tools and documentation with the changes, and added the auto value to Pingback.pm
440
   else {
217.3.1 by Daniel Nichter
Add pt-fingerprint.
441
      $opt->{value} = $val;
442
   }
443
   $opt->{got} = 1;
444
   PTDEBUG && _d('Got option', $long, '=', $val);
445
}
446
447
sub get_opts {
448
   my ( $self ) = @_; 
449
450
   foreach my $long ( keys %{$self->{opts}} ) {
451
      $self->{opts}->{$long}->{got} = 0;
452
      $self->{opts}->{$long}->{value}
453
         = exists $self->{defaults}->{$long}       ? $self->{defaults}->{$long}
454
         : $self->{opts}->{$long}->{is_cumulative} ? 0
455
         : undef;
456
   }
457
   $self->{got_opts} = 0;
458
459
   $self->{errors} = [];
460
461
   if ( @ARGV && $ARGV[0] eq "--config" ) {
462
      shift @ARGV;
463
      $self->_set_option('config', shift @ARGV);
464
   }
465
   if ( $self->has('config') ) {
466
      my @extra_args;
467
      foreach my $filename ( split(',', $self->get('config')) ) {
468
         eval {
469
            push @extra_args, $self->_read_config_file($filename);
470
         };
471
         if ( $EVAL_ERROR ) {
472
            if ( $self->got('config') ) {
473
               die $EVAL_ERROR;
474
            }
475
            elsif ( PTDEBUG ) {
476
               _d($EVAL_ERROR);
477
            }
478
         }
479
      }
480
      unshift @ARGV, @extra_args;
481
   }
482
483
   Getopt::Long::Configure('no_ignore_case', 'bundling');
484
   GetOptions(
485
      map    { $_->{spec} => sub { $self->_set_option(@_); } }
486
      grep   { $_->{long} ne 'config' } # --config is handled specially above.
487
      values %{$self->{opts}}
488
   ) or $self->save_error('Error parsing options');
489
490
   if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
491
      if ( $self->{version} ) {
492
         print $self->{version}, "\n";
493
      }
494
      else {
495
         print "Error parsing version.  See the VERSION section of the tool's documentation.\n";
496
      }
424.1.3 by Daniel Nichter
Update OptionParser in all tools.
497
      exit 1;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
498
   }
499
500
   if ( @ARGV && $self->{strict} ) {
501
      $self->save_error("Unrecognized command-line options @ARGV");
502
   }
503
504
   foreach my $mutex ( @{$self->{mutex}} ) {
505
      my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
506
      if ( @set > 1 ) {
507
         my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
508
                      @{$mutex}[ 0 .. scalar(@$mutex) - 2] )
509
                 . ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
510
                 . ' are mutually exclusive.';
511
         $self->save_error($err);
512
      }
513
   }
514
515
   foreach my $required ( @{$self->{atleast1}} ) {
516
      my @set = grep { $self->{opts}->{$_}->{got} } @$required;
517
      if ( @set == 0 ) {
518
         my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
519
                      @{$required}[ 0 .. scalar(@$required) - 2] )
520
                 .' or --'.$self->{opts}->{$required->[-1]}->{long};
521
         $self->save_error("Specify at least one of $err");
522
      }
523
   }
524
525
   $self->_check_opts( keys %{$self->{opts}} );
526
   $self->{got_opts} = 1;
527
   return;
528
}
529
530
sub _check_opts {
531
   my ( $self, @long ) = @_;
532
   my $long_last = scalar @long;
533
   while ( @long ) {
534
      foreach my $i ( 0..$#long ) {
535
         my $long = $long[$i];
536
         next unless $long;
537
         my $opt  = $self->{opts}->{$long};
538
         if ( $opt->{got} ) {
539
            if ( exists $self->{disables}->{$long} ) {
540
               my @disable_opts = @{$self->{disables}->{$long}};
541
               map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
542
               PTDEBUG && _d('Unset options', @disable_opts,
543
                  'because', $long,'disables them');
544
            }
545
546
            if ( exists $self->{allowed_groups}->{$long} ) {
547
548
               my @restricted_groups = grep {
549
                  !exists $self->{allowed_groups}->{$long}->{$_}
550
               } keys %{$self->{groups}};
551
552
               my @restricted_opts;
553
               foreach my $restricted_group ( @restricted_groups ) {
554
                  RESTRICTED_OPT:
555
                  foreach my $restricted_opt (
556
                     keys %{$self->{groups}->{$restricted_group}} )
557
                  {
558
                     next RESTRICTED_OPT if $restricted_opt eq $long;
559
                     push @restricted_opts, $restricted_opt
560
                        if $self->{opts}->{$restricted_opt}->{got};
561
                  }
562
               }
563
564
               if ( @restricted_opts ) {
565
                  my $err;
566
                  if ( @restricted_opts == 1 ) {
567
                     $err = "--$restricted_opts[0]";
568
                  }
569
                  else {
570
                     $err = join(', ',
571
                               map { "--$self->{opts}->{$_}->{long}" }
572
                               grep { $_ } 
573
                               @restricted_opts[0..scalar(@restricted_opts) - 2]
574
                            )
575
                          . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
576
                  }
577
                  $self->save_error("--$long is not allowed with $err");
578
               }
579
            }
580
581
         }
582
         elsif ( $opt->{is_required} ) { 
583
            $self->save_error("Required option --$long must be specified");
584
         }
585
586
         $self->_validate_type($opt);
587
         if ( $opt->{parsed} ) {
588
            delete $long[$i];
589
         }
590
         else {
591
            PTDEBUG && _d('Temporarily failed to parse', $long);
592
         }
593
      }
594
595
      die "Failed to parse options, possibly due to circular dependencies"
596
         if @long == $long_last;
597
      $long_last = @long;
598
   }
599
600
   return;
601
}
602
603
sub _validate_type {
604
   my ( $self, $opt ) = @_;
605
   return unless $opt;
606
607
   if ( !$opt->{type} ) {
608
      $opt->{parsed} = 1;
609
      return;
610
   }
611
612
   my $val = $opt->{value};
613
614
   if ( $val && $opt->{type} eq 'm' ) {  # type time
615
      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
616
      my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
617
      if ( !$suffix ) {
618
         my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
619
         $suffix = $s || 's';
620
         PTDEBUG && _d('No suffix given; using', $suffix, 'for',
621
            $opt->{long}, '(value:', $val, ')');
622
      }
623
      if ( $suffix =~ m/[smhd]/ ) {
624
         $val = $suffix eq 's' ? $num            # Seconds
625
              : $suffix eq 'm' ? $num * 60       # Minutes
626
              : $suffix eq 'h' ? $num * 3600     # Hours
627
              :                  $num * 86400;   # Days
628
         $opt->{value} = ($prefix || '') . $val;
629
         PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
630
      }
631
      else {
632
         $self->save_error("Invalid time suffix for --$opt->{long}");
633
      }
634
   }
635
   elsif ( $val && $opt->{type} eq 'd' ) {  # type DSN
636
      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
637
      my $prev = {};
638
      my $from_key = $self->{defaults_to}->{ $opt->{long} };
639
      if ( $from_key ) {
640
         PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
641
         if ( $self->{opts}->{$from_key}->{parsed} ) {
642
            $prev = $self->{opts}->{$from_key}->{value};
643
         }
644
         else {
645
            PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
646
               $from_key, 'parsed');
647
            return;
648
         }
649
      }
650
      my $defaults = $self->{DSNParser}->parse_options($self);
651
      $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
652
   }
653
   elsif ( $val && $opt->{type} eq 'z' ) {  # type size
654
      PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
655
      $self->_parse_size($opt, $val);
656
   }
657
   elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
658
      $opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
659
   }
660
   elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
661
      $opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
662
   }
663
   else {
664
      PTDEBUG && _d('Nothing to validate for option',
665
         $opt->{long}, 'type', $opt->{type}, 'value', $val);
666
   }
667
668
   $opt->{parsed} = 1;
669
   return;
670
}
671
672
sub get {
673
   my ( $self, $opt ) = @_;
674
   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
675
   die "Option $opt does not exist"
676
      unless $long && exists $self->{opts}->{$long};
677
   return $self->{opts}->{$long}->{value};
678
}
679
680
sub got {
681
   my ( $self, $opt ) = @_;
682
   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
683
   die "Option $opt does not exist"
684
      unless $long && exists $self->{opts}->{$long};
685
   return $self->{opts}->{$long}->{got};
686
}
687
688
sub has {
689
   my ( $self, $opt ) = @_;
690
   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
691
   return defined $long ? exists $self->{opts}->{$long} : 0;
692
}
693
694
sub set {
695
   my ( $self, $opt, $val ) = @_;
696
   my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
697
   die "Option $opt does not exist"
698
      unless $long && exists $self->{opts}->{$long};
699
   $self->{opts}->{$long}->{value} = $val;
700
   return;
701
}
702
703
sub save_error {
704
   my ( $self, $error ) = @_;
705
   push @{$self->{errors}}, $error;
706
   return;
707
}
708
709
sub errors {
710
   my ( $self ) = @_;
711
   return $self->{errors};
712
}
713
714
sub usage {
715
   my ( $self ) = @_;
716
   warn "No usage string is set" unless $self->{usage}; # XXX
717
   return "Usage: " . ($self->{usage} || '') . "\n";
718
}
719
720
sub descr {
721
   my ( $self ) = @_;
722
   warn "No description string is set" unless $self->{description}; # XXX
723
   my $descr  = ($self->{description} || $self->{program_name} || '')
724
              . "  For more details, please use the --help option, "
725
              . "or try 'perldoc $PROGRAM_NAME' "
726
              . "for complete documentation.";
727
   $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
728
      unless $ENV{DONT_BREAK_LINES};
729
   $descr =~ s/ +$//mg;
730
   return $descr;
731
}
732
733
sub usage_or_errors {
734
   my ( $self, $file, $return ) = @_;
735
   $file ||= $self->{file} || __FILE__;
736
737
   if ( !$self->{description} || !$self->{usage} ) {
738
      PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
739
      my %synop = $self->_parse_synopsis($file);
740
      $self->{description} ||= $synop{description};
741
      $self->{usage}       ||= $synop{usage};
742
      PTDEBUG && _d("Description:", $self->{description},
743
         "\nUsage:", $self->{usage});
744
   }
745
746
   if ( $self->{opts}->{help}->{got} ) {
747
      print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
748
      exit 0 unless $return;
749
   }
750
   elsif ( scalar @{$self->{errors}} ) {
751
      print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
424.1.3 by Daniel Nichter
Update OptionParser in all tools.
752
      exit 1 unless $return;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
753
   }
754
755
   return;
756
}
757
758
sub print_errors {
759
   my ( $self ) = @_;
760
   my $usage = $self->usage() . "\n";
761
   if ( (my @errors = @{$self->{errors}}) ) {
762
      $usage .= join("\n  * ", 'Errors in command-line arguments:', @errors)
763
              . "\n";
764
   }
765
   return $usage . "\n" . $self->descr();
766
}
767
768
sub print_usage {
769
   my ( $self ) = @_;
770
   die "Run get_opts() before print_usage()" unless $self->{got_opts};
771
   my @opts = values %{$self->{opts}};
772
773
   my $maxl = max(
774
      map {
775
         length($_->{long})               # option long name
776
         + ($_->{is_negatable} ? 4 : 0)   # "[no]" if opt is negatable
777
         + ($_->{type} ? 2 : 0)           # "=x" where x is the opt type
778
      }
779
      @opts);
780
781
   my $maxs = max(0,
782
      map {
783
         length($_)
784
         + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
785
         + ($self->{opts}->{$_}->{type} ? 2 : 0)
786
      }
787
      values %{$self->{short_opts}});
788
789
   my $lcol = max($maxl, ($maxs + 3));
790
   my $rcol = 80 - $lcol - 6;
791
   my $rpad = ' ' x ( 80 - $rcol );
792
793
   $maxs = max($lcol - 3, $maxs);
794
795
   my $usage = $self->descr() . "\n" . $self->usage();
796
797
   my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
798
   push @groups, 'default';
799
800
   foreach my $group ( reverse @groups ) {
801
      $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
802
      foreach my $opt (
803
         sort { $a->{long} cmp $b->{long} }
804
         grep { $_->{group} eq $group }
805
         @opts )
806
      {
807
         my $long  = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
808
         my $short = $opt->{short};
809
         my $desc  = $opt->{desc};
810
811
         $long .= $opt->{type} ? "=$opt->{type}" : "";
812
813
         if ( $opt->{type} && $opt->{type} eq 'm' ) {
814
            my ($s) = $desc =~ m/\(suffix (.)\)/;
815
            $s    ||= 's';
816
            $desc =~ s/\s+\(suffix .\)//;
817
            $desc .= ".  Optional suffix s=seconds, m=minutes, h=hours, "
818
                   . "d=days; if no suffix, $s is used.";
819
         }
472.1.2 by Brian Fraser
Update modules for all tools using DSNParser
820
         $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol}(?!\W))(?:\s+|(?<=\W)|$)/g);
217.3.1 by Daniel Nichter
Add pt-fingerprint.
821
         $desc =~ s/ +$//mg;
822
         if ( $short ) {
823
            $usage .= sprintf("  --%-${maxs}s -%s  %s\n", $long, $short, $desc);
824
         }
825
         else {
826
            $usage .= sprintf("  --%-${lcol}s  %s\n", $long, $desc);
827
         }
828
      }
829
   }
830
831
   $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
832
833
   if ( (my @rules = @{$self->{rules}}) ) {
834
      $usage .= "\nRules:\n\n";
835
      $usage .= join("\n", map { "  $_" } @rules) . "\n";
836
   }
837
   if ( $self->{DSNParser} ) {
838
      $usage .= "\n" . $self->{DSNParser}->usage();
839
   }
840
   $usage .= "\nOptions and values after processing arguments:\n\n";
841
   foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
842
      my $val   = $opt->{value};
843
      my $type  = $opt->{type} || '';
844
      my $bool  = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
845
      $val      = $bool              ? ( $val ? 'TRUE' : 'FALSE' )
846
                : !defined $val      ? '(No value)'
847
                : $type eq 'd'       ? $self->{DSNParser}->as_string($val)
848
                : $type =~ m/H|h/    ? join(',', sort keys %$val)
849
                : $type =~ m/A|a/    ? join(',', @$val)
850
                :                    $val;
851
      $usage .= sprintf("  --%-${lcol}s  %s\n", $opt->{long}, $val);
852
   }
853
   return $usage;
854
}
855
856
sub prompt_noecho {
857
   shift @_ if ref $_[0] eq __PACKAGE__;
858
   my ( $prompt ) = @_;
859
   local $OUTPUT_AUTOFLUSH = 1;
860
   print $prompt
861
      or die "Cannot print: $OS_ERROR";
862
   my $response;
863
   eval {
864
      require Term::ReadKey;
865
      Term::ReadKey::ReadMode('noecho');
866
      chomp($response = <STDIN>);
867
      Term::ReadKey::ReadMode('normal');
868
      print "\n"
869
         or die "Cannot print: $OS_ERROR";
870
   };
871
   if ( $EVAL_ERROR ) {
872
      die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
873
   }
874
   return $response;
875
}
876
877
sub _read_config_file {
878
   my ( $self, $filename ) = @_;
879
   open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
880
   my @args;
881
   my $prefix = '--';
882
   my $parse  = 1;
883
884
   LINE:
885
   while ( my $line = <$fh> ) {
886
      chomp $line;
887
      next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
888
      $line =~ s/\s+#.*$//g;
889
      $line =~ s/^\s+|\s+$//g;
890
      if ( $line eq '--' ) {
891
         $prefix = '';
892
         $parse  = 0;
893
         next LINE;
894
      }
895
      if ( $parse
896
         && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
897
      ) {
898
         push @args, grep { defined $_ } ("$prefix$opt", $arg);
899
      }
900
      elsif ( $line =~ m/./ ) {
901
         push @args, $line;
902
      }
903
      else {
904
         die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
905
      }
906
   }
907
   close $fh;
908
   return @args;
909
}
910
911
sub read_para_after {
912
   my ( $self, $file, $regex ) = @_;
913
   open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
914
   local $INPUT_RECORD_SEPARATOR = '';
915
   my $para;
916
   while ( $para = <$fh> ) {
917
      next unless $para =~ m/^=pod$/m;
918
      last;
919
   }
920
   while ( $para = <$fh> ) {
921
      next unless $para =~ m/$regex/;
922
      last;
923
   }
924
   $para = <$fh>;
925
   chomp($para);
926
   close $fh or die "Can't close $file: $OS_ERROR";
927
   return $para;
928
}
929
930
sub clone {
931
   my ( $self ) = @_;
932
933
   my %clone = map {
934
      my $hashref  = $self->{$_};
935
      my $val_copy = {};
936
      foreach my $key ( keys %$hashref ) {
937
         my $ref = ref $hashref->{$key};
938
         $val_copy->{$key} = !$ref           ? $hashref->{$key}
939
                           : $ref eq 'HASH'  ? { %{$hashref->{$key}} }
940
                           : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
941
                           : $hashref->{$key};
942
      }
943
      $_ => $val_copy;
944
   } qw(opts short_opts defaults);
945
946
   foreach my $scalar ( qw(got_opts) ) {
947
      $clone{$scalar} = $self->{$scalar};
948
   }
949
950
   return bless \%clone;     
951
}
952
953
sub _parse_size {
954
   my ( $self, $opt, $val ) = @_;
955
956
   if ( lc($val || '') eq 'null' ) {
957
      PTDEBUG && _d('NULL size for', $opt->{long});
958
      $opt->{value} = 'null';
959
      return;
960
   }
961
962
   my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
963
   my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
964
   if ( defined $num ) {
965
      if ( $factor ) {
966
         $num *= $factor_for{$factor};
967
         PTDEBUG && _d('Setting option', $opt->{y},
968
            'to num', $num, '* factor', $factor);
969
      }
970
      $opt->{value} = ($pre || '') . $num;
971
   }
972
   else {
973
      $self->save_error("Invalid size for --$opt->{long}: $val");
974
   }
975
   return;
976
}
977
978
sub _parse_attribs {
979
   my ( $self, $option, $attribs ) = @_;
980
   my $types = $self->{types};
981
   return $option
982
      . ($attribs->{'short form'} ? '|' . $attribs->{'short form'}   : '' )
983
      . ($attribs->{'negatable'}  ? '!'                              : '' )
984
      . ($attribs->{'cumulative'} ? '+'                              : '' )
435.5.1 by fraserb at gmail
Removed optional_value, made --version-check have default: off, updated the tools and documentation with the changes, and added the auto value to Pingback.pm
985
      . ($attribs->{'type'}       ? '=' . $types->{$attribs->{type}} : '' );
217.3.1 by Daniel Nichter
Add pt-fingerprint.
986
}
987
988
sub _parse_synopsis {
989
   my ( $self, $file ) = @_;
990
   $file ||= $self->{file} || __FILE__;
991
   PTDEBUG && _d("Parsing SYNOPSIS in", $file);
992
993
   local $INPUT_RECORD_SEPARATOR = '';  # read paragraphs
994
   open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
995
   my $para;
996
   1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
997
   die "$file does not contain a SYNOPSIS section" unless $para;
998
   my @synop;
999
   for ( 1..2 ) {  # 1 for the usage, 2 for the description
1000
      my $para = <$fh>;
1001
      push @synop, $para;
1002
   }
1003
   close $fh;
1004
   PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
1005
   my ($usage, $desc) = @synop;
1006
   die "The SYNOPSIS section in $file is not formatted properly"
1007
      unless $usage && $desc;
1008
1009
   $usage =~ s/^\s*Usage:\s+(.+)/$1/;
1010
   chomp $usage;
1011
1012
   $desc =~ s/\n/ /g;
1013
   $desc =~ s/\s{2,}/ /g;
1014
   $desc =~ s/\. ([A-Z][a-z])/.  $1/g;
1015
   $desc =~ s/\s+$//;
1016
1017
   return (
1018
      description => $desc,
1019
      usage       => $usage,
1020
   );
1021
};
1022
1023
sub _d {
1024
   my ($package, undef, $line) = caller 0;
1025
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1026
        map { defined $_ ? $_ : 'undef' }
1027
        @_;
1028
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1029
}
1030
1031
if ( PTDEBUG ) {
1032
   print '# ', $^X, ' ', $], "\n";
1033
   if ( my $uname = `uname -a` ) {
1034
      $uname =~ s/\s+/ /g;
1035
      print "# $uname\n";
1036
   }
1037
   print '# Arguments: ',
1038
      join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
1039
}
1040
1041
1;
1042
}
1043
# ###########################################################################
1044
# End OptionParser package
1045
# ###########################################################################
1046
1047
# ###########################################################################
1048
# QueryParser package
1049
# This package is a copy without comments from the original.  The original
1050
# with comments and its test file can be found in the Bazaar repository at,
1051
#   lib/QueryParser.pm
1052
#   t/lib/QueryParser.t
1053
# See https://launchpad.net/percona-toolkit for more information.
1054
# ###########################################################################
1055
{
1056
package QueryParser;
1057
1058
use strict;
1059
use warnings FATAL => 'all';
1060
use English qw(-no_match_vars);
1061
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1062
1063
our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
1064
our $tbl_regex = qr{
1065
         \b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
1066
         \b\s*
1067
         \(?                                   # Optional paren around tables
1068
         ($tbl_ident
1069
            (?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
1070
         )
1071
      }xio;
1072
our $has_derived = qr{
1073
      \b(?:FROM|JOIN|,)
1074
      \s*\(\s*SELECT
1075
   }xi;
1076
1077
our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
1078
1079
our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
1080
1081
sub new {
1082
   my ( $class ) = @_;
1083
   bless {}, $class;
1084
}
1085
1086
sub get_tables {
1087
   my ( $self, $query ) = @_;
1088
   return unless $query;
1089
   PTDEBUG && _d('Getting tables for', $query);
1090
1091
   my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
1092
   if ( $ddl_stmt ) {
1093
      PTDEBUG && _d('Special table type:', $ddl_stmt);
1094
      $query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
1095
      if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
1096
         PTDEBUG && _d('Query alters a database, not a table');
1097
         return ();
1098
      }
1099
      if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
1100
         my ($select) = $query =~ m/\b(SELECT\b.+)/is;
1101
         PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
1102
         return $self->get_tables($select);
1103
      }
1104
      my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
1105
      PTDEBUG && _d('Matches table:', $tbl);
1106
      return ($tbl);
1107
   }
1108
1109
   $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1110
362.4.1 by Brian Fraser
Fix for 1037211: QueryParser fails to distill LOCK TABLES in lowercase
1111
   if ( $query =~ s/^\s*LOCK TABLES\s+//i ) {
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1112
      PTDEBUG && _d('Special table type: LOCK TABLES');
362.4.1 by Brian Fraser
Fix for 1037211: QueryParser fails to distill LOCK TABLES in lowercase
1113
      $query =~ s/\s+(?:READ(?:\s+LOCAL)?|WRITE)\s*//gi;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1114
      PTDEBUG && _d('Locked tables:', $query);
1115
      $query = "FROM $query";
1116
   }
1117
1118
   $query =~ s/\\["']//g;                # quoted strings
1119
   $query =~ s/".*?"/?/sg;               # quoted strings
1120
   $query =~ s/'.*?'/?/sg;               # quoted strings
1121
1122
   my @tables;
1123
   foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
1124
      PTDEBUG && _d('Match tables:', $tbls);
1125
1126
      next if $tbls =~ m/\ASELECT\b/i;
1127
1128
      foreach my $tbl ( split(',', $tbls) ) {
1129
         $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
1130
1131
         if ( $tbl !~ m/[a-zA-Z]/ ) {
1132
            PTDEBUG && _d('Skipping suspicious table name:', $tbl);
1133
            next;
1134
         }
1135
1136
         push @tables, $tbl;
1137
      }
1138
   }
1139
   return @tables;
1140
}
1141
1142
sub has_derived_table {
1143
   my ( $self, $query ) = @_;
1144
   my $match = $query =~ m/$has_derived/;
1145
   PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
1146
   return $match;
1147
}
1148
1149
sub get_aliases {
1150
   my ( $self, $query, $list ) = @_;
1151
1152
   my $result = {
1153
      DATABASE => {},
1154
      TABLE    => {},
1155
   };
1156
   return $result unless $query;
1157
1158
   $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
1159
1160
   $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
1161
1162
   my @tbl_refs;
1163
   my ($tbl_refs, $from) = $query =~ m{
1164
      (
1165
         (FROM|INTO|UPDATE)\b\s*   # Keyword before table refs
1166
         .+?                       # Table refs
1167
      )
1168
      (?:\s+|\z)                   # If the query does not end with the table
1169
      (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
1170
   }ix;
1171
1172
   if ( $tbl_refs ) {
1173
1174
      if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1175
         $tbl_refs =~ s/\([^\)]+\)\s*//;
1176
      }
1177
1178
      PTDEBUG && _d('tbl refs:', $tbl_refs);
1179
1180
      my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
1181
1182
      my $after_tbl  = qr/(?:,|JOIN|ON|USING|\z)/i;
1183
1184
      $tbl_refs =~ s/ = /=/g;
1185
1186
      while (
1187
         $tbl_refs =~ m{
1188
            $before_tbl\b\s*
1189
               ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
1190
            \s*$after_tbl
1191
         }xgio )
1192
      {
1193
         my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
1194
         PTDEBUG && _d('Match table:', $tbl_ref);
1195
         push @tbl_refs, $tbl_ref;
1196
         $alias = $self->trim_identifier($alias);
1197
1198
         if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
1199
            PTDEBUG && _d('Subquery', $tbl_ref);
1200
            $result->{TABLE}->{$alias} = undef;
1201
            next;
1202
         }
1203
1204
         my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
1205
         $db  = $self->trim_identifier($db);
1206
         $tbl = $self->trim_identifier($tbl);
1207
         $result->{TABLE}->{$alias || $tbl} = $tbl;
1208
         $result->{DATABASE}->{$tbl}        = $db if $db;
1209
      }
1210
   }
1211
   else {
1212
      PTDEBUG && _d("No tables ref in", $query);
1213
   }
1214
1215
   if ( $list ) {
1216
      return \@tbl_refs;
1217
   }
1218
   else {
1219
      return $result;
1220
   }
1221
}
1222
1223
sub split {
1224
   my ( $self, $query ) = @_;
1225
   return unless $query;
1226
   $query = $self->clean_query($query);
1227
   PTDEBUG && _d('Splitting', $query);
1228
1229
   my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
1230
1231
   my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
1232
1233
   my @statements;
1234
   if ( @split_statements == 1 ) {
1235
      push @statements, $query;
1236
   }
1237
   else {
1238
      for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
1239
         push @statements, $split_statements[$i].$split_statements[$i+1];
1240
1241
         if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
1242
            $statements[-2] .= pop @statements;
1243
         }
1244
      }
1245
   }
1246
1247
   PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
1248
   return @statements;
1249
}
1250
1251
sub clean_query {
1252
   my ( $self, $query ) = @_;
1253
   return unless $query;
1254
   $query =~ s!/\*.*?\*/! !g;  # Remove /* comment blocks */
1255
   $query =~ s/^\s+//;         # Remove leading spaces
1256
   $query =~ s/\s+$//;         # Remove trailing spaces
1257
   $query =~ s/\s{2,}/ /g;     # Remove extra spaces
1258
   return $query;
1259
}
1260
1261
sub split_subquery {
1262
   my ( $self, $query ) = @_;
1263
   return unless $query;
1264
   $query = $self->clean_query($query);
1265
   $query =~ s/;$//;
1266
1267
   my @subqueries;
1268
   my $sqno = 0;  # subquery number
1269
   my $pos  = 0;
1270
   while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
1271
      $pos = pos($query);
1272
      my $word = $1;
1273
      PTDEBUG && _d($word, $sqno);
1274
      if ( $word =~ m/^\(?SELECT\b/i ) {
1275
         my $start_pos = $pos - length($word) - 1;
1276
         if ( $start_pos ) {
1277
            $sqno++;
1278
            PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
1279
            $subqueries[$sqno] = {
1280
               start_pos => $start_pos,
1281
               end_pos   => 0,
1282
               len       => 0,
1283
               words     => [$word],
1284
               lp        => 1, # left parentheses
1285
               rp        => 0, # right parentheses
1286
               done      => 0,
1287
            };
1288
         }
1289
         else {
1290
            PTDEBUG && _d('Main SELECT at pos 0');
1291
         }
1292
      }
1293
      else {
1294
         next unless $sqno;  # next unless we're in a subquery
1295
         PTDEBUG && _d('In subquery', $sqno);
1296
         my $sq = $subqueries[$sqno];
1297
         if ( $sq->{done} ) {
1298
            PTDEBUG && _d('This subquery is done; SQL is for',
1299
               ($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
1300
            next;
1301
         }
1302
         push @{$sq->{words}}, $word;
1303
         my $lp = ($word =~ tr/\(//) || 0;
1304
         my $rp = ($word =~ tr/\)//) || 0;
1305
         PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
1306
         if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
1307
            my $end_pos = $pos - 1;
1308
            PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
1309
            $sq->{end_pos} = $end_pos;
1310
            $sq->{len}     = $end_pos - $sq->{start_pos};
1311
         }
1312
      }
1313
   }
1314
1315
   for my $i ( 1..$#subqueries ) {
1316
      my $sq = $subqueries[$i];
1317
      next unless $sq;
1318
      $sq->{sql} = join(' ', @{$sq->{words}});
1319
      substr $query,
1320
         $sq->{start_pos} + 1,  # +1 for (
1321
         $sq->{len} - 1,        # -1 for )
1322
         "__subquery_$i";
1323
   }
1324
1325
   return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
1326
}
1327
1328
sub query_type {
1329
   my ( $self, $query, $qr ) = @_;
1330
   my ($type, undef) = $qr->distill_verbs($query);
1331
   my $rw;
1332
   if ( $type =~ m/^SELECT\b/ ) {
1333
      $rw = 'read';
1334
   }
1335
   elsif ( $type =~ m/^$data_manip_stmts\b/
1336
           || $type =~ m/^$data_def_stmts\b/  ) {
1337
      $rw = 'write'
1338
   }
1339
1340
   return {
1341
      type => $type,
1342
      rw   => $rw,
1343
   }
1344
}
1345
1346
sub get_columns {
1347
   my ( $self, $query ) = @_;
1348
   my $cols = [];
1349
   return $cols unless $query;
1350
   my $cols_def;
1351
1352
   if ( $query =~ m/^SELECT/i ) {
1353
      $query =~ s/
1354
         ^SELECT\s+
1355
           (?:ALL
1356
              |DISTINCT
1357
              |DISTINCTROW
1358
              |HIGH_PRIORITY
1359
              |STRAIGHT_JOIN
1360
              |SQL_SMALL_RESULT
1361
              |SQL_BIG_RESULT
1362
              |SQL_BUFFER_RESULT
1363
              |SQL_CACHE
1364
              |SQL_NO_CACHE
1365
              |SQL_CALC_FOUND_ROWS
1366
           )\s+
1367
      /SELECT /xgi;
1368
      ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
1369
   }
1370
   elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
1371
      ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
1372
   }
1373
1374
   PTDEBUG && _d('Columns:', $cols_def);
1375
   if ( $cols_def ) {
1376
      @$cols = split(',', $cols_def);
1377
      map {
1378
         my $col = $_;
1379
         $col = s/^\s+//g;
1380
         $col = s/\s+$//g;
1381
         $col;
1382
      } @$cols;
1383
   }
1384
1385
   return $cols;
1386
}
1387
1388
sub parse {
1389
   my ( $self, $query ) = @_;
1390
   return unless $query;
1391
   my $parsed = {};
1392
1393
   $query =~ s/\n/ /g;
1394
   $query = $self->clean_query($query);
1395
1396
   $parsed->{query}   = $query,
1397
   $parsed->{tables}  = $self->get_aliases($query, 1);
1398
   $parsed->{columns} = $self->get_columns($query);
1399
1400
   my ($type) = $query =~ m/^(\w+)/;
1401
   $parsed->{type} = lc $type;
1402
1403
1404
   $parsed->{sub_queries} = [];
1405
1406
   return $parsed;
1407
}
1408
1409
sub extract_tables {
1410
   my ( $self, %args ) = @_;
1411
   my $query      = $args{query};
1412
   my $default_db = $args{default_db};
1413
   my $q          = $self->{Quoter} || $args{Quoter};
1414
   return unless $query;
1415
   PTDEBUG && _d('Extracting tables');
1416
   my @tables;
1417
   my %seen;
1418
   foreach my $db_tbl ( $self->get_tables($query) ) {
1419
      next unless $db_tbl;
1420
      next if $seen{$db_tbl}++; # Unique-ify for issue 337.
1421
      my ( $db, $tbl ) = $q->split_unquote($db_tbl);
1422
      push @tables, [ $db || $default_db, $tbl ];
1423
   }
1424
   return @tables;
1425
}
1426
1427
sub trim_identifier {
1428
   my ($self, $str) = @_;
1429
   return unless defined $str;
1430
   $str =~ s/`//g;
1431
   $str =~ s/^\s+//;
1432
   $str =~ s/\s+$//;
1433
   return $str;
1434
}
1435
1436
sub _d {
1437
   my ($package, undef, $line) = caller 0;
1438
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1439
        map { defined $_ ? $_ : 'undef' }
1440
        @_;
1441
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1442
}
1443
1444
1;
1445
}
1446
# ###########################################################################
1447
# End QueryParser package
1448
# ###########################################################################
1449
1450
# ###########################################################################
1451
# QueryRewriter package
1452
# This package is a copy without comments from the original.  The original
1453
# with comments and its test file can be found in the Bazaar repository at,
1454
#   lib/QueryRewriter.pm
1455
#   t/lib/QueryRewriter.t
1456
# See https://launchpad.net/percona-toolkit for more information.
1457
# ###########################################################################
1458
{
1459
package QueryRewriter;
1460
1461
use strict;
1462
use warnings FATAL => 'all';
1463
use English qw(-no_match_vars);
1464
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
1465
1466
our $verbs   = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
1467
                  |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
1468
my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
1469
my $bal;
1470
$bal         = qr/
1471
                  \(
1472
                  (?:
1473
                     (?> [^()]+ )    # Non-parens without backtracking
1474
                     |
1475
                     (??{ $bal })    # Group with matching parens
1476
                  )*
1477
                  \)
1478
                 /x;
1479
1480
my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/;  # One-line comments
1481
my $mlc_re = qr#/\*[^!].*?\*/#sm;                  # But not /*!version */
1482
my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm;             # For SHOW + /*!version */
1483
my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm;     # Variation for SHOW
1484
1485
1486
sub new {
1487
   my ( $class, %args ) = @_;
1488
   my $self = { %args };
1489
   return bless $self, $class;
1490
}
1491
1492
sub strip_comments {
1493
   my ( $self, $query ) = @_;
1494
   return unless $query;
1495
   $query =~ s/$olc_re//go;
1496
   $query =~ s/$mlc_re//go;
1497
   if ( $query =~ m/$vlc_rf/i ) { # contains show + version
1498
      $query =~ s/$vlc_re//go;
1499
   }
1500
   return $query;
1501
}
1502
1503
sub shorten {
1504
   my ( $self, $query, $length ) = @_;
1505
   $query =~ s{
1506
      \A(
1507
         (?:INSERT|REPLACE)
1508
         (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
1509
         (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
1510
      )
1511
      \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
1512
      {$1 /*... omitted ...*/$2}xsi;
1513
1514
   return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
1515
1516
   my $last_length  = 0;
1517
   my $query_length = length($query);
1518
   while (
1519
      $length          > 0
1520
      && $query_length > $length
1521
      && $query_length < ( $last_length || $query_length + 1 )
1522
   ) {
1523
      $last_length = $query_length;
1524
      $query =~ s{
1525
         (\bIN\s*\()    # The opening of an IN list
1526
         ([^\)]+)       # Contents of the list, assuming no item contains paren
1527
         (?=\))           # Close of the list
1528
      }
1529
      {
1530
         $1 . __shorten($2)
1531
      }gexsi;
1532
   }
1533
1534
   return $query;
1535
}
1536
1537
sub __shorten {
1538
   my ( $snippet ) = @_;
1539
   my @vals = split(/,/, $snippet);
1540
   return $snippet unless @vals > 20;
1541
   my @keep = splice(@vals, 0, 20);  # Remove and save the first 20 items
1542
   return
1543
      join(',', @keep)
1544
      . "/*... omitted "
1545
      . scalar(@vals)
1546
      . " items ...*/";
1547
}
1548
1549
sub fingerprint {
1550
   my ( $self, $query ) = @_;
1551
1552
   $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
1553
      && return 'mysqldump';
1554
   $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/#     # pt-table-checksum, etc query
1555
      && return 'percona-toolkit';
1556
   $query =~ m/\Aadministrator command: /
1557
      && return $query;
1558
   $query =~ m/\A\s*(call\s+\S+)\(/i
1559
      && return lc($1); # Warning! $1 used, be careful.
1560
   if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
1561
      $query = $beginning; # Shorten multi-value INSERT statements ASAP
1562
   }
1563
  
1564
   $query =~ s/$olc_re//go;
1565
   $query =~ s/$mlc_re//go;
1566
   $query =~ s/\Ause \S+\Z/use ?/i       # Abstract the DB in USE
1567
      && return $query;
1568
1569
   $query =~ s/\\["']//g;                # quoted strings
1570
   $query =~ s/".*?"/?/sg;               # quoted strings
1571
   $query =~ s/'.*?'/?/sg;               # quoted strings
217.3.3 by Daniel Nichter
Rename preserve_embedded_numbers to match_embedded_numbers, and fingerprint_md5 to match_md5_checksums. Add corresponding options to pt-fingerprint.
1572
1573
   if ( $self->{match_md5_checksums} ) { 
1574
      $query =~ s/([._-])[a-f0-9]{32}/$1?/g;
1575
   }
1576
1577
   if ( !$self->{match_embedded_numbers} ) {
1578
      $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
1579
   }
1580
   else {
1581
      $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
1582
   }
1583
1584
   if ( $self->{match_md5_checksums} ) {
1585
      $query =~ s/[xb+-]\?/?/g;                
1586
   }
1587
   else {
1588
      $query =~ s/[xb.+-]\?/?/g;
1589
   }
1590
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1591
   $query =~ s/\A\s+//;                  # Chop off leading whitespace
1592
   chomp $query;                         # Kill trailing whitespace
1593
   $query =~ tr[ \n\t\r\f][ ]s;          # Collapse whitespace
1594
   $query = lc $query;
1595
   $query =~ s/\bnull\b/?/g;             # Get rid of NULLs
1596
   $query =~ s{                          # Collapse IN and VALUES lists
1597
               \b(in|values?)(?:[\s,]*\([\s?,]*\))+
1598
              }
1599
              {$1(?+)}gx;
1600
   $query =~ s{                          # Collapse UNION
1601
               \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
1602
              }
1603
              {$1 /*repeat$2*/}xg;
1604
   $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
1605
1606
   if ( $query =~ m/\bORDER BY /gi ) {  # Find, anchor on ORDER BY clause
1607
      1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
1608
   }
1609
1610
   return $query;
1611
}
1612
1613
sub distill_verbs {
1614
   my ( $self, $query ) = @_;
1615
1616
   $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
1617
   $query =~ m/\A\s*use\s+/          && return "USE";
1618
   $query =~ m/\A\s*UNLOCK TABLES/i  && return "UNLOCK";
1619
   $query =~ m/\A\s*xa\s+(\S+)/i     && return "XA_$1";
1620
1621
   if ( $query =~ m/\Aadministrator command:/ ) {
1622
      $query =~ s/administrator command:/ADMIN/;
1623
      $query = uc $query;
1624
      return $query;
1625
   }
1626
1627
   $query = $self->strip_comments($query);
1628
1629
   if ( $query =~ m/\A\s*SHOW\s+/i ) {
1630
      PTDEBUG && _d($query);
1631
1632
      $query = uc $query;
1633
      $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
1634
      $query =~ s/\s+COUNT[^)]+\)//g;
1635
1636
      $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
1637
1638
      $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
1639
      $query =~ s/\s+/ /g;
1640
      PTDEBUG && _d($query);
1641
      return $query;
1642
   }
1643
1644
   eval $QueryParser::data_def_stmts;
1645
   eval $QueryParser::tbl_ident;
1646
   my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
1647
   if ( $dds) {
1648
      my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
1649
      $obj = uc $obj if $obj;
1650
      PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
1651
      my ($db_or_tbl)
1652
         = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
1653
      PTDEBUG && _d('Matches db or table:', $db_or_tbl);
1654
      return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
1655
   }
1656
1657
   my @verbs = $query =~ m/\b($verbs)\b/gio;
1658
   @verbs    = do {
1659
      my $last = '';
1660
      grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
1661
   };
1662
1663
   if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
1664
      PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
1665
      my $union = grep { $_ eq 'UNION' } @verbs;
1666
      @verbs    = $union ? qw(SELECT UNION) : qw(SELECT);
1667
   }
1668
1669
   my $verb_str = join(q{ }, @verbs);
1670
   return $verb_str;
1671
}
1672
1673
sub __distill_tables {
1674
   my ( $self, $query, $table, %args ) = @_;
1675
   my $qp = $args{QueryParser} || $self->{QueryParser};
1676
   die "I need a QueryParser argument" unless $qp;
1677
1678
   my @tables = map {
1679
      $_ =~ s/`//g;
1680
      $_ =~ s/(_?)[0-9]+/$1?/g;
1681
      $_;
1682
   } grep { defined $_ } $qp->get_tables($query);
1683
1684
   push @tables, $table if $table;
1685
1686
   @tables = do {
1687
      my $last = '';
1688
      grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
1689
   };
1690
1691
   return @tables;
1692
}
1693
1694
sub distill {
1695
   my ( $self, $query, %args ) = @_;
1696
1697
   if ( $args{generic} ) {
1698
      my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
1699
      return '' unless $cmd;
1700
      $query = (uc $cmd) . ($arg ? " $arg" : '');
1701
   }
1702
   else {
1703
      my ($verbs, $table)  = $self->distill_verbs($query, %args);
1704
1705
      if ( $verbs && $verbs =~ m/^SHOW/ ) {
1706
         my %alias_for = qw(
1707
            SCHEMA   DATABASE
1708
            KEYS     INDEX
1709
            INDEXES  INDEX
1710
         );
1711
         map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
1712
         $query = $verbs;
1713
      }
1714
      else {
1715
         my @tables = $self->__distill_tables($query, $table, %args);
1716
         $query     = join(q{ }, $verbs, @tables); 
1717
      } 
1718
   }
1719
1720
   if ( $args{trf} ) {
1721
      $query = $args{trf}->($query, %args);
1722
   }
1723
1724
   return $query;
1725
}
1726
1727
sub convert_to_select {
1728
   my ( $self, $query ) = @_;
1729
   return unless $query;
1730
1731
   return if $query =~ m/=\s*\(\s*SELECT /i;
1732
1733
   $query =~ s{
1734
                 \A.*?
1735
                 update(?:\s+(?:low_priority|ignore))?\s+(.*?)
1736
                 \s+set\b(.*?)
1737
                 (?:\s*where\b(.*?))?
1738
                 (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
1739
                 \Z
1740
              }
1741
              {__update_to_select($1, $2, $3, $4)}exsi
1742
      || $query =~ s{
1743
                    \A.*?
1744
                    (?:insert(?:\s+ignore)?|replace)\s+
1745
                    .*?\binto\b(.*?)\(([^\)]+)\)\s*
1746
                    values?\s*(\(.*?\))\s*
1747
                    (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1748
                    \Z
1749
                 }
1750
                 {__insert_to_select($1, $2, $3)}exsi
1751
      || $query =~ s{
1752
                    \A.*?
1753
                    (?:insert(?:\s+ignore)?|replace)\s+
1754
                    (?:.*?\binto)\b(.*?)\s*
1755
                    set\s+(.*?)\s*
1756
                    (?:\blimit\b|on\s+duplicate\s+key.*)?\s*
1757
                    \Z
1758
                 }
1759
                 {__insert_to_select_with_set($1, $2)}exsi
1760
      || $query =~ s{
1761
                    \A.*?
1762
                    delete\s+(.*?)
1763
                    \bfrom\b(.*)
1764
                    \Z
1765
                 }
1766
                 {__delete_to_select($1, $2)}exsi;
1767
   $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
1768
   $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
1769
   return $query;
1770
}
1771
1772
sub convert_select_list {
1773
   my ( $self, $query ) = @_;
1774
   $query =~ s{
1775
               \A\s*select(.*?)\bfrom\b
1776
              }
1777
              {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
1778
   return $query;
1779
}
1780
1781
sub __delete_to_select {
1782
   my ( $delete, $join ) = @_;
1783
   if ( $join =~ m/\bjoin\b/ ) {
1784
      return "select 1 from $join";
1785
   }
1786
   return "select * from $join";
1787
}
1788
1789
sub __insert_to_select {
1790
   my ( $tbl, $cols, $vals ) = @_;
1791
   PTDEBUG && _d('Args:', @_);
1792
   my @cols = split(/,/, $cols);
1793
   PTDEBUG && _d('Cols:', @cols);
1794
   $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
1795
   my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
1796
   PTDEBUG && _d('Vals:', @vals);
1797
   if ( @cols == @vals ) {
1798
      return "select * from $tbl where "
1799
         . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
1800
   }
1801
   else {
1802
      return "select * from $tbl limit 1";
1803
   }
1804
}
1805
1806
sub __insert_to_select_with_set {
1807
   my ( $from, $set ) = @_;
1808
   $set =~ s/,/ and /g;
1809
   return "select * from $from where $set ";
1810
}
1811
1812
sub __update_to_select {
1813
   my ( $from, $set, $where, $limit ) = @_;
1814
   return "select $set from $from "
1815
      . ( $where ? "where $where" : '' )
1816
      . ( $limit ? " $limit "      : '' );
1817
}
1818
1819
sub wrap_in_derived {
1820
   my ( $self, $query ) = @_;
1821
   return unless $query;
1822
   return $query =~ m/\A\s*select/i
1823
      ? "select 1 from ($query) as x limit 1"
1824
      : $query;
1825
}
1826
1827
sub _d {
1828
   my ($package, undef, $line) = caller 0;
1829
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
1830
        map { defined $_ ? $_ : 'undef' }
1831
        @_;
1832
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
1833
}
1834
1835
1;
1836
}
1837
# ###########################################################################
1838
# End QueryRewriter package
1839
# ###########################################################################
1840
1841
# ###########################################################################
1842
# This is a combination of modules and programs in one -- a runnable module.
1843
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
1844
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
1845
#
1846
# Check at the end of this package for the call to main() which actually runs
1847
# the program.
1848
# ###########################################################################
1849
package pt_fingerprint;
1850
1851
use English qw(-no_match_vars);
1852
use Data::Dumper;
1853
$Data::Dumper::Indent = 1;
1854
$OUTPUT_AUTOFLUSH     = 1;
1855
94.16.1 by fraserb at gmail
Replace the last instances of MKDEBUG
1856
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1857
1858
sub main {
1859
   @ARGV    = @_;  # set global ARGV for this package
1860
1861
   # ##########################################################################
1862
   # Get configuration information.
1863
   # ##########################################################################
1864
   my $o = new OptionParser();
1865
   $o->get_specs();
1866
   $o->get_opts();
1867
   $o->usage_or_errors();
1868
1869
   my $qp = new QueryParser();
217.3.3 by Daniel Nichter
Rename preserve_embedded_numbers to match_embedded_numbers, and fingerprint_md5 to match_md5_checksums. Add corresponding options to pt-fingerprint.
1870
   my $qr = new QueryRewriter(
1871
      QueryParser            => $qp,
1872
      match_md5_checksums    => $o->get('match-md5-checksums'),
1873
      match_embedded_numbers => $o->get('match-embedded-numbers'),
1874
   );
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1875
1876
   if ( $o->got('query') ) {
1877
      print $qr->fingerprint($o->get('query')), "\n";
1878
   }
1879
   else {
1880
      local $INPUT_RECORD_SEPARATOR = ";\n";
1881
      while ( <> ) {
1882
         my $query = $_;
1883
         chomp $query;
217.3.4 by Daniel Nichter
Test pt-fingerprint.
1884
         $query =~ s/^#.+$//mg;
1885
         $query =~ s/^\s+//;
1886
         next unless $query =~ m/^\w/;
217.3.1 by Daniel Nichter
Add pt-fingerprint.
1887
         print $qr->fingerprint($query), "\n";
1888
      }
1889
   }
1890
}
1891
1892
# ############################################################################
1893
# Run the program.
1894
# ############################################################################
1895
if ( !caller ) { exit main(@ARGV); }
1896
1897
1; # Because this is a module as well as a script.
1898
1899
# #############################################################################
1900
# Documentation.
1901
# #############################################################################
1902
1903
=pod
1904
1905
=head1 NAME
1906
1907
pt-fingerprint - Convert queries into fingerprints.
1908
1909
=head1 SYNOPSIS
1910
1911
Usage: pt-fingerprint [OPTIONS] [FILES]
1912
1913
pt-fingerprint converts queries into fingerprints.  With the --query
1914
option, converts the option's value into a fingerprint.  With no options, treats
1915
command-line arguments as FILEs and reads and converts semicolon-separated
1916
queries from the FILEs. When FILE is -, it read standard input.
1917
1918
Convert a single query:
1919
1920
  pt-fingerprint --query "select a, b, c from users where id = 500"
1921
1922
Convert a file full of queries:
1923
1924
  pt-fingerprint /path/to/file.txt
1925
1926
=head1 RISKS
1927
1928
The following section is included to inform users about the potential risks,
1929
whether known or unknown, of using this tool.  The two main categories of risks
1930
are those created by the nature of the tool (e.g. read-only tools vs. read-write
1931
tools) and those created by bugs.
1932
1933
The pt-fingerprint tool simply reads data and transforms it, so risks are
1934
minimal.
1935
1936
See also L<"BUGS"> for more information on filing bugs and getting help.
1937
1938
=head1 DESCRIPTION
1939
1940
A query fingerprint is the abstracted form of a query, which makes it possible
1941
to group similar queries together.  Abstracting a query removes literal values,
1942
normalizes whitespace, and so on.  For example, consider these two queries:
1943
1944
  SELECT name, password FROM user WHERE id='12823';
1945
  select name,   password from user
1946
     where id=5;
1947
1948
Both of those queries will fingerprint to
1949
1950
  select name, password from user where id=?
1951
1952
Once the query's fingerprint is known, we can then talk about a query as though
1953
it represents all similar queries.
1954
1955
Query fingerprinting accommodates a great many special cases, which have proven
1956
necessary in the real world.  For example, an IN list with 5 literals is really
1957
equivalent to one with 4 literals, so lists of literals are collapsed to a
1958
single one.  If you want to understand more about how and why all of these cases
1959
are handled, please review the test cases in the Subversion repository.  If you
1960
find something that is not fingerprinted properly, please submit a bug report
1961
with a reproducible test case.  Here is a list of transformations during
1962
fingerprinting, which might not be exhaustive:
1963
1964
=over
1965
1966
=item *
1967
1968
Group all SELECT queries from mysqldump together, even if they are against
1969
different tables.  Ditto for all of pt-table-checksum's checksum queries.
1970
1971
=item *
1972
1973
Shorten multi-value INSERT statements to a single VALUES() list.
1974
1975
=item *
1976
1977
Strip comments.
1978
1979
=item *
1980
1981
Abstract the databases in USE statements, so all USE statements are grouped
1982
together.
1983
1984
=item *
1985
1986
Replace all literals, such as quoted strings.  For efficiency, the code that
1987
replaces literal numbers is somewhat non-selective, and might replace some
1988
things as numbers when they really are not.  Hexadecimal literals are also
1989
replaced.  NULL is treated as a literal.  Numbers embedded in identifiers are
1990
also replaced, so tables named similarly will be fingerprinted to the same
1991
values (e.g. users_2009 and users_2010 will fingerprint identically).
1992
1993
=item *
1994
1995
Collapse all whitespace into a single space.
1996
1997
=item *
1998
1999
Lowercase the entire query.
2000
2001
=item *
2002
2003
Replace all literals inside of IN() and VALUES() lists with a single
2004
placeholder, regardless of cardinality.
2005
2006
=item *
2007
2008
Collapse multiple identical UNION queries into a single one.
2009
2010
=back
2011
2012
=head1 OPTIONS
2013
2014
This tool accepts additional command-line arguments.  Refer to the
2015
L<"SYNOPSIS"> and usage information for details.
2016
2017
=over
2018
2019
=item --config
2020
2021
type: Array
2022
2023
Read this comma-separated list of config files; if specified, this must be the
2024
first option on the command line.
2025
2026
=item --help
2027
2028
Show help and exit.
2029
217.3.3 by Daniel Nichter
Rename preserve_embedded_numbers to match_embedded_numbers, and fingerprint_md5 to match_md5_checksums. Add corresponding options to pt-fingerprint.
2030
=item --match-embedded-numbers
2031
2032
Match numbers embedded in words and replace as single values.  This option
2033
causes the tool to be more careful about matching numbers so that words
2034
with numbers, like C<catch22> are matched and replaced as a single C<?>
2035
placeholder.  Otherwise the default number matching pattern will replace
2036
C<catch22> as C<catch?>.
2037
2038
This is helpful if database or table names contain numbers.
2039
2040
=item --match-md5-checksums
2041
2042
Match MD5 checksums and replace as single values.  This option causes
2043
the tool to be more careful about matching numbers so that MD5 checksums
2044
like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a
2045
single C<?> placeholder.  Otherwise, the default number matching pattern will
2046
replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>.
2047
217.3.1 by Daniel Nichter
Add pt-fingerprint.
2048
=item --query
2049
2050
type: string
2051
2052
The query to convert into a fingerprint.
2053
2054
=item --version
2055
2056
Show version and exit.
2057
2058
=back
2059
2060
=head1 ENVIRONMENT
2061
2062
The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
2063
To enable debugging and capture all output to a file, run the tool like:
2064
2065
   PTDEBUG=1 pt-fingerprint ... > FILE 2>&1
2066
2067
Be careful: debugging output is voluminous and can generate several megabytes
2068
of output.
2069
2070
=head1 SYSTEM REQUIREMENTS
2071
2072
You need Perl, DBI, DBD::mysql, and some core packages that ought to be
2073
installed in any reasonably new version of Perl.
2074
2075
=head1 BUGS
2076
2077
For a list of known bugs, see L<http://www.percona.com/bugs/pt-fingerprint>.
2078
2079
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
2080
Include the following information in your bug report:
2081
2082
=over
2083
2084
=item * Complete command-line used to run the tool
2085
2086
=item * Tool L<"--version">
2087
2088
=item * MySQL version of all servers involved
2089
2090
=item * Output from the tool including STDERR
2091
2092
=item * Input files (log/dump/config files, etc.)
2093
2094
=back
2095
2096
If possible, include debugging output by running the tool with C<PTDEBUG>;
2097
see L<"ENVIRONMENT">.
2098
2099
=head1 DOWNLOADING
2100
2101
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
2102
latest release of Percona Toolkit.  Or, get the latest release from the
2103
command line:
2104
2105
   wget percona.com/get/percona-toolkit.tar.gz
2106
2107
   wget percona.com/get/percona-toolkit.rpm
2108
2109
   wget percona.com/get/percona-toolkit.deb
2110
2111
You can also get individual tools from the latest release:
2112
2113
   wget percona.com/get/TOOL
2114
2115
Replace C<TOOL> with the name of any tool.
2116
2117
=head1 AUTHORS
2118
2119
Baron Schwartz and Daniel Nichter
2120
2121
=head1 ABOUT PERCONA TOOLKIT
2122
2123
This tool is part of Percona Toolkit, a collection of advanced command-line
2124
tools developed by Percona for MySQL support and consulting.  Percona Toolkit
2125
was forked from two projects in June, 2011: Maatkit and Aspersa.  Those
2126
projects were created by Baron Schwartz and developed primarily by him and
2127
Daniel Nichter, both of whom are employed by Percona.  Visit
2128
L<http://www.percona.com/software/> for more software developed by Percona.
2129
2130
=head1 COPYRIGHT, LICENSE, AND WARRANTY
2131
2132
This program is copyright 2011-2012 Percona Inc.
2133
Feedback and improvements are welcome.
2134
2135
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
2136
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
2137
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
2138
2139
This program is free software; you can redistribute it and/or modify it under
2140
the terms of the GNU General Public License as published by the Free Software
2141
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
2142
systems, you can issue `man perlgpl' or `man perlartistic' to read these
2143
licenses.
2144
2145
You should have received a copy of the GNU General Public License along with
2146
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
2147
Place, Suite 330, Boston, MA  02111-1307  USA.
2148
2149
=head1 VERSION
2150
464.1.8 by Daniel Nichter
Check and update tool versions, release notes, user docs, etc. before build.
2151
pt-fingerprint 2.1.7
217.3.1 by Daniel Nichter
Add pt-fingerprint.
2152
2153
=cut