~percona-toolkit-dev/percona-toolkit/release-2.2.3

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# BinaryLogParser package
# ###########################################################################
{
# Package: BinaryLogParser
# BinaryLogParser parses binary log files converted to text by mysqlbinlog.
package BinaryLogParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

use Data::Dumper;
$Data::Dumper::Indent    = 1;
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

my $binlog_line_1 = qr/at (\d+)$/m;
my $binlog_line_2 = qr/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)\s+server\s+id\s+(\d+)\s+end_log_pos\s+(\d+)\s+(\S+)\s*([^\n]*)$/m;
my $binlog_line_2_rest = qr/thread_id=(\d+)\s+exec_time=(\d+)\s+error_code=(\d+)/m;

# Sub: new
#
# Parameters:
#   %args - Arguments
#
# Returns:
#   BinaryLogParser object
sub new {
   my ( $class, %args ) = @_;
   my $self = {
      delim     => undef,
      delim_len => 0,
   };
   return bless $self, $class;
}


# Sub: parse_event
#   Parse binary log events returned by input callback.  This sub implements
#   a standard interface that the *Parser.pm modules share.  Each such
#   module has a sub named "parse_event" that is expected to return one event
#   each time it's called.  Events are received from an input callback,
#   $args{next_event}.  The events are chunks of text that this sub parses
#   into a hashref representing the attributes and values of the event.
#
# Parameters:
#   %args - Arguments
#
# Required Arguments:
#   next_event - Coderef that returns the text of the next event from the input
#   tell       - Coderef that tells the file position being read in the input
#
# Optional Arguments:
#   oktorun - Coderef to tell caller that there are no more events
#
# Returns:
#   Hashref representing one event and its attributes and values
sub parse_event {
   my ( $self, %args ) = @_;
   my @required_args = qw(next_event tell);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($next_event, $tell) = @args{@required_args};

   local $INPUT_RECORD_SEPARATOR = ";\n#";
   my $pos_in_log = $tell->();
   my $stmt;
   my ($delim, $delim_len) = ($self->{delim}, $self->{delim_len});

   EVENT:
   while ( defined($stmt = $next_event->()) ) {
      my @properties = ('pos_in_log', $pos_in_log);
      my ($ts, $sid, $end, $type, $rest);
      $pos_in_log = $tell->();
      $stmt =~ s/;\n#?\Z//;

      my ( $got_offset, $got_hdr );
      my $pos = 0;
      my $len = length($stmt);
      my $found_arg = 0;
      LINE:
      while ( $stmt =~ m/^(.*)$/mg ) { # /g requires scalar match.
         $pos     = pos($stmt);  # Be careful not to mess this up!
         my $line = $1;          # Necessary for /g and pos() to work.
         $line    =~ s/$delim// if $delim;
         PTDEBUG && _d($line);

         if ( $line =~ m/^\/\*.+\*\/;/ ) {
            PTDEBUG && _d('Comment line');
            next LINE;
         }
 
         if ( $line =~ m/^DELIMITER/m ) {
            my ( $del ) = $line =~ m/^DELIMITER (\S*)$/m;
            if ( $del ) {
               $self->{delim_len} = $delim_len = length $del;
               $self->{delim}     = $delim     = quotemeta $del;
               PTDEBUG && _d('delimiter:', $delim);
            }
            else {
               # Because of the line $stmt =~ s/;\n#?\Z//; above, setting
               # the delimiter back to normal like "DELIMITER ;" appear as
               # "DELIMITER ".
               PTDEBUG && _d('Delimiter reset to ;');
               $self->{delim}     = $delim     = undef;
               $self->{delim_len} = $delim_len = 0;
            }
            next LINE;
         }

         next LINE if $line =~ m/End of log file/;

         # Match the beginning of an event in the binary log.
         if ( !$got_offset && (my ( $offset ) = $line =~ m/$binlog_line_1/m) ) {
            PTDEBUG && _d('Got the at offset line');
            push @properties, 'offset', $offset;
            $got_offset++;
         }

         # Match the 2nd line of binary log header, after "# at OFFSET".
         elsif ( !$got_hdr && $line =~ m/^#(\d{6}\s+\d{1,2}:\d\d:\d\d)/ ) {
            ($ts, $sid, $end, $type, $rest) = $line =~ m/$binlog_line_2/m;
            PTDEBUG && _d('Got the header line; type:', $type, 'rest:', $rest);
            push @properties, 'cmd', 'Query', 'ts', $ts, 'server_id', $sid,
               'end_log_pos', $end;
            $got_hdr++;
         }

         # Handle meta-data lines.
         elsif ( $line =~ m/^(?:#|use |SET)/i ) {

            # Include the current default database given by 'use <db>;'  Again
            # as per the code in sql/log.cc this is case-sensitive.
            if ( my ( $db ) = $line =~ m/^use ([^;]+)/ ) {
               PTDEBUG && _d("Got a default database:", $db);
               push @properties, 'db', $db;
            }

            # Some things you might see in the log output, as printed by
            # sql/log.cc (this time the SET is uppercaes, and again it is
            # case-sensitive).
            # SET timestamp=foo;
            # SET timestamp=foo,insert_id=123;
            # SET insert_id=123;
            elsif ( my ($setting) = $line =~ m/^SET\s+([^;]*)/ ) {
               PTDEBUG && _d("Got some setting:", $setting);
               push @properties, map { s/\s+//; lc } split(/,|\s*=\s*/, $setting);
            }

         }
         else {
            # This isn't a meta-data line.  It's the first line of the
            # whole query. Grab from here to the end of the string and
            # put that into the 'arg' for the event.  Then we are done.
            # Note that if this line really IS the query but we skip in
            # the 'if' above because it looks like meta-data, later
            # we'll remedy that.
            PTDEBUG && _d("Got the query/arg line at pos", $pos);
            $found_arg++;
            if ( $got_offset && $got_hdr ) {
               if ( $type eq 'Xid' ) {
                  my ($xid) = $rest =~ m/(\d+)/;
                  push @properties, 'Xid', $xid;
               }
               elsif ( $type eq 'Query' ) {
                  my ($i, $t, $c) = $rest =~ m/$binlog_line_2_rest/m;
                  push @properties, 'Thread_id', $i, 'Query_time', $t,
                                    'error_code', $c;
               }
               elsif ( $type eq 'Start:' ) {
                  # These are lines like "#090722  7:21:41 server id 12345
                  # end_log_pos 98 Start: binlog v 4, server v 5.0.82-log
                  # created 090722  7:21:41 at startup".  They may or may
                  # not have a statement after them (ROLLBACK can follow
                  # this line), so we do not want to skip these types.
                  PTDEBUG && _d("Binlog start");
               }
               else {
                  PTDEBUG && _d('Unknown event type:', $type);
                  next EVENT;
               }
            }
            else {
               PTDEBUG && _d("It's not a query/arg, it's just some SQL fluff");
               push @properties, 'cmd', 'Query', 'ts', undef;
            }

            # Removing delimiters alters the length of $stmt, so we account
            # for this in our substr() offset.  If $pos is equal to the length
            # of $stmt, then this $line is the whole $arg (i.e. one line
            # query).  In this case, we go back the $delim_len that was
            # removed from this $line.  Otherwise, there are more lines to
            # this arg so a delimiter has not yet been removed (it remains
            # somewhere in $arg, at least at the end).  Therefore, we do not
            # go back any extra.
            my $delim_len = ($pos == length($stmt) ? $delim_len : 0);
            my $arg = substr($stmt, $pos - length($line) - $delim_len);

            $arg =~ s/$delim// if $delim; # Remove the delimiter.

            # Sometimes DELIMITER appears at the end of an arg, so we have
            # to catch it again.  Queries in this arg before this new
            # DELIMITER should have the old delim, which is why we still
            # remove it in the previous line.
            if ( $arg =~ m/^DELIMITER/m ) {
               my ( $del ) = $arg =~ m/^DELIMITER (\S*)$/m;
               if ( $del ) {
                  $self->{delim_len} = $delim_len = length $del;
                  $self->{delim}     = $delim     = quotemeta $del;
                  PTDEBUG && _d('delimiter:', $delim);
               }
               else {
                  PTDEBUG && _d('Delimiter reset to ;');
                  $del       = ';';
                  $self->{delim}     = $delim     = undef;
                  $self->{delim_len} = $delim_len = 0;
               }

               $arg =~ s/^DELIMITER.*$//m;  # Remove DELIMITER from arg.
            }

            $arg =~ s/;$//gm;  # Ensure ending ; are gone.
            $arg =~ s/\s+$//;  # Remove trailing spaces and newlines.

            push @properties, 'arg', $arg, 'bytes', length($arg);
            last LINE;
         }
      } # LINE

      if ( $found_arg ) {
         # Don't dump $event; want to see full dump of all properties, and after
         # it's been cast into a hash, duplicated keys will be gone.
         PTDEBUG && _d('Properties of event:', Dumper(\@properties));
         my $event = { @properties };
         if ( $args{stats} ) {
            $args{stats}->{events_read}++;
            $args{stats}->{events_parsed}++;
         }
         return $event;
      }
      else {
         PTDEBUG && _d('Event had no arg');
      }
   } # EVENT

   $args{oktorun}->(0) if $args{oktorun};
   return;
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End BinaryLogParser package
# ###########################################################################