~percona-toolkit-dev/percona-toolkit/docu-ptc-rbr-limitation

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# SlowLogParser package
# ###########################################################################
{
# Package: SlowLogParser
# SlowLogParser parses MySQL slow logs.
package SlowLogParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

use Data::Dumper;
$Data::Dumper::Indent    = 1;
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

sub new {
   my ( $class ) = @_;
   my $self = {
      pending => [],
   };
   return bless $self, $class;
}

my $slow_log_ts_line = qr/^# Time: ([0-9: ]{15})/;
my $slow_log_uh_line = qr/# User\@Host: ([^\[]+|\[[^[]+\]).*?@ (\S*) \[(.*)\]/;
# These can appear in the log file when it's opened -- for example, when someone
# runs FLUSH LOGS or the server starts.
# /usr/sbin/mysqld, Version: 5.0.67-0ubuntu6-log ((Ubuntu)). started with:
# Tcp port: 3306  Unix socket: /var/run/mysqld/mysqld.sock
# Time                 Id Command    Argument
# These lines vary depending on OS and whether it's embedded.
my $slow_log_hd_line = qr{
      ^(?:
      T[cC][pP]\s[pP]ort:\s+\d+ # case differs on windows/unix
      |
      [/A-Z].*mysqld,\sVersion.*(?:started\swith:|embedded\slibrary)
      |
      Time\s+Id\s+Command
      ).*\n
   }xm;

# This method accepts an open slow log filehandle and callback functions.
# It reads events from the filehandle and calls the callbacks with each event.
# It may find more than one event per call.  $misc is some placeholder for the
# future and for compatibility with other query sources.
#
# Each event is a hashref of attribute => value pairs like:
#  my $event = {
#     ts  => '',    # Timestamp
#     id  => '',    # Connection ID
#     arg => '',    # Argument to the command
#     other attributes...
#  };
#
# Returns the number of events it finds.
#
# NOTE: If you change anything inside this subroutine, you need to profile
# the result.  Sometimes a line of code has been changed from an alternate
# form for performance reasons -- sometimes as much as 20x better performance.
sub parse_event {
   my ( $self, %args ) = @_;
   my @required_args = qw(next_event tell);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($next_event, $tell) = @args{@required_args};

   # Read a whole stmt at a time.  But, to make things even more fun, sometimes
   # part of the log entry might continue past the separator.  In these cases we
   # peek ahead (see code below.)  We do it this way because in the general
   # case, reading line-by-line is too slow, and the special-case code is
   # acceptable.  And additionally, the line terminator doesn't work for all
   # cases; the header lines might follow a statement, causing the paragraph
   # slurp to grab more than one statement at a time.
   my $pending = $self->{pending};
   local $INPUT_RECORD_SEPARATOR = ";\n#";
   my $trimlen    = length($INPUT_RECORD_SEPARATOR);
   my $pos_in_log = $tell->();
   my $stmt;

   EVENT:
   while (
         defined($stmt = shift @$pending)
      or defined($stmt = $next_event->())
   ) {
      my @properties = ('cmd', 'Query', 'pos_in_log', $pos_in_log);
      $pos_in_log = $tell->();

      # If there were such lines in the file, we may have slurped > 1 event.
      # Delete the lines and re-split if there were deletes.  This causes the
      # pos_in_log to be inaccurate, but that's really okay.
      if ( $stmt =~ s/$slow_log_hd_line//go ){ # Throw away header lines in log
         my @chunks = split(/$INPUT_RECORD_SEPARATOR/o, $stmt);
         if ( @chunks > 1 ) {
            PTDEBUG && _d("Found multiple chunks");
            $stmt = shift @chunks;
            unshift @$pending, @chunks;
         }
      }

      # There might not be a leading '#' because $INPUT_RECORD_SEPARATOR will
      # have gobbled that up.  And the end may have all/part of the separator.
      $stmt = '#' . $stmt unless $stmt =~ m/\A#/;
      $stmt =~ s/;\n#?\Z//;

      # The beginning of a slow-query-log event should be something like
      # # Time: 071015 21:43:52
      # Or, it might look like this, sometimes at the end of the Time: line:
      # # User@Host: root[root] @ localhost []

      # The following line contains variables intended to be sure we do
      # particular things once and only once, for those regexes that will
      # match only one line per event, so we don't keep trying to re-match
      # regexes.
      my ($got_ts, $got_uh, $got_ac, $got_db, $got_set, $got_embed);
      my $pos = 0;
      my $len = length($stmt);
      my $found_arg = 0;
      LINE:
      while ( $stmt =~ m/^(.*)$/mg ) { # /g is important, requires scalar match.
         $pos     = pos($stmt);  # Be careful not to mess this up!
         my $line = $1;          # Necessary for /g and pos() to work.
         PTDEBUG && _d($line);

         # Handle meta-data lines.  These are case-sensitive.  If they appear in
         # the log with a different case, they are from a user query, not from
         # something printed out by sql/log.cc.
         if ($line =~ m/^(?:#|use |SET (?:last_insert_id|insert_id|timestamp))/o) {

            # Maybe it's the beginning of the slow query log event.  XXX
            # something to know: Perl profiling reports this line as the hot
            # spot for any of the conditions in the whole if/elsif/elsif
            # construct.  So if this line looks "hot" then profile each
            # condition separately.
            if ( !$got_ts && (my ( $time ) = $line =~ m/$slow_log_ts_line/o)) {
               PTDEBUG && _d("Got ts", $time);
               push @properties, 'ts', $time;
               ++$got_ts;
               # The User@Host might be concatenated onto the end of the Time.
               if ( !$got_uh
                  && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
               ) {
                  PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
                  push @properties, 'user', $user, 'host', $host, 'ip', $ip;
                  ++$got_uh;
               }
            }

            # Maybe it's the user/host line of a slow query log
            # # User@Host: root[root] @ localhost []
            elsif ( !$got_uh
                  && ( my ( $user, $host, $ip ) = $line =~ m/$slow_log_uh_line/o )
            ) {
               PTDEBUG && _d("Got user, host, ip", $user, $host, $ip);
               push @properties, 'user', $user, 'host', $host, 'ip', $ip;
               ++$got_uh;
            }

            # A line that looks like meta-data but is not:
            # # administrator command: Quit;
            elsif (!$got_ac && $line =~ m/^# (?:administrator command:.*)$/) {
               PTDEBUG && _d("Got admin command");
               $line =~ s/^#\s+//;  # string leading "# ".
               push @properties, 'cmd', 'Admin', 'arg', $line;
               push @properties, 'bytes', length($properties[-1]);
               ++$found_arg;
               ++$got_ac;
            }

            # Maybe it's the timing line of a slow query log, or another line
            # such as that... they typically look like this:
            # # Query_time: 2  Lock_time: 0  Rows_sent: 1  Rows_examined: 0
            elsif ( $line =~ m/^# +[A-Z][A-Za-z_]+: \S+/ ) { # Make the test cheap!
               PTDEBUG && _d("Got some line with properties");

               # http://code.google.com/p/maatkit/issues/detail?id=1104
               if ( $line =~ m/Schema:\s+\w+: / ) {
                  PTDEBUG && _d('Removing empty Schema attrib');
                  $line =~ s/Schema:\s+//;
                  PTDEBUG && _d($line);
               }

               # I tried using split, but coping with the above bug makes it
               # slower than a complex regex match.
               my @temp = $line =~ m/(\w+):\s+(\S+|\Z)/g;
               push @properties, @temp;
            }

            # Include the current default database given by 'use <db>;'  Again
            # as per the code in sql/log.cc this is case-sensitive.
            elsif ( !$got_db && (my ( $db ) = $line =~ m/^use ([^;]+)/ ) ) {
               PTDEBUG && _d("Got a default database:", $db);
               push @properties, 'db', $db;
               ++$got_db;
            }

            # Some things you might see in the log output, as printed by
            # sql/log.cc (this time the SET is uppercaes, and again it is
            # case-sensitive).
            # SET timestamp=foo;
            # SET timestamp=foo,insert_id=123;
            # SET insert_id=123;
            elsif (!$got_set && (my ($setting) = $line =~ m/^SET\s+([^;]*)/)) {
               # Note: this assumes settings won't be complex things like
               # SQL_MODE, which as of 5.0.51 appears to be true (see sql/log.cc,
               # function MYSQL_LOG::write(THD, char*, uint, time_t)).
               PTDEBUG && _d("Got some setting:", $setting);
               push @properties, split(/,|\s*=\s*/, $setting);
               ++$got_set;
            }

            # Handle pathological special cases. The "# administrator command"
            # is one example: it can come AFTER lines that are not commented,
            # so it looks like it belongs to the next event, and it won't be
            # in $stmt. Profiling shows this is an expensive if() so we do
            # this only if we've seen the user/host line.
            if ( !$found_arg && $pos == $len ) {
               PTDEBUG && _d("Did not find arg, looking for special cases");
               local $INPUT_RECORD_SEPARATOR = ";\n";
               if ( defined(my $l = $next_event->()) ) {
                  chomp $l;
                  $l =~ s/^\s+//;
                  PTDEBUG && _d("Found admin statement", $l);
                  push @properties, 'cmd', 'Admin', 'arg', $l;
                  push @properties, 'bytes', length($properties[-1]);
                  $found_arg++;
               }
               else {
                  # Unrecoverable -- who knows what happened.  This is possible,
                  # for example, if someone does something like "head -c 10000
                  # /path/to/slow.log | mk-log-parser".  Or if there was a
                  # server crash and the file has no newline.
                  PTDEBUG && _d("I can't figure out what to do with this line");
                  next EVENT;
               }
            }
         }
         else {
            # This isn't a meta-data line.  It's the first line of the
            # whole query. Grab from here to the end of the string and
            # put that into the 'arg' for the event.  Then we are done.
            # Note that if this line really IS the query but we skip in
            # the 'if' above because it looks like meta-data, later
            # we'll remedy that.
            PTDEBUG && _d("Got the query/arg line");
            my $arg = substr($stmt, $pos - length($line));
            push @properties, 'arg', $arg, 'bytes', length($arg);
            # Handle embedded attributes.
            if ( $args{misc} && $args{misc}->{embed}
               && ( my ($e) = $arg =~ m/($args{misc}->{embed})/)
            ) {
               push @properties, $e =~ m/$args{misc}->{capture}/g;
            }
            last LINE;
         }
      }

      # Don't dump $event; want to see full dump of all properties, and after
      # it's been cast into a hash, duplicated keys will be gone.
      PTDEBUG && _d('Properties of event:', Dumper(\@properties));
      my $event = { @properties };
      if ( $args{stats} ) {
         $args{stats}->{events_read}++;
         $args{stats}->{events_parsed}++;
      }
      return $event;
   } # EVENT

   @$pending = ();
   $args{oktorun}->(0) if $args{oktorun};
   return;
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End SlowLogParser package
# ###########################################################################