~percona-toolkit-dev/percona-toolkit/release-2.2.2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# This program is copyright 2010-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# ExplainAnalyzer package
# ###########################################################################
{
# Package: ExplainAnalyzer
# ExplainAnalyzer contains utility methods for getting and manipulating
# EXPLAIN data.  It also has methods to save and retrieve information,
# so it actually has state itself if used in this way -- it is not a data-less
# collection of methods.
package ExplainAnalyzer;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

use Data::Dumper;
$Data::Dumper::Indent    = 1;
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

sub new {
   my ( $class, %args ) = @_;
   foreach my $arg ( qw(QueryRewriter QueryParser) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my $self = {
      %args,
   };
   return bless $self, $class;
}

# Gets an EXPLAIN plan for a query.  The arguments are:
#  dbh   The $dbh, which should already have the correct default database.  This
#        module does not run USE to select a default database.
#  query The query text.
# The return value is an arrayref of hash references gotten from EXPLAIN.  If
# the query is not a SELECT, we try to convert it into one.
sub explain_query {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(dbh query) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my ($query, $dbh) = @args{qw(query dbh)};
   $query = $self->{QueryRewriter}->convert_to_select($query);
   if ( $query !~ m/^\s*select/i ) {
      PTDEBUG && _d("Cannot EXPLAIN non-SELECT query:",
         (length $query <= 100 ? $query : substr($query, 0, 100) . "..."));
      return;
   }
   my $sql = "EXPLAIN $query";
   PTDEBUG && _d($dbh, $sql);
   my $explain = $dbh->selectall_arrayref($sql, { Slice => {} });
   PTDEBUG && _d("Result of EXPLAIN:", Dumper($explain));
   return $explain;
}

# Normalizes raw EXPLAIN into a format that's easier to work with.  For example,
# the Extra column is parsed into a hash.  Accepts the output of explain_query()
# as input.
sub normalize {
   my ( $self, $explain ) = @_;
   my @result; # Don't modify the input.

   foreach my $row ( @$explain ) {
      $row = { %$row }; # Make a copy -- don't modify the input.

      # Several of the columns are really arrays of values in many cases.  For
      # example, the "key" column has an array when there is an index merge.
      foreach my $col ( qw(key possible_keys key_len ref) ) {
         $row->{$col} = [ split(/,/, $row->{$col} || '') ];
      }

      # Handle the Extra column.  Parse it into a hash by splitting on
      # semicolons.  There are many special cases to handle.
      $row->{Extra} = {
         map {
            my $var = $_;

            # Index merge query plans have an array of indexes to split up.
            if ( my ($key, $vals) = $var =~ m/(Using union)\(([^)]+)\)/ ) {
               $key => [ split(/,/, $vals) ];
            }

            # The default is just "this key/characteristic/flag exists."
            else {
               $var => 1;
            }
         }
         split(/; /, $row->{Extra} || '') # Split on semicolons.
      };

      push @result, $row;
   }

   return \@result;
}

# Trims down alternate indexes to those that were truly alternates (were not
# actually used).  For example, if key = 'foo' and possible_keys = 'foo,bar',
# then foo isn't an alternate index, only bar is.  The arguments are arrayrefs,
# and the return value is an arrayref too.
sub get_alternate_indexes {
   my ( $self, $keys, $possible_keys ) = @_;
   my %used = map { $_ => 1 } @$keys;
   return [ grep { !$used{$_} } @$possible_keys ];
}

# Returns a data structure that shows which indexes were used and considered for
# a given query and EXPLAIN plan.  Input parameters are:
#  query    The SQL of the query.
#  db       The default database.  When a table's database is not explicitly
#           qualified in the SQL itself, it defaults to this (optional) value.
#  explain  The normalized EXPLAIN plan: the output from $self->normalize().
# The return value is an arrayref of hashrefs, one per row in the query.  Each
# hashref has the following structure:
#  db    =>    The database of the table in question
#  tbl   =>    The table that was accessed
#  idx   =>    An arrayref of indexes accessed in this table
#  alt   =>    An arrayref of indexes considered but not accessed
sub get_index_usage {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(query explain) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my ($query, $explain) = @args{qw(query explain)};
   my @result;

   # First we must get a lookup data structure to translate the possibly aliased
   # names back into real table names.
   my $lookup = $self->{QueryParser}->get_aliases($query);

   foreach my $row ( @$explain ) {

      # Filter out any row that doesn't access a (real) table.  However, a row
      # that accesses a table but not an index is still interesting, so we do
      # not filter that out.
      next if !defined $row->{table}
         # Tables named like <union1,2> are just internal temp tables, not real
         # tables that we can analyze.
         || $row->{table} =~ m/^<(derived|union)\d/;

      my $table = $lookup->{TABLE}->{$row->{table}} || $row->{table};
      my $db    = $lookup->{DATABASE}->{$table}     || $args{db};
      push @result, {
         db  => $db,
         tbl => $table,
         idx => $row->{key},
         alt => $self->get_alternate_indexes(
                  $row->{key}, $row->{possible_keys}),
      };
   }

   PTDEBUG && _d("Index usage for",
      (length $query <= 100 ? $query : substr($query, 0, 100) . "..."),
      ":", Dumper(\@result));
   return \@result;
}

# This method retrieves information about how a query uses indexes, if it
# has been saved through save_usage_for().  It is basically a cache for
# remembering "oh, I've seen exactly this query before.  No need to re-EXPLAIN
# and all that stuff."  The information returned is in the same form as that of
# get_index_usage().  If no usage has been saved for the arguments, the return
# value is undef.  The arguments are:
# - The query's checksum (not the fingerprint's checksum)
# - The database connection's default database.  If a query is run against two
#   different databases, it might use different tables and indexes.
sub get_usage_for {
   my ( $self, $checksum, $db ) = @_;
   die "I need a checksum and db" unless defined $checksum && defined $db;
   my $usage;
   if ( exists $self->{usage}->{$db} # Don't auto-vivify
     && exists $self->{usage}->{$db}->{$checksum} )
   {
      $usage = $self->{usage}->{$db}->{$checksum};
   }
   PTDEBUG && _d("Usage for",
      (length $checksum <= 100 ? $checksum : substr($checksum, 0, 100) . "..."),
      "on", $db, ":", Dumper($usage));
   return $usage;
}

# This methods saves the query's index usage patterns for later retrieval with
# get_usage_for().  See that method for an explanation of the arguments.
sub save_usage_for {
   my ( $self, $checksum, $db, $usage ) = @_;
   die "I need a checksum and db" unless defined $checksum && defined $db;
   $self->{usage}->{$db}->{$checksum} = $usage;
}

# Sub: fingerprint
#   Create a fingerprint of EXPLAIN data from <normalize()>.
#
# Parameters:
#   %args - Arguments
#
# Required Arguments:
#   explain - Hashref of normalized EXPLAIN data
#
# Returns:
#   Fingerprint string
sub fingerprint {
   my ( $self, %args ) = @_;
   my @required_args = qw(explain);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my ($explain) = @args{@required_args};
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End ExplainAnalyzer package
# ###########################################################################