~percona-toolkit-dev/percona-toolkit/cant-nibble-bug-918056

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# This program is copyright 2009-2011 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# HTTPProtocolParser package
# ###########################################################################
{
# Package: HTTPProtocolParser
# HTTPProtocolParser parses HTTP traffic from tcpdump files.
package HTTPProtocolParser;
use base 'ProtocolParser';

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

# server is the "host:port" of the sever being watched.  It's auto-guessed if
# not specified.
sub new {
   my ( $class, %args ) = @_;
   my $self = $class->SUPER::new(
      %args,
      port => 80,
   );
   return $self;
}

# Handles a packet from the server given the state of the session.  Returns an
# event if one was ready to be created, otherwise returns nothing.
sub _packet_from_server {
   my ( $self, $packet, $session, $misc ) = @_;
   die "I need a packet"  unless $packet;
   die "I need a session" unless $session;

   PTDEBUG && _d('Packet is from server; client state:', $session->{state}); 

   # If there's no session state, then we're catching a server response
   # mid-stream.
   if ( !$session->{state} ) {
      PTDEBUG && _d('Ignoring mid-stream server response');
      return;
   }

   if ( $session->{out_of_order} ) {
      # We're waiting for the header so we can get the content length.
      # Once we know this, we can determine how many out of order packets
      # we need to complete the request, then order them and re-process.
      my ($line1, $content);
      if ( !$session->{have_header} ) {
         ($line1, $content) = $self->_parse_header(
            $session, $packet->{data}, $packet->{data_len});
      }
      if ( $line1 ) {
         $session->{have_header} = 1;
         $packet->{content_len}  = length $content;
         PTDEBUG && _d('Got out of order header with',
            $packet->{content_len}, 'bytes of content');
      }
      my $have_len = $packet->{content_len} || $packet->{data_len};
      map { $have_len += $_->{data_len} }
         @{$session->{packets}};
      $session->{have_all_packets}
         = 1 if $session->{attribs}->{bytes}
                && $have_len >= $session->{attribs}->{bytes};
      PTDEBUG && _d('Have', $have_len, 'of', $session->{attribs}->{bytes});
      return;
   }

   # Assume that the server is returning only one value. 
   # TODO: make it handle multiple.
   if ( $session->{state} eq 'awaiting reply' ) {

      # Save this early because we may return early if the packets
      # are being received out of order.  Also, save it only once
      # in case we re-process packets if they're out of order.
      $session->{start_reply} = $packet->{ts} unless $session->{start_reply};

      # Get first line of header and first chunk of contents/data.
      my ($line1, $content) = $self->_parse_header($session, $packet->{data},
            $packet->{data_len});

      # The reponse, when in order, is text header followed by data.
      # If there's no line1, then we didn't get the text header first
      # which means we're getting the response in out of order packets.
      if ( !$line1 ) {
         $session->{out_of_order}     = 1;  # alert parent
         $session->{have_all_packets} = 0;
         return;
      }

      # First line should be: version  code phrase
      # E.g.:                 HTTP/1.1  200 OK
      my ($version, $code, $phrase) = $line1 =~ m/(\S+)/g;
      $session->{attribs}->{Status_code} = $code;
      PTDEBUG && _d('Status code for last', $session->{attribs}->{arg},
         'request:', $session->{attribs}->{Status_code});

      my $content_len = $content ? length $content : 0;
      PTDEBUG && _d('Got', $content_len, 'bytes of content');
      if ( $session->{attribs}->{bytes}
           && $content_len < $session->{attribs}->{bytes} ) {
         $session->{data_len}  = $session->{attribs}->{bytes};
         $session->{buff}      = $content;
         $session->{buff_left} = $session->{attribs}->{bytes} - $content_len;
         PTDEBUG && _d('Contents not complete,', $session->{buff_left},
            'bytes left');
         $session->{state} = 'recving content';
         return;
      }
   }
   elsif ( $session->{state} eq 'recving content' ) {
      if ( $session->{buff} ) {
         PTDEBUG && _d('Receiving content,', $session->{buff_left},
            'bytes left');
         return;
      }
      PTDEBUG && _d('Contents received');
   }
   else {
      # TODO:
      warn "Server response in unknown state"; 
      return;
   }

   PTDEBUG && _d('Creating event, deleting session');
   $session->{end_reply} = $session->{ts_max} || $packet->{ts};
   my $event = $self->make_event($session, $packet);
   delete $self->{sessions}->{$session->{client}}; # http is stateless!
   return $event;
}

# Handles a packet from the client given the state of the session.
sub _packet_from_client {
   my ( $self, $packet, $session, $misc ) = @_;
   die "I need a packet"  unless $packet;
   die "I need a session" unless $session;

   PTDEBUG && _d('Packet is from client; state:', $session->{state});

   my $event;
   if ( ($session->{state} || '') =~ m/awaiting / ) {
      PTDEBUG && _d('More client headers:', $packet->{data});
      return;
   }

   if ( !$session->{state} ) {
      $session->{state} = 'awaiting reply';
      my ($line1, undef) = $self->_parse_header($session, $packet->{data}, $packet->{data_len});
      # First line should be: request page      version
      # E.g.:                 GET     /foo.html HTTP/1.1
      my ($request, $page, $version) = $line1 =~ m/(\S+)/g;
      if ( !$request || !$page ) {
         PTDEBUG && _d("Didn't get a request or page:", $request, $page);
         return;
      }
      $request = lc $request;
      my $vh   = $session->{attribs}->{Virtual_host} || '';
      my $arg = "$request $vh$page";
      PTDEBUG && _d('arg:', $arg);

      if ( $request eq 'get' || $request eq 'post' ) {
         @{$session->{attribs}}{qw(arg)} = ($arg);
      }
      else {
         PTDEBUG && _d("Don't know how to handle a", $request, "request");
         return;
      }

      $session->{start_request}         = $packet->{ts};
      $session->{attribs}->{host}       = $packet->{src_host};
      $session->{attribs}->{pos_in_log} = $packet->{pos_in_log};
      $session->{attribs}->{ts}         = $packet->{ts};
   }
   else {
      # TODO:
      die "Probably multiple GETs from client before a server response?"; 
   }

   return $event;
}

sub _parse_header {
   my ( $self, $session, $data, $len, $no_recurse ) = @_;
   die "I need data" unless $data;
   my ($header, $content)    = split(/\r\n\r\n/, $data);
   my ($line1, $header_vals) = $header  =~ m/\A(\S+ \S+ .+?)\r\n(.+)?/s;
   PTDEBUG && _d('HTTP header:', $line1);
   return unless $line1;

   if ( !$header_vals ) {
      PTDEBUG && _d('No header vals');
      return $line1, undef;
   }
   my @headers;
   foreach my $val ( split(/\r\n/, $header_vals) ) {
      last unless $val;
      # Capture and save any useful header values.
      PTDEBUG && _d('HTTP header:', $val);
      if ( $val =~ m/^Content-Length/i ) {
         ($session->{attribs}->{bytes}) = $val =~ /: (\d+)/;
         PTDEBUG && _d('Saved Content-Length:', $session->{attribs}->{bytes});
      }
      if ( $val =~ m/Content-Encoding/i ) {
         ($session->{compressed}) = $val =~ /: (\w+)/;
         PTDEBUG && _d('Saved Content-Encoding:', $session->{compressed});
      }
      if ( $val =~ m/^Host/i ) {
         # The "host" attribute is already taken, so we call this "domain".
         ($session->{attribs}->{Virtual_host}) = $val =~ /: (\S+)/;
         PTDEBUG && _d('Saved Host:', ($session->{attribs}->{Virtual_host}));
      }
   }
   return $line1, $content;
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End HTTPProtocolParser package
# ###########################################################################