1
# This program is copyright 2008-2011 Percona Inc.
2
# Feedback and improvements are welcome.
4
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
5
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
6
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
8
# This program is free software; you can redistribute it and/or modify it under
9
# the terms of the GNU General Public License as published by the Free Software
10
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
11
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
14
# You should have received a copy of the GNU General Public License along with
15
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16
# Place, Suite 330, Boston, MA 02111-1307 USA.
17
# ###########################################################################
18
# LogSplitter package $Revision: 7177 $
19
# ###########################################################################
21
# Package: LogSplitter
22
# LogSplitter splits MySQL query logs by sessions.
27
use warnings FATAL => 'all';
28
use English qw(-no_match_vars);
29
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
32
$Data::Dumper::Indent = 1;
33
$Data::Dumper::Sortkeys = 1;
34
$Data::Dumper::Quotekeys = 0;
39
my ( $class, %args ) = @_;
40
foreach my $arg ( qw(attribute base_dir parser session_files) ) {
41
die "I need a $arg argument" unless $args{$arg};
44
# TODO: this is probably problematic on Windows
45
$args{base_dir} .= '/' if substr($args{base_dir}, -1, 1) ne '/';
47
if ( $args{split_random} ) {
48
MKDEBUG && _d('Split random');
49
$args{attribute} = '_sessionno'; # set round-robin 1..session_files
53
# %args will override these default args if given explicitly.
54
base_file_name => 'session',
56
max_files_per_dir => 5_000,
57
max_sessions => 5_000_000, # max_dirs * max_files_per_dir
62
max_open_files => 1_000,
63
close_lru_files => 100,
64
# Override default args above.
66
# These args cannot be overridden.
67
n_dirs_total => 0, # total number of dirs created
68
n_files_total => 0, # total number of session files created
69
n_files_this_dir => -1, # number of session files in current dir
70
session_fhs => [], # filehandles for each session
71
n_open_fhs => 0, # current number of open session filehandles
72
n_events_total => 0, # total number of events in log
73
n_events_saved => 0, # total number of events saved
74
n_sessions_skipped => 0, # total number of sessions skipped
75
n_sessions_saved => 0, # number of sessions saved
76
sessions => {}, # sessions data store
80
MKDEBUG && _d('new LogSplitter final args:', Dumper($self));
81
return bless $self, $class;
85
my ( $self, @logs ) = @_;
86
$oktorun = 1; # True as long as we haven't created too many
87
# session files or too many dirs and files
89
my $callbacks = $self->{callbacks};
92
if ( $self->{split_random} ) {
93
# round-robin iterator
94
$next_sessionno = make_rr_iter(1, $self->{session_files});
98
MKDEBUG && _d('Implicitly reading STDIN because no logs were given');
102
# Split all the log files.
103
my $lp = $self->{parser};
105
foreach my $log ( @logs ) {
106
last unless $oktorun;
107
next unless defined $log;
109
if ( !-f $log && $log ne '-' ) {
110
warn "Skipping $log because it is not a file";
118
if ( !open $fh, "<", $log ) {
119
warn "Cannot open $log: $OS_ERROR\n";
124
MKDEBUG && _d('Splitting', $log);
127
my $more_events_sub = sub { $more_events = $_[0]; };
130
$event = $lp->parse_event(
131
next_event => sub { return <$fh>; },
132
tell => sub { return tell $fh; },
133
oktorun => $more_events_sub,
136
$self->{n_events_total}++;
137
if ( $self->{split_random} ) {
138
$event->{_sessionno} = $next_sessionno->();
141
foreach my $callback ( @$callbacks ) {
142
$event = $callback->($event);
146
$self->_save_event($event) if $event;
148
if ( !$more_events ) {
149
MKDEBUG && _d('Done parsing', $log);
153
last LOG unless $oktorun;
157
# Close session filehandles.
158
while ( my $fh = pop @{ $self->{session_fhs} } ) {
161
$self->{n_open_fhs} = 0;
163
$self->_merge_session_files() if $self->{merge_sessions};
164
$self->print_split_summary() unless $self->{quiet};
170
my ( $self, $event ) = @_;
171
my ($session, $session_id) = $self->_get_session_ds($event);
172
return unless $session;
174
if ( !defined $session->{fh} ) {
175
$self->{n_sessions_saved}++;
176
MKDEBUG && _d('New session:', $session_id, ',',
177
$self->{n_sessions_saved}, 'of', $self->{max_sessions});
179
my $session_file = $self->_get_next_session_file();
180
if ( !$session_file ) {
182
MKDEBUG && _d('Not oktorun because no _get_next_session_file');
186
# Close Last Recently Used session fhs if opening if this new
187
# session fh will cause us to have too many open files.
188
if ( $self->{n_open_fhs} >= $self->{max_open_files} ) {
189
$self->_close_lru_session()
192
# Open a fh for this session file.
193
open my $fh, '>', $session_file
194
or die "Cannot open session file $session_file: $OS_ERROR";
195
$session->{fh} = $fh;
196
$self->{n_open_fhs}++;
198
# Save fh and session file in case we need to open/close it later.
199
$session->{active} = 1;
200
$session->{session_file} = $session_file;
202
push @{$self->{session_fhs}}, { fh => $fh, session_id => $session_id };
204
MKDEBUG && _d('Created', $session_file, 'for session',
205
$self->{attribute}, '=', $session_id);
207
# This special comment lets mk-log-player know when a session begins.
208
print $fh "-- START SESSION $session_id\n\n";
210
elsif ( !$session->{active} ) {
211
# Reopen the existing but inactive session. This happens when
212
# a new session (above) had to close LRU session fhs.
214
# Again, close Last Recently Used session fhs if reopening if this
215
# session's fh will cause us to have too many open files.
216
if ( $self->{n_open_fhs} >= $self->{max_open_files} ) {
217
$self->_close_lru_session();
220
# Reopen this session's fh.
221
open $session->{fh}, '>>', $session->{session_file}
222
or die "Cannot reopen session file "
223
. "$session->{session_file}: $OS_ERROR";
225
# Mark this session as active again.
226
$session->{active} = 1;
227
$self->{n_open_fhs}++;
229
MKDEBUG && _d('Reopend', $session->{session_file}, 'for session',
230
$self->{attribute}, '=', $session_id);
233
MKDEBUG && _d('Event belongs to active session', $session_id);
236
my $session_fh = $session->{fh};
238
# Print USE db if 1) we haven't done so yet or 2) the db has changed.
239
my $db = $event->{db} || $event->{Schema};
240
if ( $db && ( !defined $session->{db} || $session->{db} ne $db ) ) {
241
print $session_fh "use $db\n\n";
242
$session->{db} = $db;
245
print $session_fh $self->flatten($event->{arg}), "\n\n";
246
$self->{n_events_saved}++;
251
# Returns shortcut to session data store and id for the given event.
252
# The returned session will be undef if no more sessions are allowed.
253
sub _get_session_ds {
254
my ( $self, $event ) = @_;
256
my $attrib = $self->{attribute};
257
if ( !$event->{ $attrib } ) {
258
MKDEBUG && _d('No attribute', $attrib, 'in event:', Dumper($event));
262
# This could indicate a problem in parser not parsing
263
# a log event correctly thereby leaving $event->{arg} undefined.
264
# Or, it could simply be an event like:
267
return unless $event->{arg};
269
# Don't print admin commands like quit or ping because these
271
return if ($event->{cmd} || '') eq 'Admin';
274
my $session_id = $event->{ $attrib };
276
# The following is necessary to prevent Perl from auto-vivifying
277
# a lot of empty hashes for new sessions that are ignored due to
278
# already having max_sessions.
279
if ( $self->{n_sessions_saved} < $self->{max_sessions} ) {
280
# Will auto-vivify if necessary.
281
$session = $self->{sessions}->{ $session_id } ||= {};
283
elsif ( exists $self->{sessions}->{ $session_id } ) {
284
# Use only existing sessions.
285
$session = $self->{sessions}->{ $session_id };
288
$self->{n_sessions_skipped} += 1;
289
MKDEBUG && _d('Skipping new session', $session_id,
290
'because max_sessions is reached');
293
return $session, $session_id;
296
sub _close_lru_session {
298
my $session_fhs = $self->{session_fhs};
299
my $lru_n = $self->{n_sessions_saved} - $self->{max_open_files} - 1;
300
my $close_to_n = $lru_n + $self->{close_lru_files} - 1;
302
MKDEBUG && _d('Closing session fhs', $lru_n, '..', $close_to_n,
303
'(',$self->{n_sessions}, 'sessions', $self->{n_open_fhs}, 'open fhs)');
305
foreach my $session ( @$session_fhs[ $lru_n..$close_to_n ] ) {
306
close $session->{fh};
307
$self->{n_open_fhs}--;
308
$self->{sessions}->{ $session->{session_id} }->{active} = 0;
314
# Returns an empty string on failure, or the next session file name on success.
315
# This will fail if we have opened maxdirs and maxfiles.
316
sub _get_next_session_file {
317
my ( $self, $n ) = @_;
318
return if $self->{n_dirs_total} >= $self->{max_dirs};
320
# n_files_this_dir will only be < 0 for the first dir and file
321
# because n_file is set to -1 in new(). This is a hack
322
# to cause the first dir and file to be created automatically.
323
if ( ($self->{n_files_this_dir} >= $self->{max_files_per_dir})
324
|| $self->{n_files_this_dir} < 0 ) {
325
$self->{n_dirs_total}++;
326
$self->{n_files_this_dir} = 0;
327
my $new_dir = "$self->{base_dir}$self->{n_dirs_total}";
328
if ( !-d $new_dir ) {
329
my $retval = system("mkdir $new_dir");
330
if ( ($retval >> 8) != 0 ) {
331
die "Cannot create new directory $new_dir: $OS_ERROR";
333
MKDEBUG && _d('Created new base_dir', $new_dir);
334
push @{$self->{created_dirs}}, $new_dir;
337
_d($new_dir, 'already exists');
341
MKDEBUG && _d('No dir created; n_files_this_dir:',
342
$self->{n_files_this_dir}, 'n_files_total:',
343
$self->{n_files_total});
346
$self->{n_files_total}++;
347
$self->{n_files_this_dir}++;
348
my $dir_n = $self->{n_dirs_total} . '/';
349
my $session_n = sprintf '%d', $n || $self->{n_sessions_saved};
350
my $session_file = $self->{base_dir}
352
. $self->{base_file_name}."-$session_n.txt";
353
MKDEBUG && _d('Next session file', $session_file);
354
return $session_file;
357
# Flattens multiple new-line and spaces to single new-lines and spaces
358
# and remove /* comment */ blocks.
360
my ( $self, $query ) = @_;
361
return unless $query;
362
$query =~ s!/\*.*?\*/! !g;
364
$query =~ s/\s{2,}/ /g;
368
sub _merge_session_files {
371
print "Merging session files...\n" unless $self->{quiet};
373
my @multi_session_files;
374
for my $i ( 1..$self->{session_files} ) {
375
push @multi_session_files, $self->{base_dir} ."sessions-$i.txt";
378
my @single_session_files = map {
380
} values %{$self->{sessions}};
382
my $i = make_rr_iter(0, $#multi_session_files); # round-robin iterator
383
foreach my $single_session_file ( @single_session_files ) {
384
my $multi_session_file = $multi_session_files[ $i->() ];
386
if ( $self->{split_random} ) {
387
$cmd = "mv $single_session_file $multi_session_file";
390
$cmd = "cat $single_session_file >> $multi_session_file";
394
warn "Failed to `$cmd`: $OS_ERROR";
398
foreach my $created_dir ( @{$self->{created_dirs}} ) {
399
my $cmd = "rm -rf $created_dir";
402
warn "Failed to `$cmd`: $OS_ERROR";
410
my ( $start, $end ) = @_;
411
my $current = $start;
413
$current = $start if $current > $end ;
414
$current++; # For next iteration.
419
sub print_split_summary {
421
print "Split summary:\n";
422
my $fmt = "%-20s %-10s\n";
423
printf $fmt, 'Total sessions',
424
$self->{n_sessions_saved} + $self->{n_sessions_skipped};
425
printf $fmt, 'Sessions saved',
426
$self->{n_sessions_saved};
427
printf $fmt, 'Total events', $self->{n_events_total};
428
printf $fmt, 'Events saved', $self->{n_events_saved};
433
my ($package, undef, $line) = caller 0;
434
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
435
map { defined $_ ? $_ : 'undef' }
437
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
442
# ###########################################################################
443
# End LogSplitter package
444
# ###########################################################################