1
# This program is copyright 2011 Percona Inc.
2
# This program is copyright 2010 Baron Schwartz.
3
# Feedback and improvements are welcome.
5
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
6
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
7
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
9
# This program is free software; you can redistribute it and/or modify it under
10
# the terms of the GNU General Public License as published by the Free Software
11
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
12
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
15
# You should have received a copy of the GNU General Public License along with
16
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17
# Place, Suite 330, Boston, MA 02111-1307 USA.
18
# ###########################################################################
19
# SysLogParser package $Revision: 5831 $
20
# ###########################################################################
22
# Package: SysLogParser
23
# SysLogParser parses events from syslogs.
28
use warnings FATAL => 'all';
29
use English qw(-no_match_vars);
30
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
32
# This regex matches the message number, line number, and content of a syslog
34
# 2008 Jan 9 16:16:34 hostname postgres[30059]: [13-2] ...content...
35
my $syslog_regex = qr{\A.*\w+\[\d+\]: \[(\d+)-(\d+)\] (.*)\Z};
37
# This class generates currying functions that wrap around a standard
38
# log-parser's next_event() and tell() function pointers. The wrappers behave
39
# the same way, except that they'll return entire syslog events, instead of
40
# lines at a time. To use it, do the following:
43
# my ($self, %args) = @_;
44
# my ($next_event, $tell, $is_syslog) = SysLogParser::make_closures(%args);
45
# # ... write your code to use the $next_event and $tell here...
48
# If the log isn't in syslog format, $is_syslog will be false and you'll get
49
# back simple wrappers around the $next_event and $tell functions. (They still
50
# have to be wrapped, because to find out whether the log is in syslog format,
51
# the first line has to be examined.)
55
return bless $self, $class;
58
# This method is here so that SysLogParser can be used and tested in its own
59
# right. However, its ability to generate wrapper functions probably means that
60
# it should be used as a translation layer, not directly. You can use this code
61
# as an example of how to integrate this into other packages.
63
my ( $self, %args ) = @_;
64
my ( $next_event, $tell, $is_syslog ) = $self->generate_wrappers(%args);
65
return $next_event->();
68
# This is an example of how a class can seamlessly put a syslog translation
69
# layer underneath itself.
70
sub generate_wrappers {
71
my ( $self, %args ) = @_;
73
# Reset everything, just in case some cruft was left over from a previous use
74
# of this object. The object has stateful closures. If this isn't done,
75
# then they'll keep reading from old filehandles. The sanity check is based
76
# on the memory address of the closure!
77
if ( ($self->{sanity} || '') ne "$args{next_event}" ){
78
MKDEBUG && _d("Clearing and recreating internal state");
79
@{$self}{qw(next_event tell is_syslog)} = $self->make_closures(%args);
80
$self->{sanity} = "$args{next_event}";
83
# Return the wrapper functions!
84
return @{$self}{qw(next_event tell is_syslog)};
87
# Make the closures! The $args{misc}->{new_event_test} is an optional
88
# subroutine reference, which tells the wrapper when to consider a line part of
89
# a new event, in syslog format, even when it's technically the same syslog
90
# event. See the test for samples/pg-syslog-002.txt for an example. This
91
# argument should be passed in via the call to parse_event(). Ditto for
92
# 'line_filter', which is some processing code to run on every line of content
95
my ( $self, %args ) = @_;
97
# The following variables will be referred to in the manufactured
98
# subroutines, making them proper closures.
99
my $next_event = $args{'next_event'};
100
my $tell = $args{'tell'};
101
my $new_event_test = $args{'misc'}->{'new_event_test'};
102
my $line_filter = $args{'misc'}->{'line_filter'};
104
# The first thing to do is get a line from the log and see if it's from
106
my $test_line = $next_event->();
107
MKDEBUG && _d('Read first sample/test line:', $test_line);
109
# If it's syslog, we have to generate a moderately elaborate wrapper
111
if ( defined $test_line && $test_line =~ m/$syslog_regex/o ) {
113
# Within syslog-parsing subroutines, we'll use LLSP (low-level syslog
114
# parser) as a MKDEBUG line prefix.
115
MKDEBUG && _d('This looks like a syslog line, MKDEBUG prefix=LLSP');
117
# Grab the interesting bits out of the test line, and save the result.
118
my ($msg_nr, $line_nr, $content) = $test_line =~ m/$syslog_regex/o;
119
my @pending = ($test_line);
120
my $last_msg_nr = $msg_nr;
123
# Generate the subroutine for getting a full log message without syslog
124
# breaking it across multiple lines.
125
my $new_next_event = sub {
126
MKDEBUG && _d('LLSP: next_event()');
128
# Keeping the pos_in_log variable right is a bit tricky! In general,
129
# we have to tell() the filehandle before trying to read from it,
130
# getting the position before the data we've just read. The simple
131
# rule is that when we push something onto @pending, which we almost
132
# always do, then $pos_in_log should point to the beginning of that
133
# saved content in the file.
134
MKDEBUG && _d('LLSP: Current virtual $fh position:', $pos_in_log);
137
# @arg_lines is where we store up the content we're about to return.
138
# It contains $content; @pending contains a single saved $line.
141
# Here we actually examine lines until we have found a complete event.
145
defined($line = shift @pending)
147
# Save $new_pos, because when we hit EOF we can't $tell->()
149
eval { $new_pos = -1; $new_pos = $tell->() };
150
defined($line = $next_event->());
153
MKDEBUG && _d('LLSP: Line:', $line);
156
($msg_nr, $line_nr, $content) = $line =~ m/$syslog_regex/o;
158
die "Can't parse line: $line";
161
# The message number has changed -- thus, new message.
162
elsif ( $msg_nr != $last_msg_nr ) {
163
MKDEBUG && _d('LLSP: $msg_nr', $last_msg_nr, '=>', $msg_nr);
164
$last_msg_nr = $msg_nr;
168
# Or, the caller gave us a custom new_event_test and it is true --
169
# thus, also new message.
170
elsif ( @arg_lines && $new_event_test && $new_event_test->($content) ) {
171
MKDEBUG && _d('LLSP: $new_event_test matches');
175
# Otherwise it's part of the current message; put it onto the list
176
# of lines pending. We have to translate characters that syslog has
177
# munged. Some translate TAB into the literal characters '^I' and
178
# some, rsyslog on Debian anyway, seem to translate all whitespace
179
# control characters into an octal string representing the character
181
# Example: #011FROM pg_catalog.pg_class c
182
$content =~ s/#(\d{3})/chr(oct($1))/ge;
183
$content =~ s/\^I/\t/g;
184
if ( $line_filter ) {
185
MKDEBUG && _d('LLSP: applying $line_filter');
186
$content = $line_filter->($content);
189
push @arg_lines, $content;
191
MKDEBUG && _d('LLSP: Exited while-loop after finding a complete entry');
193
# Mash the pending stuff together to return it.
194
my $psql_log_event = @arg_lines ? join('', @arg_lines) : undef;
195
MKDEBUG && _d('LLSP: Final log entry:', $psql_log_event);
197
# Save the new content into @pending for the next time. $pos_in_log
198
# must also be updated to whatever $new_pos is.
199
if ( defined $line ) {
200
MKDEBUG && _d('LLSP: Saving $line:', $line);
202
MKDEBUG && _d('LLSP: $pos_in_log:', $pos_in_log, '=>', $new_pos);
203
$pos_in_log = $new_pos;
206
# We hit the end of the file.
207
MKDEBUG && _d('LLSP: EOF reached');
212
return $psql_log_event;
215
# Create the closure for $tell->();
217
MKDEBUG && _d('LLSP: tell()', $pos_in_log);
221
return ($new_next_event, $new_tell, 1);
224
# This is either at EOF already, or it's not syslog format.
227
# Within plain-log-parsing subroutines, we'll use PLAIN as a MKDEBUG
229
MKDEBUG && _d('Plain log, or we are at EOF; MKDEBUG prefix=PLAIN');
231
# The @pending array is really only needed to return the one line we
232
# already read as a test. Too bad we can't just push it back onto the
233
# log. TODO: maybe we can test whether the filehandle is seekable and
234
# seek back to the start, then just return the unwrapped functions?
235
my @pending = defined $test_line ? ($test_line) : ();
237
my $new_next_event = sub {
238
MKDEBUG && _d('PLAIN: next_event(); @pending:', scalar @pending);
239
return @pending ? shift @pending : $next_event->();
242
MKDEBUG && _d('PLAIN: tell(); @pending:', scalar @pending);
243
return @pending ? 0 : $tell->();
245
return ($new_next_event, $new_tell, 0);
250
my ($package, undef, $line) = caller 0;
251
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
252
map { defined $_ ? $_ : 'undef' }
254
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
259
# ###########################################################################
260
# End SysLogParser package
261
# ###########################################################################