4
# Copyright 1999-2004 The Apache Software Foundation
6
# Licensed under the Apache License, Version 2.0 (the "License");
7
# you may not use this file except in compliance with the License.
8
# You may obtain a copy of the License at
10
# http://www.apache.org/licenses/LICENSE-2.0
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
19
# $Id: tomcat_trend.pl 417697 2006-06-28 08:27:04Z rjung $
21
# Author: Glenn Nielsen
23
# Script for analyzing mod_jk.log data when logging tomcat request data using
24
# the JkRequestLogFormat Apache mod_jk configuration.
26
# Generates statistics for request latency and errors. Archives the generated
27
# data to files for later use in long term trend graphs and reports.
29
# tomcat_trend.pl <directory containing mod_jk.log> <directory for archiving statistics>
32
use Statistics::Descriptive;
37
%MON = ('JAN' => 0, 'Jan' => 0,
38
'FEB' => 1, 'Feb' => 1,
39
'MAR' => 2, 'Mar' => 2,
40
'APR' => 3, 'Apr' => 3,
41
'MAY' => 4, 'May' => 4,
42
'JUN' => 5, 'Jun' => 5,
43
'JUL' => 6, 'Jul' => 6,
44
'AUG' => 7, 'Aug' => 7,
45
'SEP' => 8, 'Sep' => 8,
46
'OCT' => 9, 'Oct' => 9,
47
'NOV' => 10, 'Nov' => 10,
48
'DEC' => 11, 'Dec' => 11,);
50
@Months = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
55
$archivedir = $ARGV[1];
57
die "Usage: $0 logdir archivedir"
58
unless( length($logdir) && length($archivedir) );
60
die "Log Directory $logdir doesn't exist"
63
die "Archive Directory $archivedir doesn't exist"
64
unless( -d $archivedir);
66
# Get start date from global.data if it exists
68
if( -e "$archivedir/global.data" ) {
69
# Get the start date from the last entry in global.data
70
@tail = `tail -1 $archivedir/global.data`;
71
$startdate = (split /\s+/,$tail[0])[0];
72
($day, $mon, $year) = (localtime($startdate))[3..5];
81
$startdate = timelocal(0,0,0,$day+1,$mon,$year);
85
($day, $mon, $year) = (localtime(time))[3..5];
86
$curdate = timelocal(0,0,0,$day,$mon,$year);
87
print "Today: " . scalar(localtime($curdate)) . "\n";
89
# Get the log files names and date they start
90
@logs = `ls -1 $logdir/mod_jk.log*`;
94
next if ( $logfile =~ /\.(bz2|gz|zip)$/ );
95
@head = `head -1 $logfile`;
96
($mon, $day, $time, $year) = (split /\s+/,$head[0])[1..4];
97
($hour, $min, $sec) = split /:/,$time;
99
$logtime = timelocal($sec,$min,$hour,$day,$MON{$mon},$year-1900);
100
$modjklog{$logtime} = $logfile;
103
# Set the startdate if this is the first time processing the logs
104
# If we have a startdate, remove log files we con't need to process
105
foreach $logtime ( sort {$a <=> $b} keys %modjklog ) {
106
# If logs haven't been processed before, set startdate to time of
108
if( $startdate !~ /^\d+$/ ) {
109
$startdate = $logtime;
110
($day, $mon, $year) = (localtime($startdate))[3..5];
111
$startdate = timelocal(0,0,0,$day,$mon,$year);
114
if( $logtime > $startdate ) {
117
# Save the previous log file since start date may start here
118
$prevlogfile = $modjklog{$logtime};
119
$prevlogtime = $logtime;
120
# Remove log files we don't need to process
121
delete $modjklog{$logtime};
124
# Add back in the previous log file where we need to start processing
125
if( defined $prevlogtime ) {
126
$modjklog{$prevlogtime} = $prevlogfile;
129
print "StartDate: " . scalar(localtime($startdate)) . "\n";
130
$processdate = $startdate;
132
foreach $key ( sort {$a <=> $b} keys %modjklog ) {
133
$logtime = $processdate;
134
$logfile = $modjklog{$key};
135
print "Processing log: $logfile\n";
136
last if( $key >= $curdate );
137
$fh = new FileHandle "<$logfile";
138
die "Open of logfile $logfile failed: $!"
140
while( $line = $fh->getline) {
142
($mon, $day, $time, $year) = (split /\s+/,$line)[1..4];
143
($hour, $min, $sec) = split /:/,$time;
145
if( $day !~ /^\d+/ || $hour !~ /^\d+/ || $min!~ /^\d+/ || $sec !~ /^\d+/ ) {
146
print "Unknown log entry: $origline\n" unless $origline =~ /\.c /;
149
$logtime = timelocal($sec,$min,$hour,$day,$MON{$mon},$year-1900);
151
if( $logtime > $processdate ) {
153
# Strip off the leading date and time
154
$line =~ s/^\[.*\] //;
156
# See if this is a new 5 minute period
157
$interval = int($logtime/300);
158
if( $interval != $previnterval ) {
159
if( defined $previnterval ) {
160
&IntervalStats(\%Global,\%Interval,$previnterval*300);
163
undef @IntervalLatency;
164
undef %IntervalWorkers;
165
$Interval{tomcat_full} = 0;
166
$Interval{client_gone} = 0;
167
$Interval{latency} = \@IntervalLatency;
168
$Interval{workers} = \%IntervalWorkers;
169
$previnterval = $interval;
172
# See if this is a new day
173
if( $day != $prevday ) {
174
if( defined $prevday ) {
175
&DailyStats($processdate,\%Global);
178
undef %GlobalWorkers;
179
undef @GlobalLatency;
180
$Global{tomcat_full} = 0;
181
$Global{client_gone} = 0;
182
$Global{interval} = "";
183
$Global{latency} = \@GlobalLatency;
184
$Global{workers} = \%GlobalWorkers;
185
$Global{errors} = "";
187
$processdate = $logtime;
190
# Stop processing if logtime is today
191
last if( $logtime >= $curdate );
193
if( $line =~ /\d\)\]{0,1}: / ) {
194
# Handle a mod_jk error
195
if( $line =~ /(jk_tcp_socket_recvfull failed|ERROR: Receiving from tomcat failed)/ ) {
196
$Global{tomcat_full}++;
197
$Interval{tomcat_full}++;
198
} elsif( $line =~ /(ajp_process_callback - write failed|ERROR sending data to client. Connection aborted or network problems|Client connection aborted or network problems)/ ) {
199
$Global{client_gone}++;
200
$Interval{client_gone}++;
204
# Handle a mod_jk request log entry
205
$line =~ s/^\[.*\] //;
206
$line =~ s/\"(GET|POST|OPTIONS|HEAD)[^\"]*\" //;
207
$line =~ s/[\?\;].*\"//;
209
($work, $host, $page, $status, $latency) = split /\s+/,$line;
210
$page =~ s/\/\//\//g;
211
$page =~ s/\.\//\//g;
212
if( length($work) <= 0 || length($host) <= 0 ||
213
length($page) <= 0 || $status !~ /^\d+$/ || $latency !~ /^\d+\.\d+$/ ) {
214
print "Unknown log entry: $origline\n" unless $origline =~ /\.c /;
218
# Throw out abnormally long requests and log them as an error
219
if( $latency >= 1800 ) {
220
$Global{errors} .= "Error: $page has an HTTP status of $status and an ";
221
$Global{errors} .= "abnormally long request latency of $latency seconds\n";
225
# Save the data by day for Global, Worker, and Host
226
push @{$Global{latency}},$latency;
227
$workers = $Global{workers};
228
if( !defined $$workers{$work} ) {
231
undef %{"$work-hosts"};
232
${"$work"}{latency} = \@{"$work"};
233
${"$work"}{hosts} = \%{"$work-hosts"};
234
${"$work"}{interval} = "";
235
$$workers{$work} = \%{"$work"};
237
$worker = $$workers{$work};
238
push @{$$worker{latency}},$latency;
240
if( !defined $$worker{hosts}{$host} ) {
241
undef @{"$work-$host"};
242
undef %{"$work-$host"};
243
undef %{"$work-$host-pages"};
244
${"$work-$host"}{latency} = \@{"$work-$host"};
245
${"$work-$host"}{pages} = \%{"$work-$host-pages"};
246
${"$work-$host"}{interval} = "";
247
$$worker{hosts}{$host} = \%{"$work-$host"};
249
$hoster = $$worker{hosts}{$host};
250
push @{$$hoster{latency}},$latency;
252
if( !defined $$hoster{pages}{$page} ) {
253
undef @{"$work-$host-$page"};
254
$$hoster{pages}{$page} = \@{"$work-$host-$page"};
256
push @{$$hoster{pages}{$page}},$latency;
258
# Save the data by 5 minute interval for Global, Worker, and Host
259
push @{$Interval{latency}},$latency;
260
$workers = $Interval{workers};
261
if( !defined $$workers{"$work"} ) {
262
undef @{"int-$work"};
263
undef %{"int-$work"};
264
undef %{"int-$work-hosts"};
265
${"int-$work"}{latency} = \@{"int-$work"};
266
${"int-$work"}{hosts} = \%{"int-$work-hosts"};
267
$$workers{$work} = \%{"int-$work"};
269
$worker = $$workers{$work};
270
push @{$$worker{latency}},$latency;
272
if( !defined $$worker{hosts}{$host} ) {
273
undef @{"int-$work-$host"};
274
undef %{"int-$work-$host"};
275
${"int-$work-$host"}{latency} = \@{"int-$work-$host"};
276
$$worker{hosts}{$host} = \%{"int-$work-$host"};
278
$hoster = $$worker{hosts}{$host};
279
push @{$$hoster{latency}},$latency;
286
# If the last log file ends before switch to the current day,
287
# output the last days data
288
if( $logtime < $curdate ) {
289
&IntervalStats(\%Global,\%Interval,$previnterval*300);
290
&DailyStats($processdate,\%Global);
295
sub IntervalStats($$$) {
298
my $interval = $_[2];
300
($count,$median,$mean,$stddev,$min,$max) = &CalcStats($$data{latency});
301
$$global{interval} .= "$interval $count $median $mean $stddev $min $max $$data{client_gone} $$data{tomcat_full}\n";
303
foreach $work ( keys %{$$data{workers}} ) {
304
$worker = $$data{workers}{$work};
305
$gworker = $$global{workers}{$work};
306
($count,$median,$mean,$stddev,$min,$max) = &CalcStats($$worker{latency});
307
$$gworker{interval} .= "$interval $count $median $mean $stddev $min $max\n";
308
foreach $host ( keys %{$$worker{hosts}} ) {
309
$hoster = $$worker{hosts}{$host};
310
$ghoster = $$gworker{hosts}{$host};
311
($count,$median,$mean,$stddev,$min,$max) = &CalcStats($$hoster{latency});
312
$$ghoster{interval} .= "$interval $count $median $mean $stddev $min $max\n";
321
&SaveStats($data,$date,"","global");
322
&SaveFile($$data{interval},$date,"","daily");
323
foreach $work ( keys %{$$data{workers}} ) {
324
$worker = $$data{workers}{$work};
325
&SaveStats($worker,$date,$work,"global");
326
&SaveFile($$worker{interval},$date,$work,"daily");
327
foreach $host ( keys %{$$worker{hosts}} ) {
328
$hoster = $$worker{hosts}{$host};
329
&SaveStats($hoster,$date,"$work/$host","global");
330
&SaveFile($$hoster{interval},$date,"$work/$host","daily");
332
foreach $page ( sort keys %{$$hoster{pages}} ) {
333
$pager = $$hoster{pages}{$page};
334
($count,$median,$mean,$stddev,$min,$max) = &CalcStats($pager);
335
$pagedata .= "$page $count $median $mean $stddev $min $max\n";
337
$pagedata .= $$data{errors};
338
&SaveFile($pagedata,$date,"$work/$host","request");
346
$stats = Statistics::Descriptive::Full->new();
347
$stats->add_data(@{$data});
348
$median = $stats->median();
349
$mean = $stats->mean();
350
$stddev = $stats->standard_deviation();
351
$max = $stats->max();
352
$min = $stats->min();
353
$count = $stats->count();
354
return ($count,$median,$mean,$stddev,$min,$max);
357
sub SaveStats($$$$) {
363
if( length($dir) > 0 ) {
364
$dir = "$archivedir/$dir";
370
$outfile = "$dir/${file}.data";
372
($count,$median,$mean,$stddev,$min,$max) = &CalcStats($$data{latency});
374
open DATA, ">>$outfile" or die $!;
375
print DATA "$date $count $median $mean $stddev $min $max";
376
print DATA " $$data{client_gone} $$data{tomcat_full}" if defined $$data{tomcat_full};
386
my ($day, $mon, $year);
388
($day, $mon, $year) = (localtime($date))[3..5];
391
$mon = "0$mon" if $mon < 10;
392
$day = "0$day" if $day < 10;
393
$file = "$year-$mon-$day-$file";
395
if( length($dir) > 0 ) {
396
$dir = "$archivedir/$dir";
402
$outfile = "$dir/${file}.data";
404
open DATA, ">>$outfile" or die $!;