2
#-----------------------------------------------------------------------------
3
# Allows you to get one unique output log file, sorted on date,
4
# built from particular sources.
5
# This tool is part of AWStats log analyzer but can be use
6
# alone for any other log analyzer.
7
# See COPYING.TXT file about AWStats GNU General Public License.
8
#-----------------------------------------------------------------------------
9
# $Revision: 1.31 $ - $Author: eldy $ - $Date: 2004/11/27 14:32:00 $
11
use strict; no strict "refs";
14
#-----------------------------------------------------------------------------
16
#-----------------------------------------------------------------------------
18
# ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
21
my $NbOfDNSLookupAsked = 0;
24
my %TmpDNSLookup = ();
26
# ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
29
#my $NbOfDNSLookupAsked : shared = 0;
30
#my %threadarray : shared = ();
31
#my %MyDNSTable : shared = ();
32
#my %TmpDNSLookup : shared = ();
35
# ---------- Init variables --------
36
use vars qw/ $REVISION $VERSION /;
37
$REVISION='$Revision: 1.31 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
38
$VERSION="1.2 (build $REVISION)";
40
use vars qw/ $NBOFLINESFORBENCHMARK /;
41
$NBOFLINESFORBENCHMARK=8192;
45
$Debug $ShowSteps $AddFileNum
46
$MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
47
$NbOfLinesShowsteps $AFINET $QueueCursor
60
$DNSLookupAlreadyDone=0;
61
$NbOfLinesShowsteps=0;
64
# ---------- Init arrays --------
69
# ---------- Init hash arrays --------
71
%linerecord %timerecord %corrupted
72
%QueueHostsToResolve %QueueRecords
74
%linerecord = %timerecord = %corrupted = ();
75
%QueueHostsToResolve = %QueueRecords = ();
77
# ---------- External Program variables ----------
78
# For gzip compression
80
my $zcat_file = '\.gz$';
83
my $bzcat_file = '\.bz2$';
87
#-----------------------------------------------------------------------------
89
#-----------------------------------------------------------------------------
91
#------------------------------------------------------------------------------
92
# Function: Write an error message and exit
93
# Parameters: $message
97
#------------------------------------------------------------------------------
99
print "Error: $_[0].\n";
103
#------------------------------------------------------------------------------
104
# Function: Write a debug message
105
# Parameters: $message
109
#------------------------------------------------------------------------------
111
my $level = $_[1] || 1;
112
if ($Debug >= $level) {
113
my $debugstring = $_[0];
114
print "DEBUG $level - ".localtime(time())." : $debugstring\n";
118
#------------------------------------------------------------------------------
119
# Function: Write a warning message
120
# Parameters: $message
124
#------------------------------------------------------------------------------
126
my $messagestring=shift;
127
if ($Debug) { debug("$messagestring",1); }
128
print "$messagestring\n";
131
#-----------------------------------------------------------------------------
132
# Function: Return 1 if string contains only ascii chars
135
#-----------------------------------------------------------------------------
138
if ($Debug) { debug("IsAscii($string)",5); }
139
if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
140
if ($Debug) { debug(" Yes",5); }
141
return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
143
if ($Debug) { debug(" No",5); }
147
#-----------------------------------------------------------------------------
148
# Function: Return 1 if string contains only ascii chars
151
#-----------------------------------------------------------------------------
153
foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
154
0; # Not in @SkipDNSLookupFor
157
#-----------------------------------------------------------------------------
158
# Function: Function that wait for DNS lookup (can be threaded)
161
#-----------------------------------------------------------------------------
164
$NbOfDNSLookupAsked++;
165
use Socket; $AFINET=AF_INET;
167
$tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
168
if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
169
my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds
170
if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
171
$TmpDNSLookup{$ipaddress}='*';
174
$TmpDNSLookup{$ipaddress}=$lookupresult;
176
if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
177
delete $threadarray{$ipaddress};
181
#-----------------------------------------------------------------------------
182
# Function: WriteRecordsReadyInQueue
185
#-----------------------------------------------------------------------------
186
sub WriteRecordsReadyInQueue {
187
my $logfilechosen=shift;
188
if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
189
while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
190
# $QueueCursor point to a ready record
191
if ($QueueHostsToResolve{$QueueCursor} eq '*') {
192
if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
195
if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
196
if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
197
$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
198
if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
201
elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
202
if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
203
$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
204
if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
208
# Record is ready, we output it.
209
if ($AddFileNum) { print "$logfilechosen $QueueRecords{$QueueCursor}\n"; }
210
else { print "$QueueRecords{$QueueCursor}\n"; }
211
delete $QueueRecords{$QueueCursor};
212
delete $QueueHostsToResolve{$QueueCursor};
218
#-----------------------------------------------------------------------------
219
# Function: Check if thread are enabled or not
222
#-----------------------------------------------------------------------------
223
sub Check_Thread_Use {
224
if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
226
if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
227
if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
229
else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
235
#-----------------------------------------------------------------------------
237
#-----------------------------------------------------------------------------
238
($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
240
# Get parameters (Note: $MaxNbOfThread is already known
243
if ($ARGV[$_] =~ /^-/) {
244
if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
245
elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
246
elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
247
elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
248
elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
249
elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
250
else { print "Unknown argument $ARGV[$_] ignored\n"; }
253
push @ParamFile, $ARGV[$_];
257
if ($Debug) { $|=1; }
260
debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
261
debug("DNSLookup=$DNSLookup");
262
debug("DNSCache=$DNSCache");
263
debug("MaxNbOfThread=$MaxNbOfThread");
266
# Disallow MaxNbOfThread and Perl < 5.8
267
if ($] < 5.008 && $MaxNbOfThread) {
268
error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
271
# Warning, there is a memory hole in ActiveState perl version (in delete functions)
272
if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
277
if (scalar @ParamFile == 0) {
278
print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
279
print "$PROG allows you to get one unique output log file, sorted on date,\n";
280
print "built from particular sources:\n";
281
print " - It can read several input log files,\n";
282
print " - It can read .gz/.bz2 log files,\n";
283
print " - It can also makes a fast reverse DNS lookup to replace\n";
284
print " all IP addresses into host names in resulting log file.\n";
285
print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
286
print "distributed with a GNU General Public License (See COPYING.txt file).\n";
287
print "$PROG is part of AWStats but can be used alone as a log merger\n";
288
print "or resolver before using any other log analyzer.\n";
291
print " $PROG.$Extension [options] file\n";
292
print " $PROG.$Extension [options] file1 ... filen\n";
293
print " $PROG.$Extension [options] *.*\n";
294
print " perl $PROG.$Extension [options] *.* > newfile\n";
296
print " -dnslookup make a reverse DNS lookup on IP adresses\n";
297
print " -dnslookup=n same with a n parallel threads instead of serial requests\n";
298
print " -dnscache=file make DNS lookup from cache file first before network lookup\n";
299
print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
300
print " -addfilenum if used with several files, file number can be added in first\n";
301
print " field of output file. This can be used to add a cluster id\n";
302
print " when log files come from several load balanced computers.\n";
305
print "This runs $PROG in command line to open one or several\n";
306
print "server log files to merge them (sorted on date) and/or to make a reverse\n";
307
print "DNS lookup (if asked). The result log file is sent on standard output.\n";
308
print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
309
print "software able to output sorted log records (with a reverse DNS lookup\n";
310
print "included or not) even if log records are dispatched in several files.\n";
311
print "Each of thoose files must be already independently sorted itself\n";
312
print "(but that is the case in all web server log files). So you can use it\n";
313
print "for load balanced log files or to group several old log files.\n";
315
print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
316
print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
317
print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
318
print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
320
print "So logresolvemerge is particularly usefull when you want to output several\n";
321
print "and/or large log files in a fast process, with no use of disk or\n";
322
print "more memory, and in a chronological order through a pipe (to be used by a log\n";
323
print "analyzer).\n";
325
print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
326
print "occurs with heavy servers), this is not a problem, the output will also\n";
327
print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
328
print "with such logs.\n";
330
print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
331
print "can't run this tool on Win or Unix platforms.\n";
333
print "WARNING: Because of important memory holes in ActiveState Perl version, use\n";
334
print "another Perl interpreter if you need to process large lof files.\n";
336
print "Now supports/detects:\n";
337
print " Automatic detection of log format\n";
338
print " Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n";
339
print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
340
print "New versions and FAQ at http://awstats.sourceforge.net\n";
346
my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
347
if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
348
my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
349
if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
350
if ($nowday < 10) { $nowday = "0$nowday"; }
351
if ($nowhour < 10) { $nowhour = "0$nowhour"; }
352
if ($nowmin < 10) { $nowmin = "0$nowmin"; }
353
if ($nowsec < 10) { $nowsec = "0$nowsec"; }
354
# Get tomorrow time (will be used to discard some record with corrupted date (future date))
355
my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
356
if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
357
my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
358
if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
359
if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
360
if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
361
if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
362
if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
363
my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;
365
# Init other parameters
366
$NBOFLINESFORBENCHMARK--;
367
if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
368
if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
369
if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or choosed to "." value then DirData is current dir
370
if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "."
373
#my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
374
# monthnum must be in english because it's used to translate log date in apache log files which are always in english
375
my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
378
if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
379
open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
381
my ($time, $ip, $name) = split;
383
$name="$ip" if $name eq '*';
384
$MyDNSTable{$ip}=$name;
390
#-----------------------------------------------------------------------------
391
# PROCESSING CURRENT LOG(s)
392
#-----------------------------------------------------------------------------
395
my $NbOfLinesParsed=0;
397
my $starttime=time();
399
# Define the LogFileToDo list
401
foreach my $key (0..(@ParamFile-1)) {
402
if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) {
403
if ($Debug) { debug("Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
405
# Check for supported compression
406
if ($ParamFile[$key] =~ /$zcat_file/) {
407
if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
408
# Modify the name to include the zcat command
409
$ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
411
elsif ($ParamFile[$key] =~ /$bzcat_file/) {
412
if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
413
# Modify the name to include the bzcat command
414
$ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
417
$LogFileToDo{$cpt}=@ParamFile[$key];
421
my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
422
$ParamFile[$key] = $1;
423
if ($DirFile eq '') { $DirFile = '.'; }
424
$ParamFile[$key] =~ s/\./\\\./g;
425
$ParamFile[$key] =~ s/\*/\.\*/g;
426
$ParamFile[$key] =~ s/\?/\./g;
427
if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
428
opendir(DIR,"$DirFile");
429
my @filearray = sort readdir DIR;
431
foreach my $i (0..$#filearray) {
432
if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
433
if ($Debug) { debug("Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
434
$LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
441
# If no files to process
442
if (scalar keys %LogFileToDo == 0) {
443
error("No input log file found");
447
if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
448
foreach my $logfilenb (keys %LogFileToDo) {
449
if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
450
open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
451
binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars.
457
# BEGIN Read new record (for each log file or only for log file with record just processed)
458
#------------------------------------------------------------------------------------------
459
foreach my $logfilenb (keys %LogFileToDo) {
460
if (($logfilechosen == 0) || ($logfilechosen == $logfilenb)) {
461
if ($Debug) { debug("Search next record in file number $logfilenb",3); }
462
# Read chosen log file until we found a record with good date or reaching end of file
464
my $LOG="LOG$logfilenb";
465
$_=<$LOG>; # Read new line
466
if (! $_) { # No more records in log file number $logfilenb
467
if ($Debug) { debug(" No more records in file number $logfilenb",2); }
468
delete $LogFileToDo{$logfilenb};
475
if (/^#/) { next; } # Ignore comment lines (ISS writes such comments)
476
if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments)
477
if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
479
$linerecord{$logfilenb}=$_;
482
#----------------------------------------------------------------------
484
# Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
485
my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
486
if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
487
elsif ($_ =~ /\[(\d\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
488
elsif ($_ =~ /\[\w+ (\w+) (\d\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)\]/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
490
if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary
492
# Create $timerecord like YYYYMMDDHHMMSS
493
$timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
494
if ($timerecord{$logfilenb}<10000000000000) {
495
if ($Debug) { debug(" This record is corrupted (no date found)",3); }
496
$corrupted{$logfilenb}++;
499
if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
504
# END Read new lines for each log file. After this, following var are filled
505
# $timerecord{$logfilenb}
507
# We choose which record of which log file to process
508
if ($Debug) { debug("Choose which record of which log file to process",3); }
510
my $timeref="99999999999999";
511
foreach my $logfilenb (keys %LogFileToDo) {
512
if ($Debug) { debug(" timerecord for file $logfilenb is $timerecord{$logfilenb}",4); }
513
if ($timerecord{$logfilenb} < $timeref) { $logfilechosen=$logfilenb; $timeref=$timerecord{$logfilenb} }
515
if ($logfilechosen <= 0) { last; } # No more record to process
517
if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); }
518
if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); }
520
# Record is approved. We found a new line to parse in file number $logfilechosen
521
#-------------------------------------------------------------------------------
524
if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
525
my $delay=(time()-$starttime)||1;
526
print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
531
#--------------------
534
if ($DNSLookup) { # DNS lookup is 1 or 2
535
if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
536
elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
538
# Check in static DNS cache file
539
if ($MyDNSTable{$Host}) {
540
if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
542
elsif ($DNSLookup==1) {
543
# Check in session cache (dynamic DNS cache file + session DNS cache)
544
if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
545
if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
546
$TmpDNSLookup{$Host}='*';
547
if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); }
551
# Create or not a new thread
552
if ($MaxNbOfThread) {
553
if (! $threadarray{$Host}) { # No thread already launched for $Host
554
while ((scalar keys %threadarray) >= $MaxNbOfThread) {
555
if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
558
$threadarray{$Host}=1; # Semaphore to tell thread for $Host is active
559
# my $t = new Thread \&MakeDNSLookup, $Host;
560
my $t = threads->create(sub { MakeDNSLookup($Host) });
561
if (! $t) { error("Failed to create new thread"); }
562
if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
563
$t->detach(); # We don't need to keep return code
566
if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); }
568
# Here, this is the only way, $TmpDNSLookup{$Host} can be not defined
570
&MakeDNSLookup($Host);
571
if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); }
575
$TmpDNSLookup{$Host}='*';
576
if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); }
580
if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); }
584
if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); }
588
if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); }
589
$DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen};
593
if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
594
elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
595
if ($Debug) { debug(" No DNS lookup asked.",4); }
598
# Put record in record queue
599
if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); }
600
$QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen};
602
# Put record in host queue
603
# If there is a host to resolve, we add line to queue with value of host to resolve
604
# $Host is '' (no ip found) or is ip
606
$QueueHostsToResolve{$NbOfLinesParsed}='*';
609
$QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*';
612
$QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*';
615
# Print all records in head of queue that are ready
616
&WriteRecordsReadyInQueue($logfilechosen);
618
} # End of processing new record. Loop on next one.
620
if ($Debug) { debug("End of processing log file(s)"); }
622
# Close all log files
623
foreach my $logfilenb (keys %LogFileToDo) {
624
if ($Debug) { debug("Close log file number $logfilenb"); }
625
close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed");
628
while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) {
630
# Print all records in head of queue that are ready
631
&WriteRecordsReadyInQueue($logfilechosen);
634
# Waiting queue is empty
635
if ($MaxNbOfThread) {
636
foreach my $t (threads->list()) {
637
if ($Debug) { debug("Join thread $t"); }
643
if ($DNSLookup==1 && $DNSLookupAlreadyDone) {
644
warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed.");
648
debug("Total nb of read lines: $NbOfLinesRead");
649
debug("Total nb of parsed lines: $NbOfLinesParsed");
650
debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked");
654
# open(CACHE, ">$DNSCache") or die;
655
# foreach (keys %TmpDNSLookup) {
656
# $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip";
657
# print CACHE "0\t$_\t$TmpDNSLookup{$_}\n";
662
0; # Do not remove this line
2
#-----------------------------------------------------------------------------
3
# Allows you to get one unique output log file, sorted on date,
4
# built from particular sources.
5
# This tool is part of AWStats log analyzer but can be use
6
# alone for any other log analyzer.
7
# See COPYING.TXT file about AWStats GNU General Public License.
8
#-----------------------------------------------------------------------------
9
# $Revision: 1.34 $ - $Author: eldy $ - $Date: 2005/12/04 21:10:46 $
11
use strict; no strict "refs";
14
#-----------------------------------------------------------------------------
16
#-----------------------------------------------------------------------------
18
# ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
21
my $NbOfDNSLookupAsked = 0;
24
my %TmpDNSLookup = ();
26
# ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
29
#my $NbOfDNSLookupAsked : shared = 0;
30
#my %threadarray : shared = ();
31
#my %MyDNSTable : shared = ();
32
#my %TmpDNSLookup : shared = ();
35
# ---------- Init variables --------
36
use vars qw/ $REVISION $VERSION /;
37
$REVISION='$Revision: 1.34 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
38
$VERSION="1.2 (build $REVISION)";
40
use vars qw/ $NBOFLINESFORBENCHMARK /;
41
$NBOFLINESFORBENCHMARK=8192;
45
$Debug $ShowSteps $AddFileNum $AddFileName
46
$MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
47
$NbOfLinesShowsteps $AFINET $QueueCursor
61
$DNSLookupAlreadyDone=0;
62
$NbOfLinesShowsteps=0;
65
# ---------- Init arrays --------
70
# ---------- Init hash arrays --------
72
%LogFileToDo %linerecord %timerecord %corrupted
73
%QueueHostsToResolve %QueueRecords
75
%LogFileToDo = %linerecord = %timerecord = %corrupted = ();
76
%QueueHostsToResolve = %QueueRecords = ();
78
# DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which
79
# DRA2: is used as the key for the other hashes
83
@timerecordorder = ();
85
# ---------- External Program variables ----------
86
# For gzip compression
88
my $zcat_file = '\.gz$';
91
my $bzcat_file = '\.bz2$';
95
#-----------------------------------------------------------------------------
97
#-----------------------------------------------------------------------------
99
#------------------------------------------------------------------------------
100
# Function: Write an error message and exit
101
# Parameters: $message
105
#------------------------------------------------------------------------------
107
print "Error: $_[0].\n";
111
#------------------------------------------------------------------------------
112
# Function: Write a debug message
113
# Parameters: $message
117
#------------------------------------------------------------------------------
119
my $level = $_[1] || 1;
120
if ($Debug >= $level) {
121
my $debugstring = $_[0];
122
print "DEBUG $level - ".localtime(time())." : $debugstring\n";
126
#------------------------------------------------------------------------------
127
# Function: Write a warning message
128
# Parameters: $message
132
#------------------------------------------------------------------------------
134
my $messagestring=shift;
135
if ($Debug) { debug("$messagestring",1); }
136
print "$messagestring\n";
139
#-----------------------------------------------------------------------------
140
# Function: Return 1 if string contains only ascii chars
143
#-----------------------------------------------------------------------------
146
if ($Debug) { debug("IsAscii($string)",5); }
147
if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
148
if ($Debug) { debug(" Yes",5); }
149
return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
151
if ($Debug) { debug(" No",5); }
155
#-----------------------------------------------------------------------------
156
# DRA Function: Return 1 if DNS lookup should be skipped
159
#-----------------------------------------------------------------------------
161
foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
162
0; # Not in @SkipDNSLookupFor
165
#-----------------------------------------------------------------------------
166
# Function: Function that wait for DNS lookup (can be threaded)
169
#-----------------------------------------------------------------------------
172
$NbOfDNSLookupAsked++;
173
use Socket; $AFINET=AF_INET;
175
$tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
176
if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
177
my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds
178
if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
179
$TmpDNSLookup{$ipaddress}='*';
182
$TmpDNSLookup{$ipaddress}=$lookupresult;
184
if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
185
delete $threadarray{$ipaddress};
189
#-----------------------------------------------------------------------------
190
# Function: WriteRecordsReadyInQueue
193
#-----------------------------------------------------------------------------
194
sub WriteRecordsReadyInQueue {
195
my $logfilechosen=shift;
196
if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
197
while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
198
# $QueueCursor point to a ready record
199
if ($QueueHostsToResolve{$QueueCursor} eq '*') {
200
if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
203
if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
204
if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
205
$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
206
if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
209
elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
210
if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
211
$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
212
if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
216
# Record is ready, we output it.
217
if ($AddFileNum) { print "$logfilechosen "; }
218
if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; }
219
print "$QueueRecords{$QueueCursor}\n";
220
delete $QueueRecords{$QueueCursor};
221
delete $QueueHostsToResolve{$QueueCursor};
227
#-----------------------------------------------------------------------------
228
# Function: Check if thread are enabled or not
231
#-----------------------------------------------------------------------------
232
sub Check_Thread_Use {
233
if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
235
if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
236
if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
238
else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
244
#-----------------------------------------------------------------------------
246
#-----------------------------------------------------------------------------
247
($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
249
# Get parameters (Note: $MaxNbOfThread is already known
252
if ($ARGV[$_] =~ /^-/) {
253
if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
254
elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
255
elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
256
elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
257
elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
258
elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
259
elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; }
260
else { print "Unknown argument $ARGV[$_] ignored\n"; }
263
push @ParamFile, $ARGV[$_];
267
if ($Debug) { $|=1; }
270
debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
271
debug("DNSLookup=$DNSLookup");
272
debug("DNSCache=$DNSCache");
273
debug("MaxNbOfThread=$MaxNbOfThread");
276
# Disallow MaxNbOfThread and Perl < 5.8
277
if ($] < 5.008 && $MaxNbOfThread) {
278
error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
281
# Warning, there is a memory hole in ActiveState perl version (in delete functions)
282
if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
287
if (scalar @ParamFile == 0) {
288
print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
289
print "$PROG allows you to get one unique output log file, sorted on date,\n";
290
print "built from particular sources:\n";
291
print " - It can read several input log files,\n";
292
print " - It can read .gz/.bz2 log files,\n";
293
print " - It can also makes a fast reverse DNS lookup to replace\n";
294
print " all IP addresses into host names in resulting log file.\n";
295
print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
296
print "distributed with a GNU General Public License (See COPYING.txt file).\n";
297
print "$PROG is part of AWStats but can be used alone as a log merger\n";
298
print "or resolver before using any other log analyzer.\n";
301
print " $PROG.$Extension [options] file\n";
302
print " $PROG.$Extension [options] file1 ... filen\n";
303
print " $PROG.$Extension [options] *.*\n";
304
print " perl $PROG.$Extension [options] *.* > newfile\n";
306
print " -dnslookup make a reverse DNS lookup on IP adresses\n";
307
print " -dnslookup=n same with a n parallel threads instead of serial requests\n";
308
print " -dnscache=file make DNS lookup from cache file first before network lookup\n";
309
print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
310
print " -addfilenum if used with several files, file number can be added in first\n";
311
print " -addfilename if used with several files, file name can be added in first\n";
312
print " field of output file. This can be used to add a cluster id\n";
313
print " when log files come from several load balanced computers.\n";
316
print "This runs $PROG in command line to open one or several\n";
317
print "server log files to merge them (sorted on date) and/or to make a reverse\n";
318
print "DNS lookup (if asked). The result log file is sent on standard output.\n";
319
print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
320
print "software able to output sorted log records (with a reverse DNS lookup\n";
321
print "included or not) even if log records are dispatched in several files.\n";
322
print "Each of thoose files must be already independently sorted itself\n";
323
print "(but that is the case in all web server log files). So you can use it\n";
324
print "for load balanced log files or to group several old log files.\n";
326
print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
327
print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
328
print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
329
print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
331
print "So logresolvemerge is particularly usefull when you want to output several\n";
332
print "and/or large log files in a fast process, with no use of disk or\n";
333
print "more memory, and in a chronological order through a pipe (to be used by a log\n";
334
print "analyzer).\n";
336
print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
337
print "occurs with heavy servers), this is not a problem, the output will also\n";
338
print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
339
print "with such logs.\n";
341
print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
342
print "can't run this tool on Win or Unix platforms.\n";
344
print "WARNING: Because of important memory holes in ActiveState Perl version, use\n";
345
print "another Perl interpreter if you need to process large lof files.\n";
347
print "Now supports/detects:\n";
348
print " Automatic detection of log format\n";
349
print " Files can be .gz/.bz2 files if zcat/bzcat tools are available in PATH.\n";
350
print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
351
print "New versions and FAQ at http://awstats.sourceforge.net\n";
357
my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
358
if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
359
my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
360
if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
361
if ($nowday < 10) { $nowday = "0$nowday"; }
362
if ($nowhour < 10) { $nowhour = "0$nowhour"; }
363
if ($nowmin < 10) { $nowmin = "0$nowmin"; }
364
if ($nowsec < 10) { $nowsec = "0$nowsec"; }
365
# Get tomorrow time (will be used to discard some record with corrupted date (future date))
366
my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
367
if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
368
my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
369
if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
370
if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
371
if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
372
if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
373
if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
374
my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;
376
# Init other parameters
377
$NBOFLINESFORBENCHMARK--;
378
if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
379
if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
380
if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or choosed to "." value then DirData is current dir
381
if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "."
384
#my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
385
# monthnum must be in english because it's used to translate log date in apache log files which are always in english
386
my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
389
if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
390
open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
392
my ($time, $ip, $name) = split;
394
$name="$ip" if $name eq '*';
395
$MyDNSTable{$ip}=$name;
401
#-----------------------------------------------------------------------------
402
# PROCESSING CURRENT LOG(s)
403
#-----------------------------------------------------------------------------
405
my $NbOfLinesParsed=0;
407
my $starttime=time();
409
# Define the LogFileToDo list
411
foreach my $key (0..(@ParamFile-1)) {
412
if ($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) {
414
if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
415
# Check for supported compression
416
if ($ParamFile[$key] =~ /$zcat_file/) {
417
if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
418
# Modify the name to include the zcat command
419
$ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
421
elsif ($ParamFile[$key] =~ /$bzcat_file/) {
422
if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
423
# Modify the name to include the bzcat command
424
$ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
427
$LogFileToDo{$cpt}=@ParamFile[$key];
432
my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
433
$ParamFile[$key] = $1;
434
if ($DirFile eq '') { $DirFile = '.'; }
435
$ParamFile[$key] =~ s/\./\\\./g;
436
$ParamFile[$key] =~ s/\*/\.\*/g;
437
$ParamFile[$key] =~ s/\?/\./g;
438
if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
439
opendir(DIR,"$DirFile");
440
my @filearray = sort readdir DIR;
442
foreach my $i (0..$#filearray) {
443
if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
445
if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
446
# Check for supported compression
447
if ($filearray[$i] =~ /$zcat_file/) {
448
if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); }
449
# Modify the name to include the zcat command
450
$LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
452
elsif ($filearray[$i] =~ /$bzcat_file/) {
453
if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); }
454
# Modify the name to include the bzcat command
455
$LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
458
$LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
467
# If no files to process
468
if (scalar keys %LogFileToDo == 0) {
469
error("No input log file found");
473
if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
474
foreach my $logfilenb (keys %LogFileToDo) {
475
if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
476
open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
477
binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars.
483
# BEGIN Read new record
484
# For each log file if logfilechosen is 0
485
# If not, we go directly to log file instead of iterating over all keys for a match
486
#----------------------------------------------------------------------------------
488
if($logfilechosen == 0) {
489
@readlist = keys %LogFileToDo;
491
@readlist = ($logfilechosen);
493
foreach my $logfilenb (@readlist)
495
if ($Debug) { debug("Search next record in file number $logfilenb",3); }
496
# Read chosen log file until we found a record with good date or reaching end of file
498
my $LOG="LOG$logfilenb";
499
$_=<$LOG>; # Read new line
500
if (! $_) { # No more records in log file number $logfilenb
501
if ($Debug) { debug(" No more records in file number $logfilenb",2); }
502
delete $LogFileToDo{$logfilenb};
509
if (/^#/) { next; } # Ignore comment lines (ISS writes such comments)
510
if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments)
511
if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
513
$linerecord{$logfilenb}=$_;
516
#----------------------------------------------------------------------
518
# Split DD/Month/YYYY:HH:MM:SS or YYYY-MM-DD HH:MM:SS or MM/DD/YY\tHH:MM:SS
519
my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
520
if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
521
elsif ($_ =~ /\[(\d\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
522
elsif ($_ =~ /\[\w+ (\w+) (\d\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)\]/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
524
if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary
526
# Create $timerecord like YYYYMMDDHHMMSS
527
$timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
528
if ($timerecord{$logfilenb}<10000000000000) {
529
if ($Debug) { debug(" This record is corrupted (no date found)",3); }
530
$corrupted{$logfilenb}++;
533
if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
535
# Sort and insert into timerecordorder, oldest at end/back of array
536
# At the beginning, timerecordorder is empty. Then beceause the first pass is
537
# a loop on each file to read each first line, the timerecordorder size is
538
# number of input files.
539
# After, each new loop, read only one new line, so timerecordorder size increase
540
# by one but decrease just after by the pop command later.
542
for(my $c=$#timerecordorder; $c>=0 ; $c--) {
543
if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]})
545
# Is older or equal than index at $c, add after
546
$timerecordorder[$c + 1]=$logfilenb;
550
$timerecordorder[$c + 1]=$timerecordorder[$c];
554
$timerecordorder[0] = $logfilenb;
560
# END Read new lines for each log file. After this, following var are filled
561
# $timerecord{$logfilenb}
562
# @timerecordorder array
564
# We choose which record of which log file to process
565
if ($Debug) { debug("Choose which record of which log file to process",3); }
566
$logfilechosen=pop(@timerecordorder);
567
if(!defined($logfilechosen)) { last; } # No more record to process
570
if ($Debug) { debug(" We choosed to qualify record of file number $logfilechosen",3); }
571
if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); }
573
# Record is approved. We found a new line to parse in file number $logfilechosen
574
#-------------------------------------------------------------------------------
577
if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
578
my $delay=(time()-$starttime)||1;
579
print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
584
#--------------------
587
if ($DNSLookup) { # DNS lookup is 1 or 2
588
if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
589
elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
591
# Check in static DNS cache file
592
if ($MyDNSTable{$Host}) {
593
if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
595
elsif ($DNSLookup==1) {
596
# Check in session cache (dynamic DNS cache file + session DNS cache)
597
if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
598
if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
599
$TmpDNSLookup{$Host}='*';
600
if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); }
604
# Create or not a new thread
605
if ($MaxNbOfThread) {
606
if (! $threadarray{$Host}) { # No thread already launched for $Host
607
while ((scalar keys %threadarray) >= $MaxNbOfThread) {
608
if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
611
$threadarray{$Host}=1; # Semaphore to tell thread for $Host is active
612
# my $t = new Thread \&MakeDNSLookup, $Host;
613
my $t = threads->create(sub { MakeDNSLookup($Host) });
614
if (! $t) { error("Failed to create new thread"); }
615
if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
616
$t->detach(); # We don't need to keep return code
619
if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); }
621
# Here, this is the only way, $TmpDNSLookup{$Host} can be not defined
623
&MakeDNSLookup($Host);
624
if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); }
628
$TmpDNSLookup{$Host}='*';
629
if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); }
633
if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); }
637
if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); }
641
if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); }
642
$DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen};
646
if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4
647
elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6
648
if ($Debug) { debug(" No DNS lookup asked.",4); }
651
# Put record in record queue
652
if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); }
653
$QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen};
655
# Put record in host queue
656
# If there is a host to resolve, we add line to queue with value of host to resolve
657
# $Host is '' (no ip found) or is ip
659
$QueueHostsToResolve{$NbOfLinesParsed}='*';
662
$QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*';
665
$QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*';
668
# Print all records in head of queue that are ready
669
&WriteRecordsReadyInQueue($logfilechosen);
671
} # End of processing new record. Loop on next one.
673
if ($Debug) { debug("End of processing log file(s)"); }
675
# Close all log files
676
foreach my $logfilenb (keys %LogFileToDo) {
677
if ($Debug) { debug("Close log file number $logfilenb"); }
678
close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed");
681
while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) {
683
# Print all records in head of queue that are ready
684
&WriteRecordsReadyInQueue($logfilechosen);
687
# Waiting queue is empty
688
if ($MaxNbOfThread) {
689
foreach my $t (threads->list()) {
690
if ($Debug) { debug("Join thread $t"); }
696
if ($DNSLookup==1 && $DNSLookupAlreadyDone) {
697
warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed.");
701
debug("Total nb of read lines: $NbOfLinesRead");
702
debug("Total nb of parsed lines: $NbOfLinesParsed");
703
debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked");
707
# open(CACHE, ">$DNSCache") or die;
708
# foreach (keys %TmpDNSLookup) {
709
# $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip";
710
# print CACHE "0\t$_\t$TmpDNSLookup{$_}\n";
715
0; # Do not remove this line