2
#-----------------------------------------------------------------------------
3
# Export lib data values to a text files to allow to use AWStats robots,
4
# os, browsers, search_engines database with other log analyzers
5
#-----------------------------------------------------------------------------
6
# $Revision: 1.4 $ - $Author: eldy $ - $Date: 2003/12/05 23:53:37 $
8
#use warnings; # Must be used in test mode only. This reduce a little process speed
9
#use diagnostics; # Must be used in test mode only. This reduce a lot of process speed
10
use strict;no strict "refs";
14
#-----------------------------------------------------------------------------
16
#-----------------------------------------------------------------------------
17
use vars qw/ $REVISION $VERSION /;
18
my $REVISION='$Revision: 1.4 $'; $REVISION =~ /\s(.*)\s/; $REVISION=$1;
19
my $VERSION="5.1 (build $REVISION)";
21
# ---------- Init variables -------
26
$DEBUGFORCED=0; # Force debug level to log lesser level into debug.log file (Keep this value to 0)
33
$DIR=$PROG=$Extension='';
37
$LevelForRobotsDetection $LevelForBrowsersDetection $LevelForOSDetection $LevelForRefererAnalyze
38
$LevelForSearchEnginesDetection $LevelForKeywordsDetection
40
($LevelForRobotsDetection, $LevelForBrowsersDetection, $LevelForOSDetection, $LevelForRefererAnalyze,
41
$LevelForSearchEnginesDetection, $LevelForKeywordsDetection)=
44
$DirLock $DirCgi $DirData $DirIcons $DirLang $AWScript $ArchiveFileName
45
$AllowAccessFromWebToFollowingIPAddresses $HTMLHeadSection $HTMLEndSection $LinksToWhoIs $LinksToIPWhoIs
46
$LogFile $LogFormat $LogSeparator $Logo $LogoLink $StyleSheet $WrapperScript $SiteDomain
48
($DirLock, $DirCgi, $DirData, $DirIcons, $DirLang, $AWScript, $ArchiveFileName,
49
$AllowAccessFromWebToFollowingIPAddresses, $HTMLHeadSection, $HTMLEndSection, $LinksToWhoIs, $LinksToIPWhoIs,
50
$LogFile, $LogFormat, $LogSeparator, $Logo, $LogoLink, $StyleSheet, $WrapperScript, $SiteDomain)=
51
("","","","","","","","","","","","","","","","","","","","");
53
$QueryString $LibToExport $ExportFormat
55
($QueryString, $LibToExport, $ExportFormat)=
57
# ---------- Init arrays --------
59
@RobotsSearchIDOrder_list1 @RobotsSearchIDOrder_list2 @RobotsSearchIDOrder_listgen
60
@SearchEnginesSearchIDOrder_list1 @SearchEnginesSearchIDOrder_list2 @SearchEnginesSearchIDOrder_listgen
61
@BrowsersSearchIDOrder @OSSearchIDOrder @WordsToExtractSearchUrl @WordsToCleanSearchUrl
63
@RobotsSearchIDOrder @SearchEnginesSearchIDOrder
65
@RobotsSearchIDOrder = @SearchEnginesSearchIDOrder = ();
66
# ---------- Init hash arrays --------
68
%BrowsersHashIDLib %BrowsersHashIcon %BrowsersHereAreGrabbers
70
%MimeHashLib %MimeHashIcon %MimeHashFamily
73
%SearchEnginesHashID %SearchEnginesHashLib %SearchEnginesKnownUrl %NotSearchEnginesKeys
74
%WormsHashID %WormsHashLib
79
#-----------------------------------------------------------------------------
81
#-----------------------------------------------------------------------------
83
#------------------------------------------------------------------------------
84
# Function: Write error message and exit
85
# Parameters: $message $secondmessage $thirdmessage $donotshowsetupinfo
86
# Input: $LogSeparator $LogFormat
89
#------------------------------------------------------------------------------
91
my $message=shift||"";
92
my $secondmessage=shift||"";
93
my $thirdmessage=shift||"";
94
my $donotshowsetupinfo=shift||0;
95
if ($Debug) { debug("$message $secondmessage $thirdmessage",1); }
101
#------------------------------------------------------------------------------
102
# Function: Write debug message and exit
103
# Parameters: $string $level
104
# Input: $Debug = required level $DEBUGFORCED = required level forced
107
#------------------------------------------------------------------------------
109
my $level = $_[1] || 1;
110
if ($level <= $DEBUGFORCED) {
111
my $debugstring = $_[0];
112
if (! $DebugResetDone) { open(DEBUGFORCEDFILE,"debug.log"); close DEBUGFORCEDFILE; chmod 0666,"debug.log"; $DebugResetDone=1; }
113
open(DEBUGFORCEDFILE,">>debug.log");
114
print DEBUGFORCEDFILE localtime(time)." - $$ - DEBUG $level - $debugstring\n";
115
close DEBUGFORCEDFILE;
117
if ($level <= $Debug) {
118
my $debugstring = $_[0];
119
print localtime(time)." - DEBUG $level - $debugstring\n";
124
#------------------------------------------------------------------------------
125
# Function: Load the reference databases
128
# Output: Arrays and Hash tables are defined
130
#------------------------------------------------------------------------------
132
# Check lib files in common possible directories :
133
# Windows : "${DIR}lib" (lib in same dir than awstats.pl)
134
# Debian package : "/usr/share/awstats/lib"
135
# Other possible directories : "./lib"
138
$lib=~ s/^.*[\\\/]//;
139
$dir =~ s/[^\\\/]+$//; $dir =~ s/[\\\/]+$//;
140
debug("Lib: $lib, Dir: $dir");
141
my @PossibleLibDir=("$dir","{DIR}lib","/usr/share/awstats/lib","./lib");
144
my @FileListToLoad=();
145
push @FileListToLoad, "$lib";
146
foreach my $file (@FileListToLoad) {
147
foreach my $dir (@PossibleLibDir) {
149
if ($searchdir && (!($searchdir =~ /\/$/)) && (!($searchdir =~ /\\$/)) ) { $searchdir .= "/"; }
150
if (! $FilePath{$file}) {
151
if (-s "${searchdir}${file}") {
152
$FilePath{$file}="${searchdir}${file}";
153
if ($Debug) { debug("Call to Read_Ref_Data [FilePath{$file}=\"$FilePath{$file}\"]"); }
154
# push @INC, "${searchdir}"; require "${file}";
155
require "$FilePath{$file}";
159
if (! $FilePath{$file}) {
160
my $filetext=$file; $filetext =~ s/\.pm$//; $filetext =~ s/_/ /g;
161
&error("Error: Can't read file \"$file\".\nCheck if file is in ".($PossibleLibDir[0])." directory and is readable.");
166
#------------------------------------------------------------------------------
167
# Function: Unregex a string
171
# Return: Unregexed string
172
#------------------------------------------------------------------------------
179
#------------------------------------------------------------------------------
180
# Function: Unregex a keyword code extractor
184
# Return: Unregexed string
185
#------------------------------------------------------------------------------
186
sub unregexkeywordcode {
188
my $firstoneonly=shift||0;
189
my @xx=split(/\|/,$ss);
190
my @ll=map { s/[\(\)]//g; $_; } @xx;
191
if ($firstoneonly) { return $ll[0]; }
192
return join(',',@ll);
197
#------------------------------------------------------------------------------
199
#------------------------------------------------------------------------------
200
($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
202
my @AllowedArgs=('-lib','-exportformat','-debug');
206
# TODO Check if ARGV is an AllowedArg
207
if ($_ > 0) { $QueryString .= "&"; }
208
my $NewLinkParams=$ARGV[$_]; $NewLinkParams =~ s/^-+//; $NewLinkParams =~ s/\s/%20/g;
209
$QueryString .= "$NewLinkParams";
211
$ExportFormat="text";
212
if ($QueryString =~ /lib=([^\s&]+)/i) { $LibToExport="$1"; }
213
if ($QueryString =~ /exportformat=([^\s&]+)/i) { $ExportFormat="$1"; }
214
if ($QueryString =~ /debug=(\d+)/i) { $Debug=$1; }
217
debug("$PROG - $VERSION - Perl $^X $]",1);
218
debug("QUERY_STRING=$QueryString",2);
221
if (! $LibToExport || ! $ExportFormat) {
222
print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
223
print "$PROG is a tool to export AWStats lib (Robots, Os, Browsers, search\n";
224
print "engines database) to text files. This allow you to use AWStats lib with some\n";
225
print "other log analyzers (to enhance their capabilities or to make comparison).\n";
226
print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software distributed\n";
227
print "with a GNU General Public License (See LICENSE file for details).\n";
229
print "Syntax: $PROG.$Extension -lib=/awstatslibpath/libfile.pm [-exportformat=format]\n";
231
print "Where format can be:\n";
232
print " text (default)\n";
233
print " webalizer\n";
239
&Read_Ref_Data($LibToExport);
242
my $libisexportable=0;
247
if ($LibToExport =~ /browsers/) {
248
foreach my $key (@BrowsersSearchIDOrder) {
249
if ($ExportFormat eq 'text') {
250
print "$key\t$BrowsersHashIDLib{$key}\n";
252
if ($ExportFormat eq 'webalizer') {
253
print "GroupAgent\t$key\n";
255
if ($ExportFormat eq 'analog') {
256
print "Analog does not support self-defined browsers.\nUse 'text' export format if you want an export list of AWStats Browsers.\n";
263
if ($LibToExport =~ /mime/) {
264
if ($ExportFormat eq 'analog') {
265
foreach my $key (sort keys %MimeHashFamily) {
266
if ($MimeHashFamily{$key} =~ /(text|page|script|document)/) { print "PAGEINCLUDE *.$key\n"; }
269
foreach my $key (sort keys %MimeHashFamily) {
270
if ($ExportFormat eq 'text') {
271
print "$key\t$MimeHashLib{$MimeHashFamily{$key}}\n";
273
if ($ExportFormat eq 'webalizer') {
274
print "Webalizer does not support self-defined mime types.\nUse 'text' export format if you want an export list of AWStats Mime types.\n";
277
if ($ExportFormat eq 'analog') {
278
print "TYPEALIAS .$key \"$key [$MimeHashLib{$MimeHashFamily{$key}}]\"\n";
284
if ($LibToExport =~ /operating_systems/) {
285
foreach my $key (sort keys %OSHashLib) {
286
if ($ExportFormat eq 'text') {
287
print "Feature not ready yet\n";
290
if ($ExportFormat eq 'webalizer') {
291
print "Webalizer does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n";
294
if ($ExportFormat eq 'analog') {
295
print "Analog does not support self-defined added OS.\nUse 'text' export format if you want an export list of AWStats OS.\n";
302
if ($LibToExport =~ /robots/) {
306
# Init RobotsSearchIDOrder required for update process
308
foreach (1..2) { push @list,"list$_"; }
309
push @list,"listgen";
310
foreach my $key (@list) {
311
push @RobotsSearchIDOrder,@{"RobotsSearchIDOrder_$key"};
314
foreach my $key (@RobotsSearchIDOrder) {
315
if ($ExportFormat eq 'text') {
316
print "$key\t$RobotsHashIDLib{$key}\n";
318
if ($ExportFormat eq 'webalizer') {
319
print "GroupAgent\t$key\n";
321
if ($ExportFormat eq 'analog') {
322
print "ROBOTINCLUDE REGEXPI:$key\n";
328
if ($LibToExport =~ /search_engines/) {
331
# Init SearchEnginesIDOrder required for update process
333
foreach (1..2) { push @list,"list$_"; }
334
push @list,"listgen"; # Always added
335
foreach my $key (@list) {
336
push @SearchEnginesSearchIDOrder,@{"SearchEnginesSearchIDOrder_$key"};
339
foreach my $key (@SearchEnginesSearchIDOrder) {
340
if ($ExportFormat eq 'text') {
341
print "$key\t$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}}\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n";
343
if ($ExportFormat eq 'webalizer') {
344
my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}};
345
my $urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax,1);
346
if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode
347
my $newkey=&unregex($key);
348
if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean
349
print "SearchEngine\t$newkey\t$urlkeywordsyntax\n";
350
print "GroupReferrer\t$newkey\t$SearchEnginesHashLib{$SearchEnginesHashID{$key}}\n";
352
if ($ExportFormat eq 'analog') {
353
my $urlkeywordsyntax=$SearchEnginesKnownUrl{$SearchEnginesHashID{$key}};
354
$urlkeywordsyntax=~s/=$//;
355
$urlkeywordsyntax=&unregexkeywordcode($urlkeywordsyntax);
356
if (! $urlkeywordsyntax) { next; } # This has no keywordextractcode
357
my $newkey=&unregex($key);
358
if ($newkey =~ /[\[\]\(\)\|\?\*\+]/) { next; } # This was a regex value that i can't clean
359
print "SEARCHENGINE http://*$newkey*/* $urlkeywordsyntax\n";
365
if (! $libisexportable) {
366
print "Export for AWStats lib '$LibToExport' is not supported in this tool version.\n";
370
0; # Do not remove this line