2
webalizer - a web server log analysis program
4
Copyright (C) 1997-2011 Bradford L. Barrett
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2 of the License, or
9
(at your option) any later version, and provided that the above
10
copyright and permission notice is included with all distributed
11
copies of this or derived software.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24
/*********************************************/
25
/* STANDARD INCLUDES */
26
/*********************************************/
28
/* Fix broken Zlib 64 bitness */
29
#if _FILE_OFFSET_BITS == 64
30
#ifndef _LARGEFILE64_SOURCE
31
#define _LARGEFILE64_SOURCE 1
40
#include <unistd.h> /* normal stuff */
43
#include <sys/utsname.h>
52
/* ensure sys/types */
54
#include <sys/types.h>
57
/* Need socket header? */
58
#ifdef HAVE_SYS_SOCKET_H
59
#include <sys/socket.h>
62
/* some systems need this */
69
#include <netinet/in.h>
70
#include <arpa/inet.h>
80
int bz2_rewind(void **, char *, char *);
83
#include "webalizer.h" /* main header */
89
#include "webalizer_lang.h" /* lang. support */
91
#include "dns_resolv.h"
94
/* internal function prototypes */
96
void clear_month(); /* clear monthly stuff */
97
char *unescape(char *); /* unescape URLs */
98
void print_opts(char *); /* print options */
99
void print_version(); /* duhh... */
100
int isurlchar(unsigned char, int); /* valid URL char fnc. */
101
void get_config(char *); /* Read a config file */
102
static char *save_opt(char *); /* save conf option */
103
void srch_string(char *); /* srch str analysis */
104
char *get_domain(char *); /* return domain name */
105
void agent_mangle(char *); /* reformat user agent */
106
char *our_gzgets(void *, char *, int); /* our gzgets */
107
int ouricmp(char *, char *); /* case ins. compare */
108
int isipaddr(char *); /* is IP address test */
110
/*********************************************/
111
/* GLOBAL VARIABLES */
112
/*********************************************/
114
char *version = "2.23"; /* program version */
115
char *editlvl = "05"; /* edit level */
116
char *moddate = "14-Apr-2011"; /* modification date */
117
char *copyright = "Copyright 1997-2011 by Bradford L. Barrett";
119
int verbose = 2; /* 2=verbose,1=err, 0=none */
120
int debug_mode = 0; /* debug mode flag */
121
int time_me = 0; /* timing display flag */
122
int local_time = 1; /* 1=localtime 0=GMT (UTC) */
123
int hist_gap = 0; /* 1=error w/hist, save bkp */
124
int ignore_hist = 0; /* history flag (1=skip) */
125
int ignore_state = 0; /* state flag (1=skip) */
126
int default_index= 1; /* default index. (1=yes) */
127
int hourly_graph = 1; /* hourly graph display */
128
int hourly_stats = 1; /* hourly stats table */
129
int daily_graph = 1; /* daily graph display */
130
int daily_stats = 1; /* daily stats table */
131
int ctry_graph = 1; /* country graph display */
132
int shade_groups = 1; /* Group shading 0=no 1=yes */
133
int hlite_groups = 1; /* Group hlite 0=no 1=yes */
134
int mangle_agent = 0; /* mangle user agents */
135
int incremental = 0; /* incremental mode 1=yes */
136
int use_https = 0; /* use 'https://' on URLs */
137
int htaccess = 0; /* create .htaccess? (0=no) */
138
int stripcgi = 1; /* strip url cgi (0=no) */
139
int normalize = 1; /* normalize CLF URL (0=no) */
140
int trimsquid = 0; /* trim squid urls (0=no) */
141
int searchcasei = 1; /* case insensitive search */
142
int visit_timeout= 1800; /* visit timeout (seconds) */
143
int graph_legend = 1; /* graph legend (1=yes) */
144
int graph_lines = 2; /* graph lines (0=none) */
145
int fold_seq_err = 0; /* fold seq err (0=no) */
146
int log_type = LOG_CLF; /* log type (default=CLF) */
147
int group_domains= 0; /* Group domains 0=none */
148
int hide_sites = 0; /* Hide ind. sites (0=no) */
149
int link_referrer= 0; /* Link referrers (0=no) */
150
char *hname = NULL; /* hostname for reports */
151
char *state_fname = "webalizer.current"; /* run state file name */
152
char *hist_fname = "webalizer.hist"; /* name of history file */
153
char *html_ext = "html"; /* HTML file suffix */
154
char *dump_ext = "tab"; /* Dump file suffix */
155
char *conf_fname = NULL; /* name of config file */
156
char *log_fname = NULL; /* log file pointer */
157
char *out_dir = NULL; /* output directory */
158
char *blank_str = ""; /* blank string */
159
char *geodb_fname = NULL; /* GeoDB database filename */
160
char *dns_cache = NULL; /* DNS cache file name */
161
int dns_children = 0; /* DNS children (0=don't do)*/
162
int cache_ips = 0; /* CacheIPs in DB (0=no) */
163
int cache_ttl = 7; /* DNS Cache TTL (days) */
164
int geodb = 0; /* Use GeoDB (0=no) */
165
int graph_mths = 12; /* # months in index graph */
166
int index_mths = 12; /* # months in index table */
167
int year_hdrs = 1; /* index year seperators */
168
int year_totals = 1; /* index year subtotals */
169
int use_flags = 0; /* Show flags in ctry table */
170
char *flag_dir = "flags"; /* location of flag icons */
173
int geoip = 0; /* Use GeoIP (0=no) */
174
char *geoip_db = NULL; /* GeoIP database filename */
175
GeoIP *geo_fp = NULL; /* GeoIP database handle */
178
int ntop_sites = 30; /* top n sites to display */
179
int ntop_sitesK = 10; /* top n sites (by kbytes) */
180
int ntop_urls = 30; /* top n url's to display */
181
int ntop_urlsK = 10; /* top n url's (by kbytes) */
182
int ntop_entry = 10; /* top n entry url's */
183
int ntop_exit = 10; /* top n exit url's */
184
int ntop_refs = 30; /* top n referrers "" */
185
int ntop_agents = 15; /* top n user agents "" */
186
int ntop_ctrys = 30; /* top n countries "" */
187
int ntop_search = 20; /* top n search strings */
188
int ntop_users = 20; /* top n users to display */
190
int all_sites = 0; /* List All sites (0=no) */
191
int all_urls = 0; /* List All URLs (0=no) */
192
int all_refs = 0; /* List All Referrers */
193
int all_agents = 0; /* List All User Agents */
194
int all_search = 0; /* List All Search Strings */
195
int all_users = 0; /* List All Usernames */
197
int dump_sites = 0; /* Dump tab delimited sites */
198
int dump_urls = 0; /* URLs */
199
int dump_refs = 0; /* Referrers */
200
int dump_agents = 0; /* User Agents */
201
int dump_users = 0; /* Usernames */
202
int dump_search = 0; /* Search strings */
203
int dump_header = 0; /* Dump header as first rec */
204
char *dump_path = NULL; /* Path for dump files */
206
int cur_year=0, cur_month=0, /* year/month/day/hour */
207
cur_day=0, cur_hour=0, /* tracking variables */
208
cur_min=0, cur_sec=0;
210
u_int64_t cur_tstamp=0; /* Timestamp... */
211
u_int64_t rec_tstamp=0;
212
u_int64_t req_tstamp=0;
213
u_int64_t epoch; /* used for timestamp adj. */
215
int check_dup=0; /* check for dup flag */
216
int gz_log=COMP_NONE; /* gziped log? (0=no) */
218
double t_xfer=0.0; /* monthly total xfer value */
219
double t_ixfer=0.0; /* monthly total in xfer */
220
double t_oxfer=0.0; /* monthly total out xfer */
221
u_int64_t t_hit=0,t_file=0,t_site=0, /* monthly total vars */
222
t_url=0,t_ref=0,t_agent=0,
223
t_page=0, t_visit=0, t_user=0;
225
double tm_xfer[31]; /* daily transfer totals */
226
double tm_ixfer[31]; /* daily in xfer totals */
227
double tm_oxfer[31]; /* daily out xfer totals */
229
u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */
230
tm_site[31], tm_page[31],
233
u_int64_t dt_site; /* daily 'sites' total */
235
u_int64_t ht_hit=0, mh_hit=0; /* hourly hits totals */
237
u_int64_t th_hit[24], th_file[24], /* hourly total arrays */
244
int f_day,l_day; /* first/last day vars */
246
struct utsname system_info; /* system info structure */
248
u_int64_t ul_bogus =0; /* Dummy counter for groups */
250
struct log_struct log_rec; /* expanded log storage */
252
void *zlog_fp; /* compressed logfile ptr */
253
FILE *log_fp; /* regular logfile pointer */
255
char buffer[BUFSIZE]; /* log file record buffer */
256
char tmp_buf[BUFSIZE]; /* used to temp save above */
258
CLISTPTR *top_ctrys = NULL; /* Top countries table */
260
#define GZ_BUFSIZE 16384 /* our_getfs buffer size */
261
char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */
262
char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */
263
int f_end=0; /* count to end of buffer */
265
char hit_color[] = "#00805c"; /* graph hit color */
266
char file_color[] = "#0040ff"; /* graph file color */
267
char site_color[] = "#ff8000"; /* graph site color */
268
char kbyte_color[] = "#ff0000"; /* graph kbyte color */
269
char page_color[] = "#00e0ff"; /* graph page color */
270
char visit_color[] = "#ffff00"; /* graph visit color */
271
char misc_color[] = "#00e0ff"; /* graph misc color */
272
char pie_color1[] = "#800080"; /* pie additionnal color 1 */
273
char pie_color2[] = "#80ffc0"; /* pie additionnal color 2 */
274
char pie_color3[] = "#ff00ff"; /* pie additionnal color 3 */
275
char pie_color4[] = "#ffc080"; /* pie additionnal color 4 */
277
/*********************************************/
278
/* MAIN - start here */
279
/*********************************************/
281
int main(int argc, char *argv[])
283
int i; /* generic counter */
284
char *cp1, *cp2, *cp3; /* generic char pointers */
285
char host_buf[MAXHOST+1]; /* used to save hostname */
287
NLISTPTR lptr; /* generic list pointer */
289
extern char *optarg; /* used for command line */
290
extern int optind; /* parsing routine 'getopt' */
293
time_t start_time, end_time; /* program timers */
294
float temp_time; /* temporary time storage */
296
int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;
298
int good_rec =0; /* 1 if we had a good record */
299
u_int64_t total_rec =0; /* Total Records Processed */
300
u_int64_t total_ignore=0; /* Total Records Ignored */
301
u_int64_t total_bad =0; /* Total Bad Records */
303
int max_ctry; /* max countries defined */
305
/* month names used for parsing logfile (shouldn't be lang specific) */
306
char *log_month[12]={ "jan", "feb", "mar",
309
"oct", "nov", "dec"};
311
/* stat struct for files */
312
struct stat log_stat;
314
/* Assume that LC_CTYPE is what the user wants for non-ASCII chars */
315
setlocale(LC_CTYPE,"");
317
/* initalize epoch */
318
epoch=jdate(1,1,1970); /* used for timestamp adj. */
320
sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);
321
/* check for default config file */
322
if (!access("webalizer.conf",F_OK))
323
get_config("webalizer.conf");
324
else if (!access(tmp_buf,F_OK))
327
/* get command line options */
328
opterr = 0; /* disable parser errors */
329
while ((i=getopt(argc,argv,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF)
333
case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents */
334
case 'A': ntop_agents=atoi(optarg); break; /* Top agents */
335
case 'b': ignore_state=1; break; /* Ignore state file */
336
case 'c': get_config(optarg); break; /* Config file */
337
case 'C': ntop_ctrys=atoi(optarg); break; /* Top countries */
338
case 'd': debug_mode=1; break; /* Debug */
339
case 'D': dns_cache=optarg; break; /* DNS Cache filename */
340
case 'e': ntop_entry=atoi(optarg); break; /* Top entry pages */
341
case 'E': ntop_exit=atoi(optarg); break; /* Top exit pages */
342
case 'f': fold_seq_err=1; break; /* Fold sequence errs */
343
case 'F': log_type=(tolower(optarg[0])=='f')?
344
LOG_FTP:(tolower(optarg[0])=='s')?
345
LOG_SQUID:(tolower(optarg[0])=='w')?
346
LOG_W3C:LOG_CLF; break; /* define log type */
347
case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */
348
case 'G': hourly_graph=0; break; /* no hourly graph */
349
case 'h': print_opts(argv[0]); break; /* help */
350
case 'H': hourly_stats=0; break; /* no hourly stats */
351
case 'i': ignore_hist=1; break; /* Ignore history */
352
case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */
353
case 'j': geodb=1; break; /* Enable GeoDB */
354
case 'J': geodb_fname=optarg; break; /* GeoDB db filename */
355
case 'k': graph_mths=atoi(optarg); break; /* # months idx graph */
356
case 'K': index_mths=atoi(optarg); break; /* # months idx table */
357
case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */
358
case 'L': graph_legend=0; break; /* Graph Legends */
359
case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */
360
case 'M': mangle_agent=atoi(optarg); break; /* mangle user agents */
361
case 'n': hname=optarg; break; /* Hostname */
362
case 'N': dns_children=atoi(optarg); break; /* # of DNS children */
363
case 'o': out_dir=optarg; break; /* Output directory */
364
case 'O': add_nlist(optarg,&omit_page); break; /* pages not counted */
365
case 'p': incremental=1; break; /* Incremental run */
366
case 'P': add_nlist(optarg,&page_type); break; /* page view types */
367
case 'q': verbose=1; break; /* Quiet (verbose=1) */
368
case 'Q': verbose=0; break; /* Really Quiet */
369
case 'r': add_nlist(optarg,&hidden_refs); break; /* Hide referrer */
370
case 'R': ntop_refs=atoi(optarg); break; /* Top referrers */
371
case 's': add_nlist(optarg,&hidden_sites); break; /* Hide site */
372
case 'S': ntop_sites=atoi(optarg); break; /* Top sites */
373
case 't': msg_title=optarg; break; /* Report title */
374
case 'T': time_me=1; break; /* TimeMe */
375
case 'u': add_nlist(optarg,&hidden_urls); break; /* hide URL */
376
case 'U': ntop_urls=atoi(optarg); break; /* Top urls */
377
case 'v': verbose=2; debug_mode=1; break; /* Verbose */
378
case 'V': print_version(); break; /* Version */
380
case 'w': geoip=1; break; /* Enable GeoIP */
381
case 'W': geoip_db=optarg; break; /* GeoIP database name */
383
case 'x': html_ext=optarg; break; /* HTML file extension */
384
case 'X': hide_sites=1; break; /* Hide ind. sites */
385
case 'Y': ctry_graph=0; break; /* Supress ctry graph */
386
case 'Z': normalize=0; break; /* Dont normalize URLs */
387
case 'z': use_flags=1; flag_dir=optarg; break; /* Ctry flag dir */
391
if (argc - optind != 0) log_fname = argv[optind];
392
if ( log_fname && (log_fname[0]=='-')) log_fname=NULL; /* force STDIN? */
394
/* check for gzipped file - .gz */
395
if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-3),".gz"))
399
/* check for bzip file - .bz2 */
400
if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-4),".bz2"))
404
/* setup our internal variables */
405
init_counters(); /* initalize (zero) main counters */
406
memset(hist, 0, sizeof(hist)); /* initalize (zero) history array */
408
/* add default index. alias if needed */
409
if (default_index) add_nlist("index.",&index_alias);
411
if (page_type==NULL) /* check if page types present */
413
if ((log_type==LOG_CLF)||(log_type==LOG_SQUID)||(log_type==LOG_W3C))
415
add_nlist("htm*" ,&page_type); /* if no page types specified, we */
416
add_nlist("cgi" ,&page_type); /* use the default ones here... */
417
if (!isinlist(page_type,html_ext)) add_nlist(html_ext,&page_type);
419
else add_nlist("txt" ,&page_type); /* FTP logs default to .txt */
422
for (max_ctry=0;ctry[max_ctry].desc;max_ctry++);
423
if (ntop_ctrys > max_ctry) ntop_ctrys = max_ctry; /* force upper limit */
424
if (graph_lines> 20) graph_lines= 20; /* keep graphs sane! */
425
if (graph_mths<12) graph_mths=12;
426
if (graph_mths>GRAPHMAX) graph_mths=GRAPHMAX;
427
if (index_mths<12) index_mths=12;
428
if (index_mths>HISTSIZE) index_mths=HISTSIZE;
430
if (log_type == LOG_FTP)
432
/* disable stuff for ftp logs */
433
ntop_entry=ntop_exit=0;
438
if (search_list==NULL)
440
/* If no search engines defined, define some :) */
441
add_glist(".google. q=" ,&search_list);
442
add_glist("yahoo.com p=" ,&search_list);
443
add_glist("altavista.com q=" ,&search_list);
444
add_glist("aolsearch. query=" ,&search_list);
445
add_glist("ask.co q=" ,&search_list);
446
add_glist("eureka.com q=" ,&search_list);
447
add_glist("lycos.com query=" ,&search_list);
448
add_glist("hotbot.com MT=" ,&search_list);
449
add_glist("msn.com q=" ,&search_list);
450
add_glist("infoseek.com qt=" ,&search_list);
451
add_glist("webcrawler searchText=" ,&search_list);
452
add_glist("excite search=" ,&search_list);
453
add_glist("netscape.com query=" ,&search_list);
454
add_glist("mamma.com query=" ,&search_list);
455
add_glist("alltheweb.com q=" ,&search_list);
456
add_glist("northernlight.com qr=" ,&search_list);
460
/* ensure entry/exits don't exceed urls */
461
i=(ntop_urls>ntop_urlsK)?ntop_urls:ntop_urlsK;
462
if (ntop_entry>i) ntop_entry=i;
463
if (ntop_exit>i) ntop_exit=i;
465
for (i=0;i<MAXHASH;i++)
467
sm_htab[i]=sd_htab[i]=NULL; /* initalize hash tables */
474
/* Be polite and announce yourself... */
478
printf("Webalizer V%s-%s (%s %s %s) %s\n", version,editlvl,
479
system_info.sysname, system_info.release,
480
system_info.machine,language);
484
if (strstr(argv[0],"webazolver")!=0)
485
/* DNS support not present, aborting... */
486
{ printf("%s\n",msg_dns_abrt); exit(1); }
488
/* Force sane values for cache TTL */
489
if (cache_ttl<1) cache_ttl=1;
490
if (cache_ttl>100) cache_ttl=100;
497
if ( !(lstat(log_fname, &log_stat)) )
499
/* check if the file a symlink */
500
if ( S_ISLNK(log_stat.st_mode) )
503
fprintf(stderr,"%s %s (symlink)\n",msg_log_err,log_fname);
510
/* open compressed file */
512
if (gz_log==COMP_BZIP)
513
zlog_fp = BZ2_bzopen(log_fname,"rb");
516
zlog_fp = gzopen(log_fname, "rb");
519
/* Error: Can't open log file ... */
520
fprintf(stderr, "%s %s (%d)\n",msg_log_err,log_fname,ENOENT);
526
/* open regular file */
527
log_fp = fopen(log_fname,"r");
530
/* Error: Can't open log file ... */
531
fprintf(stderr, "%s %s\n",msg_log_err,log_fname);
537
/* Using logfile ... */
540
printf("%s %s (",msg_log_use,log_fname?log_fname:"STDIN");
541
if (gz_log==COMP_GZIP) printf("gzip-");
543
if (gz_log==COMP_BZIP) printf("bzip-");
547
/* display log file type hint */
548
case LOG_CLF: printf("clf)\n"); break;
549
case LOG_FTP: printf("ftp)\n"); break;
550
case LOG_SQUID: printf("squid)\n"); break;
551
case LOG_W3C: printf("w3c)\n"); break;
555
/* switch directories if needed */
558
if (chdir(out_dir) != 0)
560
/* Error: Can't change directory to ... */
561
fprintf(stderr, "%s %s\n",msg_dir_err,out_dir);
567
if (strstr(argv[0],"webazolver")!=0)
569
if (!dns_children) dns_children=5; /* default dns children if needed */
572
/* No cache file specified, aborting... */
573
fprintf(stderr,"%s\n",msg_dns_nocf); /* Must have a cache file */
578
if (dns_cache && dns_children) /* run-time resolution */
580
if (dns_children > MAXCHILD) dns_children=MAXCHILD;
581
/* DNS Lookup (#children): */
582
if (verbose>1) printf("%s (%d): ",msg_dns_rslv,dns_children);
584
(gz_log)?dns_resolver(zlog_fp):dns_resolver(log_fp);
586
(gz_log==COMP_BZIP)?bz2_rewind(&zlog_fp, log_fname, "rb"):
588
(gz_log==COMP_GZIP)?gzrewind(zlog_fp):
589
(log_fname)?rewind(log_fp):exit(0);
592
if (strstr(argv[0],"webazolver")!=0) exit(0); /* webazolver exits here */
596
if (!open_cache()) { dns_cache=NULL; dns_db=NULL; }
599
/* Using DNS cache file <filaneme> */
600
if (verbose>1) printf("%s %s\n",msg_dns_usec,dns_cache);
607
geo_db=geodb_open(geodb_fname);
610
if (verbose) printf("%s: %s\n",msg_geo_open,
611
(geodb_fname)?geodb_fname:msg_geo_dflt);
612
if (verbose) printf("GeoDB %s\n",msg_geo_nolu);
615
else if (verbose>1) printf("%s %s\n",
616
msg_geo_use,geodb_ver(geo_db,buffer));
618
if (geoip) geoip=0; /* Disable GeoIP if using GeoDB */
624
/* open GeoIP database */
628
geo_fp=GeoIP_open(geoip_db, GEOIP_MEMORY_CACHE);
630
geo_fp=GeoIP_new(GEOIP_MEMORY_CACHE);
632
/* Did we open one? */
635
/* couldn't open.. warn user */
636
if (verbose) printf("GeoIP %s\n",msg_geo_nolu);
639
else if (verbose>1) printf("%s %s (%s)\n",msg_geo_use,
640
GeoIPDBDescription[(int)geo_fp->databaseType],
641
(geoip_db==NULL)?msg_geo_dflt:geo_fp->file_path);
643
#endif /* USE_GEOIP */
645
/* Creating output in ... */
647
printf("%s %s\n",msg_dir_use,out_dir?out_dir:msg_cur_dir);
652
if (uname(&system_info)) hname="localhost";
653
else hname=system_info.nodename;
656
/* Hostname for reports is ... */
657
if (strlen(hname)) if (verbose>1) printf("%s '%s'\n",msg_hostname,hname);
659
/* get past history */
660
if (ignore_hist) { if (verbose>1) printf("%s\n",msg_ign_hist); }
663
if (incremental) /* incremental processing? */
665
if ((i=restore_state())) /* restore internal data structs */
667
/* Error: Unable to restore run data (error num) */
668
/* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
669
fprintf(stderr,"%s (%d)\n",msg_bad_data,i);
674
/* Allocate memory for our TOP countries array */
676
{ if ( (top_ctrys=calloc(ntop_ctrys,sizeof(CLISTPTR))) == NULL)
677
/* Can't get memory, Top Countries disabled! */
678
{if (verbose) fprintf(stderr,"%s\n",msg_nomem_tc); ntop_ctrys=0;}}
680
/* get processing start time */
681
start_time = time(NULL);
683
/*********************************************/
684
/* MAIN PROCESS LOOP - read through log file */
685
/*********************************************/
687
while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE) != Z_NULL):
688
(fgets(buffer,BUFSIZE,log_fname?log_fp:stdin) != NULL))
691
if (strlen(buffer) == (BUFSIZE-1))
695
fprintf(stderr,"%s",msg_big_rec);
696
if (debug_mode) fprintf(stderr,":\n%s",buffer);
697
else fprintf(stderr,"\n");
700
total_bad++; /* bump bad record counter */
702
/* get the rest of the record */
703
while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE)!=Z_NULL):
704
(fgets(buffer,BUFSIZE,log_fname?log_fp:stdin)!=NULL))
706
if (strlen(buffer) < BUFSIZE-1)
708
if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
711
if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
713
continue; /* go get next record if any */
716
/* got a record... */
717
strcpy(tmp_buf, buffer); /* save buffer in case of error */
718
if (parse_record(buffer)) /* parse the record */
720
/*********************************************/
721
/* PASSED MINIMAL CHECKS, DO A LITTLE MORE */
722
/*********************************************/
724
/* convert month name to lowercase */
726
log_rec.datetime[i]=tolower(log_rec.datetime[i]);
728
/* lowercase sitename/IPv6 addresses */
729
cp1=log_rec.hostname;
730
while (*cp1++!='\0') *cp1=tolower(*cp1);
732
/* get year/month/day/hour/min/sec values */
735
if (strncmp(log_month[i],&log_rec.datetime[4],3)==0)
736
{ rec_month = i+1; break; }
739
rec_year=atoi(&log_rec.datetime[8]); /* get year number (int) */
740
rec_day =atoi(&log_rec.datetime[1]); /* get day number */
741
rec_hour=atoi(&log_rec.datetime[13]); /* get hour number */
742
rec_min =atoi(&log_rec.datetime[16]); /* get minute number */
743
rec_sec =atoi(&log_rec.datetime[19]); /* get second number */
745
/* Kludge for Netscape server time (0-24?) error */
746
if (rec_hour>23) rec_hour=0;
748
/* minimal sanity check on date */
749
if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990))
751
total_bad++; /* if a bad date, bump counter */
754
fprintf(stderr,"%s: %s [%llu]",
755
msg_bad_date,log_rec.datetime,total_rec);
756
if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
757
else fprintf(stderr,"\n");
759
continue; /* and ignore this record */
762
/*********************************************/
763
/* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
764
/*********************************************/
766
/* Flag as a good one */
769
/* get current records timestamp (seconds since epoch) */
770
req_tstamp=cur_tstamp;
771
rec_tstamp=((jdate(rec_day,rec_month,rec_year)-epoch)*86400)+
772
(rec_hour*3600)+(rec_min*60)+rec_sec;
774
/* Do we need to check for duplicate records? (incremental mode) */
777
/* check if less than/equal to last record processed */
778
if ( rec_tstamp <= cur_tstamp )
780
/* if it is, assume we have already processed and ignore it */
786
/* if it isn't.. disable any more checks this run */
788
/* now check if it's a new month */
789
if ( (cur_month != rec_month) || (cur_year != rec_year) )
792
cur_sec = rec_sec; /* set current counters */
796
cur_month = rec_month;
798
cur_tstamp= rec_tstamp;
799
f_day=l_day=rec_day; /* reset first and last day */
804
/* check for out of sequence records */
805
if (rec_tstamp/3600 < cur_tstamp/3600)
807
if (!fold_seq_err && ((rec_tstamp+SLOP_VAL)/3600<cur_tstamp/3600) )
808
{ total_ignore++; continue; }
811
rec_sec = cur_sec; /* if folding sequence */
812
rec_min = cur_min; /* errors, just make it */
813
rec_hour = cur_hour; /* look like the last */
814
rec_day = cur_day; /* good records timestamp */
815
rec_month = cur_month;
817
rec_tstamp= cur_tstamp;
820
cur_tstamp=rec_tstamp; /* update current timestamp */
822
/*********************************************/
823
/* DO SOME PRE-PROCESS FORMATTING */
824
/*********************************************/
827
unescape(log_rec.url);
830
cp1 = cp2 = log_rec.url;
831
/* handle null '-' case here... */
832
if (*++cp1 == '-') strcpy(log_rec.url,"/INVALID-URL");
835
/* strip actual URL out of request */
836
while ( (*cp1 != ' ') && (*cp1 != '\0') ) cp1++;
839
/* scan to begin of actual URL field */
840
while ((*cp1 == ' ') && (*cp1 != '\0')) cp1++;
841
/* remove duplicate / if needed */
842
while (( *cp1=='/') && (*(cp1+1)=='/')) cp1++;
843
while (( *cp1!='\0')&&(*cp1!='"')) *cp2++=*cp1++;
848
/* strip query portion of cgi scripts */
851
if (!isurlchar(*cp1, stripcgi)) { *cp1 = '\0'; break; }
853
if (log_rec.url[0]=='\0')
854
{ log_rec.url[0]='/'; log_rec.url[1]='\0'; }
857
if (log_type==LOG_CLF && log_rec.resp_code!=RC_NOTFOUND && normalize)
859
if ( ((cp2=strstr(log_rec.url,"://"))!=NULL)&&(cp2<log_rec.url+6) )
862
/* see if a '/' is present after it */
863
if ( (cp2=strchr(cp1,(int)'/'))==NULL) cp1--;
865
/* Ok, now shift url string */
866
cp2=log_rec.url; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0';
868
/* extra sanity checks on URL string */
869
while ((cp2=strstr(log_rec.url,"/./")))
870
{ cp1=cp2+2; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0'; }
871
if (log_rec.url[0]!='/')
873
if ( log_rec.resp_code==RC_OK ||
874
log_rec.resp_code==RC_PARTIALCONTENT ||
875
log_rec.resp_code==RC_NOMOD)
878
fprintf(stderr,"Converted URL '%s' to '/'\n",log_rec.url);
885
fprintf(stderr,"Invalid URL: '%s'\n",log_rec.url);
886
strcpy(log_rec.url,"/INVALID-URL");
889
while ( log_rec.url[ (i=strlen(log_rec.url)-1) ] == '?' )
890
log_rec.url[i]='\0'; /* drop trailing ?s if any */
894
/* check for service (ie: http://) and lowercase if found */
895
if (((cp2=strstr(log_rec.url,"://"))!= NULL)&&(cp2<log_rec.url+6))
900
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
906
/* strip off index.html (or any aliases) */
910
if ((cp1=strstr(log_rec.url,lptr->string))!=NULL)
914
if ( !stripcgi && (cp2=strchr(cp1,'?'))!=NULL )
915
{ while(*cp2) *cp1++=*cp2++; *cp1='\0'; }
923
/* unescape referrer */
924
unescape(log_rec.refer);
926
/* fix referrer field */
929
if ( (*cp2 != '\0') && (*cp2 == '"') )
931
while ( *cp1 != '\0' )
934
if (((unsigned char)*cp1<32&&(unsigned char)*cp1>0) ||
935
*cp1==127 || (unsigned char)*cp1=='<') *cp1=0;
941
/* get query portion of cgi referrals */
947
if (!isurlchar(*cp1, 1))
949
/* Save query portion in log.rec.srchstr */
950
strncpy(log_rec.srchstr,(char *)cp1,MAXSRCH);
956
/* handle null referrer */
957
if (log_rec.refer[0]=='\0')
958
{ log_rec.refer[0]='-'; log_rec.refer[1]='\0'; }
961
/* if HTTP request, lowercase http://sitename/ portion */
963
if ( (*cp1=='h') || (*cp1=='H'))
965
while ( (*cp1!='/') && (*cp1!='\0'))
967
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
970
/* now do hostname */
971
if ( (*cp1=='/') && ( *(cp1+1)=='/')) {cp1++; cp1++;}
972
while ( (*cp1!='/') && (*cp1!='\0'))
974
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
979
/* Do we need to mangle? */
980
if (mangle_agent) agent_mangle(log_rec.agent);
982
/* if necessary, shrink referrer to fit storage */
983
if (strlen(log_rec.refer)>=MAXREFH)
985
if (verbose) fprintf(stderr,"%s [%llu]\n",
986
msg_big_ref,total_rec);
987
log_rec.refer[MAXREFH-1]='\0';
990
/* if necessary, shrink URL to fit storage */
991
if (strlen(log_rec.url)>=MAXURLH)
993
if (verbose) fprintf(stderr,"%s [%llu]\n",
994
msg_big_req,total_rec);
995
log_rec.url[MAXURLH-1]='\0';
998
/* fix user agent field */
1001
if ( (*cp2 != '\0') && ((*cp2 == '"')||(*cp2 == '(')) )
1003
while (*cp1 != '\0') { cp3 = cp2; *cp2++ = *cp1++; }
1006
cp1 = log_rec.agent; /* CHANGE !!! */
1007
while (*cp1 != 0) /* get rid of more common _bad_ chars ;) */
1009
if ( ((unsigned char)*cp1 < 32) ||
1010
((unsigned char)*cp1==127) ||
1011
(*cp1=='<') || (*cp1=='>') )
1012
{ *cp1='\0'; break; }
1016
/* fix username if needed */
1017
if (log_rec.ident[0]==0)
1018
{ log_rec.ident[0]='-'; log_rec.ident[1]='\0'; }
1022
while ((unsigned char)*cp3>=32 && *cp3!='"') cp3++;
1025
/* unescape user name */
1026
unescape(log_rec.ident);
1028
/********************************************/
1029
/* PROCESS RECORD */
1030
/********************************************/
1032
/* first time through? */
1035
/* if yes, init our date vars */
1036
cur_month=rec_month; cur_year=rec_year;
1037
cur_day=rec_day; cur_hour=rec_hour;
1038
cur_min=rec_min; cur_sec=rec_sec;
1042
/* adjust last day processed if different */
1043
if (rec_day > l_day) l_day = rec_day;
1045
/* update min/sec stuff */
1046
if (cur_sec != rec_sec) cur_sec = rec_sec;
1047
if (cur_min != rec_min) cur_min = rec_min;
1049
/* check for hour change */
1050
if (cur_hour != rec_hour)
1052
/* if yes, init hourly stuff */
1053
if (ht_hit > mh_hit) mh_hit = ht_hit;
1055
cur_hour = rec_hour;
1058
/* check for day change */
1059
if (cur_day != rec_day)
1061
/* if yes, init daily stuff */
1062
tm_site[cur_day-1]=dt_site; dt_site=0;
1063
tm_visit[cur_day-1]=tot_visit(sd_htab);
1068
/* check for month change */
1069
if ( (cur_month != rec_month) || (cur_year != rec_year) )
1071
/* if yes, do monthly stuff */
1072
t_visit=tot_visit(sm_htab);
1073
month_update_exit(req_tstamp); /* process exit pages */
1075
write_month_html(); /* generate HTML for month */
1077
cur_month = rec_month; /* update our flags */
1078
cur_year = rec_year;
1079
f_day=l_day=rec_day;
1082
/* save hostname for later */
1083
strncpy(host_buf, log_rec.hostname, sizeof(log_rec.hostname));
1086
/* Resolve IP address if needed */
1089
struct addrinfo hints, *ares;
1090
memset(&hints, 0, sizeof(hints));
1091
hints.ai_family = AF_UNSPEC;
1092
hints.ai_socktype = SOCK_STREAM;
1093
hints.ai_flags = AI_NUMERICHOST;
1094
if (0 == getaddrinfo(log_rec.hostname, "0", &hints, &ares))
1097
resolve_dns(&log_rec);
1101
/* lowercase hostname and validity check */
1102
cp1 = log_rec.hostname; i=0;
1104
if ( (!isalnum((unsigned char)*cp1)) && (*cp1!=':') )
1105
strncpy(log_rec.hostname, "Invalid", 8);
1108
while (*cp1 != '\0') /* loop through string */
1110
if ( (*cp1>='A') && (*cp1<='Z') )
1111
{ *cp1++ += 'a'-'A'; continue; }
1112
if ( *cp1=='.' ) i++;
1113
if ( (isalnum((unsigned char)*cp1)) ||
1114
(*cp1=='.')||(*cp1=='-') ||
1115
(*cp1==':')||((*cp1=='_')&&(i==0)) ) cp1++;
1118
/* Invalid hostname found! */
1119
if (strcmp(log_rec.hostname, host_buf))
1120
strcpy(log_rec.hostname, host_buf);
1121
else strncpy(log_rec.hostname,"Invalid",8);
1125
if (*cp1 == '\0') /* did we make it to the end? */
1127
if (!isalnum((unsigned char)*(cp1-1)))
1128
strncpy(log_rec.hostname,"Invalid",8);
1132
/* Catch blank hostnames here */
1133
if (log_rec.hostname[0]=='\0')
1134
strncpy(log_rec.hostname,"Unknown",8);
1136
/* Ignore/Include check */
1137
if ( (isinlist(include_sites,log_rec.hostname)==NULL) &&
1138
(isinlist(include_urls,log_rec.url)==NULL) &&
1139
(isinlist(include_refs,log_rec.refer)==NULL) &&
1140
(isinlist(include_agents,log_rec.agent)==NULL) &&
1141
(isinlist(include_users,log_rec.ident)==NULL) )
1143
if (isinlist(ignored_sites,log_rec.hostname)!=NULL)
1144
{ total_ignore++; continue; }
1145
if (isinlist(ignored_urls,log_rec.url)!=NULL)
1146
{ total_ignore++; continue; }
1147
if (isinlist(ignored_agents,log_rec.agent)!=NULL)
1148
{ total_ignore++; continue; }
1149
if (isinlist(ignored_refs,log_rec.refer)!=NULL)
1150
{ total_ignore++; continue; }
1151
if (isinlist(ignored_users,log_rec.ident)!=NULL)
1152
{ total_ignore++; continue; }
1155
/* Bump response code totals */
1156
switch (log_rec.resp_code) {
1157
case RC_CONTINUE: i=IDX_CONTINUE; break;
1158
case RC_SWITCHPROTO: i=IDX_SWITCHPROTO; break;
1159
case RC_OK: i=IDX_OK; break;
1160
case RC_CREATED: i=IDX_CREATED; break;
1161
case RC_ACCEPTED: i=IDX_ACCEPTED; break;
1162
case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; break;
1163
case RC_NOCONTENT: i=IDX_NOCONTENT; break;
1164
case RC_RESETCONTENT: i=IDX_RESETCONTENT; break;
1165
case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; break;
1166
case RC_MULTIPLECHOICES: i=IDX_MULTIPLECHOICES; break;
1167
case RC_MOVEDPERM: i=IDX_MOVEDPERM; break;
1168
case RC_MOVEDTEMP: i=IDX_MOVEDTEMP; break;
1169
case RC_SEEOTHER: i=IDX_SEEOTHER; break;
1170
case RC_NOMOD: i=IDX_NOMOD; break;
1171
case RC_USEPROXY: i=IDX_USEPROXY; break;
1172
case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break;
1173
case RC_BAD: i=IDX_BAD; break;
1174
case RC_UNAUTH: i=IDX_UNAUTH; break;
1175
case RC_PAYMENTREQ: i=IDX_PAYMENTREQ; break;
1176
case RC_FORBIDDEN: i=IDX_FORBIDDEN; break;
1177
case RC_NOTFOUND: i=IDX_NOTFOUND; break;
1178
case RC_METHODNOTALLOWED: i=IDX_METHODNOTALLOWED; break;
1179
case RC_NOTACCEPTABLE: i=IDX_NOTACCEPTABLE; break;
1180
case RC_PROXYAUTHREQ: i=IDX_PROXYAUTHREQ; break;
1181
case RC_TIMEOUT: i=IDX_TIMEOUT; break;
1182
case RC_CONFLICT: i=IDX_CONFLICT; break;
1183
case RC_GONE: i=IDX_GONE; break;
1184
case RC_LENGTHREQ: i=IDX_LENGTHREQ; break;
1185
case RC_PREFAILED: i=IDX_PREFAILED; break;
1186
case RC_REQENTTOOLARGE: i=IDX_REQENTTOOLARGE; break;
1187
case RC_REQURITOOLARGE: i=IDX_REQURITOOLARGE; break;
1188
case RC_UNSUPMEDIATYPE: i=IDX_UNSUPMEDIATYPE; break;
1189
case RC_RNGNOTSATISFIABLE:i=IDX_RNGNOTSATISFIABLE;break;
1190
case RC_EXPECTATIONFAILED:i=IDX_EXPECTATIONFAILED;break;
1191
case RC_SERVERERR: i=IDX_SERVERERR; break;
1192
case RC_NOTIMPLEMENTED: i=IDX_NOTIMPLEMENTED; break;
1193
case RC_BADGATEWAY: i=IDX_BADGATEWAY; break;
1194
case RC_UNAVAIL: i=IDX_UNAVAIL; break;
1195
case RC_GATEWAYTIMEOUT: i=IDX_GATEWAYTIMEOUT; break;
1196
case RC_BADHTTPVER: i=IDX_BADHTTPVER; break;
1197
default: i=IDX_UNDEFINED; break;
1199
response[i].count++;
1201
/* now save in the various hash tables... */
1202
if (log_rec.resp_code==RC_OK || log_rec.resp_code==RC_PARTIALCONTENT)
1205
/* URL/ident hash table (only if valid response code) */
1206
if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)||
1207
(log_rec.resp_code==RC_PARTIALCONTENT))
1209
/* URL hash table */
1210
if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1,
1211
log_rec.xfer_size,log_rec.ixfer_size,log_rec.oxfer_size,
1212
&t_url,(u_int64_t)0,(u_int64_t)0,um_htab))
1215
/* Error adding URL node, skipping ... */
1216
fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url);
1219
/* ident (username) hash table */
1220
if (put_inode(log_rec.ident,OBJ_REG,
1221
1,(u_int64_t)i,log_rec.xfer_size,
1222
log_rec.ixfer_size,log_rec.oxfer_size,&t_user,
1223
0,rec_tstamp,im_htab))
1226
/* Error adding ident node, skipping .... */
1227
fprintf(stderr,"%s %s\n", msg_nomem_i, log_rec.ident);
1231
/* referrer hash table */
1234
if (log_rec.refer[0]!='\0')
1235
if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab))
1238
fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer);
1242
/* hostname (site) hash table - daily */
1243
if (put_hnode(log_rec.hostname,OBJ_REG,
1244
1,(u_int64_t)i,log_rec.xfer_size,
1245
log_rec.ixfer_size,log_rec.oxfer_size,&dt_site,
1246
0,rec_tstamp,"",sd_htab))
1249
/* Error adding host node (daily), skipping .... */
1250
fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname);
1253
/* hostname (site) hash table - monthly */
1254
if (put_hnode(log_rec.hostname,OBJ_REG,
1255
1,(u_int64_t)i,log_rec.xfer_size,
1256
log_rec.ixfer_size,log_rec.oxfer_size,&t_site,
1257
0,rec_tstamp,"",sm_htab))
1260
/* Error adding host node (monthly), skipping .... */
1261
fprintf(stderr,"%s %s\n", msg_nomem_mh, log_rec.hostname);
1264
/* user agent hash table */
1267
if (log_rec.agent[0]!='\0')
1268
if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab))
1271
fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent);
1275
/* bump monthly/daily/hourly totals */
1276
t_hit++; ht_hit++; /* daily/hourly hits */
1277
t_xfer += log_rec.xfer_size; /* total xfer size */
1278
t_ixfer += log_rec.ixfer_size; /* total in xfer size */
1279
t_oxfer += log_rec.oxfer_size; /* total out xfer size */
1280
tm_xfer[rec_day-1] += log_rec.xfer_size; /* daily xfer total */
1281
tm_ixfer[rec_day-1] += log_rec.ixfer_size; /* daily in xfer total */
1282
tm_oxfer[rec_day-1] += log_rec.oxfer_size; /* daily out xfer total */
1283
tm_hit[rec_day-1]++; /* daily hits total */
1284
th_xfer[rec_hour] += log_rec.xfer_size; /* hourly xfer total */
1285
th_ixfer[rec_hour] += log_rec.ixfer_size; /* hourly in xfer total */
1286
th_oxfer[rec_hour] += log_rec.oxfer_size; /* hourly out xfer total*/
1287
th_hit[rec_hour]++; /* hourly hits total */
1289
/* if RC_OK, increase file counters */
1290
if (log_rec.resp_code == RC_OK)
1293
tm_file[rec_day-1]++;
1294
th_file[rec_hour]++;
1297
/* Pages (pageview) calculation */
1298
if (ispage(log_rec.url))
1301
tm_page[rec_day-1]++;
1302
th_page[rec_hour]++;
1304
/* do search string stuff if needed */
1305
if (ntop_search) srch_string(log_rec.srchstr);
1308
/*********************************************/
1309
/* RECORD PROCESSED - DO GROUPS HERE */
1310
/*********************************************/
1313
if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL)
1315
if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size,
1316
log_rec.ixfer_size,log_rec.oxfer_size,
1317
&ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab))
1320
/* Error adding URL node, skipping ... */
1321
fprintf(stderr,"%s %s\n", msg_nomem_u, cp1);
1326
if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL)
1328
if (put_hnode(cp1,OBJ_GRP,1,
1329
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1331
log_rec.ixfer_size,log_rec.oxfer_size,&ul_bogus,
1332
0,rec_tstamp,"",sm_htab))
1335
/* Error adding Site node, skipping ... */
1336
fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1341
/* Domain Grouping */
1344
cp1 = get_domain(log_rec.hostname);
1347
if (put_hnode(cp1,OBJ_GRP,1,
1348
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1349
log_rec.xfer_size,log_rec.ixfer_size,log_rec.oxfer_size,
1350
&ul_bogus,0,rec_tstamp,"",sm_htab))
1353
/* Error adding Site node, skipping ... */
1354
fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1360
/* Referrer Grouping */
1361
if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL)
1363
if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab))
1366
/* Error adding Referrer node, skipping ... */
1367
fprintf(stderr,"%s %s\n", msg_nomem_r, cp1);
1371
/* User Agent Grouping */
1372
if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL)
1374
if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab))
1377
/* Error adding User Agent node, skipping ... */
1378
fprintf(stderr,"%s %s\n", msg_nomem_a, cp1);
1382
/* Ident (username) Grouping */
1383
if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL)
1385
if (put_inode(cp1,OBJ_GRP,1,
1386
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1388
log_rec.ixfer_size,log_rec.oxfer_size,&ul_bogus,
1389
0,rec_tstamp,im_htab))
1392
/* Error adding Username node, skipping ... */
1393
fprintf(stderr,"%s %s\n", msg_nomem_i, cp1);
1398
/*********************************************/
1400
/*********************************************/
1404
/* If first record, check if stupid Netscape header stuff */
1405
if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) )
1407
/* Skipping Netscape header record */
1408
if (verbose>1) printf("%s\n",msg_ign_nscp);
1409
/* count it as ignored... */
1414
/* Check if it's a W3C header or IIS Null-Character line */
1415
if ((buffer[0]=='\0') || (buffer[0]=='#'))
1421
/* really bad record... */
1425
fprintf(stderr,"%s (%llu)",msg_bad_rec,total_rec);
1426
if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
1427
else fprintf(stderr,"\n");
1434
/*********************************************/
1435
/* DONE READING LOG FILE - final processing */
1436
/*********************************************/
1438
/* close log file if needed */
1440
if (gz_log) (gz_log==COMP_BZIP)?BZ2_bzclose(zlog_fp):gzclose(zlog_fp);
1442
if (gz_log) gzclose(zlog_fp);
1444
else if (log_fname) fclose(log_fp);
1446
if (good_rec) /* were any good records? */
1448
tm_site[cur_day-1]=dt_site; /* If yes, clean up a bit */
1449
tm_visit[cur_day-1]=tot_visit(sd_htab);
1450
t_visit=tot_visit(sm_htab);
1451
if (ht_hit > mh_hit) mh_hit = ht_hit;
1453
if (total_rec > (total_ignore+total_bad)) /* did we process any? */
1457
if (save_state()) /* incremental stuff */
1459
/* Error: Unable to save current run data */
1460
if (verbose) fprintf(stderr,"%s\n",msg_data_err);
1461
unlink(state_fname);
1464
month_update_exit(rec_tstamp); /* calculate exit pages */
1466
write_month_html(); /* write monthly HTML file */
1467
put_history(); /* write history */
1469
if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1471
/* get processing end time */
1472
end_time = time(NULL);
1474
/* display end of processing statistics */
1475
if (time_me || (verbose>1))
1477
printf("%llu %s ",total_rec, msg_records);
1480
printf("(%llu %s",total_ignore,msg_ignored);
1481
if (total_bad) printf(", %llu %s) ",total_bad,msg_bad);
1484
else if (total_bad) printf("(%llu %s) ",total_bad,msg_bad);
1486
/* totoal processing time in seconds */
1487
temp_time = difftime(end_time, start_time);
1488
if (temp_time==0) temp_time=1;
1489
printf("%s %.0f %s", msg_in, temp_time, msg_seconds);
1491
/* calculate records per second */
1493
i=( (int)( (float)total_rec/temp_time ) );
1496
if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i);
1501
/* Close DNS cache file */
1502
if (dns_db) close_cache();
1503
/* Close GeoDB database */
1504
if (geo_db) geodb_close(geo_db);
1508
/* Close GeoIP database */
1509
if (geo_fp) GeoIP_delete(geo_fp);
1512
/* Whew, all done! Exit with completion status (0) */
1517
/* No valid records found... exit with error (1) */
1518
if (verbose) printf("%s\n",msg_no_vrec);
1519
if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1524
/*********************************************/
1525
/* GET_CONFIG - get configuration file info */
1526
/*********************************************/
1528
void get_config(char *fname)
1530
char *kwords[]= { "Undefined", /* 0 = undefined keyword 0 */
1531
"OutputDir", /* Output directory 1 */
1532
"LogFile", /* Log file to use for input 2 */
1533
"ReportTitle", /* Title for reports 3 */
1534
"HostName", /* Hostname to use 4 */
1535
"IgnoreHist", /* Ignore history file 5 */
1536
"Quiet", /* Run in quiet mode 6 */
1537
"TimeMe", /* Produce timing results 7 */
1538
"Debug", /* Produce debug information 8 */
1539
"HourlyGraph", /* Hourly stats graph 9 */
1540
"HourlyStats", /* Hourly stats table 10 */
1541
"TopSites", /* Top sites 11 */
1542
"TopURLs", /* Top URLs 12 */
1543
"TopReferrers", /* Top Referrers 13 */
1544
"TopAgents", /* Top User Agents 14 */
1545
"TopCountries", /* Top Countries 15 */
1546
"HideSite", /* Sites to hide 16 */
1547
"HideURL", /* URLs to hide 17 */
1548
"HideReferrer", /* Referrers to hide 18 */
1549
"HideAgent", /* User Agents to hide 19 */
1550
"IndexAlias", /* Aliases for index.html 20 */
1551
"HTMLHead", /* HTML Top1 code 21 */
1552
"HTMLPost", /* HTML Top2 code 22 */
1553
"HTMLTail", /* HTML Tail code 23 */
1554
"MangleAgents", /* Mangle User Agents 24 */
1555
"IgnoreSite", /* Sites to ignore 25 */
1556
"IgnoreURL", /* Url's to ignore 26 */
1557
"IgnoreReferrer", /* Referrers to ignore 27 */
1558
"IgnoreAgent", /* User Agents to ignore 28 */
1559
"ReallyQuiet", /* Dont display ANY messages 29 */
1560
"GMTTime", /* Local or UTC time? 30 */
1561
"GroupURL", /* Group URLs 31 */
1562
"GroupSite", /* Group Sites 32 */
1563
"GroupReferrer", /* Group Referrers 33 */
1564
"GroupAgent", /* Group Agents 34 */
1565
"GroupShading", /* Shade Grouped entries 35 */
1566
"GroupHighlight", /* BOLD Grouped entries 36 */
1567
"Incremental", /* Incremental runs 37 */
1568
"IncrementalName", /* Filename for state data 38 */
1569
"HistoryName", /* Filename for history data 39 */
1570
"HTMLExtension", /* HTML filename extension 40 */
1571
"HTMLPre", /* HTML code at beginning 41 */
1572
"HTMLBody", /* HTML body code 42 */
1573
"HTMLEnd", /* HTML code at end 43 */
1574
"UseHTTPS", /* Use https:// on URLs 44 */
1575
"IncludeSite", /* Sites to always include 45 */
1576
"IncludeURL", /* URLs to always include 46 */
1577
"IncludeReferrer", /* Referrers to include 47 */
1578
"IncludeAgent", /* User Agents to include 48 */
1579
"PageType", /* Page Type (pageview) 49 */
1580
"VisitTimeout", /* Visit timeout (seconds) 50 */
1581
"GraphLegend", /* Graph Legends (yes/no) 51 */
1582
"GraphLines", /* Graph Lines (0=none) 52 */
1583
"FoldSeqErr", /* Fold sequence errors 53 */
1584
"CountryGraph", /* Display ctry graph (0=no) 54 */
1585
"TopKSites", /* Top sites (by KBytes) 55 */
1586
"TopKURLs", /* Top URLs (by KBytes) 56 */
1587
"TopEntry", /* Top Entry Pages 57 */
1588
"TopExit", /* Top Exit Pages 58 */
1589
"TopSearch", /* Top Search Strings 59 */
1590
"LogType", /* Log Type (clf/ftp/squid) 60 */
1591
"SearchEngine", /* SearchEngine strings 61 */
1592
"GroupDomains", /* Group domains (n=level) 62 */
1593
"HideAllSites", /* Hide ind. sites (0=no) 63 */
1594
"AllSites", /* List all sites? 64 */
1595
"AllURLs", /* List all URLs? 65 */
1596
"AllReferrers", /* List all Referrers? 66 */
1597
"AllAgents", /* List all User Agents? 67 */
1598
"AllSearchStr", /* List all Search Strings? 68 */
1599
"AllUsers", /* List all Users? 69 */
1600
"TopUsers", /* Top Usernames to show 70 */
1601
"HideUser", /* Usernames to hide 71 */
1602
"IgnoreUser", /* Usernames to ignore 72 */
1603
"IncludeUser", /* Usernames to include 73 */
1604
"GroupUser", /* Usernames to group 74 */
1605
"DumpPath", /* Path for dump files 75 */
1606
"DumpExtension", /* Dump filename extension 76 */
1607
"DumpHeader", /* Dump header as first rec? 77 */
1608
"DumpSites", /* Dump sites tab file 78 */
1609
"DumpURLs", /* Dump urls tab file 79 */
1610
"DumpReferrers", /* Dump referrers tab file 80 */
1611
"DumpAgents", /* Dump user agents tab file 81 */
1612
"DumpUsers", /* Dump usernames tab file 82 */
1613
"DumpSearchStr", /* Dump search str tab file 83 */
1614
"DNSCache", /* DNS Cache file name 84 */
1615
"DNSChildren", /* DNS Children (0=no DNS) 85 */
1616
"DailyGraph", /* Daily Graph (0=no) 86 */
1617
"DailyStats", /* Daily Stats (0=no) 87 */
1618
"LinkReferrer", /* Link referrer (0=no) 88 */
1619
"PagePrefix", /* PagePrefix - treat as page 89 */
1620
"ColorHit", /* Hit Color (def=00805c) 90 */
1621
"ColorFile", /* File Color (def=0040ff) 91 */
1622
"ColorSite", /* Site Color (def=ff8000) 92 */
1623
"ColorKbyte", /* Kbyte Color (def=ff0000) 93 */
1624
"ColorPage", /* Page Color (def=00e0ff) 94 */
1625
"ColorVisit", /* Visit Color (def=ffff00) 95 */
1626
"ColorMisc", /* Misc Color (def=00e0ff) 96 */
1627
"PieColor1", /* Pie Color 1 (def=800080) 97 */
1628
"PieColor2", /* Pie Color 2 (def=80ffc0) 98 */
1629
"PieColor3", /* Pie Color 3 (def=ff00ff) 99 */
1630
"PieColor4", /* Pie Color 4 (def=ffc080) 100 */
1631
"CacheIPs", /* Cache IPs in DNS DB (0=no) 101 */
1632
"CacheTTL", /* DNS Cache entry TTL (days) 102 */
1633
"GeoDB", /* GeoDB lookups (0=no) 103 */
1634
"GeoDBDatabase", /* GeoDB database filename 104 */
1635
"StripCGI", /* Strip CGI in URLS (0=no) 105 */
1636
"TrimSquidURL", /* Trim squid URLs (0=none) 106 */
1637
"OmitPage", /* URLs not counted as pages 107 */
1638
"HTAccess", /* Write .httaccess files? 108 */
1639
"IgnoreState", /* Ignore state file (0=no) 109 */
1640
"DefaultIndex", /* Default index.* (1=yes) 110 */
1641
"GeoIP", /* Use GeoIP? (1=yes) 111 */
1642
"GeoIPDatabase", /* Database to use for GeoIP 112 */
1643
"NormalizeURL", /* Normalize CLF URLs (1=yes) 113 */
1644
"IndexMonths", /* # months for main page 114 */
1645
"GraphMonths", /* # months for yearly graph 115 */
1646
"YearHeaders", /* use year headers? (1=yes) 116 */
1647
"YearTotals", /* show year subtotals (0=no) 117 */
1648
"CountryFlags", /* show country flags? (0-no) 118 */
1649
"FlagDir", /* directory w/flag images 119 */
1650
"SearchCaseI" /* srch str case insensitive 120 */
1655
char buffer[BUFSIZE];
1656
char keyword[MAXKWORD];
1657
char value[MAXKVAL];
1660
int num_kwords=sizeof(kwords)/sizeof(char *);
1662
if ( (fp=fopen(fname,"r")) == NULL)
1665
fprintf(stderr,"%s %s\n",msg_bad_conf,fname);
1669
while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
1671
/* skip comments and blank lines */
1672
if ( (buffer[0]=='#') || isspace((unsigned char)buffer[0]) ) continue;
1675
cp1=buffer;cp2=keyword;count=MAXKWORD-1;
1676
while ( (isalnum((unsigned char)*cp1)) && count )
1677
{ *cp2++ = *cp1++; count--; }
1681
cp2=value; count=MAXKVAL-1;
1682
while ((*cp1!='\n')&&(*cp1!='\0')&&(isspace((unsigned char)*cp1))) cp1++;
1683
while ((*cp1!='\n')&&(*cp1!='\0')&&count ) { *cp2++ = *cp1++; count--; }
1685
while ((isspace((unsigned char)*cp2)) && (cp2 != value) ) *cp2--='\0';
1687
/* check if blank keyword/value */
1688
if ( (keyword[0]=='\0') || (value[0]=='\0') ) continue;
1691
for (i=0;i<num_kwords;i++)
1692
if (!ouricmp(keyword,kwords[i])) { key=i; break; }
1694
if (key==0) { printf("%s '%s' (%s)\n", /* Invalid keyword */
1695
msg_bad_key,keyword,fname);
1701
case 1: out_dir=save_opt(value); break; /* OutputDir */
1702
case 2: log_fname=save_opt(value); break; /* LogFile */
1703
case 3: msg_title=save_opt(value); break; /* ReportTitle */
1704
case 4: hname=save_opt(value); break; /* HostName */
1705
case 5: ignore_hist=
1706
(tolower(value[0])=='y')?1:0; break; /* IgnoreHist */
1708
(tolower(value[0])=='y')?1:2; break; /* Quiet */
1710
(tolower(value[0])=='n')?0:1; break; /* TimeMe */
1712
(tolower(value[0])=='y')?1:0; break; /* Debug */
1713
case 9: hourly_graph=
1714
(tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1715
case 10: hourly_stats=
1716
(tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1717
case 11: ntop_sites = atoi(value); break; /* TopSites */
1718
case 12: ntop_urls = atoi(value); break; /* TopURLs */
1719
case 13: ntop_refs = atoi(value); break; /* TopRefs */
1720
case 14: ntop_agents = atoi(value); break; /* TopAgents */
1721
case 15: ntop_ctrys = atoi(value); break; /* TopCountries */
1722
case 16: add_nlist(value,&hidden_sites); break; /* HideSite */
1723
case 17: add_nlist(value,&hidden_urls); break; /* HideURL */
1724
case 18: add_nlist(value,&hidden_refs); break; /* HideReferrer */
1725
case 19: add_nlist(value,&hidden_agents); break; /* HideAgent */
1726
case 20: add_nlist(value,&index_alias); break; /* IndexAlias */
1727
case 21: add_nlist(value,&html_head); break; /* HTMLHead */
1728
case 22: add_nlist(value,&html_post); break; /* HTMLPost */
1729
case 23: add_nlist(value,&html_tail); break; /* HTMLTail */
1730
case 24: mangle_agent=atoi(value); break; /* MangleAgents */
1731
case 25: add_nlist(value,&ignored_sites); break; /* IgnoreSite */
1732
case 26: add_nlist(value,&ignored_urls); break; /* IgnoreURL */
1733
case 27: add_nlist(value,&ignored_refs); break; /* IgnoreReferrer */
1734
case 28: add_nlist(value,&ignored_agents); break; /* IgnoreAgent */
1735
case 29: if (tolower(value[0])=='y')
1736
verbose=0; break; /* ReallyQuiet */
1737
case 30: local_time=
1738
(tolower(value[0])=='y')?0:1; break; /* GMTTime */
1739
case 31: add_glist(value,&group_urls); break; /* GroupURL */
1740
case 32: add_glist(value,&group_sites); break; /* GroupSite */
1741
case 33: add_glist(value,&group_refs); break; /* GroupReferrer */
1742
case 34: add_glist(value,&group_agents); break; /* GroupAgent */
1743
case 35: shade_groups=
1744
(tolower(value[0])=='n')?0:1; break; /* GroupShading */
1745
case 36: hlite_groups=
1746
(tolower(value[0])=='n')?0:1; break; /* GroupHighlight */
1747
case 37: incremental=
1748
(tolower(value[0])=='y')?1:0; break; /* Incremental */
1749
case 38: state_fname=save_opt(value); break; /* State FName */
1750
case 39: hist_fname=save_opt(value); break; /* History FName */
1751
case 40: html_ext=save_opt(value); break; /* HTML extension */
1752
case 41: add_nlist(value,&html_pre); break; /* HTML Pre code */
1753
case 42: add_nlist(value,&html_body); break; /* HTML Body code */
1754
case 43: add_nlist(value,&html_end); break; /* HTML End code */
1756
(tolower(value[0])=='y')?1:0; break; /* Use https:// */
1757
case 45: add_nlist(value,&include_sites); break; /* IncludeSite */
1758
case 46: add_nlist(value,&include_urls); break; /* IncludeURL */
1759
case 47: add_nlist(value,&include_refs); break; /* IncludeReferrer*/
1760
case 48: add_nlist(value,&include_agents); break; /* IncludeAgent */
1761
case 49: add_nlist(value,&page_type); break; /* PageType */
1762
case 50: visit_timeout=atoi(value); break; /* VisitTimeout */
1763
case 51: graph_legend=
1764
(tolower(value[0])=='n')?0:1; break; /* GraphLegend */
1765
case 52: graph_lines = atoi(value); break; /* GraphLines */
1766
case 53: fold_seq_err=
1767
(tolower(value[0])=='y')?1:0; break; /* FoldSeqErr */
1768
case 54: ctry_graph=
1769
(tolower(value[0])=='n')?0:1; break; /* CountryGraph */
1770
case 55: ntop_sitesK = atoi(value); break; /* TopKSites (KB) */
1771
case 56: ntop_urlsK = atoi(value); break; /* TopKUrls (KB) */
1772
case 57: ntop_entry = atoi(value); break; /* Top Entry pgs */
1773
case 58: ntop_exit = atoi(value); break; /* Top Exit pages */
1774
case 59: ntop_search = atoi(value); break; /* Top Search pgs */
1775
case 60: log_type=(tolower(value[0])=='f')?
1776
LOG_FTP:((tolower(value[0])=='s')?
1777
LOG_SQUID:((tolower(value[0])=='w')?
1778
LOG_W3C:LOG_CLF)); break; /* LogType */
1779
case 61: add_glist(value,&search_list); break; /* SearchEngine */
1780
case 62: group_domains=atoi(value); break; /* GroupDomains */
1781
case 63: hide_sites=
1782
(tolower(value[0])=='y')?1:0; break; /* HideAllSites */
1784
(tolower(value[0])=='y')?1:0; break; /* All Sites? */
1786
(tolower(value[0])=='y')?1:0; break; /* All URLs? */
1788
(tolower(value[0])=='y')?1:0; break; /* All Refs */
1789
case 67: all_agents=
1790
(tolower(value[0])=='y')?1:0; break; /* All Agents? */
1791
case 68: all_search=
1792
(tolower(value[0])=='y')?1:0; break; /* All Srch str */
1794
(tolower(value[0])=='y')?1:0; break; /* All Users? */
1795
case 70: ntop_users=atoi(value); break; /* TopUsers */
1796
case 71: add_nlist(value,&hidden_users); break; /* HideUser */
1797
case 72: add_nlist(value,&ignored_users); break; /* IgnoreUser */
1798
case 73: add_nlist(value,&include_users); break; /* IncludeUser */
1799
case 74: add_glist(value,&group_users); break; /* GroupUser */
1800
case 75: dump_path=save_opt(value); break; /* DumpPath */
1801
case 76: dump_ext=save_opt(value); break; /* Dumpfile ext */
1802
case 77: dump_header=
1803
(tolower(value[0])=='y')?1:0; break; /* DumpHeader? */
1804
case 78: dump_sites=
1805
(tolower(value[0])=='y')?1:0; break; /* DumpSites? */
1807
(tolower(value[0])=='y')?1:0; break; /* DumpURLs? */
1809
(tolower(value[0])=='y')?1:0; break; /* DumpReferrers? */
1810
case 81: dump_agents=
1811
(tolower(value[0])=='y')?1:0; break; /* DumpAgents? */
1812
case 82: dump_users=
1813
(tolower(value[0])=='y')?1:0; break; /* DumpUsers? */
1814
case 83: dump_search=
1815
(tolower(value[0])=='y')?1:0; break; /* DumpSrchStrs? */
1817
case 84: dns_cache=save_opt(value); break; /* DNSCache fname */
1818
case 85: dns_children=atoi(value); break; /* DNSChildren */
1820
case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled */
1821
case 85: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1822
#endif /* USE_DNS */
1823
case 86: daily_graph=
1824
(tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1825
case 87: daily_stats=
1826
(tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1827
case 88: link_referrer=
1828
(tolower(value[0])=='y')?1:0; break; /* LinkReferrer */
1829
case 89: add_nlist(value,&page_prefix); break; /* PagePrefix */
1830
case 90: strncpy(hit_color+1, value, 6); break; /* ColorHit */
1831
case 91: strncpy(file_color+1, value, 6); break; /* ColorFile */
1832
case 92: strncpy(site_color+1, value, 6); break; /* ColorSite */
1833
case 93: strncpy(kbyte_color+1,value, 6); break; /* ColorKbyte */
1834
case 94: strncpy(page_color+1, value, 6); break; /* ColorPage */
1835
case 95: strncpy(visit_color+1,value, 6); break; /* ColorVisit */
1836
case 96: strncpy(misc_color+1, value, 6); break; /* ColorMisc */
1837
case 97: strncpy(pie_color1+1, value, 6); break; /* PieColor1 */
1838
case 98: strncpy(pie_color2+1, value, 6); break; /* PieColor2 */
1839
case 99: strncpy(pie_color3+1, value, 6); break; /* PieColor3 */
1840
case 100:strncpy(pie_color4+1, value, 6); break; /* PieColor4 */
1842
case 101: cache_ips=
1843
(tolower(value[0])=='y')?1:0; break; /* CacheIPs */
1844
case 102: cache_ttl=atoi(value); break; /* CacheTTL days */
1846
(tolower(value[0])=='y')?1:0; break; /* GeoDB */
1847
case 104: geodb_fname=save_opt(value); break; /* GeoDBDatabase */
1849
case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none */
1852
case 104: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1853
#endif /* USE_DNS */
1855
(tolower(value[0])=='n')?0:1; break; /* StripCGI */
1856
case 106: trimsquid=atoi(value); break; /* TrimSquidURL */
1857
case 107: add_nlist(value,&omit_page); break; /* OmitPage */
1859
(tolower(value[0])=='y')?1:0; break; /* HTAccess */
1860
case 109: ignore_state=
1861
(tolower(value[0])=='y')?1:0; break; /* IgnoreState */
1862
case 110: default_index=
1863
(tolower(value[0])=='n')?0:1; break; /* DefaultIndex */
1866
(tolower(value[0])=='y')?1:0; break; /* GeoIP */
1867
case 112: geoip_db=save_opt(value); break; /* GeoIPDatabase */
1869
case 111: /* Disable GeoIP and GeoIPDatabase if not enabled */
1870
case 112: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1872
case 113: normalize=
1873
(tolower(value[0])=='n')?0:1; break; /* NormalizeURL */
1874
case 114: index_mths=atoi(value); break; /* IndexMonths */
1875
case 115: graph_mths=atoi(value); break; /* GraphMonths */
1876
case 116: year_hdrs=
1877
(tolower(value[0])=='n')?0:1; break; /* YearHeaders */
1878
case 117: year_totals=
1879
(tolower(value[0])=='n')?0:1; break; /* YearTotals */
1880
case 118: use_flags=
1881
(tolower(value[0])=='y')?1:0; break; /* CountryFlags */
1882
case 119: use_flags=1; flag_dir=save_opt(value); break; /* FlagDir */
1883
case 120: searchcasei=
1884
(tolower(value[0])=='n')?0:1; break; /* SearchCaseI */
1890
/*********************************************/
1891
/* SAVE_OPT - save option from config file */
1892
/*********************************************/
1894
static char *save_opt(char *str)
1898
if ( (cp1=malloc(strlen(str)+1))==NULL) return NULL;
1904
/*********************************************/
1905
/* CLEAR_MONTH - initalize monthly stuff */
1906
/*********************************************/
1912
init_counters(); /* reset monthly counters */
1913
del_htabs(); /* clear hash tables */
1914
if (ntop_ctrys!=0 ) for (i=0;i<ntop_ctrys;i++) top_ctrys[i]=NULL;
1917
/*********************************************/
1918
/* INIT_COUNTERS - prep counters for use */
1919
/*********************************************/
1921
void init_counters()
1924
for (i=0;i<TOTAL_RC;i++) response[i].count = 0;
1925
for (i=0;i<31;i++) /* monthly totals */
1927
tm_xfer[i]=tm_ixfer[i]=tm_oxfer[i]=0.0;
1928
tm_hit[i]=tm_file[i]=tm_site[i]=tm_page[i]=tm_visit[i]=0;
1930
for (i=0;i<24;i++) /* hourly totals */
1932
th_hit[i]=th_file[i]=th_page[i]=0;
1933
th_xfer[i]=th_ixfer[i]=th_oxfer[i]=0.0;
1935
for (i=0;ctry[i].desc;i++) /* country totals */
1939
ctry[i].xfer=ctry[i].ixfer=ctry[i].oxfer=0;
1941
t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0;
1942
t_xfer=t_ixfer=t_oxfer=0.0;
1943
mh_hit = dt_site = 0;
1947
/*********************************************/
1948
/* PRINT_OPTS - print command line options */
1949
/*********************************************/
1951
void print_opts(char *pname)
1955
printf("%s: %s %s\n",h_usage1,pname,h_usage2);
1956
for (i=0;h_msg[i];i++) printf("%s\n",h_msg[i]);
1960
/*********************************************/
1962
/*********************************************/
1964
void print_version()
1967
uname(&system_info);
1969
printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1971
system_info.sysname,system_info.release,system_info.machine,
1972
language,copyright);
1975
strncpy(&buf[strlen(buf)],"DNS/GeoDB ",11);
1978
strncpy(&buf[strlen(buf)],"BZip2 ",7);
1981
strncpy(&buf[strlen(buf)],"GeoIP ",7);
1986
printf("Mod date: %s Options: ",moddate);
1987
if (buf[0]!=0) printf("%s",buf);
1988
else printf("none");
1991
printf("Default GeoDB dir : %s\n",GEODB_LOC);
1993
printf("Default config dir: %s\n",ETCDIR);
2000
/*********************************************/
2001
/* CUR_TIME - return date/time as a string */
2002
/*********************************************/
2007
static char timestamp[48];
2009
/* get system time */
2011
/* convert to timestamp string */
2013
strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M %Z",
2016
strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M GMT",
2022
/*********************************************/
2023
/* ISPAGE - determine if an HTML page or not */
2024
/*********************************************/
2026
int ispage(char *str)
2031
if (isinlist(omit_page,str)!=NULL) return 0;
2034
while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
2035
if ((cp2++==str)||(*(--cp1)=='/')) return 1;
2039
/* Check if a PagePrefix matches */
2040
if(strncmp(str,t->string,strlen(t->string))==0) return 1;
2043
return (isinlist(page_type,cp2)!=NULL);
2046
/*********************************************/
2047
/* ISURLCHAR - checks for valid URL chars */
2048
/*********************************************/
2050
int isurlchar(unsigned char ch, int flag)
2052
if (isalnum(ch)) return 1; /* allow letters, numbers... */
2053
if (ch > 127) return 1; /* allow extended chars... */
2054
if (flag) /* and filter some others */
2055
return (strchr(":/\\.,' *!-+_@~()[]!",ch)!=NULL); /* strip cgi vars */
2057
return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch)!=NULL); /* keep cgi vars */
2060
/*********************************************/
2061
/* CTRY_IDX - create unique # from TLD */
2062
/*********************************************/
2064
u_int64_t ctry_idx(char *str)
2066
int i=strlen(str),j=0;
2070
for (;i>0;i--) { idx+=((*--cp-'a'+1)<<j); j+=(j==0)?7:5; }
2074
/*********************************************/
2075
/* UN_IDX - get TLD from index # */
2076
/*********************************************/
2078
char *un_idx(u_int64_t idx)
2084
memset(buf, 0, sizeof(buf));
2085
if (idx<=0) return buf;
2086
if ((j=(idx&0x7f))>32) /* only for a1, a2 and o1 */
2087
{ buf[0]=(idx>>7)+'a'; buf[1]=j-32; return buf; }
2090
buf[i]=(i==5)?(idx&0x7f)+'a'-1:(j=(idx>>(((5-i)*5)+2))&0x1f)?j+'a'-1:' ';
2091
cp=buf; while (*cp==' ') { for (i=0;i<6;i++) buf[i]=buf[i+1]; } return buf;
2094
/*********************************************/
2095
/* FROM_HEX - convert hex char to decimal */
2096
/*********************************************/
2098
char from_hex(char c) /* convert hex to dec */
2100
c = (c>='0'&&c<='9')?c-'0': /* 0-9? */
2101
(c>='A'&&c<='F')?c-'A'+10: /* A-F? */
2102
c - 'a' + 10; /* lowercase... */
2103
return (c<0||c>15)?0:c; /* return 0 if bad... */
2106
/*********************************************/
2107
/* UNESCAPE - convert escape seqs to chars */
2108
/*********************************************/
2110
char *unescape(char *str)
2112
unsigned char *cp1=(unsigned char *)str; /* force unsigned so we */
2113
unsigned char *cp2=cp1; /* can do > 127 */
2115
if (!str) return NULL; /* make sure strings valid */
2119
if (*cp1=='%') /* Found an escape? */
2122
if (isxdigit(*cp1)) /* ensure a hex digit */
2124
if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ASCII */
2125
if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */
2126
if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */
2127
if (*cp1) { cp2++; cp1++; }
2131
else *cp2++ = *cp1++; /* if not, just continue */
2133
*cp2=*cp1; /* don't forget terminator */
2134
return str; /* return the string */
2137
/*********************************************/
2138
/* OURICMP - Case insensitive string compare */
2139
/*********************************************/
2141
int ouricmp(char *str1, char *str2)
2144
(tolower((unsigned char)*str1)==tolower((unsigned char)*str2)))
2146
if (*str1==0) return 0; else return 1;
2149
/*********************************************/
2150
/* SRCH_STRING - get search strings from ref */
2151
/*********************************************/
2153
void srch_string(char *ptr)
2155
/* ptr should point to unescaped query string */
2156
char tmpbuf[BUFSIZE];
2158
unsigned char *cp1, *cp2, *cps;
2161
/* Check if search engine referrer or return */
2162
if ( (cps=(unsigned char *)isinglist(search_list,log_rec.refer))==NULL)
2165
/* Try to find query variable */
2166
srch[0]='?'; srch[sizeof(srch)-1] = '\0';
2167
strncpy(&srch[1],(char *)cps,sizeof(srch)-2); /* First, try "?..." */
2168
if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL)
2170
srch[0]='&'; /* Next, try "&..." */
2171
if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL) return;
2173
cp2=(unsigned char *)tmpbuf;
2174
while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
2175
while (*cp1!='&' && *cp1!=0)
2177
if (*cp1=='"' || *cp1==',' || *cp1=='?')
2178
{ cp1++; continue; } /* skip bad ones.. */
2181
if (*cp1=='+') *cp1=' '; /* change + to space */
2182
if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces */
2183
if (*cp1==' ') sp_flg=1; else sp_flg=0; /* (flag spaces here) */
2185
*cp2++=tolower(*cp1++); /* normal character */
2189
*cp2=0; cp2=(unsigned char *)tmpbuf;
2190
if (tmpbuf[0]=='?') tmpbuf[0]=' '; /* format fix ? */
2191
while( *cp2!=0 && isspace((unsigned char)*cp2) ) cp2++; /* skip sps. */
2192
if (*cp2==0) return;
2194
/* any trailing spaces? */
2195
cp1=cp2+strlen((char *)cp2)-1;
2196
while (cp1!=cp2) if (isspace((unsigned char)*cp1)) *cp1--='\0'; else break;
2198
/* strip invalid chars */
2200
while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
2202
if (put_snode((char *)cp2,(u_int64_t)1,sr_htab))
2205
/* Error adding search string node, skipping .... */
2206
fprintf(stderr,"%s %s\n", msg_nomem_sc, tmpbuf);
2211
/*********************************************/
2212
/* GET_DOMAIN - Get domain portion of host */
2213
/*********************************************/
2215
char *get_domain(char *str)
2218
int i=group_domains+1;
2220
if (isipaddr(str)) return NULL;
2221
cp = str+strlen(str)-1;
2226
if (!(--i)) return ++cp;
2232
/*********************************************/
2233
/* AGENT_MANGLE - Re-format user agent */
2234
/*********************************************/
2236
void agent_mangle(char *str)
2238
char *cp1, *cp2, *cp3;
2240
str=cp2=log_rec.agent;
2241
cp1=strstr(str,"ompatible"); /* check known fakers */
2244
while (*cp1!=';'&&*cp1!='\0') cp1++;
2245
/* kludge for Mozilla/3.01 (compatible;) */
2246
if (*cp1++==';' && strcmp(cp1,")\"")) /* success! */
2248
/* Opera can hide as MSIE */
2249
cp3=strstr(str,"Opera");
2252
while (*cp3!='.'&&*cp3!='\0')
2254
if(*cp3=='/') *cp2++=' ';
2262
while (*cp1 == ' ') cp1++; /* eat spaces */
2263
while (*cp1!='.'&&*cp1!='\0'&&*cp1!=';') *cp2++=*cp1++;
2267
while (*cp1!='.'&&*cp1!=';'&&*cp1!='\0') *cp2++=*cp1++;
2268
if (*cp1!=';'&&*cp1!='\0') { *cp2++=*cp1++; *cp2++=*cp1++; }
2271
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2273
while (*cp1!=';'&&*cp1!='\0'&&*cp1!='('&&*cp1!=' ') *cp2++=*cp1++;
2276
/* Level 1 - try to get OS */
2277
cp1=strstr(cp1,")");
2282
while (*cp1!=';'&&*cp1!='('&&cp1!=str) cp1--;
2283
if (cp1!=str&&*cp1!='\0') cp1++;
2284
while (*cp1==' '&&*cp1!='\0') cp1++;
2285
while (*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2293
/* nothing after "compatible", should we mangle? */
2299
cp1=strstr(str,"Opera"); /* Opera flavor */
2302
while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2303
while (*cp1!='.'&&*cp1!='\0')
2305
if(*cp1=='/') *cp2++=' ';
2311
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2316
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2318
while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2321
cp1=strstr(cp1,"(");
2327
while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2335
cp1=strstr(str,"Mozilla"); /* Netscape flavor */
2338
while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2339
if (*cp1==' ') *cp1='/';
2340
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2343
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2348
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2350
while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2353
/* Level 1 - Try to get OS */
2354
cp1=strstr(cp1,"(");
2360
while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2370
/*********************************************/
2371
/* OUR_GZGETS - enhanced gzgets for log only */
2372
/*********************************************/
2374
char *our_gzgets(void *fp, char *buf, int size)
2376
char *out_cp=buf; /* point to output */
2379
if (f_cp>(f_buf+f_end-1)) /* load? */
2382
f_end=(gz_log==COMP_BZIP)?
2383
BZ2_bzread(fp, f_buf, GZ_BUFSIZE):
2384
gzread(fp, f_buf, GZ_BUFSIZE);
2386
f_end=gzread(fp, f_buf, GZ_BUFSIZE);
2388
if (f_end<=0) return Z_NULL;
2392
if (--size) /* more? */
2395
if (*f_cp++ == '\n') { *out_cp='\0'; return buf; }
2397
else { *out_cp='\0'; return buf; }
2402
/*********************************************/
2403
/* bz2_rewind - our 'rewind' for bz2 files */
2404
/*********************************************/
2406
int bz2_rewind( void **fp, char *fname, char *mode )
2409
*fp = BZ2_bzopen( fname, "rb");
2410
f_cp=f_buf+GZ_BUFSIZE; f_end=0; /* reset buffer counters */
2411
memset(f_buf, 0, sizeof(f_buf));
2412
if (*fp == Z_NULL) return -1;
2415
#endif /* USE_BZIP */
2417
/*********************************************/
2418
/* ISIPADDR - Determine if str is IP address */
2419
/*********************************************/
2421
int isipaddr(char *str)
2424
char *cp; /* generic ptr */
2426
if (strchr(str,':')!=NULL)
2428
/* Possible IPv6 Address */
2430
while (strchr(":.abcdef0123456789",*cp)!=NULL && *cp!='\0')
2433
if (*cp++==':') i++;
2436
if (*cp!='\0') return -1; /* bad hostname (has ':') */
2437
if (i>1 && j) return 2; /* IPv4/IPv6 */
2438
return 3; /* IPv6 */
2442
/* Not an IPv6 address, check for IPv4 */
2444
while (strchr(".0123456789",*cp)!=NULL && *cp!='\0')
2446
if (*cp++=='.') i++;
2448
if (*cp!='\0') return 0; /* hostname */
2449
if (i!=4) return -1; /* bad hostname */
2450
return 1; /* IPv4 */
2454
/*****************************************************************/
2456
/* JDATE - Julian date calculator */
2458
/* Calculates the number of days since Jan 1, 0000. */
2460
/* Originally written by Bradford L. Barrett (03/17/1988) */
2461
/* Returns an unsigned long value representing the number of */
2462
/* days since January 1, 0000. */
2464
/* Note: Due to the changes made by Pope Gregory XIII in the */
2465
/* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2466
/* not return a truely accurate number (will be at least */
2467
/* 10 days off). Somehow, I don't think this will */
2468
/* present much of a problem for most situations :) */
2470
/* Usage: days = jdate(day, month, year) */
2472
/* The number returned is adjusted by 5 to facilitate day of */
2473
/* week calculations. The mod of the returned value gives the */
2474
/* day of the week the date is. (ie: dow = days % 7 ) where */
2475
/* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2477
/*****************************************************************/
2479
u_int64_t jdate( int day, int month, int year )
2481
u_int64_t days; /* value returned */
2482
int mtable[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2484
/* First, calculate base number including leap and Centenial year stuff */
2486
days=(((u_int64_t)year*365)+day+mtable[month-1]+
2487
((year+4)/4) - ((year/100)-(year/400)));
2489
/* now adjust for leap year before March 1st */
2491
if ((year % 4 == 0) && !((year % 100 == 0) &&
2492
(year % 400 != 0)) && (month < 3))
2495
/* done, return with calculated value */