2
webalizer - a web server log analysis program
4
Copyright (C) 1997-2011 Bradford L. Barrett
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2 of the License, or
9
(at your option) any later version, and provided that the above
10
copyright and permission notice is included with all distributed
11
copies of this or derived software.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24
/*********************************************/
25
/* STANDARD INCLUDES */
26
/*********************************************/
28
/* Fix broken Zlib 64 bitness */
29
#if _FILE_OFFSET_BITS == 64
30
#ifndef _LARGEFILE64_SOURCE
31
#define _LARGEFILE64_SOURCE 1
40
#include <unistd.h> /* normal stuff */
43
#include <sys/utsname.h>
52
/* ensure sys/types */
54
#include <sys/types.h>
57
/* Need socket header? */
58
#ifdef HAVE_SYS_SOCKET_H
59
#include <sys/socket.h>
62
/* some systems need this */
69
#include <netinet/in.h>
70
#include <arpa/inet.h>
80
int bz2_rewind(void **, char *, char *);
83
#include "webalizer.h" /* main header */
89
#include "webalizer_lang.h" /* lang. support */
91
#include "dns_resolv.h"
94
/* internal function prototypes */
96
void clear_month(); /* clear monthly stuff */
97
char *unescape(char *); /* unescape URLs */
98
void print_opts(char *); /* print options */
99
void print_version(); /* duhh... */
100
int isurlchar(unsigned char, int); /* valid URL char fnc. */
101
void get_config(char *); /* Read a config file */
102
static char *save_opt(char *); /* save conf option */
103
void srch_string(char *); /* srch str analysis */
104
char *get_domain(char *); /* return domain name */
105
void agent_mangle(char *); /* reformat user agent */
106
char *our_gzgets(void *, char *, int); /* our gzgets */
107
int ouricmp(char *, char *); /* case ins. compare */
108
int isipaddr(char *); /* is IP address test */
110
/*********************************************/
111
/* GLOBAL VARIABLES */
112
/*********************************************/
114
char *version = "2.23"; /* program version */
115
char *editlvl = "05"; /* edit level */
116
char *moddate = "14-Apr-2011"; /* modification date */
117
char *copyright = "Copyright 1997-2011 by Bradford L. Barrett";
119
int verbose = 2; /* 2=verbose,1=err, 0=none */
120
int debug_mode = 0; /* debug mode flag */
121
int time_me = 0; /* timing display flag */
122
int local_time = 1; /* 1=localtime 0=GMT (UTC) */
123
int hist_gap = 0; /* 1=error w/hist, save bkp */
124
int ignore_hist = 0; /* history flag (1=skip) */
125
int ignore_state = 0; /* state flag (1=skip) */
126
int default_index= 1; /* default index. (1=yes) */
127
int hourly_graph = 1; /* hourly graph display */
128
int hourly_stats = 1; /* hourly stats table */
129
int daily_graph = 1; /* daily graph display */
130
int daily_stats = 1; /* daily stats table */
131
int ctry_graph = 1; /* country graph display */
132
int shade_groups = 1; /* Group shading 0=no 1=yes */
133
int hlite_groups = 1; /* Group hlite 0=no 1=yes */
134
int mangle_agent = 0; /* mangle user agents */
135
int incremental = 0; /* incremental mode 1=yes */
136
int use_https = 0; /* use 'https://' on URLs */
137
int htaccess = 0; /* create .htaccess? (0=no) */
138
int stripcgi = 1; /* strip url cgi (0=no) */
139
int normalize = 1; /* normalize CLF URL (0=no) */
140
int trimsquid = 0; /* trim squid urls (0=no) */
141
int searchcasei = 1; /* case insensitive search */
142
int visit_timeout= 1800; /* visit timeout (seconds) */
143
int graph_legend = 1; /* graph legend (1=yes) */
144
int graph_lines = 2; /* graph lines (0=none) */
145
int fold_seq_err = 0; /* fold seq err (0=no) */
146
int log_type = LOG_CLF; /* log type (default=CLF) */
147
int group_domains= 0; /* Group domains 0=none */
148
int hide_sites = 0; /* Hide ind. sites (0=no) */
149
int link_referrer= 0; /* Link referrers (0=no) */
150
char *hname = NULL; /* hostname for reports */
151
char *state_fname = "webalizer.current"; /* run state file name */
152
char *hist_fname = "webalizer.hist"; /* name of history file */
153
char *html_ext = "html"; /* HTML file suffix */
154
char *dump_ext = "tab"; /* Dump file suffix */
155
char *conf_fname = NULL; /* name of config file */
156
char *log_fname = NULL; /* log file pointer */
157
char *out_dir = NULL; /* output directory */
158
char *blank_str = ""; /* blank string */
159
char *geodb_fname = NULL; /* GeoDB database filename */
160
char *dns_cache = NULL; /* DNS cache file name */
161
int dns_children = 0; /* DNS children (0=don't do)*/
162
int cache_ips = 0; /* CacheIPs in DB (0=no) */
163
int cache_ttl = 7; /* DNS Cache TTL (days) */
164
int geodb = 0; /* Use GeoDB (0=no) */
165
int graph_mths = 12; /* # months in index graph */
166
int index_mths = 12; /* # months in index table */
167
int year_hdrs = 1; /* index year seperators */
168
int year_totals = 1; /* index year subtotals */
169
int use_flags = 0; /* Show flags in ctry table */
170
char *flag_dir = "flags"; /* location of flag icons */
173
int geoip = 0; /* Use GeoIP (0=no) */
174
char *geoip_db = NULL; /* GeoIP database filename */
175
GeoIP *geo_fp = NULL; /* GeoIP database handle */
178
int ntop_sites = 30; /* top n sites to display */
179
int ntop_sitesK = 10; /* top n sites (by kbytes) */
180
int ntop_urls = 30; /* top n url's to display */
181
int ntop_urlsK = 10; /* top n url's (by kbytes) */
182
int ntop_entry = 10; /* top n entry url's */
183
int ntop_exit = 10; /* top n exit url's */
184
int ntop_refs = 30; /* top n referrers "" */
185
int ntop_agents = 15; /* top n user agents "" */
186
int ntop_ctrys = 30; /* top n countries "" */
187
int ntop_search = 20; /* top n search strings */
188
int ntop_users = 20; /* top n users to display */
190
int all_sites = 0; /* List All sites (0=no) */
191
int all_urls = 0; /* List All URLs (0=no) */
192
int all_refs = 0; /* List All Referrers */
193
int all_agents = 0; /* List All User Agents */
194
int all_search = 0; /* List All Search Strings */
195
int all_users = 0; /* List All Usernames */
197
int dump_sites = 0; /* Dump tab delimited sites */
198
int dump_urls = 0; /* URLs */
199
int dump_refs = 0; /* Referrers */
200
int dump_agents = 0; /* User Agents */
201
int dump_users = 0; /* Usernames */
202
int dump_search = 0; /* Search strings */
203
int dump_header = 0; /* Dump header as first rec */
204
char *dump_path = NULL; /* Path for dump files */
206
int cur_year=0, cur_month=0, /* year/month/day/hour */
207
cur_day=0, cur_hour=0, /* tracking variables */
208
cur_min=0, cur_sec=0;
210
u_int64_t cur_tstamp=0; /* Timestamp... */
211
u_int64_t rec_tstamp=0;
212
u_int64_t req_tstamp=0;
213
u_int64_t epoch; /* used for timestamp adj. */
215
int check_dup=0; /* check for dup flag */
216
int gz_log=COMP_NONE; /* gziped log? (0=no) */
218
double t_xfer=0.0; /* monthly total xfer value */
219
u_int64_t t_hit=0,t_file=0,t_site=0, /* monthly total vars */
220
t_url=0,t_ref=0,t_agent=0,
221
t_page=0, t_visit=0, t_user=0;
223
double tm_xfer[31]; /* daily transfer totals */
225
u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */
226
tm_site[31], tm_page[31],
229
u_int64_t dt_site; /* daily 'sites' total */
231
u_int64_t ht_hit=0, mh_hit=0; /* hourly hits totals */
233
u_int64_t th_hit[24], th_file[24], /* hourly total arrays */
238
int f_day,l_day; /* first/last day vars */
240
struct utsname system_info; /* system info structure */
242
u_int64_t ul_bogus =0; /* Dummy counter for groups */
244
struct log_struct log_rec; /* expanded log storage */
246
void *zlog_fp; /* compressed logfile ptr */
247
FILE *log_fp; /* regular logfile pointer */
249
char buffer[BUFSIZE]; /* log file record buffer */
250
char tmp_buf[BUFSIZE]; /* used to temp save above */
252
CLISTPTR *top_ctrys = NULL; /* Top countries table */
254
#define GZ_BUFSIZE 16384 /* our_getfs buffer size */
255
char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */
256
char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */
257
int f_end=0; /* count to end of buffer */
259
char hit_color[] = "#00805c"; /* graph hit color */
260
char file_color[] = "#0040ff"; /* graph file color */
261
char site_color[] = "#ff8000"; /* graph site color */
262
char kbyte_color[] = "#ff0000"; /* graph kbyte color */
263
char page_color[] = "#00e0ff"; /* graph page color */
264
char visit_color[] = "#ffff00"; /* graph visit color */
265
char misc_color[] = "#00e0ff"; /* graph misc color */
266
char pie_color1[] = "#800080"; /* pie additionnal color 1 */
267
char pie_color2[] = "#80ffc0"; /* pie additionnal color 2 */
268
char pie_color3[] = "#ff00ff"; /* pie additionnal color 3 */
269
char pie_color4[] = "#ffc080"; /* pie additionnal color 4 */
271
/*********************************************/
272
/* MAIN - start here */
273
/*********************************************/
275
int main(int argc, char *argv[])
277
int i; /* generic counter */
278
char *cp1, *cp2, *cp3; /* generic char pointers */
279
char host_buf[MAXHOST+1]; /* used to save hostname */
281
NLISTPTR lptr; /* generic list pointer */
283
extern char *optarg; /* used for command line */
284
extern int optind; /* parsing routine 'getopt' */
287
time_t start_time, end_time; /* program timers */
288
float temp_time; /* temporary time storage */
290
int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;
292
int good_rec =0; /* 1 if we had a good record */
293
u_int64_t total_rec =0; /* Total Records Processed */
294
u_int64_t total_ignore=0; /* Total Records Ignored */
295
u_int64_t total_bad =0; /* Total Bad Records */
297
int max_ctry; /* max countries defined */
299
/* month names used for parsing logfile (shouldn't be lang specific) */
300
char *log_month[12]={ "jan", "feb", "mar",
303
"oct", "nov", "dec"};
305
/* stat struct for files */
306
struct stat log_stat;
308
/* Assume that LC_CTYPE is what the user wants for non-ASCII chars */
309
setlocale(LC_CTYPE,"");
311
/* initalize epoch */
312
epoch=jdate(1,1,1970); /* used for timestamp adj. */
314
sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);
315
/* check for default config file */
316
if (!access("webalizer.conf",F_OK))
317
get_config("webalizer.conf");
318
else if (!access(tmp_buf,F_OK))
321
/* get command line options */
322
opterr = 0; /* disable parser errors */
323
while ((i=getopt(argc,argv,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF)
327
case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents */
328
case 'A': ntop_agents=atoi(optarg); break; /* Top agents */
329
case 'b': ignore_state=1; break; /* Ignore state file */
330
case 'c': get_config(optarg); break; /* Config file */
331
case 'C': ntop_ctrys=atoi(optarg); break; /* Top countries */
332
case 'd': debug_mode=1; break; /* Debug */
333
case 'D': dns_cache=optarg; break; /* DNS Cache filename */
334
case 'e': ntop_entry=atoi(optarg); break; /* Top entry pages */
335
case 'E': ntop_exit=atoi(optarg); break; /* Top exit pages */
336
case 'f': fold_seq_err=1; break; /* Fold sequence errs */
337
case 'F': log_type=(tolower(optarg[0])=='f')?
338
LOG_FTP:(tolower(optarg[0])=='s')?
339
LOG_SQUID:(tolower(optarg[0])=='w')?
340
LOG_W3C:LOG_CLF; break; /* define log type */
341
case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */
342
case 'G': hourly_graph=0; break; /* no hourly graph */
343
case 'h': print_opts(argv[0]); break; /* help */
344
case 'H': hourly_stats=0; break; /* no hourly stats */
345
case 'i': ignore_hist=1; break; /* Ignore history */
346
case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */
347
case 'j': geodb=1; break; /* Enable GeoDB */
348
case 'J': geodb_fname=optarg; break; /* GeoDB db filename */
349
case 'k': graph_mths=atoi(optarg); break; /* # months idx graph */
350
case 'K': index_mths=atoi(optarg); break; /* # months idx table */
351
case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */
352
case 'L': graph_legend=0; break; /* Graph Legends */
353
case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */
354
case 'M': mangle_agent=atoi(optarg); break; /* mangle user agents */
355
case 'n': hname=optarg; break; /* Hostname */
356
case 'N': dns_children=atoi(optarg); break; /* # of DNS children */
357
case 'o': out_dir=optarg; break; /* Output directory */
358
case 'O': add_nlist(optarg,&omit_page); break; /* pages not counted */
359
case 'p': incremental=1; break; /* Incremental run */
360
case 'P': add_nlist(optarg,&page_type); break; /* page view types */
361
case 'q': verbose=1; break; /* Quiet (verbose=1) */
362
case 'Q': verbose=0; break; /* Really Quiet */
363
case 'r': add_nlist(optarg,&hidden_refs); break; /* Hide referrer */
364
case 'R': ntop_refs=atoi(optarg); break; /* Top referrers */
365
case 's': add_nlist(optarg,&hidden_sites); break; /* Hide site */
366
case 'S': ntop_sites=atoi(optarg); break; /* Top sites */
367
case 't': msg_title=optarg; break; /* Report title */
368
case 'T': time_me=1; break; /* TimeMe */
369
case 'u': add_nlist(optarg,&hidden_urls); break; /* hide URL */
370
case 'U': ntop_urls=atoi(optarg); break; /* Top urls */
371
case 'v': verbose=2; debug_mode=1; break; /* Verbose */
372
case 'V': print_version(); break; /* Version */
374
case 'w': geoip=1; break; /* Enable GeoIP */
375
case 'W': geoip_db=optarg; break; /* GeoIP database name */
377
case 'x': html_ext=optarg; break; /* HTML file extension */
378
case 'X': hide_sites=1; break; /* Hide ind. sites */
379
case 'Y': ctry_graph=0; break; /* Supress ctry graph */
380
case 'Z': normalize=0; break; /* Dont normalize URLs */
381
case 'z': use_flags=1; flag_dir=optarg; break; /* Ctry flag dir */
385
if (argc - optind != 0) log_fname = argv[optind];
386
if ( log_fname && (log_fname[0]=='-')) log_fname=NULL; /* force STDIN? */
388
/* check for gzipped file - .gz */
389
if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-3),".gz"))
393
/* check for bzip file - .bz2 */
394
if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-4),".bz2"))
398
/* setup our internal variables */
399
init_counters(); /* initalize (zero) main counters */
400
memset(hist, 0, sizeof(hist)); /* initalize (zero) history array */
402
/* add default index. alias if needed */
403
if (default_index) add_nlist("index.",&index_alias);
405
if (page_type==NULL) /* check if page types present */
407
if ((log_type==LOG_CLF)||(log_type==LOG_SQUID)||(log_type==LOG_W3C))
409
add_nlist("htm*" ,&page_type); /* if no page types specified, we */
410
add_nlist("cgi" ,&page_type); /* use the default ones here... */
411
if (!isinlist(page_type,html_ext)) add_nlist(html_ext,&page_type);
413
else add_nlist("txt" ,&page_type); /* FTP logs default to .txt */
416
for (max_ctry=0;ctry[max_ctry].desc;max_ctry++);
417
if (ntop_ctrys > max_ctry) ntop_ctrys = max_ctry; /* force upper limit */
418
if (graph_lines> 20) graph_lines= 20; /* keep graphs sane! */
419
if (graph_mths<12) graph_mths=12;
420
if (graph_mths>GRAPHMAX) graph_mths=GRAPHMAX;
421
if (index_mths<12) index_mths=12;
422
if (index_mths>HISTSIZE) index_mths=HISTSIZE;
424
if (log_type == LOG_FTP)
426
/* disable stuff for ftp logs */
427
ntop_entry=ntop_exit=0;
432
if (search_list==NULL)
434
/* If no search engines defined, define some :) */
435
add_glist(".google. q=" ,&search_list);
436
add_glist("yahoo.com p=" ,&search_list);
437
add_glist("altavista.com q=" ,&search_list);
438
add_glist("aolsearch. query=" ,&search_list);
439
add_glist("ask.co q=" ,&search_list);
440
add_glist("eureka.com q=" ,&search_list);
441
add_glist("lycos.com query=" ,&search_list);
442
add_glist("hotbot.com MT=" ,&search_list);
443
add_glist("msn.com q=" ,&search_list);
444
add_glist("infoseek.com qt=" ,&search_list);
445
add_glist("webcrawler searchText=" ,&search_list);
446
add_glist("excite search=" ,&search_list);
447
add_glist("netscape.com query=" ,&search_list);
448
add_glist("mamma.com query=" ,&search_list);
449
add_glist("alltheweb.com q=" ,&search_list);
450
add_glist("northernlight.com qr=" ,&search_list);
454
/* ensure entry/exits don't exceed urls */
455
i=(ntop_urls>ntop_urlsK)?ntop_urls:ntop_urlsK;
456
if (ntop_entry>i) ntop_entry=i;
457
if (ntop_exit>i) ntop_exit=i;
459
for (i=0;i<MAXHASH;i++)
461
sm_htab[i]=sd_htab[i]=NULL; /* initalize hash tables */
468
/* Be polite and announce yourself... */
472
printf("Webalizer V%s-%s (%s %s %s) %s\n", version,editlvl,
473
system_info.sysname, system_info.release,
474
system_info.machine,language);
478
if (strstr(argv[0],"webazolver")!=0)
479
/* DNS support not present, aborting... */
480
{ printf("%s\n",msg_dns_abrt); exit(1); }
482
/* Force sane values for cache TTL */
483
if (cache_ttl<1) cache_ttl=1;
484
if (cache_ttl>100) cache_ttl=100;
491
if ( !(lstat(log_fname, &log_stat)) )
493
/* check if the file a symlink */
494
if ( S_ISLNK(log_stat.st_mode) )
497
fprintf(stderr,"%s %s (symlink)\n",msg_log_err,log_fname);
504
/* open compressed file */
506
if (gz_log==COMP_BZIP)
507
zlog_fp = BZ2_bzopen(log_fname,"rb");
510
zlog_fp = gzopen(log_fname, "rb");
513
/* Error: Can't open log file ... */
514
fprintf(stderr, "%s %s (%d)\n",msg_log_err,log_fname,ENOENT);
520
/* open regular file */
521
log_fp = fopen(log_fname,"r");
524
/* Error: Can't open log file ... */
525
fprintf(stderr, "%s %s\n",msg_log_err,log_fname);
531
/* Using logfile ... */
534
printf("%s %s (",msg_log_use,log_fname?log_fname:"STDIN");
535
if (gz_log==COMP_GZIP) printf("gzip-");
537
if (gz_log==COMP_BZIP) printf("bzip-");
541
/* display log file type hint */
542
case LOG_CLF: printf("clf)\n"); break;
543
case LOG_FTP: printf("ftp)\n"); break;
544
case LOG_SQUID: printf("squid)\n"); break;
545
case LOG_W3C: printf("w3c)\n"); break;
549
/* switch directories if needed */
552
if (chdir(out_dir) != 0)
554
/* Error: Can't change directory to ... */
555
fprintf(stderr, "%s %s\n",msg_dir_err,out_dir);
561
if (strstr(argv[0],"webazolver")!=0)
563
if (!dns_children) dns_children=5; /* default dns children if needed */
566
/* No cache file specified, aborting... */
567
fprintf(stderr,"%s\n",msg_dns_nocf); /* Must have a cache file */
572
if (dns_cache && dns_children) /* run-time resolution */
574
if (dns_children > MAXCHILD) dns_children=MAXCHILD;
575
/* DNS Lookup (#children): */
576
if (verbose>1) printf("%s (%d): ",msg_dns_rslv,dns_children);
578
(gz_log)?dns_resolver(zlog_fp):dns_resolver(log_fp);
580
(gz_log==COMP_BZIP)?bz2_rewind(&zlog_fp, log_fname, "rb"):
582
(gz_log==COMP_GZIP)?gzrewind(zlog_fp):
583
(log_fname)?rewind(log_fp):exit(0);
586
if (strstr(argv[0],"webazolver")!=0) exit(0); /* webazolver exits here */
590
if (!open_cache()) { dns_cache=NULL; dns_db=NULL; }
593
/* Using DNS cache file <filaneme> */
594
if (verbose>1) printf("%s %s\n",msg_dns_usec,dns_cache);
601
geo_db=geodb_open(geodb_fname);
604
if (verbose) printf("%s: %s\n",msg_geo_open,
605
(geodb_fname)?geodb_fname:msg_geo_dflt);
606
if (verbose) printf("GeoDB %s\n",msg_geo_nolu);
609
else if (verbose>1) printf("%s %s\n",
610
msg_geo_use,geodb_ver(geo_db,buffer));
612
if (geoip) geoip=0; /* Disable GeoIP if using GeoDB */
618
/* open GeoIP database */
622
geo_fp=GeoIP_open(geoip_db, GEOIP_MEMORY_CACHE);
624
geo_fp=GeoIP_new(GEOIP_MEMORY_CACHE);
626
/* Did we open one? */
629
/* couldn't open.. warn user */
630
if (verbose) printf("GeoIP %s\n",msg_geo_nolu);
633
else if (verbose>1) printf("%s %s (%s)\n",msg_geo_use,
634
GeoIPDBDescription[(int)geo_fp->databaseType],
635
(geoip_db==NULL)?msg_geo_dflt:geo_fp->file_path);
637
#endif /* USE_GEOIP */
639
/* Creating output in ... */
641
printf("%s %s\n",msg_dir_use,out_dir?out_dir:msg_cur_dir);
646
if (uname(&system_info)) hname="localhost";
647
else hname=system_info.nodename;
650
/* Hostname for reports is ... */
651
if (strlen(hname)) if (verbose>1) printf("%s '%s'\n",msg_hostname,hname);
653
/* get past history */
654
if (ignore_hist) { if (verbose>1) printf("%s\n",msg_ign_hist); }
657
if (incremental) /* incremental processing? */
659
if ((i=restore_state())) /* restore internal data structs */
661
/* Error: Unable to restore run data (error num) */
662
/* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
663
fprintf(stderr,"%s (%d)\n",msg_bad_data,i);
668
/* Allocate memory for our TOP countries array */
670
{ if ( (top_ctrys=calloc(ntop_ctrys,sizeof(CLISTPTR))) == NULL)
671
/* Can't get memory, Top Countries disabled! */
672
{if (verbose) fprintf(stderr,"%s\n",msg_nomem_tc); ntop_ctrys=0;}}
674
/* get processing start time */
675
start_time = time(NULL);
677
/*********************************************/
678
/* MAIN PROCESS LOOP - read through log file */
679
/*********************************************/
681
while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE) != Z_NULL):
682
(fgets(buffer,BUFSIZE,log_fname?log_fp:stdin) != NULL))
685
if (strlen(buffer) == (BUFSIZE-1))
689
fprintf(stderr,"%s",msg_big_rec);
690
if (debug_mode) fprintf(stderr,":\n%s",buffer);
691
else fprintf(stderr,"\n");
694
total_bad++; /* bump bad record counter */
696
/* get the rest of the record */
697
while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE)!=Z_NULL):
698
(fgets(buffer,BUFSIZE,log_fname?log_fp:stdin)!=NULL))
700
if (strlen(buffer) < BUFSIZE-1)
702
if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
705
if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
707
continue; /* go get next record if any */
710
/* got a record... */
711
strcpy(tmp_buf, buffer); /* save buffer in case of error */
712
if (parse_record(buffer)) /* parse the record */
714
/*********************************************/
715
/* PASSED MINIMAL CHECKS, DO A LITTLE MORE */
716
/*********************************************/
718
/* convert month name to lowercase */
720
log_rec.datetime[i]=tolower(log_rec.datetime[i]);
722
/* lowercase sitename/IPv6 addresses */
723
cp1=log_rec.hostname;
724
while (*cp1++!='\0') *cp1=tolower(*cp1);
726
/* get year/month/day/hour/min/sec values */
729
if (strncmp(log_month[i],&log_rec.datetime[4],3)==0)
730
{ rec_month = i+1; break; }
733
rec_year=atoi(&log_rec.datetime[8]); /* get year number (int) */
734
rec_day =atoi(&log_rec.datetime[1]); /* get day number */
735
rec_hour=atoi(&log_rec.datetime[13]); /* get hour number */
736
rec_min =atoi(&log_rec.datetime[16]); /* get minute number */
737
rec_sec =atoi(&log_rec.datetime[19]); /* get second number */
739
/* Kludge for Netscape server time (0-24?) error */
740
if (rec_hour>23) rec_hour=0;
742
/* minimal sanity check on date */
743
if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990))
745
total_bad++; /* if a bad date, bump counter */
748
fprintf(stderr,"%s: %s [%llu]",
749
msg_bad_date,log_rec.datetime,total_rec);
750
if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
751
else fprintf(stderr,"\n");
753
continue; /* and ignore this record */
756
/*********************************************/
757
/* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
758
/*********************************************/
760
/* Flag as a good one */
763
/* get current records timestamp (seconds since epoch) */
764
req_tstamp=cur_tstamp;
765
rec_tstamp=((jdate(rec_day,rec_month,rec_year)-epoch)*86400)+
766
(rec_hour*3600)+(rec_min*60)+rec_sec;
768
/* Do we need to check for duplicate records? (incremental mode) */
771
/* check if less than/equal to last record processed */
772
if ( rec_tstamp <= cur_tstamp )
774
/* if it is, assume we have already processed and ignore it */
780
/* if it isn't.. disable any more checks this run */
782
/* now check if it's a new month */
783
if ( (cur_month != rec_month) || (cur_year != rec_year) )
786
cur_sec = rec_sec; /* set current counters */
790
cur_month = rec_month;
792
cur_tstamp= rec_tstamp;
793
f_day=l_day=rec_day; /* reset first and last day */
798
/* check for out of sequence records */
799
if (rec_tstamp/3600 < cur_tstamp/3600)
801
if (!fold_seq_err && ((rec_tstamp+SLOP_VAL)/3600<cur_tstamp/3600) )
802
{ total_ignore++; continue; }
805
rec_sec = cur_sec; /* if folding sequence */
806
rec_min = cur_min; /* errors, just make it */
807
rec_hour = cur_hour; /* look like the last */
808
rec_day = cur_day; /* good records timestamp */
809
rec_month = cur_month;
811
rec_tstamp= cur_tstamp;
814
cur_tstamp=rec_tstamp; /* update current timestamp */
816
/*********************************************/
817
/* DO SOME PRE-PROCESS FORMATTING */
818
/*********************************************/
821
unescape(log_rec.url);
824
cp1 = cp2 = log_rec.url;
825
/* handle null '-' case here... */
826
if (*++cp1 == '-') strcpy(log_rec.url,"/INVALID-URL");
829
/* strip actual URL out of request */
830
while ( (*cp1 != ' ') && (*cp1 != '\0') ) cp1++;
833
/* scan to begin of actual URL field */
834
while ((*cp1 == ' ') && (*cp1 != '\0')) cp1++;
835
/* remove duplicate / if needed */
836
while (( *cp1=='/') && (*(cp1+1)=='/')) cp1++;
837
while (( *cp1!='\0')&&(*cp1!='"')) *cp2++=*cp1++;
842
/* strip query portion of cgi scripts */
845
if (!isurlchar(*cp1, stripcgi)) { *cp1 = '\0'; break; }
847
if (log_rec.url[0]=='\0')
848
{ log_rec.url[0]='/'; log_rec.url[1]='\0'; }
851
if (log_type==LOG_CLF && log_rec.resp_code!=RC_NOTFOUND && normalize)
853
if ( ((cp2=strstr(log_rec.url,"://"))!=NULL)&&(cp2<log_rec.url+6) )
856
/* see if a '/' is present after it */
857
if ( (cp2=strchr(cp1,(int)'/'))==NULL) cp1--;
859
/* Ok, now shift url string */
860
cp2=log_rec.url; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0';
862
/* extra sanity checks on URL string */
863
while ((cp2=strstr(log_rec.url,"/./")))
864
{ cp1=cp2+2; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0'; }
865
if (log_rec.url[0]!='/')
867
if ( log_rec.resp_code==RC_OK ||
868
log_rec.resp_code==RC_PARTIALCONTENT ||
869
log_rec.resp_code==RC_NOMOD)
872
fprintf(stderr,"Converted URL '%s' to '/'\n",log_rec.url);
879
fprintf(stderr,"Invalid URL: '%s'\n",log_rec.url);
880
strcpy(log_rec.url,"/INVALID-URL");
883
while ( log_rec.url[ (i=strlen(log_rec.url)-1) ] == '?' )
884
log_rec.url[i]='\0'; /* drop trailing ?s if any */
888
/* check for service (ie: http://) and lowercase if found */
889
if (((cp2=strstr(log_rec.url,"://"))!= NULL)&&(cp2<log_rec.url+6))
894
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
900
/* strip off index.html (or any aliases) */
904
if ((cp1=strstr(log_rec.url,lptr->string))!=NULL)
908
if ( !stripcgi && (cp2=strchr(cp1,'?'))!=NULL )
909
{ while(*cp2) *cp1++=*cp2++; *cp1='\0'; }
917
/* unescape referrer */
918
unescape(log_rec.refer);
920
/* fix referrer field */
923
if ( (*cp2 != '\0') && (*cp2 == '"') )
925
while ( *cp1 != '\0' )
928
if (((unsigned char)*cp1<32&&(unsigned char)*cp1>0) ||
929
*cp1==127 || (unsigned char)*cp1=='<') *cp1=0;
935
/* get query portion of cgi referrals */
941
if (!isurlchar(*cp1, 1))
943
/* Save query portion in log.rec.srchstr */
944
strncpy(log_rec.srchstr,(char *)cp1,MAXSRCH);
950
/* handle null referrer */
951
if (log_rec.refer[0]=='\0')
952
{ log_rec.refer[0]='-'; log_rec.refer[1]='\0'; }
955
/* if HTTP request, lowercase http://sitename/ portion */
957
if ( (*cp1=='h') || (*cp1=='H'))
959
while ( (*cp1!='/') && (*cp1!='\0'))
961
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
964
/* now do hostname */
965
if ( (*cp1=='/') && ( *(cp1+1)=='/')) {cp1++; cp1++;}
966
while ( (*cp1!='/') && (*cp1!='\0'))
968
if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
973
/* Do we need to mangle? */
974
if (mangle_agent) agent_mangle(log_rec.agent);
976
/* if necessary, shrink referrer to fit storage */
977
if (strlen(log_rec.refer)>=MAXREFH)
979
if (verbose) fprintf(stderr,"%s [%llu]\n",
980
msg_big_ref,total_rec);
981
log_rec.refer[MAXREFH-1]='\0';
984
/* if necessary, shrink URL to fit storage */
985
if (strlen(log_rec.url)>=MAXURLH)
987
if (verbose) fprintf(stderr,"%s [%llu]\n",
988
msg_big_req,total_rec);
989
log_rec.url[MAXURLH-1]='\0';
992
/* fix user agent field */
995
if ( (*cp2 != '\0') && ((*cp2 == '"')||(*cp2 == '(')) )
997
while (*cp1 != '\0') { cp3 = cp2; *cp2++ = *cp1++; }
1000
cp1 = log_rec.agent; /* CHANGE !!! */
1001
while (*cp1 != 0) /* get rid of more common _bad_ chars ;) */
1003
if ( ((unsigned char)*cp1 < 32) ||
1004
((unsigned char)*cp1==127) ||
1005
(*cp1=='<') || (*cp1=='>') )
1006
{ *cp1='\0'; break; }
1010
/* fix username if needed */
1011
if (log_rec.ident[0]==0)
1012
{ log_rec.ident[0]='-'; log_rec.ident[1]='\0'; }
1016
while ((unsigned char)*cp3>=32 && *cp3!='"') cp3++;
1019
/* unescape user name */
1020
unescape(log_rec.ident);
1022
/********************************************/
1023
/* PROCESS RECORD */
1024
/********************************************/
1026
/* first time through? */
1029
/* if yes, init our date vars */
1030
cur_month=rec_month; cur_year=rec_year;
1031
cur_day=rec_day; cur_hour=rec_hour;
1032
cur_min=rec_min; cur_sec=rec_sec;
1036
/* adjust last day processed if different */
1037
if (rec_day > l_day) l_day = rec_day;
1039
/* update min/sec stuff */
1040
if (cur_sec != rec_sec) cur_sec = rec_sec;
1041
if (cur_min != rec_min) cur_min = rec_min;
1043
/* check for hour change */
1044
if (cur_hour != rec_hour)
1046
/* if yes, init hourly stuff */
1047
if (ht_hit > mh_hit) mh_hit = ht_hit;
1049
cur_hour = rec_hour;
1052
/* check for day change */
1053
if (cur_day != rec_day)
1055
/* if yes, init daily stuff */
1056
tm_site[cur_day-1]=dt_site; dt_site=0;
1057
tm_visit[cur_day-1]=tot_visit(sd_htab);
1062
/* check for month change */
1063
if ( (cur_month != rec_month) || (cur_year != rec_year) )
1065
/* if yes, do monthly stuff */
1066
t_visit=tot_visit(sm_htab);
1067
month_update_exit(req_tstamp); /* process exit pages */
1069
write_month_html(); /* generate HTML for month */
1071
cur_month = rec_month; /* update our flags */
1072
cur_year = rec_year;
1073
f_day=l_day=rec_day;
1076
/* save hostname for later */
1077
strncpy(host_buf, log_rec.hostname, sizeof(log_rec.hostname));
1080
/* Resolve IP address if needed */
1083
struct addrinfo hints, *ares;
1084
memset(&hints, 0, sizeof(hints));
1085
hints.ai_family = AF_UNSPEC;
1086
hints.ai_socktype = SOCK_STREAM;
1087
hints.ai_flags = AI_NUMERICHOST;
1088
if (0 == getaddrinfo(log_rec.hostname, "0", &hints, &ares))
1091
resolve_dns(&log_rec);
1095
/* lowercase hostname and validity check */
1096
cp1 = log_rec.hostname; i=0;
1098
if ( (!isalnum((unsigned char)*cp1)) && (*cp1!=':') )
1099
strncpy(log_rec.hostname, "Invalid", 8);
1102
while (*cp1 != '\0') /* loop through string */
1104
if ( (*cp1>='A') && (*cp1<='Z') )
1105
{ *cp1++ += 'a'-'A'; continue; }
1106
if ( *cp1=='.' ) i++;
1107
if ( (isalnum((unsigned char)*cp1)) ||
1108
(*cp1=='.')||(*cp1=='-') ||
1109
(*cp1==':')||((*cp1=='_')&&(i==0)) ) cp1++;
1112
/* Invalid hostname found! */
1113
if (strcmp(log_rec.hostname, host_buf))
1114
strcpy(log_rec.hostname, host_buf);
1115
else strncpy(log_rec.hostname,"Invalid",8);
1119
if (*cp1 == '\0') /* did we make it to the end? */
1121
if (!isalnum((unsigned char)*(cp1-1)))
1122
strncpy(log_rec.hostname,"Invalid",8);
1126
/* Catch blank hostnames here */
1127
if (log_rec.hostname[0]=='\0')
1128
strncpy(log_rec.hostname,"Unknown",8);
1130
/* Ignore/Include check */
1131
if ( (isinlist(include_sites,log_rec.hostname)==NULL) &&
1132
(isinlist(include_urls,log_rec.url)==NULL) &&
1133
(isinlist(include_refs,log_rec.refer)==NULL) &&
1134
(isinlist(include_agents,log_rec.agent)==NULL) &&
1135
(isinlist(include_users,log_rec.ident)==NULL) )
1137
if (isinlist(ignored_sites,log_rec.hostname)!=NULL)
1138
{ total_ignore++; continue; }
1139
if (isinlist(ignored_urls,log_rec.url)!=NULL)
1140
{ total_ignore++; continue; }
1141
if (isinlist(ignored_agents,log_rec.agent)!=NULL)
1142
{ total_ignore++; continue; }
1143
if (isinlist(ignored_refs,log_rec.refer)!=NULL)
1144
{ total_ignore++; continue; }
1145
if (isinlist(ignored_users,log_rec.ident)!=NULL)
1146
{ total_ignore++; continue; }
1149
/* Bump response code totals */
1150
switch (log_rec.resp_code) {
1151
case RC_CONTINUE: i=IDX_CONTINUE; break;
1152
case RC_SWITCHPROTO: i=IDX_SWITCHPROTO; break;
1153
case RC_OK: i=IDX_OK; break;
1154
case RC_CREATED: i=IDX_CREATED; break;
1155
case RC_ACCEPTED: i=IDX_ACCEPTED; break;
1156
case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; break;
1157
case RC_NOCONTENT: i=IDX_NOCONTENT; break;
1158
case RC_RESETCONTENT: i=IDX_RESETCONTENT; break;
1159
case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; break;
1160
case RC_MULTIPLECHOICES: i=IDX_MULTIPLECHOICES; break;
1161
case RC_MOVEDPERM: i=IDX_MOVEDPERM; break;
1162
case RC_MOVEDTEMP: i=IDX_MOVEDTEMP; break;
1163
case RC_SEEOTHER: i=IDX_SEEOTHER; break;
1164
case RC_NOMOD: i=IDX_NOMOD; break;
1165
case RC_USEPROXY: i=IDX_USEPROXY; break;
1166
case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break;
1167
case RC_BAD: i=IDX_BAD; break;
1168
case RC_UNAUTH: i=IDX_UNAUTH; break;
1169
case RC_PAYMENTREQ: i=IDX_PAYMENTREQ; break;
1170
case RC_FORBIDDEN: i=IDX_FORBIDDEN; break;
1171
case RC_NOTFOUND: i=IDX_NOTFOUND; break;
1172
case RC_METHODNOTALLOWED: i=IDX_METHODNOTALLOWED; break;
1173
case RC_NOTACCEPTABLE: i=IDX_NOTACCEPTABLE; break;
1174
case RC_PROXYAUTHREQ: i=IDX_PROXYAUTHREQ; break;
1175
case RC_TIMEOUT: i=IDX_TIMEOUT; break;
1176
case RC_CONFLICT: i=IDX_CONFLICT; break;
1177
case RC_GONE: i=IDX_GONE; break;
1178
case RC_LENGTHREQ: i=IDX_LENGTHREQ; break;
1179
case RC_PREFAILED: i=IDX_PREFAILED; break;
1180
case RC_REQENTTOOLARGE: i=IDX_REQENTTOOLARGE; break;
1181
case RC_REQURITOOLARGE: i=IDX_REQURITOOLARGE; break;
1182
case RC_UNSUPMEDIATYPE: i=IDX_UNSUPMEDIATYPE; break;
1183
case RC_RNGNOTSATISFIABLE:i=IDX_RNGNOTSATISFIABLE;break;
1184
case RC_EXPECTATIONFAILED:i=IDX_EXPECTATIONFAILED;break;
1185
case RC_SERVERERR: i=IDX_SERVERERR; break;
1186
case RC_NOTIMPLEMENTED: i=IDX_NOTIMPLEMENTED; break;
1187
case RC_BADGATEWAY: i=IDX_BADGATEWAY; break;
1188
case RC_UNAVAIL: i=IDX_UNAVAIL; break;
1189
case RC_GATEWAYTIMEOUT: i=IDX_GATEWAYTIMEOUT; break;
1190
case RC_BADHTTPVER: i=IDX_BADHTTPVER; break;
1191
default: i=IDX_UNDEFINED; break;
1193
response[i].count++;
1195
/* now save in the various hash tables... */
1196
if (log_rec.resp_code==RC_OK || log_rec.resp_code==RC_PARTIALCONTENT)
1199
/* URL/ident hash table (only if valid response code) */
1200
if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)||
1201
(log_rec.resp_code==RC_PARTIALCONTENT))
1203
/* URL hash table */
1204
if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1,
1205
log_rec.xfer_size,&t_url,(u_int64_t)0,(u_int64_t)0,um_htab))
1208
/* Error adding URL node, skipping ... */
1209
fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url);
1212
/* ident (username) hash table */
1213
if (put_inode(log_rec.ident,OBJ_REG,
1214
1,(u_int64_t)i,log_rec.xfer_size,&t_user,
1215
0,rec_tstamp,im_htab))
1218
/* Error adding ident node, skipping .... */
1219
fprintf(stderr,"%s %s\n", msg_nomem_i, log_rec.ident);
1223
/* referrer hash table */
1226
if (log_rec.refer[0]!='\0')
1227
if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab))
1230
fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer);
1234
/* hostname (site) hash table - daily */
1235
if (put_hnode(log_rec.hostname,OBJ_REG,
1236
1,(u_int64_t)i,log_rec.xfer_size,&dt_site,
1237
0,rec_tstamp,"",sd_htab))
1240
/* Error adding host node (daily), skipping .... */
1241
fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname);
1244
/* hostname (site) hash table - monthly */
1245
if (put_hnode(log_rec.hostname,OBJ_REG,
1246
1,(u_int64_t)i,log_rec.xfer_size,&t_site,
1247
0,rec_tstamp,"",sm_htab))
1250
/* Error adding host node (monthly), skipping .... */
1251
fprintf(stderr,"%s %s\n", msg_nomem_mh, log_rec.hostname);
1254
/* user agent hash table */
1257
if (log_rec.agent[0]!='\0')
1258
if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab))
1261
fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent);
1265
/* bump monthly/daily/hourly totals */
1266
t_hit++; ht_hit++; /* daily/hourly hits */
1267
t_xfer += log_rec.xfer_size; /* total xfer size */
1268
tm_xfer[rec_day-1] += log_rec.xfer_size; /* daily xfer total */
1269
tm_hit[rec_day-1]++; /* daily hits total */
1270
th_xfer[rec_hour] += log_rec.xfer_size; /* hourly xfer total */
1271
th_hit[rec_hour]++; /* hourly hits total */
1273
/* if RC_OK, increase file counters */
1274
if (log_rec.resp_code == RC_OK)
1277
tm_file[rec_day-1]++;
1278
th_file[rec_hour]++;
1281
/* Pages (pageview) calculation */
1282
if (ispage(log_rec.url))
1285
tm_page[rec_day-1]++;
1286
th_page[rec_hour]++;
1288
/* do search string stuff if needed */
1289
if (ntop_search) srch_string(log_rec.srchstr);
1292
/*********************************************/
1293
/* RECORD PROCESSED - DO GROUPS HERE */
1294
/*********************************************/
1297
if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL)
1299
if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size,
1300
&ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab))
1303
/* Error adding URL node, skipping ... */
1304
fprintf(stderr,"%s %s\n", msg_nomem_u, cp1);
1309
if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL)
1311
if (put_hnode(cp1,OBJ_GRP,1,
1312
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1313
log_rec.xfer_size,&ul_bogus,
1314
0,rec_tstamp,"",sm_htab))
1317
/* Error adding Site node, skipping ... */
1318
fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1323
/* Domain Grouping */
1326
cp1 = get_domain(log_rec.hostname);
1329
if (put_hnode(cp1,OBJ_GRP,1,
1330
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1331
log_rec.xfer_size,&ul_bogus,
1332
0,rec_tstamp,"",sm_htab))
1335
/* Error adding Site node, skipping ... */
1336
fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1342
/* Referrer Grouping */
1343
if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL)
1345
if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab))
1348
/* Error adding Referrer node, skipping ... */
1349
fprintf(stderr,"%s %s\n", msg_nomem_r, cp1);
1353
/* User Agent Grouping */
1354
if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL)
1356
if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab))
1359
/* Error adding User Agent node, skipping ... */
1360
fprintf(stderr,"%s %s\n", msg_nomem_a, cp1);
1364
/* Ident (username) Grouping */
1365
if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL)
1367
if (put_inode(cp1,OBJ_GRP,1,
1368
(u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1369
log_rec.xfer_size,&ul_bogus,
1370
0,rec_tstamp,im_htab))
1373
/* Error adding Username node, skipping ... */
1374
fprintf(stderr,"%s %s\n", msg_nomem_i, cp1);
1379
/*********************************************/
1381
/*********************************************/
1385
/* If first record, check if stupid Netscape header stuff */
1386
if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) )
1388
/* Skipping Netscape header record */
1389
if (verbose>1) printf("%s\n",msg_ign_nscp);
1390
/* count it as ignored... */
1395
/* Check if it's a W3C header or IIS Null-Character line */
1396
if ((buffer[0]=='\0') || (buffer[0]=='#'))
1402
/* really bad record... */
1406
fprintf(stderr,"%s (%llu)",msg_bad_rec,total_rec);
1407
if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
1408
else fprintf(stderr,"\n");
1415
/*********************************************/
1416
/* DONE READING LOG FILE - final processing */
1417
/*********************************************/
1419
/* close log file if needed */
1421
if (gz_log) (gz_log==COMP_BZIP)?BZ2_bzclose(zlog_fp):gzclose(zlog_fp);
1423
if (gz_log) gzclose(zlog_fp);
1425
else if (log_fname) fclose(log_fp);
1427
if (good_rec) /* were any good records? */
1429
tm_site[cur_day-1]=dt_site; /* If yes, clean up a bit */
1430
tm_visit[cur_day-1]=tot_visit(sd_htab);
1431
t_visit=tot_visit(sm_htab);
1432
if (ht_hit > mh_hit) mh_hit = ht_hit;
1434
if (total_rec > (total_ignore+total_bad)) /* did we process any? */
1438
if (save_state()) /* incremental stuff */
1440
/* Error: Unable to save current run data */
1441
if (verbose) fprintf(stderr,"%s\n",msg_data_err);
1442
unlink(state_fname);
1445
month_update_exit(rec_tstamp); /* calculate exit pages */
1447
write_month_html(); /* write monthly HTML file */
1448
put_history(); /* write history */
1450
if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1452
/* get processing end time */
1453
end_time = time(NULL);
1455
/* display end of processing statistics */
1456
if (time_me || (verbose>1))
1458
printf("%llu %s ",total_rec, msg_records);
1461
printf("(%llu %s",total_ignore,msg_ignored);
1462
if (total_bad) printf(", %llu %s) ",total_bad,msg_bad);
1465
else if (total_bad) printf("(%llu %s) ",total_bad,msg_bad);
1467
/* totoal processing time in seconds */
1468
temp_time = difftime(end_time, start_time);
1469
if (temp_time==0) temp_time=1;
1470
printf("%s %.0f %s", msg_in, temp_time, msg_seconds);
1472
/* calculate records per second */
1474
i=( (int)( (float)total_rec/temp_time ) );
1477
if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i);
1482
/* Close DNS cache file */
1483
if (dns_db) close_cache();
1484
/* Close GeoDB database */
1485
if (geo_db) geodb_close(geo_db);
1489
/* Close GeoIP database */
1490
if (geo_fp) GeoIP_delete(geo_fp);
1493
/* Whew, all done! Exit with completion status (0) */
1498
/* No valid records found... exit with error (1) */
1499
if (verbose) printf("%s\n",msg_no_vrec);
1500
if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1505
/*********************************************/
1506
/* GET_CONFIG - get configuration file info */
1507
/*********************************************/
1509
void get_config(char *fname)
1511
char *kwords[]= { "Undefined", /* 0 = undefined keyword 0 */
1512
"OutputDir", /* Output directory 1 */
1513
"LogFile", /* Log file to use for input 2 */
1514
"ReportTitle", /* Title for reports 3 */
1515
"HostName", /* Hostname to use 4 */
1516
"IgnoreHist", /* Ignore history file 5 */
1517
"Quiet", /* Run in quiet mode 6 */
1518
"TimeMe", /* Produce timing results 7 */
1519
"Debug", /* Produce debug information 8 */
1520
"HourlyGraph", /* Hourly stats graph 9 */
1521
"HourlyStats", /* Hourly stats table 10 */
1522
"TopSites", /* Top sites 11 */
1523
"TopURLs", /* Top URLs 12 */
1524
"TopReferrers", /* Top Referrers 13 */
1525
"TopAgents", /* Top User Agents 14 */
1526
"TopCountries", /* Top Countries 15 */
1527
"HideSite", /* Sites to hide 16 */
1528
"HideURL", /* URLs to hide 17 */
1529
"HideReferrer", /* Referrers to hide 18 */
1530
"HideAgent", /* User Agents to hide 19 */
1531
"IndexAlias", /* Aliases for index.html 20 */
1532
"HTMLHead", /* HTML Top1 code 21 */
1533
"HTMLPost", /* HTML Top2 code 22 */
1534
"HTMLTail", /* HTML Tail code 23 */
1535
"MangleAgents", /* Mangle User Agents 24 */
1536
"IgnoreSite", /* Sites to ignore 25 */
1537
"IgnoreURL", /* Url's to ignore 26 */
1538
"IgnoreReferrer", /* Referrers to ignore 27 */
1539
"IgnoreAgent", /* User Agents to ignore 28 */
1540
"ReallyQuiet", /* Dont display ANY messages 29 */
1541
"GMTTime", /* Local or UTC time? 30 */
1542
"GroupURL", /* Group URLs 31 */
1543
"GroupSite", /* Group Sites 32 */
1544
"GroupReferrer", /* Group Referrers 33 */
1545
"GroupAgent", /* Group Agents 34 */
1546
"GroupShading", /* Shade Grouped entries 35 */
1547
"GroupHighlight", /* BOLD Grouped entries 36 */
1548
"Incremental", /* Incremental runs 37 */
1549
"IncrementalName", /* Filename for state data 38 */
1550
"HistoryName", /* Filename for history data 39 */
1551
"HTMLExtension", /* HTML filename extension 40 */
1552
"HTMLPre", /* HTML code at beginning 41 */
1553
"HTMLBody", /* HTML body code 42 */
1554
"HTMLEnd", /* HTML code at end 43 */
1555
"UseHTTPS", /* Use https:// on URLs 44 */
1556
"IncludeSite", /* Sites to always include 45 */
1557
"IncludeURL", /* URLs to always include 46 */
1558
"IncludeReferrer", /* Referrers to include 47 */
1559
"IncludeAgent", /* User Agents to include 48 */
1560
"PageType", /* Page Type (pageview) 49 */
1561
"VisitTimeout", /* Visit timeout (seconds) 50 */
1562
"GraphLegend", /* Graph Legends (yes/no) 51 */
1563
"GraphLines", /* Graph Lines (0=none) 52 */
1564
"FoldSeqErr", /* Fold sequence errors 53 */
1565
"CountryGraph", /* Display ctry graph (0=no) 54 */
1566
"TopKSites", /* Top sites (by KBytes) 55 */
1567
"TopKURLs", /* Top URLs (by KBytes) 56 */
1568
"TopEntry", /* Top Entry Pages 57 */
1569
"TopExit", /* Top Exit Pages 58 */
1570
"TopSearch", /* Top Search Strings 59 */
1571
"LogType", /* Log Type (clf/ftp/squid) 60 */
1572
"SearchEngine", /* SearchEngine strings 61 */
1573
"GroupDomains", /* Group domains (n=level) 62 */
1574
"HideAllSites", /* Hide ind. sites (0=no) 63 */
1575
"AllSites", /* List all sites? 64 */
1576
"AllURLs", /* List all URLs? 65 */
1577
"AllReferrers", /* List all Referrers? 66 */
1578
"AllAgents", /* List all User Agents? 67 */
1579
"AllSearchStr", /* List all Search Strings? 68 */
1580
"AllUsers", /* List all Users? 69 */
1581
"TopUsers", /* Top Usernames to show 70 */
1582
"HideUser", /* Usernames to hide 71 */
1583
"IgnoreUser", /* Usernames to ignore 72 */
1584
"IncludeUser", /* Usernames to include 73 */
1585
"GroupUser", /* Usernames to group 74 */
1586
"DumpPath", /* Path for dump files 75 */
1587
"DumpExtension", /* Dump filename extension 76 */
1588
"DumpHeader", /* Dump header as first rec? 77 */
1589
"DumpSites", /* Dump sites tab file 78 */
1590
"DumpURLs", /* Dump urls tab file 79 */
1591
"DumpReferrers", /* Dump referrers tab file 80 */
1592
"DumpAgents", /* Dump user agents tab file 81 */
1593
"DumpUsers", /* Dump usernames tab file 82 */
1594
"DumpSearchStr", /* Dump search str tab file 83 */
1595
"DNSCache", /* DNS Cache file name 84 */
1596
"DNSChildren", /* DNS Children (0=no DNS) 85 */
1597
"DailyGraph", /* Daily Graph (0=no) 86 */
1598
"DailyStats", /* Daily Stats (0=no) 87 */
1599
"LinkReferrer", /* Link referrer (0=no) 88 */
1600
"PagePrefix", /* PagePrefix - treat as page 89 */
1601
"ColorHit", /* Hit Color (def=00805c) 90 */
1602
"ColorFile", /* File Color (def=0040ff) 91 */
1603
"ColorSite", /* Site Color (def=ff8000) 92 */
1604
"ColorKbyte", /* Kbyte Color (def=ff0000) 93 */
1605
"ColorPage", /* Page Color (def=00e0ff) 94 */
1606
"ColorVisit", /* Visit Color (def=ffff00) 95 */
1607
"ColorMisc", /* Misc Color (def=00e0ff) 96 */
1608
"PieColor1", /* Pie Color 1 (def=800080) 97 */
1609
"PieColor2", /* Pie Color 2 (def=80ffc0) 98 */
1610
"PieColor3", /* Pie Color 3 (def=ff00ff) 99 */
1611
"PieColor4", /* Pie Color 4 (def=ffc080) 100 */
1612
"CacheIPs", /* Cache IPs in DNS DB (0=no) 101 */
1613
"CacheTTL", /* DNS Cache entry TTL (days) 102 */
1614
"GeoDB", /* GeoDB lookups (0=no) 103 */
1615
"GeoDBDatabase", /* GeoDB database filename 104 */
1616
"StripCGI", /* Strip CGI in URLS (0=no) 105 */
1617
"TrimSquidURL", /* Trim squid URLs (0=none) 106 */
1618
"OmitPage", /* URLs not counted as pages 107 */
1619
"HTAccess", /* Write .httaccess files? 108 */
1620
"IgnoreState", /* Ignore state file (0=no) 109 */
1621
"DefaultIndex", /* Default index.* (1=yes) 110 */
1622
"GeoIP", /* Use GeoIP? (1=yes) 111 */
1623
"GeoIPDatabase", /* Database to use for GeoIP 112 */
1624
"NormalizeURL", /* Normalize CLF URLs (1=yes) 113 */
1625
"IndexMonths", /* # months for main page 114 */
1626
"GraphMonths", /* # months for yearly graph 115 */
1627
"YearHeaders", /* use year headers? (1=yes) 116 */
1628
"YearTotals", /* show year subtotals (0=no) 117 */
1629
"CountryFlags", /* show country flags? (0-no) 118 */
1630
"FlagDir", /* directory w/flag images 119 */
1631
"SearchCaseI" /* srch str case insensitive 120 */
1636
char buffer[BUFSIZE];
1637
char keyword[MAXKWORD];
1638
char value[MAXKVAL];
1641
int num_kwords=sizeof(kwords)/sizeof(char *);
1643
if ( (fp=fopen(fname,"r")) == NULL)
1646
fprintf(stderr,"%s %s\n",msg_bad_conf,fname);
1650
while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
1652
/* skip comments and blank lines */
1653
if ( (buffer[0]=='#') || isspace((unsigned char)buffer[0]) ) continue;
1656
cp1=buffer;cp2=keyword;count=MAXKWORD-1;
1657
while ( (isalnum((unsigned char)*cp1)) && count )
1658
{ *cp2++ = *cp1++; count--; }
1662
cp2=value; count=MAXKVAL-1;
1663
while ((*cp1!='\n')&&(*cp1!='\0')&&(isspace((unsigned char)*cp1))) cp1++;
1664
while ((*cp1!='\n')&&(*cp1!='\0')&&count ) { *cp2++ = *cp1++; count--; }
1666
while ((isspace((unsigned char)*cp2)) && (cp2 != value) ) *cp2--='\0';
1668
/* check if blank keyword/value */
1669
if ( (keyword[0]=='\0') || (value[0]=='\0') ) continue;
1672
for (i=0;i<num_kwords;i++)
1673
if (!ouricmp(keyword,kwords[i])) { key=i; break; }
1675
if (key==0) { printf("%s '%s' (%s)\n", /* Invalid keyword */
1676
msg_bad_key,keyword,fname);
1682
case 1: out_dir=save_opt(value); break; /* OutputDir */
1683
case 2: log_fname=save_opt(value); break; /* LogFile */
1684
case 3: msg_title=save_opt(value); break; /* ReportTitle */
1685
case 4: hname=save_opt(value); break; /* HostName */
1686
case 5: ignore_hist=
1687
(tolower(value[0])=='y')?1:0; break; /* IgnoreHist */
1689
(tolower(value[0])=='y')?1:2; break; /* Quiet */
1691
(tolower(value[0])=='n')?0:1; break; /* TimeMe */
1693
(tolower(value[0])=='y')?1:0; break; /* Debug */
1694
case 9: hourly_graph=
1695
(tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1696
case 10: hourly_stats=
1697
(tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1698
case 11: ntop_sites = atoi(value); break; /* TopSites */
1699
case 12: ntop_urls = atoi(value); break; /* TopURLs */
1700
case 13: ntop_refs = atoi(value); break; /* TopRefs */
1701
case 14: ntop_agents = atoi(value); break; /* TopAgents */
1702
case 15: ntop_ctrys = atoi(value); break; /* TopCountries */
1703
case 16: add_nlist(value,&hidden_sites); break; /* HideSite */
1704
case 17: add_nlist(value,&hidden_urls); break; /* HideURL */
1705
case 18: add_nlist(value,&hidden_refs); break; /* HideReferrer */
1706
case 19: add_nlist(value,&hidden_agents); break; /* HideAgent */
1707
case 20: add_nlist(value,&index_alias); break; /* IndexAlias */
1708
case 21: add_nlist(value,&html_head); break; /* HTMLHead */
1709
case 22: add_nlist(value,&html_post); break; /* HTMLPost */
1710
case 23: add_nlist(value,&html_tail); break; /* HTMLTail */
1711
case 24: mangle_agent=atoi(value); break; /* MangleAgents */
1712
case 25: add_nlist(value,&ignored_sites); break; /* IgnoreSite */
1713
case 26: add_nlist(value,&ignored_urls); break; /* IgnoreURL */
1714
case 27: add_nlist(value,&ignored_refs); break; /* IgnoreReferrer */
1715
case 28: add_nlist(value,&ignored_agents); break; /* IgnoreAgent */
1716
case 29: if (tolower(value[0])=='y')
1717
verbose=0; break; /* ReallyQuiet */
1718
case 30: local_time=
1719
(tolower(value[0])=='y')?0:1; break; /* GMTTime */
1720
case 31: add_glist(value,&group_urls); break; /* GroupURL */
1721
case 32: add_glist(value,&group_sites); break; /* GroupSite */
1722
case 33: add_glist(value,&group_refs); break; /* GroupReferrer */
1723
case 34: add_glist(value,&group_agents); break; /* GroupAgent */
1724
case 35: shade_groups=
1725
(tolower(value[0])=='n')?0:1; break; /* GroupShading */
1726
case 36: hlite_groups=
1727
(tolower(value[0])=='n')?0:1; break; /* GroupHighlight */
1728
case 37: incremental=
1729
(tolower(value[0])=='y')?1:0; break; /* Incremental */
1730
case 38: state_fname=save_opt(value); break; /* State FName */
1731
case 39: hist_fname=save_opt(value); break; /* History FName */
1732
case 40: html_ext=save_opt(value); break; /* HTML extension */
1733
case 41: add_nlist(value,&html_pre); break; /* HTML Pre code */
1734
case 42: add_nlist(value,&html_body); break; /* HTML Body code */
1735
case 43: add_nlist(value,&html_end); break; /* HTML End code */
1737
(tolower(value[0])=='y')?1:0; break; /* Use https:// */
1738
case 45: add_nlist(value,&include_sites); break; /* IncludeSite */
1739
case 46: add_nlist(value,&include_urls); break; /* IncludeURL */
1740
case 47: add_nlist(value,&include_refs); break; /* IncludeReferrer*/
1741
case 48: add_nlist(value,&include_agents); break; /* IncludeAgent */
1742
case 49: add_nlist(value,&page_type); break; /* PageType */
1743
case 50: visit_timeout=atoi(value); break; /* VisitTimeout */
1744
case 51: graph_legend=
1745
(tolower(value[0])=='n')?0:1; break; /* GraphLegend */
1746
case 52: graph_lines = atoi(value); break; /* GraphLines */
1747
case 53: fold_seq_err=
1748
(tolower(value[0])=='y')?1:0; break; /* FoldSeqErr */
1749
case 54: ctry_graph=
1750
(tolower(value[0])=='n')?0:1; break; /* CountryGraph */
1751
case 55: ntop_sitesK = atoi(value); break; /* TopKSites (KB) */
1752
case 56: ntop_urlsK = atoi(value); break; /* TopKUrls (KB) */
1753
case 57: ntop_entry = atoi(value); break; /* Top Entry pgs */
1754
case 58: ntop_exit = atoi(value); break; /* Top Exit pages */
1755
case 59: ntop_search = atoi(value); break; /* Top Search pgs */
1756
case 60: log_type=(tolower(value[0])=='f')?
1757
LOG_FTP:((tolower(value[0])=='s')?
1758
LOG_SQUID:((tolower(value[0])=='w')?
1759
LOG_W3C:LOG_CLF)); break; /* LogType */
1760
case 61: add_glist(value,&search_list); break; /* SearchEngine */
1761
case 62: group_domains=atoi(value); break; /* GroupDomains */
1762
case 63: hide_sites=
1763
(tolower(value[0])=='y')?1:0; break; /* HideAllSites */
1765
(tolower(value[0])=='y')?1:0; break; /* All Sites? */
1767
(tolower(value[0])=='y')?1:0; break; /* All URLs? */
1769
(tolower(value[0])=='y')?1:0; break; /* All Refs */
1770
case 67: all_agents=
1771
(tolower(value[0])=='y')?1:0; break; /* All Agents? */
1772
case 68: all_search=
1773
(tolower(value[0])=='y')?1:0; break; /* All Srch str */
1775
(tolower(value[0])=='y')?1:0; break; /* All Users? */
1776
case 70: ntop_users=atoi(value); break; /* TopUsers */
1777
case 71: add_nlist(value,&hidden_users); break; /* HideUser */
1778
case 72: add_nlist(value,&ignored_users); break; /* IgnoreUser */
1779
case 73: add_nlist(value,&include_users); break; /* IncludeUser */
1780
case 74: add_glist(value,&group_users); break; /* GroupUser */
1781
case 75: dump_path=save_opt(value); break; /* DumpPath */
1782
case 76: dump_ext=save_opt(value); break; /* Dumpfile ext */
1783
case 77: dump_header=
1784
(tolower(value[0])=='y')?1:0; break; /* DumpHeader? */
1785
case 78: dump_sites=
1786
(tolower(value[0])=='y')?1:0; break; /* DumpSites? */
1788
(tolower(value[0])=='y')?1:0; break; /* DumpURLs? */
1790
(tolower(value[0])=='y')?1:0; break; /* DumpReferrers? */
1791
case 81: dump_agents=
1792
(tolower(value[0])=='y')?1:0; break; /* DumpAgents? */
1793
case 82: dump_users=
1794
(tolower(value[0])=='y')?1:0; break; /* DumpUsers? */
1795
case 83: dump_search=
1796
(tolower(value[0])=='y')?1:0; break; /* DumpSrchStrs? */
1798
case 84: dns_cache=save_opt(value); break; /* DNSCache fname */
1799
case 85: dns_children=atoi(value); break; /* DNSChildren */
1801
case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled */
1802
case 85: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1803
#endif /* USE_DNS */
1804
case 86: daily_graph=
1805
(tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1806
case 87: daily_stats=
1807
(tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1808
case 88: link_referrer=
1809
(tolower(value[0])=='y')?1:0; break; /* LinkReferrer */
1810
case 89: add_nlist(value,&page_prefix); break; /* PagePrefix */
1811
case 90: strncpy(hit_color+1, value, 6); break; /* ColorHit */
1812
case 91: strncpy(file_color+1, value, 6); break; /* ColorFile */
1813
case 92: strncpy(site_color+1, value, 6); break; /* ColorSite */
1814
case 93: strncpy(kbyte_color+1,value, 6); break; /* ColorKbyte */
1815
case 94: strncpy(page_color+1, value, 6); break; /* ColorPage */
1816
case 95: strncpy(visit_color+1,value, 6); break; /* ColorVisit */
1817
case 96: strncpy(misc_color+1, value, 6); break; /* ColorMisc */
1818
case 97: strncpy(pie_color1+1, value, 6); break; /* PieColor1 */
1819
case 98: strncpy(pie_color2+1, value, 6); break; /* PieColor2 */
1820
case 99: strncpy(pie_color3+1, value, 6); break; /* PieColor3 */
1821
case 100:strncpy(pie_color4+1, value, 6); break; /* PieColor4 */
1823
case 101: cache_ips=
1824
(tolower(value[0])=='y')?1:0; break; /* CacheIPs */
1825
case 102: cache_ttl=atoi(value); break; /* CacheTTL days */
1827
(tolower(value[0])=='y')?1:0; break; /* GeoDB */
1828
case 104: geodb_fname=save_opt(value); break; /* GeoDBDatabase */
1830
case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none */
1833
case 104: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1834
#endif /* USE_DNS */
1836
(tolower(value[0])=='n')?0:1; break; /* StripCGI */
1837
case 106: trimsquid=atoi(value); break; /* TrimSquidURL */
1838
case 107: add_nlist(value,&omit_page); break; /* OmitPage */
1840
(tolower(value[0])=='y')?1:0; break; /* HTAccess */
1841
case 109: ignore_state=
1842
(tolower(value[0])=='y')?1:0; break; /* IgnoreState */
1843
case 110: default_index=
1844
(tolower(value[0])=='n')?0:1; break; /* DefaultIndex */
1847
(tolower(value[0])=='y')?1:0; break; /* GeoIP */
1848
case 112: geoip_db=save_opt(value); break; /* GeoIPDatabase */
1850
case 111: /* Disable GeoIP and GeoIPDatabase if not enabled */
1851
case 112: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1853
case 113: normalize=
1854
(tolower(value[0])=='n')?0:1; break; /* NormalizeURL */
1855
case 114: index_mths=atoi(value); break; /* IndexMonths */
1856
case 115: graph_mths=atoi(value); break; /* GraphMonths */
1857
case 116: year_hdrs=
1858
(tolower(value[0])=='n')?0:1; break; /* YearHeaders */
1859
case 117: year_totals=
1860
(tolower(value[0])=='n')?0:1; break; /* YearTotals */
1861
case 118: use_flags=
1862
(tolower(value[0])=='y')?1:0; break; /* CountryFlags */
1863
case 119: use_flags=1; flag_dir=save_opt(value); break; /* FlagDir */
1864
case 120: searchcasei=
1865
(tolower(value[0])=='n')?0:1; break; /* SearchCaseI */
1871
/*********************************************/
1872
/* SAVE_OPT - save option from config file */
1873
/*********************************************/
1875
static char *save_opt(char *str)
1879
if ( (cp1=malloc(strlen(str)+1))==NULL) return NULL;
1885
/*********************************************/
1886
/* CLEAR_MONTH - initalize monthly stuff */
1887
/*********************************************/
1893
init_counters(); /* reset monthly counters */
1894
del_htabs(); /* clear hash tables */
1895
if (ntop_ctrys!=0 ) for (i=0;i<ntop_ctrys;i++) top_ctrys[i]=NULL;
1898
/*********************************************/
1899
/* INIT_COUNTERS - prep counters for use */
1900
/*********************************************/
1902
void init_counters()
1905
for (i=0;i<TOTAL_RC;i++) response[i].count = 0;
1906
for (i=0;i<31;i++) /* monthly totals */
1909
tm_hit[i]=tm_file[i]=tm_site[i]=tm_page[i]=tm_visit[i]=0;
1911
for (i=0;i<24;i++) /* hourly totals */
1913
th_hit[i]=th_file[i]=th_page[i]=0;
1916
for (i=0;ctry[i].desc;i++) /* country totals */
1922
t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0;
1924
mh_hit = dt_site = 0;
1928
/*********************************************/
1929
/* PRINT_OPTS - print command line options */
1930
/*********************************************/
1932
void print_opts(char *pname)
1936
printf("%s: %s %s\n",h_usage1,pname,h_usage2);
1937
for (i=0;h_msg[i];i++) printf("%s\n",h_msg[i]);
1941
/*********************************************/
1943
/*********************************************/
1945
void print_version()
1948
uname(&system_info);
1950
printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1952
system_info.sysname,system_info.release,system_info.machine,
1953
language,copyright);
1956
strncpy(&buf[strlen(buf)],"DNS/GeoDB ",11);
1959
strncpy(&buf[strlen(buf)],"BZip2 ",7);
1962
strncpy(&buf[strlen(buf)],"GeoIP ",7);
1967
printf("Mod date: %s Options: ",moddate);
1968
if (buf[0]!=0) printf("%s",buf);
1969
else printf("none");
1972
printf("Default GeoDB dir : %s\n",GEODB_LOC);
1974
printf("Default config dir: %s\n",ETCDIR);
1981
/*********************************************/
1982
/* CUR_TIME - return date/time as a string */
1983
/*********************************************/
1988
static char timestamp[48];
1990
/* get system time */
1992
/* convert to timestamp string */
1994
strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M %Z",
1997
strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M GMT",
2003
/*********************************************/
2004
/* ISPAGE - determine if an HTML page or not */
2005
/*********************************************/
2007
int ispage(char *str)
2012
if (isinlist(omit_page,str)!=NULL) return 0;
2015
while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
2016
if ((cp2++==str)||(*(--cp1)=='/')) return 1;
2020
/* Check if a PagePrefix matches */
2021
if(strncmp(str,t->string,strlen(t->string))==0) return 1;
2024
return (isinlist(page_type,cp2)!=NULL);
2027
/*********************************************/
2028
/* ISURLCHAR - checks for valid URL chars */
2029
/*********************************************/
2031
int isurlchar(unsigned char ch, int flag)
2033
if (isalnum(ch)) return 1; /* allow letters, numbers... */
2034
if (ch > 127) return 1; /* allow extended chars... */
2035
if (flag) /* and filter some others */
2036
return (strchr(":/\\.,' *!-+_@~()[]!",ch)!=NULL); /* strip cgi vars */
2038
return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch)!=NULL); /* keep cgi vars */
2041
/*********************************************/
2042
/* CTRY_IDX - create unique # from TLD */
2043
/*********************************************/
2045
u_int64_t ctry_idx(char *str)
2047
int i=strlen(str),j=0;
2051
for (;i>0;i--) { idx+=((*--cp-'a'+1)<<j); j+=(j==0)?7:5; }
2055
/*********************************************/
2056
/* UN_IDX - get TLD from index # */
2057
/*********************************************/
2059
char *un_idx(u_int64_t idx)
2065
memset(buf, 0, sizeof(buf));
2066
if (idx<=0) return buf;
2067
if ((j=(idx&0x7f))>32) /* only for a1, a2 and o1 */
2068
{ buf[0]=(idx>>7)+'a'; buf[1]=j-32; return buf; }
2071
buf[i]=(i==5)?(idx&0x7f)+'a'-1:(j=(idx>>(((5-i)*5)+2))&0x1f)?j+'a'-1:' ';
2072
cp=buf; while (*cp==' ') { for (i=0;i<6;i++) buf[i]=buf[i+1]; } return buf;
2075
/*********************************************/
2076
/* FROM_HEX - convert hex char to decimal */
2077
/*********************************************/
2079
char from_hex(char c) /* convert hex to dec */
2081
c = (c>='0'&&c<='9')?c-'0': /* 0-9? */
2082
(c>='A'&&c<='F')?c-'A'+10: /* A-F? */
2083
c - 'a' + 10; /* lowercase... */
2084
return (c<0||c>15)?0:c; /* return 0 if bad... */
2087
/*********************************************/
2088
/* UNESCAPE - convert escape seqs to chars */
2089
/*********************************************/
2091
char *unescape(char *str)
2093
unsigned char *cp1=(unsigned char *)str; /* force unsigned so we */
2094
unsigned char *cp2=cp1; /* can do > 127 */
2096
if (!str) return NULL; /* make sure strings valid */
2100
if (*cp1=='%') /* Found an escape? */
2103
if (isxdigit(*cp1)) /* ensure a hex digit */
2105
if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ASCII */
2106
if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */
2107
if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */
2108
if (*cp1) { cp2++; cp1++; }
2112
else *cp2++ = *cp1++; /* if not, just continue */
2114
*cp2=*cp1; /* don't forget terminator */
2115
return str; /* return the string */
2118
/*********************************************/
2119
/* OURICMP - Case insensitive string compare */
2120
/*********************************************/
2122
int ouricmp(char *str1, char *str2)
2125
(tolower((unsigned char)*str1)==tolower((unsigned char)*str2)))
2127
if (*str1==0) return 0; else return 1;
2130
/*********************************************/
2131
/* SRCH_STRING - get search strings from ref */
2132
/*********************************************/
2134
void srch_string(char *ptr)
2136
/* ptr should point to unescaped query string */
2137
char tmpbuf[BUFSIZE];
2139
unsigned char *cp1, *cp2, *cps;
2142
/* Check if search engine referrer or return */
2143
if ( (cps=(unsigned char *)isinglist(search_list,log_rec.refer))==NULL)
2146
/* Try to find query variable */
2147
srch[0]='?'; srch[sizeof(srch)-1] = '\0';
2148
strncpy(&srch[1],(char *)cps,sizeof(srch)-2); /* First, try "?..." */
2149
if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL)
2151
srch[0]='&'; /* Next, try "&..." */
2152
if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL) return;
2154
cp2=(unsigned char *)tmpbuf;
2155
while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
2156
while (*cp1!='&' && *cp1!=0)
2158
if (*cp1=='"' || *cp1==',' || *cp1=='?')
2159
{ cp1++; continue; } /* skip bad ones.. */
2162
if (*cp1=='+') *cp1=' '; /* change + to space */
2163
if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces */
2164
if (*cp1==' ') sp_flg=1; else sp_flg=0; /* (flag spaces here) */
2166
*cp2++=tolower(*cp1++); /* normal character */
2170
*cp2=0; cp2=(unsigned char *)tmpbuf;
2171
if (tmpbuf[0]=='?') tmpbuf[0]=' '; /* format fix ? */
2172
while( *cp2!=0 && isspace((unsigned char)*cp2) ) cp2++; /* skip sps. */
2173
if (*cp2==0) return;
2175
/* any trailing spaces? */
2176
cp1=cp2+strlen((char *)cp2)-1;
2177
while (cp1!=cp2) if (isspace((unsigned char)*cp1)) *cp1--='\0'; else break;
2179
/* strip invalid chars */
2181
while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
2183
if (put_snode((char *)cp2,(u_int64_t)1,sr_htab))
2186
/* Error adding search string node, skipping .... */
2187
fprintf(stderr,"%s %s\n", msg_nomem_sc, tmpbuf);
2192
/*********************************************/
2193
/* GET_DOMAIN - Get domain portion of host */
2194
/*********************************************/
2196
char *get_domain(char *str)
2199
int i=group_domains+1;
2201
if (isipaddr(str)) return NULL;
2202
cp = str+strlen(str)-1;
2207
if (!(--i)) return ++cp;
2213
/*********************************************/
2214
/* AGENT_MANGLE - Re-format user agent */
2215
/*********************************************/
2217
void agent_mangle(char *str)
2219
char *cp1, *cp2, *cp3;
2221
str=cp2=log_rec.agent;
2222
cp1=strstr(str,"ompatible"); /* check known fakers */
2225
while (*cp1!=';'&&*cp1!='\0') cp1++;
2226
/* kludge for Mozilla/3.01 (compatible;) */
2227
if (*cp1++==';' && strcmp(cp1,")\"")) /* success! */
2229
/* Opera can hide as MSIE */
2230
cp3=strstr(str,"Opera");
2233
while (*cp3!='.'&&*cp3!='\0')
2235
if(*cp3=='/') *cp2++=' ';
2243
while (*cp1 == ' ') cp1++; /* eat spaces */
2244
while (*cp1!='.'&&*cp1!='\0'&&*cp1!=';') *cp2++=*cp1++;
2248
while (*cp1!='.'&&*cp1!=';'&&*cp1!='\0') *cp2++=*cp1++;
2249
if (*cp1!=';'&&*cp1!='\0') { *cp2++=*cp1++; *cp2++=*cp1++; }
2252
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2254
while (*cp1!=';'&&*cp1!='\0'&&*cp1!='('&&*cp1!=' ') *cp2++=*cp1++;
2257
/* Level 1 - try to get OS */
2258
cp1=strstr(cp1,")");
2263
while (*cp1!=';'&&*cp1!='('&&cp1!=str) cp1--;
2264
if (cp1!=str&&*cp1!='\0') cp1++;
2265
while (*cp1==' '&&*cp1!='\0') cp1++;
2266
while (*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2274
/* nothing after "compatible", should we mangle? */
2280
cp1=strstr(str,"Opera"); /* Opera flavor */
2283
while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2284
while (*cp1!='.'&&*cp1!='\0')
2286
if(*cp1=='/') *cp2++=' ';
2292
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2297
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2299
while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2302
cp1=strstr(cp1,"(");
2308
while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2316
cp1=strstr(str,"Mozilla"); /* Netscape flavor */
2319
while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2320
if (*cp1==' ') *cp1='/';
2321
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2324
while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2329
if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2331
while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2334
/* Level 1 - Try to get OS */
2335
cp1=strstr(cp1,"(");
2341
while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2351
/*********************************************/
2352
/* OUR_GZGETS - enhanced gzgets for log only */
2353
/*********************************************/
2355
char *our_gzgets(void *fp, char *buf, int size)
2357
char *out_cp=buf; /* point to output */
2360
if (f_cp>(f_buf+f_end-1)) /* load? */
2363
f_end=(gz_log==COMP_BZIP)?
2364
BZ2_bzread(fp, f_buf, GZ_BUFSIZE):
2365
gzread(fp, f_buf, GZ_BUFSIZE);
2367
f_end=gzread(fp, f_buf, GZ_BUFSIZE);
2369
if (f_end<=0) return Z_NULL;
2373
if (--size) /* more? */
2376
if (*f_cp++ == '\n') { *out_cp='\0'; return buf; }
2378
else { *out_cp='\0'; return buf; }
2383
/*********************************************/
2384
/* bz2_rewind - our 'rewind' for bz2 files */
2385
/*********************************************/
2387
int bz2_rewind( void **fp, char *fname, char *mode )
2390
*fp = BZ2_bzopen( fname, "rb");
2391
f_cp=f_buf+GZ_BUFSIZE; f_end=0; /* reset buffer counters */
2392
memset(f_buf, 0, sizeof(f_buf));
2393
if (*fp == Z_NULL) return -1;
2396
#endif /* USE_BZIP */
2398
/*********************************************/
2399
/* ISIPADDR - Determine if str is IP address */
2400
/*********************************************/
2402
int isipaddr(char *str)
2405
char *cp; /* generic ptr */
2407
if (strchr(str,':')!=NULL)
2409
/* Possible IPv6 Address */
2411
while (strchr(":.abcdef0123456789",*cp)!=NULL && *cp!='\0')
2414
if (*cp++==':') i++;
2417
if (*cp!='\0') return -1; /* bad hostname (has ':') */
2418
if (i>1 && j) return 2; /* IPv4/IPv6 */
2419
return 3; /* IPv6 */
2423
/* Not an IPv6 address, check for IPv4 */
2425
while (strchr(".0123456789",*cp)!=NULL && *cp!='\0')
2427
if (*cp++=='.') i++;
2429
if (*cp!='\0') return 0; /* hostname */
2430
if (i!=4) return -1; /* bad hostname */
2431
return 1; /* IPv4 */
2435
/*****************************************************************/
2437
/* JDATE - Julian date calculator */
2439
/* Calculates the number of days since Jan 1, 0000. */
2441
/* Originally written by Bradford L. Barrett (03/17/1988) */
2442
/* Returns an unsigned long value representing the number of */
2443
/* days since January 1, 0000. */
2445
/* Note: Due to the changes made by Pope Gregory XIII in the */
2446
/* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2447
/* not return a truely accurate number (will be at least */
2448
/* 10 days off). Somehow, I don't think this will */
2449
/* present much of a problem for most situations :) */
2451
/* Usage: days = jdate(day, month, year) */
2453
/* The number returned is adjusted by 5 to facilitate day of */
2454
/* week calculations. The mod of the returned value gives the */
2455
/* day of the week the date is. (ie: dow = days % 7 ) where */
2456
/* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2458
/*****************************************************************/
2460
u_int64_t jdate( int day, int month, int year )
2462
u_int64_t days; /* value returned */
2463
int mtable[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2465
/* First, calculate base number including leap and Centenial year stuff */
2467
days=(((u_int64_t)year*365)+day+mtable[month-1]+
2468
((year+4)/4) - ((year/100)-(year/400)));
2470
/* now adjust for leap year before March 1st */
2472
if ((year % 4 == 0) && !((year % 100 == 0) &&
2473
(year % 400 != 0)) && (month < 3))
2476
/* done, return with calculated value */