2
webalizer - a web server log analysis program
4
Copyright (C) 1997-2011 Bradford L. Barrett
6
This program is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2 of the License, or
9
(at your option) any later version, and provided that the above
10
copyright and permission notice is included with all distributed
11
copies of this or derived software.
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
18
You should have received a copy of the GNU General Public License
19
along with this program; if not, write to the Free Software
20
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24
/*********************************************/
25
/* STANDARD INCLUDES */
26
/*********************************************/
32
#include <unistd.h> /* normal stuff */
34
#include <sys/utsname.h>
36
/* ensure sys/types */
38
#include <sys/types.h>
41
/* need socket header? */
42
#ifdef HAVE_SYS_SOCKET_H
43
#include <sys/socket.h>
46
/* some systems need this */
51
#include "webalizer.h" /* main header */
55
/* internal function prototypes */
56
void fmt_logrec(char *);
57
int parse_record_clf(char *);
58
int parse_record_ftp(char *);
59
int parse_record_squid(char *);
60
int parse_record_w3c(char *);
62
/*********************************************/
63
/* FMT_LOGREC - terminate log fields w/zeros */
64
/*********************************************/
66
void fmt_logrec(char *buffer)
73
/* break record up, terminate fields with '\0' */
76
case '\t': if (b || q || p) break; *cp='\0'; break;
77
case ' ': if (b || q || p) break; *cp='\0'; break;
78
case '"': if (*(cp-1)=='\\') break; else q^=1; break;
79
case '[': if (q) break; b++; break;
80
case ']': if (q) break; if (b>0) b--; break;
81
case '(': if (q) break; p++; break;
82
case ')': if (q) break; if (p>0) p--; break;
88
/*********************************************/
89
/* PARSE_RECORD - uhhh, you know... */
90
/*********************************************/
92
int parse_record(char *buffer)
94
/* clear out structure */
95
memset(&log_rec,0,sizeof(struct log_struct));
97
/* call appropriate handler */
101
case LOG_CLF: return parse_record_clf(buffer); break; /* clf */
102
case LOG_FTP: return parse_record_ftp(buffer); break; /* ftp */
103
case LOG_SQUID: return parse_record_squid(buffer); break; /* squid */
104
case LOG_W3C: return parse_record_w3c(buffer); break; /* w3c */
108
/*********************************************/
109
/* PARSE_RECORD_FTP - ftp log handler */
110
/*********************************************/
112
int parse_record_ftp(char *buffer)
116
char *cp1, *cp2, *cpx, *cpy, *eob;
118
size = strlen(buffer); /* get length of buffer */
119
eob = buffer+size; /* calculate end of buffer */
120
fmt_logrec(buffer); /* seperate fields with \0's */
122
/* Start out with date/time */
124
while (*cp1!=0 && cp1<eob) cp1++;
125
while (*cp1==0 && cp1<eob) cp1++;
126
cpx=cp1; /* save month name */
127
while (*cp1!=0 && cp1<eob) cp1++;
128
while (*cp1==0 && cp1<eob) cp1++;
129
i=atoi(cp1); /* get day number */
130
while (*cp1!=0 && cp1<eob) cp1++;
131
while (*cp1==0 && cp1<eob) cp1++;
132
cpy=cp1; /* get timestamp */
133
while (*cp1!=0 && cp1<eob) cp1++;
134
while (*cp1==0 && cp1<eob) cp1++;
135
j=atoi(cp1); /* get year */
137
/* minimal sanity check */
138
if (*(cpy+2)!=':' || *(cpy+5)!=':') return 0;
139
if (j<1990 || j>2100) return 0;
140
if (i<1 || i>31) return 0;
142
/* format date/time field */
143
snprintf(log_rec.datetime,sizeof(log_rec.datetime),
144
"[%02d/%s/%4d:%s -0000]",i,cpx,j,cpy);
146
/* skip seconds... */
147
while (*cp1!=0 && cp1<eob) cp1++;
148
while (*cp1==0 && cp1<eob) cp1++;
149
while (*cp1!=0 && cp1<eob) cp1++;
154
/* Blank? That's weird.. */
155
strcpy(log_rec.hostname,"NONE");
156
if (debug_mode) fprintf(stderr, "Warning: Blank hostname found!\n");
161
strncpy(log_rec.hostname, ++cp1, MAXHOST);
162
log_rec.hostname[MAXHOST-1]=0;
163
while (*cp1!=0 && cp1<eob) cp1++;
165
while (*cp1==0 && cp1<eob) cp1++;
168
if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0;
169
else log_rec.xfer_size = strtoul(cp1,NULL,10);
172
while (*cp1!=0 && cp1<eob) cp1++;
173
while (*cp1==0 && cp1<eob) cp1++;
175
/* get next field for later */
176
while (*cp1!=0 && cp1<eob) cp1++;
177
while (*cp1==0 && cp1<eob) cp1++;
180
while (*cp1!=0 && cp1<eob) cp1++;
181
while (*cp1==0) cp1++;
182
while (*cp1!=0 && cp1<eob) cp1++;
183
while (*cp1==0) cp1++;
185
/* fabricate an appropriate request string based on direction */
187
snprintf(log_rec.url,sizeof(log_rec.url),"\"POST %s\"",cpx);
189
snprintf(log_rec.url,sizeof(log_rec.url),"\"GET %s\"",cpx);
193
while (*cp1!=0 && cp1<eob) cp1++;
195
cp2=log_rec.ident;count=MAXIDENT-1;
196
while (*cp1!=0 && cp1<eob && count) { *cp2++ = *cp1++; count--; }
199
/* return appropriate response code */
200
log_rec.resp_code=(*(eob-2)=='i')?206:200;
202
/* don't worry about I/O bytes in FTP */
203
log_rec.ixfer_size=log_rec.oxfer_size=0;
208
/*********************************************/
209
/* PARSE_RECORD_CLF - CLF web log handler */
210
/*********************************************/
212
int parse_record_clf(char *buffer)
215
char *cp1, *cp2, *cpx, *eob, *eos;
217
size = strlen(buffer); /* get length of buffer */
218
eob = buffer+size; /* calculate end of buffer */
219
fmt_logrec(buffer); /* seperate fields with \0's */
222
cp1 = cpx = buffer; cp2=log_rec.hostname;
223
eos = (cp1+MAXHOST)-1;
224
if (eos >= eob) eos=eob-1;
226
while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
232
fprintf(stderr,"%s",msg_big_host);
233
if (debug_mode) fprintf(stderr,": %s\n",cpx);
234
else fprintf(stderr,"\n");
236
while (*cp1 != '\0') cp1++;
238
if (cp1 < eob) cp1++;
240
/* skip next field (ident) */
241
while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++;
242
if (cp1 < eob) cp1++;
244
/* IDENT (authuser) field */
247
eos = (cp1+MAXIDENT-1);
248
if (eos >= eob) eos=eob-1;
250
while ( (*cp1 != '[') && (cp1 < eos) ) /* remove embeded spaces */
252
if (*cp1=='\0') *cp1=' ';
257
if (cp1 >= eob) return 0;
259
/* check if oversized username */
264
fprintf(stderr,"%s",msg_big_user);
265
if (debug_mode) fprintf(stderr,": %s\n",cpx);
266
else fprintf(stderr,"\n");
268
while ( (*cp1 != '[') && (cp1 < eob) ) cp1++;
271
/* strip trailing space(s) */
272
while (*cp2==' ') *cp2--='\0';
274
/* date/time string */
276
cp2 = log_rec.datetime;
278
if (eos >= eob) eos=eob-1;
280
while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
286
fprintf(stderr,"%s",msg_big_date);
287
if (debug_mode) fprintf(stderr,": %s\n",cpx);
288
else fprintf(stderr,"\n");
290
while (*cp1 != '\0') cp1++;
292
if (cp1 < eob) cp1++;
294
/* minimal sanity check on timestamp */
295
if ( (log_rec.datetime[0] != '[') ||
296
(log_rec.datetime[3] != '/') ||
297
(cp1 >= eob)) return 0;
302
eos = (cp1+MAXURL-1);
303
if (eos >= eob) eos = eob-1;
305
while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
311
fprintf(stderr,"%s",msg_big_req);
312
if (debug_mode) fprintf(stderr,": %s\n",cpx);
313
else fprintf(stderr,"\n");
315
while (*cp1 != '\0') cp1++;
317
if (cp1 < eob) cp1++;
319
if ( (log_rec.url[0] != '"') ||
320
(cp1 >= eob) ) return 0;
322
/* Strip off HTTP version from URL */
323
if ( (cp2=strstr(log_rec.url,"HTTP"))!=NULL )
325
*cp2='\0'; /* Terminate string */
326
*(--cp2)='"'; /* change <sp> to " */
330
log_rec.resp_code = atoi(cp1);
333
while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++;
334
if (cp1 < eob) cp1++;
335
if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0;
336
else log_rec.xfer_size = strtoul(cp1,NULL,10);
338
/* done with CLF record */
339
if (cp1>=eob) return 1;
341
while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 < eob) ) cp1++;
342
if (cp1 < eob) cp1++;
343
/* get referrer if present */
346
eos = (cp1+MAXREF-1);
347
if (eos >= eob) eos = eob-1;
349
while ( (*cp1 != '\0') && (*cp1 != '\n') && (cp1 != eos) ) *cp2++ = *cp1++;
355
fprintf(stderr,"%s",msg_big_ref);
356
if (debug_mode) fprintf(stderr,": %s\n",cpx);
357
else fprintf(stderr,"\n");
359
while (*cp1 != '\0') cp1++;
361
if (cp1 < eob) cp1++;
365
eos = cp1+(MAXAGENT-1);
366
if (eos >= eob) eos = eob-1;
368
while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
372
while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++;
373
if (cp1 < eob) cp1++;
374
if (*cp1<'0'||*cp1>'9') log_rec.ixfer_size=0;
375
else log_rec.ixfer_size = strtoul(cp1,NULL,10);
378
while ( (*cp1 != '\0') && (cp1 < eob) ) cp1++;
379
if (cp1 < eob) cp1++;
380
if (*cp1<'0'||*cp1>'9') log_rec.oxfer_size=0;
381
else log_rec.oxfer_size = strtoul(cp1,NULL,10);
383
return 1; /* maybe a valid record, return with TRUE */
386
/*********************************************/
387
/* PARSE_RECORD_SQUID - squid log handler */
388
/*********************************************/
390
int parse_record_squid(char *buffer)
392
int size, slash_count=0;
394
char *cp1, *cp2, *cpx, *eob, *eos;
396
size = strlen(buffer); /* get length of buffer */
397
eob = buffer+size; /* calculate end of buffer */
398
fmt_logrec(buffer); /* seperate fields with \0's */
402
i=atoi(cp1); /* get timestamp */
404
/* format date/time field */
405
strftime(log_rec.datetime,sizeof(log_rec.datetime),
406
"[%d/%b/%Y:%H:%M:%S -0000]",localtime(&i));
408
while (*cp1!=0 && cp1<eob) cp1++;
409
while (*cp1==0) cp1++;
411
/* skip request size */
412
while (*cp1!=0 && cp1<eob) cp1++;
413
while (*cp1==0) cp1++;
416
cpx = cp1; cp2=log_rec.hostname;
417
eos = (cp1+MAXHOST)-1;
418
if (eos >= eob) eos=eob-1;
420
while ((*cp1 != '\0') && (cp1 != eos)) *cp2++ = *cp1++;
426
fprintf(stderr,"%s",msg_big_host);
427
if (debug_mode) fprintf(stderr,": %s\n",cpx);
428
else fprintf(stderr,"\n");
430
while (*cp1 != '\0') cp1++;
432
if (cp1 < eob) cp1++;
434
/* skip cache status */
435
while (*cp1!=0 && cp1<eob && *cp1!='/') cp1++;
439
log_rec.resp_code = atoi(cp1);
440
while (*cp1!=0 && cp1<eob) cp1++;
441
while (*cp1==0) cp1++;
444
if (*cp1<'0'||*cp1>'9') log_rec.xfer_size=0;
445
else log_rec.xfer_size = strtoul(cp1,NULL,10);
447
while (*cp1!=0 && cp1<eob) cp1++;
448
while (*cp1==0) cp1++;
450
/* HTTP request type */
454
eos = (cp1+MAXURL-1);
455
if (eos >= eob) eos = eob-1;
457
while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
463
fprintf(stderr,"%s",msg_big_req);
464
if (debug_mode) fprintf(stderr,": %s\n",cpx);
465
else fprintf(stderr,"\n");
467
while (*cp1 != '\0') cp1++;
469
if (cp1 < eob) cp1++;
473
/* HTTP URL requested */
478
slash_count=trimsquid+2;
479
while ( (*cp1 != '\0') && (cp1 != eos) && slash_count)
482
if (*cp1 == '/') slash_count--;
485
else while ( (*cp1 != '\0') && (cp1 != eos) ) *cp2++ = *cp1++;
488
if ((*cp1 != '\0' && trimsquid==0) || (trimsquid && slash_count) )
492
fprintf(stderr,"%s",msg_big_req);
493
if (debug_mode) fprintf(stderr,": %s\n",cpx);
494
else fprintf(stderr,"\n");
496
while (*cp1 != '\0') cp1++;
498
if (cp1 < eob) cp1++;
502
/* IDENT (authuser) field */
505
eos = (cp1+MAXIDENT-1);
506
if (eos >= eob) eos=eob-1;
508
while (*cp1 == ' ') cp1++; /* skip white space */
510
while ( (*cp1 != ' ' && *cp1!='\0') && (cp1 < eos) ) *cp2++=*cp1++;
514
if (cp1 >= eob) return 0;
516
/* strip trailing space(s) */
517
while (*cp2==' ') *cp2--='\0';
519
/* don't do this for squid */
520
log_rec.ixfer_size=log_rec.oxfer_size=0;
522
/* we have no interest in the remaining fields */
526
/*********************************************/
527
/* PARSE_RECORD_W3C - w3c log handler */
528
/*********************************************/
530
/* field index structure */
531
struct field_index_struct
533
int date; /* Date field index */
534
int time; /* Time field index */
535
int ip; /* IP field index */
536
int username; /* Username field index */
537
int method; /* Method field index */
538
int url; /* URL field index */
539
int query; /* Querystring field index */
540
int status; /* Status code field index */
541
int size; /* Size field index */
542
int referer; /* Referrer field index */
543
int agent; /* User agent field index */
544
int fields; /* Number of fields in this format */
547
/* field structure */
550
char *date; /* Date field */
551
char *time; /* Time field */
552
char *ip; /* IP field */
553
char *username; /* Username field */
554
char *method; /* Method field */
555
char *url; /* URL field */
556
char *query; /* Querystring */
557
char *status; /* Status code */
558
char *size; /* Size field */
559
char *referer; /* Referrer field */
560
char *agent; /* User agent field */
563
int parse_record_w3c(char *buffer)
569
static struct field_index_struct field_index;
570
struct fields_struct fields;
571
struct tm gm_time, *local_time;
574
memset(&gm_time, 0, sizeof(struct tm));
575
size = strlen(buffer); /* get length of buffer */
576
eob = buffer + size; /* calculate end of buffer */
578
/* remove line end markers, reduce eob accordingly */
583
if (*cp == '\r' || *cp=='\n')
592
fmt_logrec(buffer); /* seperate fields with \0's */
596
/* Check if the line is empty or a line suffers from the IIS
597
Null-Character bug and abort parsing if found. */
598
if (*cp == '\0') return 0;
600
/* If it's a header line ignore it or parse the Fields header if found */
604
if (!strcmp(cp, "Fields:"))
606
/* Reset the field indices */
607
memset(&field_index, 0, sizeof(struct field_index_struct));
613
/* Set the field index */
614
if (!strcmp(cp, "date")) field_index.date = index;
615
if (!strcmp(cp, "time")) field_index.time = index;
616
if (!strcmp(cp, "c-ip")) field_index.ip = index;
617
if (!strcmp(cp, "cs-method")) field_index.method = index;
618
if (!strcmp(cp, "cs-uri-stem")) field_index.url = index;
619
if (!strcmp(cp, "cs-uri-query")) field_index.query = index;
620
if (!strcmp(cp, "sc-status")) field_index.status = index;
621
if (!strcmp(cp, "cs(Referer)")) field_index.referer = index;
622
if (!strcmp(cp, "sc-bytes")) field_index.size = index;
623
if (!strcmp(cp, "cs(User-Agent)")) field_index.agent = index;
624
if (!strcmp(cp, "cs-username")) field_index.username = index;
626
/* Continue with the next field */
631
field_index.fields = index -1;
634
/* Return because this header line is completely parsed */
638
/* A data line has been found */
640
/* Check if the number of entries in this line are conform to the
641
format specified in the header */
649
if (index-1 != field_index.fields) return 0;
654
/* Reset the field pointers and begin parsing the data line */
655
memset(&fields, 0, sizeof(struct fields_struct));
659
/* Set the field pointers */
660
if (index == field_index.date) fields.date = cp;
661
if (index == field_index.time) fields.time = cp;
662
if (index == field_index.ip) fields.ip = cp;
663
if (index == field_index.method) fields.method = cp;
664
if (index == field_index.url) fields.url = cp;
665
if (index == field_index.query) fields.query = cp;
666
if (index == field_index.status) fields.status = cp;
667
if (index == field_index.referer) fields.referer = cp;
668
if (index == field_index.size) fields.size = cp;
669
if (index == field_index.agent) fields.agent = cp;
670
if (index == field_index.username) fields.username = cp;
672
/* Continue with the next data field */
682
while (*cp) { if (*cp=='+') *cp=' '; cp++; }
684
/* If no HTTP Method, force to "NONE" */
685
if (fields.method && (fields.method[0]=='-'))
686
fields.method="NONE";
688
if (fields.query && (fields.query[0]!='-'))
689
snprintf(log_rec.url, MAXURL, "\"%s %s?%s\"",
690
fields.method, fields.url, fields.query);
691
else snprintf(log_rec.url, MAXURL, "\"%s %s\"",
692
fields.method, fields.url);
697
if (fields.ip) strncpy(log_rec.hostname, fields.ip, MAXHOST - 1);
699
/* Save response code */
700
if (fields.status) log_rec.resp_code = atoi(fields.status);
703
if (fields.referer) strncpy(log_rec.refer, fields.referer, MAXREF - 1);
705
/* Save transfer size */
706
if (fields.size) log_rec.xfer_size = strtoul(fields.size, NULL, 10);
708
/* Save user agent */
712
while (*cp) { if (*cp=='+') *cp=' '; cp++; }
713
strncpy(log_rec.agent, fields.agent, MAXAGENT - 1);
716
/* Save auth username */
717
if (fields.username) strncpy(log_rec.ident, fields.username, MAXIDENT - 1);
719
/* Parse date and time and save it */
722
gm_time.tm_year = atoi(fields.date);
723
if (gm_time.tm_year > 1900) gm_time.tm_year-=1900;
724
while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++;
725
if (fields.date[0] == '\0') return 0;
727
gm_time.tm_mon = atoi(fields.date) - 1;
728
while ((fields.date[0] != '\0') && (fields.date[0] != '-')) fields.date++;
729
if (fields.date[0] == '\0') return 0;
731
gm_time.tm_mday = atoi(fields.date);
735
gm_time.tm_hour = atoi(fields.time);
736
while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++;
737
if (fields.time[0] == '\0') return 0;
739
gm_time.tm_min = atoi(fields.time);
740
while ((fields.time[0] != '\0') && (fields.time[0] != ':')) fields.time++;
741
if (fields.time[0] == '\0') return 0;
743
gm_time.tm_sec = atoi(fields.time);
746
/* Convert GMT to localtime */
747
gm_time.tm_isdst = -1; /* force dst check */
748
timestamp = mktime(&gm_time); /* get time in sec */
750
timestamp-=(gm_time.tm_isdst)?altzone:timezone; /* solaris & friends */
752
timestamp = mktime(&gm_time)+gm_time.tm_gmtoff; /* glibc systems */
754
local_time = localtime(×tamp); /* update tm struct */
755
strftime(log_rec.datetime, sizeof(log_rec.datetime),/* and format sting */
756
"[%d/%b/%Y:%H:%M:%S -0000]", local_time); /* for log_rec field */