5
#define RESOLVING_STRING "..."
6
#define NO_RESOLVED_INFO "?"
8
extern map *um, /* urlmap */
11
*rm, /* referrermap */
19
extern Queue want_host, want_ip;
22
extern adns_state adns;
25
/* CommonLogParser handles common and combined, despite its name */
26
int CommonLogParser::parse(char *logline, struct logbits *b)
28
char *bufsp, *bufcp, *ptr;
31
struct sockaddr_in addr;
36
bufcp = strchr(logline, ' ');
43
/* quickly figure out if this is an IP or a host. We do this by
44
* checking each character of it; if every character is either a
45
* digit or a dot, then it's an IP (no host can just be digits)
47
for(workptr = bufsp ; *workptr ; workptr++)
49
if (isdigit(*workptr)) continue;
50
if (*workptr == '.') continue;
52
/* it's neither a digit or a dot */
59
/* it is a hostname */
61
/* insert will return existing position if it exists */
62
b->host_pos = hm->insert(ptr);
63
b->host_hash = TTHash(ptr);
64
b->want_host = false; /* cos we have it */
71
dprintf("lookup %s\n", ptr);
72
/* fire off a query with adns */
73
b->dns_query = new adns_query;
74
adns_submit(adns, ptr, adns_r_a,
75
(adns_queryflags) NULL, NULL, b->dns_query);
77
b->ip_pos = im->insert(RESOLVING_STRING);
78
b->ip_hash = TTHash(RESOLVING_STRING);
81
#endif /* HAVE_ADNS_H */
83
/* don't resolve the IP, and use -1 which means
84
* "there is nothing of interest here" */
93
b->ip_pos = im->insert(ptr);
94
b->ip_hash = TTHash(ptr);
95
b->want_ip = false; /* we have the IP already */
101
/* this is so we'll get a display like
102
..resolving.. [212.13.201.101]
104
clueful.shagged.org [212.13.201.101]
106
b->host_pos = hm->insert(RESOLVING_STRING);
107
b->host_hash = TTHash(RESOLVING_STRING);
109
b->want_host = true; /* we're going to get this */
111
/* construct network byte order num
112
** for adns_submit_reverse
114
addr.sin_family = AF_INET;
115
addr.sin_addr.s_addr = inet_addr(ptr);
117
b->dns_query = new adns_query;
118
adns_submit_reverse(adns, (struct sockaddr *)&addr,
119
adns_r_ptr, (adns_queryflags)adns_qf_owner,
123
#endif /* HAVE_ADNS_H */
125
/* don't resolve the host, use the IP */
126
b->host_pos = hm->insert(ptr);
127
b->host_hash = TTHash(ptr);
128
b->want_host = false; /* we are not resolving */
132
/* now skip to date */
133
if (!(bufcp = strchr(bufcp, '[')))
138
b->time = now; /* be lazy */
140
bufcp += 29; /* from dayofmonth to first char of method */
142
/* URL. processURL() will update bufcp to point at the end so we can
143
* continue processing from there */
144
if ((ptr = this->processURL(&bufcp)) == NULL)
147
/* get url_pos for this url; for circle_struct (c) later */
148
b->url_pos = um->insert(ptr);
149
b->url_hash = TTHash(ptr);
152
b->retcode = atoi(bufcp);
156
b->bytes = atoi(bufcp);
159
/* this may be the end of the line if it's a common log; if
160
* it's combined then we have referrer and user agent left */
161
if (!(bufsp = strchr(bufcp, '"')))
163
/* nothing left, its common */
165
/* fill in a dummy value for referrer map */
166
b->ref_pos = rm->insert("Unknown");
170
bufsp += 1; /* skip to first character of referrer */
172
/* find the end of referrer and null it */
173
if (!(bufcp = strchr(bufsp, '"')))
175
*bufcp = (char) NULL;
177
/* unless they want to keep it, skip over the protocol, ie http:// */
178
if ((cf.preserve_ref_protocol == 0) && (bufcp = strstr(bufsp, "://")))
182
/* we could munge the referrer now; cut down the path elements,
183
* remove querystring, but we'll leave that for a later date */
185
// b->referrer = bufsp;
187
/* get ref_pos for this url; for circle_struct (c) later */
188
b->ref_pos = rm->insert(bufsp);
189
b->ref_hash = TTHash(bufsp);
191
/* user-agent is as yet unused */
197
int AtopLogParser::parse(char *logline, struct logbits *b)
203
/* generic parser helper functions */
205
char *LogParser::processURL(char **buf) /* {{{ */
207
char *bufcp, *realstart, *endptr;
212
/* this skips past the method */
213
if (!(bufcp = strchr(bufcp, ' ')) )
215
++bufcp; // skip space
219
/* find the end of url; locate a protocol, out of the following list */
221
!(endptr = strstr(bufcp, " HTTP/"))
222
#if WITH_REAL_PROTOCOLS
223
/* v0.12: RealServer logs are very similar to Apache's,
224
* so we can support those too! Cool! */
225
&& !(endptr = strstr(bufcp, " RTSP/")) /* RealStreaming UDP */
226
&& !(endptr = strstr(bufcp, " RTSPT/")) /* RealStreaming TCP */
227
&& !(endptr = strstr(bufcp, " RTSPH/")) /* RealStreaming HTTP */
232
/* null the space in front of it */
233
*endptr = (char) NULL;
235
/* TODO maybe we can use the protocol someday.. */
238
/* this is all mungeURL is interested in */
239
length = endptr - realstart;
241
/* now find the finishing ", so parse* can deal with rest of line */
242
if (!(endptr = strstr(endptr+1, "\" ")))
245
mungeURL(&realstart, &length);
247
/* feed back where the end of the URL is */
253
/* munge the url passed in *url inplace;
254
* *length is the original length, and we update it once we're done */
255
int LogParser::mungeURL(char **url, int *length) /* {{{ */
258
char *bufcp, *endptr, *workptr;
260
endptr = *url + *length;
261
*endptr = (char) NULL;
263
/* do we want to keep the query string? */
264
if (!cf.keep_querystring)
266
/* null the first ? or & - anything after
267
* it is unrequired; it's the querystring */
268
if ((workptr = strchr(*url, '?')) ||
269
(workptr = strchr(*url, '&')) )
271
/* we might have overrun the end of the real URL and
272
* gone into referrer or something. Check that. */
273
if (workptr < endptr)
276
*workptr = (char) NULL;
282
/* how many path segments of the url are we keeping? */
283
if (cf.keep_segments > 0)
285
/* given a path of /foo/bar/moo/ and a keep_segments of 2,
286
* we want the / after the second element */
288
bufcp = workptr = *url + 1; /* skip leading / */
290
//dprintf("workptr is %s\n", workptr);
292
/* now skip the next keep_segments slashes */
293
while (skipped < cf.keep_segments && workptr < endptr)
299
/* discovered a slash */
302
/* bufcp becomes the char after / */
306
/* if we hit the end before finding the right number
307
* of slashes, we just keep it all */
308
if (workptr == endptr)
311
*bufcp = (char) NULL;
315
/* do we want to lowercase it all? */
316
if (cf.lowercase_urls)
319
while(workptr < endptr)
321
*workptr = tolower(*workptr);
332
/* adns; check to see if any queries have returned, and populate the circle
333
* as required. Be careful of any circle entries that have expired since
334
* the query was started. */
335
void collect_dns_responses()
340
int got_host = false, got_ip = false;
342
/* check every circle entry that has want_host or want_ip */
344
while(c->walk(&lb) != -1)
346
if (lb->want_host == false && lb->want_ip == false)
349
// dprintf("adns_check for %p\n", lb);
350
/* this circle slot has an outstanding query */
351
err = adns_check(adns, lb->dns_query, &answer, NULL);
359
/* some form of reply. Be it success or error, this query is
362
got_host = lb->want_host;
363
got_ip = lb->want_ip;
365
lb->want_host = false;
367
delete lb->dns_query;
369
if (answer->status == adns_s_ok)
371
/* we have a reply */
372
// dprintf("got a reply\n");
375
/* we'll have this new host in the hostmap ta */
376
lb->host_pos = hm->insert(*answer->rrs.str);
377
lb->host_hash = TTHash(*answer->rrs.str);
381
/* put the IP into the ipmap */
383
im->insert(inet_ntoa(*answer->rrs.inaddr));
385
TTHash(inet_ntoa(*answer->rrs.inaddr));
392
/* assume this IP has no reverse info; so we'll put the IP
393
* into Host as well; this is so that the Host list will be
394
* maintained properly (if we just put ? into Host, then
395
* they bunch up together)
398
lb->host_pos = hm->insert(im->reverse(lb->ip_pos));
399
lb->host_hash = TTHash(im->reverse(lb->ip_pos));
404
#endif /* HAVE_ADNS_H */