1
/* Licensed to the Apache Software Foundation (ASF) under one or more
2
* contributor license agreements. See the NOTICE file distributed with
3
* this work for additional information regarding copyright ownership.
4
* The ASF licenses this file to You under the Apache License, Version 2.0
5
* (the "License"); you may not use this file except in compliance with
6
* the License. You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
20
* Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
21
* UUNET Canada, April 16, 1995
23
* Rewritten by David Robinson. (drtr ast.cam.ac.uk)
25
* Usage: logresolve [-s filename] [-c] < access_log > new_log
28
* -s filename name of a file to record statistics
29
* -c check the DNS for a matching A record for the host.
33
* To generate meaningful statistics from an HTTPD log file, it's good
34
* to have the domain name of each machine that accessed your site, but
35
* doing this on the fly can slow HTTPD down.
37
* Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
38
* resolution off. Before running your stats program, just run your log
39
* file through this program (logresolve) and all of your IP numbers will
40
* be resolved into hostnames (where possible).
42
* logresolve takes an HTTPD access log (in the COMMON log file format,
43
* or any other format that has the IP number/domain name as the first
44
* field for that matter), and outputs the same file with all of the
45
* domain names looked up. Where no domain name can be found, the IP
48
* To minimize impact on your nameserver, logresolve has its very own
49
* internal hash-table cache. This means that each IP number will only
50
* be looked up the first time it is found in the log file.
52
* The -c option causes logresolve to apply the same check as httpd
53
* compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
54
* address, it looks up the IP addresses for the hostname and checks
55
* that one of these matches the original address.
71
#if APR_HAVE_NETINET_IN_H
72
#include <netinet/in.h>
77
#if APR_HAVE_SYS_SOCKET_H
78
#include <sys/socket.h>
80
#if APR_HAVE_ARPA_INET_H
81
#include <arpa/inet.h>
84
static void cgethost(struct in_addr ipnum, char *string, int check);
85
static int get_line(char *s, int n);
86
static void stats(FILE *output);
89
#define NO_ADDRESS NO_DATA
93
/* maximum line length */
98
/* maximum length of a domain name */
103
/* number of buckets in cache hash apr_table_t */
107
* struct nsrec - record of nameservice for cache linked list
109
* ipnum - IP number hostname - hostname noname - nonzero if IP number has no
110
* hostname, i.e. hostname=IP number
114
struct in_addr ipnum;
121
* statistics - obvious
126
extern __declspec(dllimport) int h_errno;
128
extern int h_errno; /* some machines don't have this in their headers */
132
/* largest value for h_errno */
134
#define MAX_ERR (NO_ADDRESS)
135
#define UNKNOWN_ERR (MAX_ERR+1)
136
#define NO_REVERSE (MAX_ERR+2)
138
static int cachehits = 0;
139
static int cachesize = 0;
140
static int entries = 0;
141
static int resolves = 0;
142
static int withname = 0;
143
static int errors[MAX_ERR + 3];
146
* cgethost - gets hostname by IP address, caching, and adding unresolvable
147
* IP numbers with their IP number as hostname, setting noname flag
150
static void cgethost (struct in_addr ipnum, char *string, int check)
152
struct nsrec **current, *new;
153
struct hostent *hostdata;
156
current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
157
(ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
159
while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
160
current = &(*current)->next;
162
if (*current == NULL) {
164
new = (struct nsrec *) malloc(sizeof(struct nsrec));
167
fprintf(stderr, "Insufficient memory\n");
175
hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
177
if (hostdata == NULL) {
178
if (h_errno > MAX_ERR)
179
errors[UNKNOWN_ERR]++;
182
new->noname = h_errno;
183
name = strdup(inet_ntoa(ipnum));
187
name = strdup(hostdata->h_name);
191
fprintf(stderr, "Insufficient memory\n");
194
hostdata = gethostbyname(name);
195
if (hostdata != NULL) {
198
for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
199
if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
204
if (hostdata == NULL) {
205
fprintf(stderr, "Bad host: %s != %s\n", name,
207
new->noname = NO_REVERSE;
209
name = strdup(inet_ntoa(ipnum));
210
errors[NO_REVERSE]++;
214
new->hostname = name;
215
if (new->hostname == NULL) {
217
fprintf(stderr, "Insufficient memory\n");
224
/* size of string == MAXDNAME +1 */
225
strncpy(string, (*current)->hostname, MAXDNAME);
226
string[MAXDNAME] = '\0';
230
* prints various statistics to output
233
static void stats (FILE *output)
237
struct nsrec *current;
238
char *errstring[MAX_ERR + 3];
240
for (i = 0; i < MAX_ERR + 3; i++)
241
errstring[i] = "Unknown error";
242
errstring[HOST_NOT_FOUND] = "Host not found";
243
errstring[TRY_AGAIN] = "Try again";
244
errstring[NO_RECOVERY] = "Non recoverable error";
245
errstring[NO_DATA] = "No data record";
246
errstring[NO_ADDRESS] = "No address";
247
errstring[NO_REVERSE] = "No reverse entry";
249
fprintf(output, "logresolve Statistics:\n");
251
fprintf(output, "Entries: %d\n", entries);
252
fprintf(output, " With name : %d\n", withname);
253
fprintf(output, " Resolves : %d\n", resolves);
254
if (errors[HOST_NOT_FOUND])
255
fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
256
if (errors[TRY_AGAIN])
257
fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
259
fprintf(output, " - No data : %d\n", errors[NO_DATA]);
260
if (errors[NO_ADDRESS])
261
fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
262
if (errors[NO_REVERSE])
263
fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
264
fprintf(output, "Cache hits : %d\n", cachehits);
265
fprintf(output, "Cache size : %d\n", cachesize);
266
fprintf(output, "Cache buckets : IP number * hostname\n");
268
for (i = 0; i < BUCKETS; i++)
269
for (current = nscache[i]; current != NULL; current = current->next) {
270
ipstring = inet_ntoa(current->ipnum);
271
if (current->noname == 0)
272
fprintf(output, " %3d %15s - %s\n", i, ipstring,
275
if (current->noname > MAX_ERR + 2)
276
fprintf(output, " %3d %15s : Unknown error\n", i,
279
fprintf(output, " %3d %15s : %s\n", i, ipstring,
280
errstring[current->noname]);
287
* gets a line from stdin
290
static int get_line (char *s, int n)
294
if (!fgets(s, n, stdin))
296
cp = strchr(s, '\n');
302
int main (int argc, char *argv[])
304
struct in_addr ipnum;
305
char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
308
#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK))
309
/* If we apr'ify this code, apr_pool_create/apr_pool_destroy
310
* should perform the WSAStartup/WSACleanup for us.
313
WSAStartup(MAKEWORD(2, 0), &wsaData);
318
for (i = 1; i < argc; i++) {
319
if (strcmp(argv[i], "-c") == 0)
321
else if (strcmp(argv[i], "-s") == 0) {
323
fprintf(stderr, "logresolve: missing filename to -s\n");
330
fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
335
for (i = 0; i < BUCKETS; i++)
337
for (i = 0; i < MAX_ERR + 2; i++)
340
while (get_line(line, MAXLINE)) {
344
if (!apr_isdigit(line[0])) { /* short cut */
349
bar = strchr(line, ' ');
352
ipnum.s_addr = inet_addr(line);
353
if (ipnum.s_addr == 0xffffffffu) {
363
cgethost(ipnum, hoststring, check);
365
printf("%s %s\n", hoststring, bar + 1);
370
#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK))
374
if (statfile != NULL) {
376
fp = fopen(statfile, "w");
378
fprintf(stderr, "logresolve: could not open statistics file '%s'\n"