1
/* Host name resolution and matching.
2
Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
4
This file is part of GNU Wget.
6
GNU Wget is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2 of the License, or
9
(at your option) any later version.
11
GNU Wget is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
GNU General Public License for more details.
16
You should have received a copy of the GNU General Public License
17
along with Wget; if not, write to the Free Software
18
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
In addition, as a special exception, the Free Software Foundation
21
gives permission to link the code of its release of Wget with the
22
OpenSSL project's "OpenSSL" library (or with modified versions of it
23
that use the same license as the "OpenSSL" library), and distribute
24
the linked executables. You must obey the GNU General Public License
25
in all respects for all of the code used other than "OpenSSL". If you
26
modify this file, you may extend this exception to your version of the
27
file, but you are not obligated to do so. If you do not wish to do
28
so, delete this exception statement from your version. */
44
#include <sys/types.h>
48
# define SET_H_ERRNO(err) WSASetLastError(err)
50
# include <sys/socket.h>
51
# include <netinet/in.h>
53
# include <arpa/inet.h>
56
# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
60
#define NO_ADDRESS NO_DATA
63
#ifdef HAVE_SYS_UTSNAME_H
64
# include <sys/utsname.h>
85
int ip_default_family = AF_INET6;
87
int ip_default_family = AF_INET;
90
/* Mapping between known hosts and to lists of their addresses. */
92
static struct hash_table *host_name_addresses_map;
94
/* Lists of addresses. This should eventually be extended to handle
98
int count; /* number of adrresses */
99
ip_address *addresses; /* pointer to the string of addresses */
101
int faulty; /* number of addresses known not to work. */
102
int refcount; /* so we know whether to free it or not. */
105
/* Get the bounds of the address list. */
108
address_list_get_bounds (struct address_list *al, int *start, int *end)
114
/* Copy address number INDEX to IP_STORE. */
117
address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
119
assert (index >= al->faulty && index < al->count);
120
memcpy (ip_store, al->addresses + index, sizeof (ip_address));
123
/* Check whether two address lists have all their IPs in common. */
126
address_list_match_all (struct address_list *al1, struct address_list *al2)
130
if (al1->count != al2->count)
132
return 0 == memcmp (al1->addresses, al2->addresses,
133
al1->count * sizeof (ip_address));
136
/* Mark the INDEXth element of AL as faulty, so that the next time
137
this address list is used, the faulty element will be skipped. */
140
address_list_set_faulty (struct address_list *al, int index)
142
/* We assume that the address list is traversed in order, so that a
143
"faulty" attempt is always preceded with all-faulty addresses,
144
and this is how Wget uses it. */
145
assert (index == al->faulty);
148
if (al->faulty >= al->count)
149
/* All addresses have been proven faulty. Since there's not much
150
sense in returning the user an empty address list the next
151
time, we'll rather make them all clean, so that they can be
156
#ifdef HAVE_GETADDRINFO
158
* address_list_from_addrinfo
160
* This function transform an addrinfo links list in and address_list.
163
* addrinfo* Linkt list of addrinfo
166
* address_list* New allocated address_list
168
static struct address_list *
169
address_list_from_addrinfo (struct addrinfo *ai)
171
struct address_list *al;
172
struct addrinfo *ai_head = ai;
176
for (ai = ai_head; ai; ai = ai->ai_next)
177
if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
182
al = xmalloc (sizeof (struct address_list));
183
al->addresses = xmalloc (cnt * sizeof (ip_address));
188
for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
189
if (ai->ai_family == AF_INET6)
191
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr;
192
memcpy (al->addresses + i, &sin6->sin6_addr, 16);
195
else if (ai->ai_family == AF_INET)
197
struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
198
map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
205
/* Create an address_list out of a NULL-terminated vector of
206
addresses, as returned by gethostbyname. */
207
static struct address_list *
208
address_list_from_vector (char **h_addr_list)
212
struct address_list *al = xmalloc (sizeof (struct address_list));
214
while (h_addr_list[count])
219
al->addresses = xmalloc (count * sizeof (ip_address));
222
for (i = 0; i < count; i++)
223
map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
229
/* Like address_list_from_vector, but initialized with a single
232
static struct address_list *
233
address_list_from_single (ip_address *addr)
235
struct address_list *al = xmalloc (sizeof (struct address_list));
238
al->addresses = xmalloc (sizeof (ip_address));
240
memcpy (al->addresses, addr, sizeof (ip_address));
246
address_list_delete (struct address_list *al)
248
xfree (al->addresses);
253
address_list_release (struct address_list *al)
256
DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
257
if (al->refcount <= 0)
259
DEBUGP (("Deleting unused %p.\n", al));
260
address_list_delete (al);
265
* wget_sockaddr_set_address
267
* This function takes an wget_sockaddr and fill in the protocol type,
268
* the port number and the address, there NULL in address means wildcard.
269
* Unsuported adress family will abort the whole programm.
272
* wget_sockaddr* The space to be filled
273
* int The wished protocol
274
* unsigned short The port
275
* const ip_address The Binary IP adress
278
* - Only modify 1. param
281
wget_sockaddr_set_address (wget_sockaddr *sa,
282
int ip_family, unsigned short port, ip_address *addr)
284
if (ip_family == AF_INET)
286
sa->sin.sin_family = ip_family;
287
sa->sin.sin_port = htons (port);
289
memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
293
if (!map_ip_to_ipv4 (addr, &addr4))
294
/* should the callers have prevented this? */
296
memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
301
if (ip_family == AF_INET6)
303
sa->sin6.sin6_family = ip_family;
304
sa->sin6.sin6_port = htons (port);
306
memset (&sa->sin6.sin6_addr, 0 , 16);
308
memcpy (&sa->sin6.sin6_addr, addr, 16);
316
* wget_sockaddr_set_port
318
* This funtion only fill the port of the socket information.
319
* If the protocol is not supported nothing is done.
320
* Unsuported adress family will abort the whole programm.
323
* that the IP-Protocol already is set.
326
* wget_sockaddr* The space there port should be entered
327
* unsigned int The port that should be entered in host order
330
* - Only modify 1. param
333
wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
335
if (sa->sa.sa_family == AF_INET)
337
sa->sin.sin_port = htons (port);
341
if (sa->sa.sa_family == AF_INET6)
343
sa->sin6.sin6_port = htons (port);
351
* wget_sockaddr_get_addr
353
* This function return the adress from an sockaddr as byte string.
354
* Unsuported adress family will abort the whole programm.
357
* that the IP-Protocol already is set.
360
* wget_sockaddr* Socket Information
363
* unsigned char * IP address as byte string.
366
wget_sockaddr_get_addr (wget_sockaddr *sa)
368
if (sa->sa.sa_family == AF_INET)
369
return &sa->sin.sin_addr;
371
if (sa->sa.sa_family == AF_INET6)
372
return &sa->sin6.sin6_addr;
380
* wget_sockaddr_get_port
382
* This function only return the port from the input structure
383
* Unsuported adress family will abort the whole programm.
386
* that the IP-Protocol already is set.
389
* wget_sockaddr* Information where to get the port
392
* unsigned short Port Number in host order.
395
wget_sockaddr_get_port (const wget_sockaddr *sa)
397
if (sa->sa.sa_family == AF_INET)
398
return htons (sa->sin.sin_port);
400
if (sa->sa.sa_family == AF_INET6)
401
return htons (sa->sin6.sin6_port);
404
/* do not complain about return nothing */
411
* This function return the length of the sockaddr corresponding to
412
* the acutall prefered protocol for (bind, connect etc...)
413
* Unsuported adress family will abort the whole programm.
416
* that the IP-Protocol already is set.
419
* - Public IP-Family Information
422
* socklen_t structure length for socket options
427
if (ip_default_family == AF_INET)
428
return sizeof (struct sockaddr_in);
430
if (ip_default_family == AF_INET6)
431
return sizeof (struct sockaddr_in6);
434
/* do not complain about return nothing */
439
* Map an IPv4 adress to the internal adress format.
442
map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
445
static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
446
memcpy ((char *)ip + 12, ipv4 , 4);
447
memcpy ((char *)ip + 0, ipv64, 12);
449
if ((char *)ip != (char *)ipv4)
450
memcpy (ip, ipv4, 4);
454
/* Detect whether an IP adress represents an IPv4 address and, if so,
455
copy it to IPV4. 0 is returned on failure.
456
This operation always succeeds when Wget is compiled without IPv6.
457
If IPV4 is NULL, don't copy, just detect. */
460
map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
463
static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
464
if (0 != memcmp (ip, ipv64, 12))
467
memcpy (ipv4, (char *)ip + 12, 4);
470
memcpy (ipv4, (char *)ip, 4);
475
/* Versions of gethostbyname and getaddrinfo that support timeout. */
479
struct ghbnwt_context {
480
const char *host_name;
481
struct hostent *hptr;
485
gethostbyname_with_timeout_callback (void *arg)
487
struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
488
ctx->hptr = gethostbyname (ctx->host_name);
491
/* Just like gethostbyname, except it times out after TIMEOUT seconds.
492
In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
493
The function makes sure that when NULL is returned for reasons
494
other than timeout, errno is reset. */
496
static struct hostent *
497
gethostbyname_with_timeout (const char *host_name, double timeout)
499
struct ghbnwt_context ctx;
500
ctx.host_name = host_name;
501
if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
503
SET_H_ERRNO (HOST_NOT_FOUND);
512
#else /* ENABLE_IPV6 */
514
struct gaiwt_context {
517
const struct addrinfo *hints;
518
struct addrinfo **res;
523
getaddrinfo_with_timeout_callback (void *arg)
525
struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
526
ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
529
/* Just like getaddrinfo, except it times out after TIMEOUT seconds.
530
In case of timeout, the EAI_SYSTEM error code is returned and errno
531
is set to ETIMEDOUT. */
534
getaddrinfo_with_timeout (const char *node, const char *service,
535
const struct addrinfo *hints, struct addrinfo **res,
538
struct gaiwt_context ctx;
540
ctx.service = service;
544
if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
549
return ctx.exit_code;
552
#endif /* ENABLE_IPV6 */
554
/* Pretty-print ADDR. When compiled without IPv6, this is the same as
555
inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
559
pretty_print_address (ip_address *addr)
563
static char buf[128];
565
if (map_ip_to_ipv4 (addr, &addr4))
566
return inet_ntoa (*(struct in_addr *)&addr4);
568
if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
572
return inet_ntoa (*(struct in_addr *)addr);
575
/* Add host name HOST with the address ADDR_TEXT to the cache.
576
ADDR_LIST is a NULL-terminated list of addresses, as in struct
580
cache_host_lookup (const char *host, struct address_list *al)
582
if (!host_name_addresses_map)
583
host_name_addresses_map = make_nocase_string_hash_table (0);
586
hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
592
debug_logprintf ("Caching %s =>", host);
593
for (i = 0; i < al->count; i++)
594
debug_logprintf (" %s", pretty_print_address (al->addresses + i));
595
debug_logprintf ("\n");
600
struct address_list *
601
lookup_host (const char *host, int silent)
603
struct address_list *al = NULL;
607
/* First, try to check whether the address is already a numeric
611
if (inet_pton (AF_INET6, host, &addr) > 0)
612
return address_list_from_single (&addr);
615
addr_ipv4 = (uint32_t)inet_addr (host);
616
if (addr_ipv4 != (uint32_t)-1)
618
/* ADDR is defined to be in network byte order, which is what
619
this returns, so we can just copy it to STORE_IP. */
620
map_ipv4_to_ip ((ip4_address *)&addr_ipv4, &addr);
621
return address_list_from_single (&addr);
624
if (host_name_addresses_map)
626
al = hash_table_get (host_name_addresses_map, host);
629
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
636
logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
638
/* Host name lookup goes on below. */
640
#ifdef HAVE_GETADDRINFO
642
struct addrinfo hints, *ai;
645
memset (&hints, 0, sizeof (hints));
646
if (ip_default_family == AF_INET)
647
hints.ai_family = AF_INET;
649
hints.ai_family = PF_UNSPEC;
650
hints.ai_socktype = SOCK_STREAM;
651
err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.dns_timeout);
653
if (err != 0 || ai == NULL)
656
logprintf (LOG_VERBOSE, _("failed: %s.\n"),
657
err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
660
al = address_list_from_addrinfo (ai);
665
struct hostent *hptr;
666
hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
671
if (errno != ETIMEDOUT)
672
logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
674
logputs (LOG_VERBOSE, _("failed: timed out.\n"));
678
/* Do all systems have h_addr_list, or is it a newer thing? If
679
the latter, use address_list_from_single. */
680
al = address_list_from_vector (hptr->h_addr_list);
684
/* Print the addresses determined by DNS lookup, but no more than
689
int printmax = al->count <= 3 ? al->count : 3;
690
for (i = 0; i < printmax; i++)
692
logprintf (LOG_VERBOSE, "%s",
693
pretty_print_address (al->addresses + i));
694
if (i < printmax - 1)
695
logputs (LOG_VERBOSE, ", ");
697
if (printmax != al->count)
698
logputs (LOG_VERBOSE, ", ...");
699
logputs (LOG_VERBOSE, "\n");
702
/* Cache the lookup information. */
704
cache_host_lookup (host, al);
709
/* Determine whether a URL is acceptable to be followed, according to
710
a list of domains to accept. */
712
accept_domain (struct url *u)
714
assert (u->host != NULL);
717
if (!sufmatch ((const char **)opt.domains, u->host))
720
if (opt.exclude_domains)
722
if (sufmatch ((const char **)opt.exclude_domains, u->host))
728
/* Check whether WHAT is matched in LIST, each element of LIST being a
729
pattern to match WHAT against, using backward matching (see
730
match_backwards() in utils.c).
732
If an element of LIST matched, 1 is returned, 0 otherwise. */
734
sufmatch (const char **list, const char *what)
739
for (i = 0; list[i]; i++)
741
for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
742
if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
744
/* The domain must be first to reach to beginning. */
751
/* Print error messages for host errors. */
755
/* Can't use switch since some constants are equal (at least on my
756
system), and the compiler signals "duplicate case value". */
757
if (error == HOST_NOT_FOUND
758
|| error == NO_RECOVERY
760
|| error == NO_ADDRESS
761
|| error == TRY_AGAIN)
762
return _("Host not found");
764
return _("Unknown error");
768
host_cleanup_mapper (void *key, void *value, void *arg_ignored)
770
struct address_list *al;
772
xfree (key); /* host */
774
al = (struct address_list *)value;
775
assert (al->refcount == 1);
776
address_list_delete (al);
784
if (host_name_addresses_map)
786
hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
787
hash_table_destroy (host_name_addresses_map);
788
host_name_addresses_map = NULL;