2
Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
3
Free Software Foundation, Inc.
2
Copyright (C) 2005 Free Software Foundation, Inc.
5
4
This file is part of GNU Wget.
62
63
/* Supported schemes: */
63
64
static struct scheme_data supported_schemes[] =
65
{ "http://", DEFAULT_HTTP_PORT, 1 },
66
{ "http", "http://", DEFAULT_HTTP_PORT, 1 },
67
{ "https://", DEFAULT_HTTPS_PORT, 1 },
68
{ "https", "https://", DEFAULT_HTTPS_PORT, 1 },
69
{ "ftp://", DEFAULT_FTP_PORT, 1 },
70
{ "ftp", "ftp://", DEFAULT_FTP_PORT, 1 },
71
72
/* SCHEME_INVALID */
75
76
/* Forward declarations: */
77
78
static int path_simplify PARAMS ((char *));
79
/* Support for encoding and decoding of URL strings. We determine
80
whether a character is unsafe through static table lookup. This
81
code assumes ASCII character set and 8-bit chars. */
80
/* Support for escaping and unescaping of URL strings. */
82
/* Table of "reserved" and "unsafe" characters. Those terms are
83
rfc1738-speak, as such largely obsoleted by rfc2396 and later
84
specs, but the general idea remains.
86
A reserved character is the one that you can't decode without
87
changing the meaning of the URL. For example, you can't decode
88
"/foo/%2f/bar" into "/foo///bar" because the number and contents of
89
path components is different. Non-reserved characters can be
90
changed, so "/foo/%78/bar" is safe to change to "/foo/x/bar". The
91
unsafe characters are loosely based on rfc1738, plus "$" and ",",
92
as recommended by rfc2396, and minus "~", which is very frequently
93
used (and sometimes unrecognized as %7E by broken servers).
95
An unsafe character is the one that should be encoded when URLs are
96
placed in foreign environments. E.g. space and newline are unsafe
97
in HTTP contexts because HTTP uses them as separator and line
98
terminator, so they must be encoded to %20 and %0A respectively.
99
"*" is unsafe in shell context, etc.
101
We determine whether a character is unsafe through static table
102
lookup. This code assumes ASCII character set and 8-bit chars. */
84
/* rfc1738 reserved chars, preserved from encoding. */
105
/* rfc1738 reserved chars + "$" and ",". */
85
106
urlchr_reserved = 1,
87
/* rfc1738 unsafe chars, plus some more. */
108
/* rfc1738 unsafe chars, plus non-printables. */
97
118
#define U urlchr_unsafe
100
const static unsigned char urlchr_table[256] =
121
static const unsigned char urlchr_table[256] =
102
123
U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
103
124
U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
104
125
U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
105
126
U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
106
U, 0, U, RU, 0, U, R, 0, /* SP ! " # $ % & ' */
107
0, 0, 0, R, 0, 0, 0, R, /* ( ) * + , - . / */
127
U, 0, U, RU, R, U, R, 0, /* SP ! " # $ % & ' */
128
0, 0, 0, R, R, 0, 0, R, /* ( ) * + , - . / */
108
129
0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
109
130
0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
110
131
RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
114
135
U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
115
136
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
116
137
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
117
0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */
138
0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */
119
140
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
120
141
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
228
255
return url_escape_1 (s, urlchr_unsafe, 1);
231
enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
233
/* Decide whether to encode, decode, or pass through the char at P.
234
This used to be a macro, but it got a little too convoluted. */
235
static inline enum copy_method
236
decide_copy_method (const char *p)
258
/* Decide whether the char at position P needs to be encoded. (It is
259
not enough to pass a single char *P because the function may need
260
to inspect the surrounding context.)
262
Return 1 if the char should be escaped as %XX, 0 otherwise. */
265
char_needs_escaping (const char *p)
240
269
if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2)))
242
/* %xx sequence: decode it, unless it would decode to an
243
unsafe or a reserved char; in that case, leave it as
245
char preempt = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
246
if (URL_UNSAFE_CHAR (preempt) || URL_RESERVED_CHAR (preempt))
247
return CM_PASSTHROUGH;
252
272
/* Garbled %.. sequence: encode `%'. */
255
275
else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
258
return CM_PASSTHROUGH;
261
281
/* Translate a %-escaped (but possibly non-conformant) input string S
266
286
After a URL has been run through this function, the protocols that
267
287
use `%' as the quote character can use the resulting string as-is,
268
while those that don't call url_unescape() to get to the intended
269
data. This function is also stable: after an input string is
270
transformed the first time, all further transformations of the
271
result yield the same result string.
288
while those that don't can use url_unescape to get to the intended
289
data. This function is stable: once the input is transformed,
290
further transformations of the result yield the same output.
273
292
Let's discuss why this function is needed.
275
Imagine Wget is to retrieve `http://abc.xyz/abc def'. Since a raw
276
space character would mess up the HTTP request, it needs to be
294
Imagine Wget is asked to retrieve `http://abc.xyz/abc def'. Since
295
a raw space character would mess up the HTTP request, it needs to
296
be quoted, like this:
279
298
GET /abc%20def HTTP/1.0
281
It appears that the unsafe chars need to be quoted, for example
282
with url_escape. But what if we're requested to download
300
It would appear that the unsafe chars need to be quoted, for
301
example with url_escape. But what if we're requested to download
283
302
`abc%20def'? url_escape transforms "%" to "%25", which would leave
284
303
us with `abc%2520def'. This is incorrect -- since %-escapes are
285
304
part of URL syntax, "%20" is the correct way to denote a literal
286
space on the Wget command line. This leaves us in the conclusion
287
that in that case Wget should not call url_escape, but leave the
305
space on the Wget command line. This leads to the conclusion that
306
in that case Wget should not call url_escape, but leave the `%20'
307
as is. This is clearly contradictory, but it only gets worse.
290
And what if the requested URI is `abc%20 def'? If we call
291
url_escape, we end up with `/abc%2520%20def', which is almost
292
certainly not intended. If we don't call url_escape, we are left
293
with the embedded space and cannot complete the request. What the
294
user meant was for Wget to request `/abc%20%20def', and this is
295
where reencode_escapes kicks in.
309
What if the requested URI is `abc%20 def'? If we call url_escape,
310
we end up with `/abc%2520%20def', which is almost certainly not
311
intended. If we don't call url_escape, we are left with the
312
embedded space and cannot complete the request. What the user
313
meant was for Wget to request `/abc%20%20def', and this is where
314
reencode_escapes kicks in.
297
316
Wget used to solve this by first decoding %-quotes, and then
298
317
encoding all the "unsafe" characters found in the resulting string.
306
325
literal plus. reencode_escapes correctly translates the above to
307
326
"a%2B+b", i.e. returns the original string.
309
This function uses an algorithm proposed by Anon Sricharoenchai:
311
1. Encode all URL_UNSAFE and the "%" that are not followed by 2
314
2. Decode all "%XX" except URL_UNSAFE, URL_RESERVED (";/?:@=&") and
317
...except that this code conflates the two steps, and decides
318
whether to encode, decode, or pass through each character in turn.
319
The function still uses two passes, but their logic is the same --
320
the first pass exists merely for the sake of allocation. Another
321
small difference is that we include `+' to URL_RESERVED.
328
This function uses a modified version of the algorithm originally
329
proposed by Anon Sricharoenchai:
331
* Encode all "unsafe" characters, except those that are also
332
"reserved", to %XX. See urlchr_table for which characters are
335
* Encode the "%" characters not followed by two hex digits to
338
* Pass through all other characters and %XX escapes as-is. (Up to
339
Wget 1.10 this decoded %XX escapes corresponding to "safe"
340
characters, but that was obtrusive and broke some servers.)
323
342
Anon's test case:
325
344
"http://abc.xyz/%20%3F%%36%31%25aa% a?a=%61+a%2Ba&b=b%26c%3Dc"
327
"http://abc.xyz/%20%3F%2561%25aa%25%20a?a=a+a%2Ba&b=b%26c%3Dc"
346
"http://abc.xyz/%20%3F%25%36%31%25aa%25%20a?a=%61+a%2Ba&b=b%26c%3Dc"
329
348
Simpler test cases:
345
364
int oldlen, newlen;
347
366
int encode_count = 0;
348
int decode_count = 0;
350
/* First, pass through the string to see if there's anything to do,
368
/* First pass: inspect the string to see if there's anything to do,
351
369
and to calculate the new length. */
352
370
for (p1 = s; *p1; p1++)
354
switch (decide_copy_method (p1))
371
if (char_needs_escaping (p1))
367
if (!encode_count && !decode_count)
368
375
/* The string is good as it is. */
369
return (char *)s; /* C const model sucks. */
376
return (char *) s; /* C const model sucks. */
372
/* Each encoding adds two characters (hex digits), while each
373
decoding removes two characters. */
374
newlen = oldlen + 2 * (encode_count - decode_count);
379
/* Each encoding adds two characters (hex digits). */
380
newlen = oldlen + 2 * encode_count;
375
381
newstr = xmalloc (newlen + 1);
383
/* Second pass: copy the string to the destination address, encoding
384
chars when needed. */
382
switch (decide_copy_method (p1))
386
unsigned char c = *p1++;
388
*p2++ = XNUM_TO_DIGIT (c >> 4);
389
*p2++ = XNUM_TO_DIGIT (c & 0xf);
393
*p2++ = X2DIGITS_TO_NUM (p1[1], p1[2]);
394
p1 += 3; /* skip %xx */
389
if (char_needs_escaping (p1))
391
unsigned char c = *p1++;
393
*p2++ = XNUM_TO_DIGIT (c >> 4);
394
*p2++ = XNUM_TO_DIGIT (c & 0xf);
401
400
assert (p2 - newstr == newlen);
457
456
supported_schemes[scheme].enabled = 0;
460
/* Skip the username and password, if present here. The function
461
should *not* be called with the complete URL, but with the part
462
right after the scheme.
464
If no username and password are found, return 0. */
459
/* Skip the username and password, if present in the URL. The
460
function should *not* be called with the complete URL, but with the
461
portion after the scheme.
463
If no username and password are found, return URL. */
467
466
url_skip_credentials (const char *url)
469
468
/* Look for '@' that comes before terminators, such as '/', '?',
471
470
const char *p = (const char *)strpbrk (url, "@/?#;");
472
471
if (!p || *p != '@')
477
476
/* Parse credentials contained in [BEG, END). The region is expected
579
584
help because the check for literal accept is in the
580
585
preprocessor.) */
587
#if defined(__GNUC__) && __GNUC__ >= 3
584
589
#define strpbrk_or_eos(s, accept) ({ \
585
590
char *SOE_p = strpbrk (s, accept); \
587
SOE_p = (char *)s + strlen (s); \
592
SOE_p = strchr (s, '\0'); \
591
#else /* not __GNUC__ */
596
#else /* not __GNUC__ or old gcc */
594
599
strpbrk_or_eos (const char *s, const char *accept)
596
601
char *p = strpbrk (s, accept);
598
p = (char *)s + strlen (s);
603
p = strchr (s, '\0');
606
#endif /* not __GNUC__ or old gcc */
603
608
/* Turn STR into lowercase; return non-zero if a character was
604
609
actually changed. */
635
640
N_("Invalid IPv6 numeric address")
639
/* The following two functions were adapted from glibc. */
642
is_valid_ipv4_address (const char *str, const char *end)
644
int saw_digit, octets;
654
if (ch >= '0' && ch <= '9') {
655
val = val * 10 + (ch - '0');
659
if (saw_digit == 0) {
664
} else if (ch == '.' && saw_digit == 1) {
678
static const int NS_INADDRSZ = 4;
679
static const int NS_IN6ADDRSZ = 16;
680
static const int NS_INT16SZ = 2;
683
is_valid_ipv6_address (const char *str, const char *end)
685
static const char xdigits[] = "0123456789abcdef";
698
/* Leading :: requires some special handling. */
702
if (str == end || *str != ':')
714
/* if ch is a number, add it to val. */
715
pch = strchr(xdigits, ch);
718
val |= (pch - xdigits);
725
/* if ch is a colon ... */
728
if (saw_xdigit == 0) {
733
} else if (str == end) {
736
if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
744
/* if ch is a dot ... */
745
if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) &&
746
is_valid_ipv4_address(curtok, end) == 1) {
755
if (saw_xdigit == 1) {
756
if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
761
if (colonp != NULL) {
762
if (tp == NS_IN6ADDRSZ)
767
if (tp != NS_IN6ADDRSZ)
776
645
Return a new struct url if successful, NULL on error. In case of
883
752
p = strpbrk_or_eos (p, "/;?#");
886
if (port_b == port_e)
888
/* http://host:/whatever */
890
error_code = PE_BAD_PORT_NUMBER;
894
for (port = 0, pp = port_b; pp < port_e; pp++)
755
/* Allow empty port, as per rfc2396. */
756
if (port_b != port_e)
758
for (port = 0, pp = port_b; pp < port_e; pp++)
898
/* http://host:12randomgarbage/blah */
900
error_code = PE_BAD_PORT_NUMBER;
762
/* http://host:12randomgarbage/blah */
764
error_code = PE_BAD_PORT_NUMBER;
767
port = 10 * port + (*pp - '0');
768
/* Check for too large port numbers here, before we have
769
a chance to overflow on bogus port values. */
772
error_code = PE_BAD_PORT_NUMBER;
904
port = 10 * port + (*pp - '0');
958
829
if (!parse_credentials (uname_b, uname_e - 1, &user, &passwd))
960
831
error_code = PE_INVALID_USER_NAME;
965
u = (struct url *)xmalloc (sizeof (struct url));
966
memset (u, 0, sizeof (*u));
836
u = xnew0 (struct url);
968
837
u->scheme = scheme;
969
838
u->host = strdupdelim (host_b, host_e);
978
847
host_modified = lowercase_str (u->host);
849
/* Decode %HH sequences in host name. This is important not so much
850
to support %HH sequences in host names (which other browser
851
don't), but to support binary characters (which will have been
852
converted to %HH by reencode_escapes). */
853
if (strchr (u->host, '%'))
855
url_unescape (u->host);
981
860
u->params = strdupdelim (params_b, params_e);
1123
1001
return full_path;
1004
/* Unescape CHR in an otherwise escaped STR. Used to selectively
1005
escaping of certain characters, such as "/" and ":". Returns a
1006
count of unescaped chars. */
1009
unescape_single_char (char *str, char chr)
1011
const char c1 = XNUM_TO_DIGIT (chr >> 4);
1012
const char c2 = XNUM_TO_DIGIT (chr & 0xf);
1013
char *h = str; /* hare */
1014
char *t = str; /* tortoise */
1015
for (; *h; h++, t++)
1017
if (h[0] == '%' && h[1] == c1 && h[2] == c2)
1126
1028
/* Escape unsafe and reserved characters, except for the slash
1130
1032
url_escape_dir (const char *dir)
1132
1034
char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
1134
1035
if (newdir == dir)
1135
1036
return (char *)dir;
1137
/* Unescape slashes in NEWDIR. */
1139
h = newdir; /* hare */
1140
t = newdir; /* tortoise */
1142
for (; *h; h++, t++)
1144
/* url_escape_1 having converted '/' to "%2F" exactly. */
1145
if (*h == '%' && h[1] == '2' && h[2] == 'F')
1038
unescape_single_char (newdir, '/');
1370
1254
translate file name back to URL, this would become important
1371
1255
crucial. Right now, it's better to be minimal in escaping. */
1373
const static unsigned char filechr_table[256] =
1257
static const unsigned char filechr_table[256] =
1375
1259
UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
1376
1260
C, C, C, C, C, C, C, C, /* BS HT LF VT FF CR SO SI */
1448
1332
e = unescaped + strlen (unescaped);
1335
/* Defang ".." when found as component of path. Remember that path
1336
comes from the URL and might contain malicious input. */
1337
if (e - b == 2 && b[0] == '.' && b[1] == '.')
1451
1343
/* Walk the PATHEL string and check how many characters we'll need
1452
to add for file quoting. */
1454
1346
for (p = b; p < e; p++)
1455
1347
if (FILE_CHAR_TEST (*p, mask))
1458
/* e-b is the string length. Each quoted char means two additional
1350
/* Calculate the length of the output string. e-b is the input
1351
string length. Each quoted char introduces two additional
1459
1352
characters in the string, hence 2*quoted. */
1460
1353
outlen = (e - b) + (2 * quoted);
1461
1354
GROW (dest, outlen);
1465
/* If there's nothing to quote, we don't need to go through the
1466
string the second time. */
1358
/* If there's nothing to quote, we can simply append the string
1359
without processing it again. */
1467
1360
memcpy (TAIL (dest), b, outlen);
1548
1441
directory structure. */
1549
1442
if (opt.dirstruct)
1444
if (opt.protocol_directories)
1447
append_char ('/', &fnres);
1448
append_string (supported_schemes[u->scheme].name, &fnres);
1551
1450
if (opt.add_hostdir)
1553
1452
if (fnres.tail)
1554
1453
append_char ('/', &fnres);
1555
append_string (u->host, &fnres);
1454
if (0 != strcmp (u->host, ".."))
1455
append_string (u->host, &fnres);
1457
/* Host name can come from the network; malicious DNS may
1458
allow ".." to be resolved, causing us to write to
1459
"../<file>". Defang such host names. */
1460
append_string ("%2E%2E", &fnres);
1556
1461
if (u->port != scheme_default_port (u->scheme))
1558
1463
char portstr[24];
1606
/* Return the length of URL's path. Path is considered to be
1607
terminated by one of '?', ';', '#', or by the end of the
1610
path_length (const char *url)
1612
const char *q = strpbrk_or_eos (url, "?;#");
1616
/* Find the last occurrence of character C in the range [b, e), or
1617
NULL, if none are present. This is equivalent to strrchr(b, c),
1618
except that it accepts an END argument instead of requiring the
1619
string to be zero-terminated. Why is there no memrchr()? */
1621
find_last_char (const char *b, const char *e, char c)
1629
1511
/* Resolve "." and ".." elements of PATH by destructively modifying
1630
1512
PATH and return non-zero if PATH has been modified, zero otherwise.
1635
1517
"back up one element". Single leading and trailing slashes are
1638
This function does not handle URL escapes explicitly. If you're
1639
passing paths from URLs, make sure to unquote "%2e" and "%2E" to
1640
".", so that this function can find the dots. (Wget's URL parser
1641
calls reencode_escapes, which see.)
1643
1520
For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive
1644
1521
test examples are provided below. If you change anything in this
1645
1522
function, run test_path_simplify to make sure you haven't broken a
1670
1542
else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))
1672
1544
/* Handle "../" by retreating the tortoise by one path
1673
element -- but not past beggining of PATH. */
1545
element -- but not past beggining. */
1676
1548
/* Move backwards until T hits the beginning of the
1677
1549
previous path element or the beginning of path. */
1678
for (--t; t > path && t[-1] != '/'; t--)
1550
for (--t; t > beg && t[-1] != '/'; t--)
1555
/* If we're at the beginning, copy the "../" literally
1556
move the beginning so a later ".." doesn't remove
1685
/* Ignore empty path elements. Supporting them well is hard
1686
(where do you save "http://x.com///y.html"?), and they
1687
don't bring any practical gain. Plus, they break our
1688
filesystem-influenced assumptions: allowing them would
1689
make "x/y//../z" simplify to "x/y/z", whereas most people
1690
would expect "x/z". */
1695
1566
/* A regular path element. If H hasn't advanced past T,
1696
1567
simply skip to the next path element. Otherwise, copy
1697
1568
the path element until the next slash. */
1594
/* Return the length of URL's path. Path is considered to be
1595
terminated by one of '?', ';', '#', or by the end of the
1599
path_length (const char *url)
1601
const char *q = strpbrk_or_eos (url, "?;#");
1605
/* Find the last occurrence of character C in the range [b, e), or
1606
NULL, if none are present. We might want to use memrchr (a GNU
1607
extension) under GNU libc. */
1610
find_last_char (const char *b, const char *e, char c)
1723
1618
/* Merge BASE with LINK and return the resulting URI.
1725
1620
Either of the URIs may be absolute or relative, complete with the
1727
1622
foreseeable cases. It only employs minimal URL parsing, without
1728
1623
knowledge of the specifics of schemes.
1730
Perhaps this function should call path_simplify so that the callers
1731
don't have to call url_parse unconditionally. */
1625
I briefly considered making this function call path_simplify after
1626
the merging process, as rfc1738 seems to suggest. This is a bad
1627
idea for several reasons: 1) it complexifies the code, and 2)
1628
url_parse has to simplify path anyway, so it's wasteful to boot. */
1734
1631
uri_merge (const char *base, const char *link)
1878
1775
const char *last_slash = find_last_char (base, end, '/');
1879
1776
if (!last_slash)
1881
/* No slash found at all. Append LINK to what we have,
1882
but we'll need a slash as a separator.
1884
Example: if base == "foo" and link == "qux/xyzzy", then
1885
we cannot just append link to base, because we'd get
1886
"fooqux/xyzzy", whereas what we want is
1889
To make sure the / gets inserted, we set
1890
need_explicit_slash to 1. We also set start_insert
1891
to end + 1, so that the length calculations work out
1892
correctly for one more (slash) character. Accessing
1893
that character is fine, since it will be the
1894
delimiter, '\0' or '?'. */
1895
/* example: "foo?..." */
1896
/* ^ ('?' gets changed to '/') */
1897
start_insert = end + 1;
1898
need_explicit_slash = 1;
1778
/* No slash found at all. Replace what we have with LINK. */
1779
start_insert = base;
1900
1781
else if (last_slash && last_slash >= base + 2
1901
1782
&& last_slash[-2] == ':' && last_slash[-1] == '/')
1951
1832
char *result, *p;
1952
char *quoted_user = NULL, *quoted_passwd = NULL;
1833
char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL;
1954
1835
int scheme_port = supported_schemes[url->scheme].default_port;
1955
char *scheme_str = supported_schemes[url->scheme].leading_string;
1836
const char *scheme_str = supported_schemes[url->scheme].leading_string;
1956
1837
int fplen = full_path_length (url);
1958
int brackets_around_host = 0;
1839
int brackets_around_host;
1960
1841
assert (scheme_str != NULL);
1975
if (strchr (url->host, ':'))
1976
brackets_around_host = 1;
1856
/* In the unlikely event that the host name contains non-printable
1857
characters, quote it for displaying to the user. */
1858
quoted_host = url_escape_allow_passthrough (url->host);
1860
/* Undo the quoting of colons that URL escaping performs. IPv6
1861
addresses may legally contain colons, and in that case must be
1862
placed in square brackets. */
1863
if (quoted_host != url->host)
1864
unescape_single_char (quoted_host, ':');
1865
brackets_around_host = strchr (quoted_host, ':') != NULL;
1978
1867
size = (strlen (scheme_str)
1979
+ strlen (url->host)
1868
+ strlen (quoted_host)
1980
1869
+ (brackets_around_host ? 2 : 0)
2090
1980
char *test, *result;
2091
1981
int should_modify;
2096
{ "foo", "foo", 0 },
2097
{ "foo/bar", "foo/bar", 0 },
2098
{ "foo///bar", "foo/bar", 1 },
2099
{ "foo/.", "foo/", 1 },
2100
{ "foo/./", "foo/", 1 },
2101
{ "foo./", "foo./", 0 },
2102
{ "foo/../bar", "bar", 1 },
2103
{ "foo/../bar/", "bar/", 1 },
2104
{ "foo/bar/..", "foo/", 1 },
2105
{ "foo/bar/../x", "foo/x", 1 },
2106
{ "foo/bar/../x/", "foo/x/", 1 },
2107
{ "foo/..", "", 1 },
2108
{ "foo/../..", "", 1 },
2109
{ "a/b/../../c", "c", 1 },
2110
{ "./a/../b", "b", 1 }
1987
{ "../", "../", 0 },
1988
{ "foo", "foo", 0 },
1989
{ "foo/bar", "foo/bar", 0 },
1990
{ "foo///bar", "foo///bar", 0 },
1991
{ "foo/.", "foo/", 1 },
1992
{ "foo/./", "foo/", 1 },
1993
{ "foo./", "foo./", 0 },
1994
{ "foo/../bar", "bar", 1 },
1995
{ "foo/../bar/", "bar/", 1 },
1996
{ "foo/bar/..", "foo/", 1 },
1997
{ "foo/bar/../x", "foo/x", 1 },
1998
{ "foo/bar/../x/", "foo/x/", 1 },
1999
{ "foo/..", "", 1 },
2000
{ "foo/../..", "..", 1 },
2001
{ "foo/../../..", "../..", 1 },
2002
{ "foo/../../bar/../../baz", "../../baz", 1 },
2003
{ "a/b/../../c", "c", 1 },
2004
{ "./a/../b", "b", 1 }
2118
2012
int expected_change = tests[i].should_modify;
2119
2013
run_test (test, expected_result, expected_change);
2122
/* Now run all the tests with a leading slash before the test case,
2123
to prove that the slash is being preserved. */
2124
for (i = 0; i < countof (tests); i++)
2126
char *test, *expected_result;
2127
int expected_change = tests[i].should_modify;
2129
test = xmalloc (1 + strlen (tests[i].test) + 1);
2130
sprintf (test, "/%s", tests[i].test);
2132
expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
2133
sprintf (expected_result, "/%s", tests[i].result);
2135
run_test (test, expected_result, expected_change);
2138
xfree (expected_result);