2
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3
2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
2
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3
2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
5
6
This file is part of GNU Wget.
407
405
if (opt.auth_without_challenge)
409
DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
407
DEBUGP (("Auth-without-challenge set, sending Basic credentials.\n"));
410
408
do_challenge = true;
412
410
else if (basic_authed_hosts
413
411
&& hash_table_contains(basic_authed_hosts, hostname))
415
DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
413
DEBUGP (("Found %s in basic_authed_hosts.\n", quote (hostname)));
416
414
do_challenge = true;
420
DEBUGP(("Host %s has not issued a general basic challenge.\n",
418
DEBUGP (("Host %s has not issued a general basic challenge.\n",
421
419
quote (hostname)));
423
421
if (do_challenge)
901
899
mode, the body is displayed for debugging purposes. */
904
skip_short_body (int fd, wgint contlen)
902
skip_short_body (int fd, wgint contlen, bool chunked)
907
905
SKIP_SIZE = 512, /* size of the download buffer */
908
906
SKIP_THRESHOLD = 4096 /* the largest size we read */
908
wgint remaining_chunk_size = 0;
910
909
char dlbuf[SKIP_SIZE + 1];
911
910
dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
913
/* We shouldn't get here with unknown contlen. (This will change
914
with HTTP/1.1, which supports "chunked" transfer.) */
915
assert (contlen != -1);
912
assert (contlen != -1 || contlen);
917
914
/* If the body is too large, it makes more sense to simply close the
918
915
connection than to try to read the body. */
919
916
if (contlen > SKIP_THRESHOLD)
922
DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
919
while (contlen > 0 || chunked)
926
int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
924
if (remaining_chunk_size == 0)
926
char *line = fd_read_line (fd);
931
remaining_chunk_size = strtol (line, &endl, 16);
932
if (remaining_chunk_size == 0)
939
contlen = MIN (remaining_chunk_size, SKIP_SIZE);
942
DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
944
ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
929
947
/* Don't normally report the error since this is an
972
#define NOT_RFC2231 0
973
#define RFC2231_NOENCODING 1
974
#define RFC2231_ENCODING 2
976
/* extract_param extracts the parameter name into NAME.
977
However, if the parameter name is in RFC2231 format then
978
this function adjusts NAME by stripping of the trailing
979
characters that are not part of the name but are present to
980
indicate the presence of encoding information in the value
981
or a fragment of a long parameter value
984
modify_param_name(param_token *name)
986
const char *delim1 = memchr (name->b, '*', name->e - name->b);
987
const char *delim2 = memrchr (name->b, '*', name->e - name->b);
993
result = NOT_RFC2231;
995
else if(delim1 == delim2)
997
if ((name->e - 1) == delim1)
999
result = RFC2231_ENCODING;
1003
result = RFC2231_NOENCODING;
1010
result = RFC2231_ENCODING;
1015
/* extract_param extract the paramater value into VALUE.
1016
Like modify_param_name this function modifies VALUE by
1017
stripping off the encoding information from the actual value
1020
modify_param_value (param_token *value, int encoding_type )
1022
if (RFC2231_ENCODING == encoding_type)
1024
const char *delim = memrchr (value->b, '\'', value->e - value->b);
1025
if ( delim != NULL )
1027
value->b = (delim+1);
945
1032
/* Extract a parameter from the string (typically an HTTP header) at
946
1033
**SOURCE and advance SOURCE to the next parameter. Return false
947
1034
when there are no more parameters to extract. The name of the
1013
1100
if (*p == separator) ++p;
1104
int param_type = modify_param_name(name);
1105
if (NOT_RFC2231 != param_type)
1107
modify_param_value(value, param_type);
1113
#undef RFC2231_NOENCODING
1114
#undef RFC2231_ENCODING
1116
/* Appends the string represented by VALUE to FILENAME */
1119
append_value_to_filename (char **filename, param_token const * const value)
1121
int original_length = strlen(*filename);
1122
int new_length = strlen(*filename) + (value->e - value->b);
1123
*filename = xrealloc (*filename, new_length+1);
1124
memcpy (*filename + original_length, value->b, (value->e - value->b));
1125
(*filename)[new_length] = '\0';
1020
1129
#define MAX(p, q) ((p) > (q) ? (p) : (q))
1036
1145
The file name is stripped of directory components and must not be
1148
Historically, this function returned filename prefixed with opt.dir_prefix,
1149
now that logic is handled by the caller, new code should pay attention,
1150
changed by crq, Sep 2010.
1040
1154
parse_content_disposition (const char *hdr, char **filename)
1042
1156
param_token name, value;
1043
1158
while (extract_param (&hdr, &name, &value, ';'))
1044
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
1046
/* Make the file name begin at the last slash or backslash. */
1047
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
1048
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
1049
if (last_slash && last_bs)
1050
value.b = 1 + MAX (last_slash, last_bs);
1051
else if (last_slash || last_bs)
1052
value.b = 1 + (last_slash ? last_slash : last_bs);
1053
if (value.b == value.e)
1055
/* Start with the directory prefix, if specified. */
1058
int prefix_length = strlen (opt.dir_prefix);
1059
bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
1064
total_length = prefix_length + (value.e - value.b);
1065
*filename = xmalloc (total_length + 1);
1066
strcpy (*filename, opt.dir_prefix);
1068
(*filename)[prefix_length - 1] = '/';
1069
memcpy (*filename + prefix_length, value.b, (value.e - value.b));
1070
(*filename)[total_length] = '\0';
1073
*filename = strdupdelim (value.b, value.e);
1160
int isFilename = BOUNDED_EQUAL_NO_CASE ( name.b, name.e, "filename" );
1161
if ( isFilename && value.b != NULL)
1163
/* Make the file name begin at the last slash or backslash. */
1164
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
1165
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
1166
if (last_slash && last_bs)
1167
value.b = 1 + MAX (last_slash, last_bs);
1168
else if (last_slash || last_bs)
1169
value.b = 1 + (last_slash ? last_slash : last_bs);
1170
if (value.b == value.e)
1174
append_value_to_filename (filename, &value);
1176
*filename = strdupdelim (value.b, value.e);
1079
1187
/* Persistent connections. Currently, we cache the most recently used
1080
1188
connection as persistent, provided that the HTTP server agrees to
1340
1448
hs->error = NULL;
1452
get_file_flags (const char *filename, int *dt)
1454
logprintf (LOG_VERBOSE, _("\
1455
File %s already there; not retrieving.\n\n"), quote (filename));
1456
/* If the file is there, we suppose it's retrieved OK. */
1459
/* #### Bogusness alert. */
1460
/* If its suffix is "html" or "htm" or similar, assume text/html. */
1461
if (has_html_suffix_p (filename))
1343
1465
#define BEGINS_WITH(line, string_constant) \
1344
1466
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
1345
1467
&& (c_isspace (line[sizeof (string_constant) - 1]) \
1430
1552
bool keep_alive;
1432
/* Whether keep-alive should be inhibited.
1554
/* Is the server using the chunked transfer encoding? */
1555
bool chunked_transfer_encoding = false;
1434
RFC 2068 requests that 1.0 clients not send keep-alive requests
1435
to proxies. This is because many 1.0 proxies do not interpret
1436
the Connection header and transfer it to the remote server,
1437
causing it to not close the connection and leave both the proxy
1438
and the client hanging. */
1557
/* Whether keep-alive should be inhibited. */
1439
1558
bool inhibit_keep_alive =
1440
!opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1559
!opt.http_keep_alive || opt.ignore_length;
1442
1561
/* Headers sent when using POST. */
1443
1562
wgint post_data_size = 0;
1501
1620
request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1502
1621
if (*dt & SEND_NOCACHE)
1503
request_set_header (req, "Pragma", "no-cache", rel_none);
1623
/* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms... */
1624
request_set_header (req, "Cache-Control", "no-cache, must-revalidate", rel_none);
1626
/* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */
1627
request_set_header (req, "Pragma", "no-cache", rel_none);
1629
if (hs->restval && !opt.timestamping)
1505
1630
request_set_header (req, "Range",
1506
1631
aprintf ("bytes=%s-",
1507
1632
number_to_static_string (hs->restval)),
1550
if (!inhibit_keep_alive)
1551
request_set_header (req, "Connection", "Keep-Alive", rel_none);
1554
request_set_header (req, "Cookie",
1555
cookie_header (wget_cookie_jar,
1556
u->host, u->port, u->path,
1558
u->scheme == SCHEME_HTTPS
1675
if (inhibit_keep_alive)
1676
request_set_header (req, "Connection", "Close", rel_none);
1680
request_set_header (req, "Connection", "Keep-Alive", rel_none);
1683
request_set_header (req, "Connection", "Close", rel_none);
1684
request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
1565
1688
if (opt.post_data || opt.post_file_name)
1829
1977
/* Check for status line. */
1830
1978
message = NULL;
1831
1979
statcode = resp_status (resp, &message);
1982
char *tms = datetime_str (time (NULL));
1983
logprintf (LOG_VERBOSE, "%d\n", statcode);
1984
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), tms, statcode,
1985
quotearg_style (escape_quoting_style,
1986
_("Malformed status line")));
1987
CLOSE_INVALIDATE (sock);
1992
if (H_10X (statcode))
1994
DEBUGP (("Ignoring response\n"));
1832
1998
hs->message = xstrdup (message);
1833
1999
if (!opt.server_response)
1834
2000
logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1867
2033
/* Check for keep-alive related responses. */
1868
2034
if (!inhibit_keep_alive && contlen != -1)
1870
if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1872
else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
2036
if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1874
if (0 == strcasecmp (hdrval, "Keep-Alive"))
2038
if (0 == strcasecmp (hdrval, "Close"))
2043
resp_header_copy (resp, "Transfer-Encoding", hdrval, sizeof (hdrval));
2044
if (0 == strcasecmp (hdrval, "chunked"))
2045
chunked_transfer_encoding = true;
1879
2047
/* Handle (possibly multiple instances of) the Set-Cookie header. */
1880
2048
if (opt.cookies)
1981
2150
* hstat.local_file is set by http_loop to the argument of -O. */
1982
2151
if (!hs->local_file)
2153
char *local_file = NULL;
1984
2155
/* Honor Content-Disposition whether possible. */
1985
2156
if (!opt.content_disposition
1986
2157
|| !resp_header_copy (resp, "Content-Disposition",
1987
2158
hdrval, sizeof (hdrval))
1988
|| !parse_content_disposition (hdrval, &hs->local_file))
2159
|| !parse_content_disposition (hdrval, &local_file))
1990
2161
/* The Content-Disposition header is missing or broken.
1991
2162
* Choose unique file name according to given URL. */
1992
hs->local_file = url_file_name (u);
2163
hs->local_file = url_file_name (u, NULL);
2167
DEBUGP (("Parsed filename from Content-Disposition: %s\n",
2169
hs->local_file = url_file_name (u, local_file);
2001
2178
/* If opt.noclobber is turned on and file already exists, do not
2002
2179
retrieve the file. But if the output_document was given, then this
2003
2180
test was already done and the file didn't exist. Hence the !opt.output_document */
2004
logprintf (LOG_VERBOSE, _("\
2005
File %s already there; not retrieving.\n\n"), quote (hs->local_file));
2006
/* If the file is there, we suppose it's retrieved OK. */
2009
/* #### Bogusness alert. */
2010
/* If its suffix is "html" or "htm" or similar, assume text/html. */
2011
if (has_html_suffix_p (hs->local_file))
2181
get_file_flags (hs->local_file, dt);
2015
2183
xfree_null (message);
2016
2184
return RETRUNNEEDED;
2154
2322
_("Location: %s%s\n"),
2155
2323
hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
2156
2324
hs->newloc ? _(" [following]") : "");
2157
if (keep_alive && !head_only && skip_short_body (sock, contlen))
2325
if (keep_alive && !head_only
2326
&& skip_short_body (sock, contlen, chunked_transfer_encoding))
2158
2327
CLOSE_FINISH (sock);
2160
2329
CLOSE_INVALIDATE (sock);
2161
2330
xfree_null (type);
2332
/* From RFC2616: The status codes 303 and 307 have
2333
been added for servers that wish to make unambiguously
2334
clear which kind of reaction is expected of the client.
2336
A 307 should be redirected using the same method,
2337
in other words, a POST should be preserved and not
2338
converted to a GET in that case. */
2339
if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
2340
return NEWLOCATION_KEEP_POST;
2163
2341
return NEWLOCATION;
2459
2655
else if (!opt.content_disposition)
2461
2657
hstat.local_file =
2462
url_file_name (opt.trustservernames ? u : original_url);
2658
url_file_name (opt.trustservernames ? u : original_url, NULL);
2463
2659
got_name = true;
2466
/* TODO: Ick! This code is now in both gethttp and http_loop, and is
2467
* screaming for some refactoring. */
2468
2662
if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
2470
2664
/* If opt.noclobber is turned on and file already exists, do not
2471
2665
retrieve the file. But if the output_document was given, then this
2472
2666
test was already done and the file didn't exist. Hence the !opt.output_document */
2473
logprintf (LOG_VERBOSE, _("\
2474
File %s already there; not retrieving.\n\n"),
2475
quote (hstat.local_file));
2476
/* If the file is there, we suppose it's retrieved OK. */
2479
/* #### Bogusness alert. */
2480
/* If its suffix is "html" or "htm" or similar, assume text/html. */
2481
if (has_html_suffix_p (hstat.local_file))
2667
get_file_flags (hstat.local_file, dt);
2491
2675
/* Reset the document type. */
2494
/* Skip preliminary HEAD request if we're not in spider mode AND
2495
* if -O was given or HTTP Content-Disposition support is disabled. */
2497
&& (got_name || !opt.content_disposition))
2678
/* Skip preliminary HEAD request if we're not in spider mode. */
2498
2680
send_head_first = false;
2500
2682
/* Send preliminary HEAD request if -N is given and we have an existing
2501
2683
* destination file. */
2502
file_name = url_file_name (opt.trustservernames ? u : original_url);
2503
if (opt.timestamping
2504
&& !opt.content_disposition
2505
&& file_exists_p (file_name))
2684
file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
2685
if (opt.timestamping && (file_exists_p (file_name)
2686
|| opt.content_disposition))
2506
2687
send_head_first = true;
2507
2688
xfree (file_name);
3164
3359
/* Calculate the digest value. */
3166
ALLOCA_MD5_CONTEXT (ctx);
3167
unsigned char hash[MD5_HASHLEN];
3168
char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
3169
char response_digest[MD5_HASHLEN * 2 + 1];
3362
unsigned char hash[MD5_DIGEST_SIZE];
3363
char a1buf[MD5_DIGEST_SIZE * 2 + 1], a2buf[MD5_DIGEST_SIZE * 2 + 1];
3364
char response_digest[MD5_DIGEST_SIZE * 2 + 1];
3171
3366
/* A1BUF = H(user ":" realm ":" password) */
3173
gen_md5_update ((unsigned char *)user, strlen (user), ctx);
3174
gen_md5_update ((unsigned char *)":", 1, ctx);
3175
gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
3176
gen_md5_update ((unsigned char *)":", 1, ctx);
3177
gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
3178
gen_md5_finish (ctx, hash);
3367
md5_init_ctx (&ctx);
3368
md5_process_bytes ((unsigned char *)user, strlen (user), &ctx);
3369
md5_process_bytes ((unsigned char *)":", 1, &ctx);
3370
md5_process_bytes ((unsigned char *)realm, strlen (realm), &ctx);
3371
md5_process_bytes ((unsigned char *)":", 1, &ctx);
3372
md5_process_bytes ((unsigned char *)passwd, strlen (passwd), &ctx);
3373
md5_finish_ctx (&ctx, hash);
3179
3374
dump_hash (a1buf, hash);
3181
3376
/* A2BUF = H(method ":" path) */
3183
gen_md5_update ((unsigned char *)method, strlen (method), ctx);
3184
gen_md5_update ((unsigned char *)":", 1, ctx);
3185
gen_md5_update ((unsigned char *)path, strlen (path), ctx);
3186
gen_md5_finish (ctx, hash);
3377
md5_init_ctx (&ctx);
3378
md5_process_bytes ((unsigned char *)method, strlen (method), &ctx);
3379
md5_process_bytes ((unsigned char *)":", 1, &ctx);
3380
md5_process_bytes ((unsigned char *)path, strlen (path), &ctx);
3381
md5_finish_ctx (&ctx, hash);
3187
3382
dump_hash (a2buf, hash);
3189
3384
/* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
3191
gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
3192
gen_md5_update ((unsigned char *)":", 1, ctx);
3193
gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
3194
gen_md5_update ((unsigned char *)":", 1, ctx);
3195
gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
3196
gen_md5_finish (ctx, hash);
3385
md5_init_ctx (&ctx);
3386
md5_process_bytes ((unsigned char *)a1buf, MD5_DIGEST_SIZE * 2, &ctx);
3387
md5_process_bytes ((unsigned char *)":", 1, &ctx);
3388
md5_process_bytes ((unsigned char *)nonce, strlen (nonce), &ctx);
3389
md5_process_bytes ((unsigned char *)":", 1, &ctx);
3390
md5_process_bytes ((unsigned char *)a2buf, MD5_DIGEST_SIZE * 2, &ctx);
3391
md5_finish_ctx (&ctx, hash);
3197
3392
dump_hash (response_digest, hash);
3199
3394
res = xmalloc (strlen (user)
3361
char *opt_dir_prefix;
3362
3556
char *filename;
3364
3558
} test_array[] = {
3365
{ "filename=\"file.ext\"", NULL, "file.ext", true },
3366
{ "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3367
{ "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
3368
{ "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3369
{ "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
3370
{ "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
3371
{ "attachment", NULL, NULL, false },
3372
{ "attachment", "somedir", NULL, false },
3559
{ "filename=\"file.ext\"", "file.ext", true },
3560
{ "attachment; filename=\"file.ext\"", "file.ext", true },
3561
{ "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
3562
{ "attachment", NULL, false },
3563
{ "attachement; filename*=UTF-8'en-US'hello.txt", "hello.txt", true },
3564
{ "attachement; filename*0=\"hello\"; filename*1=\"world.txt\"", "helloworld.txt", true },
3375
3567
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)