21
21
* it under the terms of the GNU General Public License as published by
22
22
* the Free Software Foundation; either version 2 of the License, or
23
23
* (at your option) any later version.
25
25
* This program is distributed in the hope that it will be useful,
26
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
28
* GNU General Public License for more details.
30
30
* You should have received a copy of the GNU General Public License
31
31
* along with this program; if not, write to the Free Software
32
32
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
37
37
#include "HttpRequest.h"
38
38
#include "URLScheme.h"
40
static HttpRequest *urnParse(method_t method, char *urn);
40
static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn);
41
41
static const char valid_hostname_chars_u[] =
42
42
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
43
43
"abcdefghijklmnopqrstuvwxyz"
46
49
static const char valid_hostname_chars[] =
47
50
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
48
51
"abcdefghijklmnopqrstuvwxyz"
174
180
* If the 'request' arg is non-NULL, put parsed values there instead
175
181
* of allocating a new HttpRequest.
177
* This abuses HttpRequest as a way of representing the parsed url
183
* This abuses HttpRequest as a way of representing the parsed url
178
184
* and its components.
179
185
* method is used to switch parsers and to init the HttpRequest.
180
186
* If method is METHOD_CONNECT, then rather than a URL a hostname:port is
182
188
* The url is non const so that if its too long we can NULL-terminate it in place.
192
* This routine parses a URL. Its assumed that the URL is complete -
193
* ie, the end of the string is the end of the URL. Don't pass a partial
194
* URL here as this routine doesn't have any way of knowing whether
195
* its partial or not (ie, it handles the case of no trailing slash as
196
* being "end of host with implied path of /".
185
urlParse(method_t method, char *url, HttpRequest *request)
199
urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
187
201
LOCAL_ARRAY(char, proto, MAX_URL);
188
202
LOCAL_ARRAY(char, login, MAX_URL);
201
218
debugs(23, 1, "urlParse: URL too large (" << l << " bytes)");
205
221
if (method == METHOD_CONNECT) {
206
222
port = CONNECT_PORT;
208
if (sscanf(url, "%[^:]:%d", host, &port) < 1)
224
if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
225
if (sscanf(url, "%[^:]:%d", host, &port) < 1)
210
228
} else if (!strncmp(url, "urn:", 4)) {
211
229
return urnParse(method, url);
213
if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
234
/* Find first : - everything before is protocol */
235
for (i = 0, dst = proto; i < l && *src != ':'; i++, src++, dst++) {
243
/* (XXX yah, I'm not checking we've got enough data left before checking the array..) */
244
if (*src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
249
/* Then everything until first /; thats host (and port; which we'll look for here later) */
250
/* bug 1881: If we don't get a "/" then we imply it was there */
251
for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
256
* We can't check for "i >= l" here because we could be at the end of the line
257
* and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
258
* been -given- a valid URL and the path is just '/'.
264
/* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
265
for (dst = urlpath; i < l && *src != '\r' && *src != '\n' && *src != '\0'; i++, src++, dst++) {
269
/* We -could- be at the end of the buffer here */
272
/* If the URL path is empty we set it to be "/" */
273
if (dst == urlpath) {
216
278
protocol = urlParseProtocol(proto);
218
279
port = urlDefaultPort(protocol);
220
/* Is there any login informaiton? */
281
/* Is there any login information? (we should eventually parse it above) */
221
282
if ((t = strrchr(host, '@'))) {
222
283
strcpy((char *) login, (char *) host);
223
284
t = strrchr(login, '@');
225
286
strcpy((char *) host, t + 1);
228
if ((t = strrchr(host, ':'))) {
289
/* Is there any host information? (we should eventually parse it above) */
291
/* strip any IPA brackets. valid under IPv6. */
294
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
299
for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
303
/* we moved in-place, so truncate the actual hostname found */
306
/* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
307
while (*dst != '\0' && *dst != ']') dst++;
310
/* skip ahead to either start of port, or original EOS */
311
while (*dst != '\0' && *dst != ':') dst++;
314
t = strrchr(host, ':');
316
if (t != strchr(host,':') ) {
317
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
318
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
319
/* therefore we MUST accept the case where they are not bracketed at all. */
324
if (t && *t == ':') {
239
334
if (stringHasWhitespace(host)) {
240
335
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
244
338
if (!xisspace(*t))
346
debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
254
348
if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
255
349
debugs(23, 1, "urlParse: Illegal character in hostname '" << host << "'");
259
#if DONT_DO_THIS_IT_BREAKS_SEMANTIC_TRANSPARENCY
353
if (Config.appendDomain && !strchr(host, '.'))
354
strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
260
356
/* remove trailing dots from hostnames */
261
357
while ((l = strlen(host)) > 0 && host[--l] == '.')
264
/* remove duplicate dots */
265
while ((t = strstr(host, "..")))
266
xmemmove(t, t + 1, strlen(t));
270
if (Config.appendDomain && !strchr(host, '.'))
271
strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
360
/* reject duplicate or leading dots */
361
if (strstr(host, "..") || *host == '.') {
362
debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
273
366
if (port < 1 || port > 65535) {
274
367
debugs(23, 3, "urlParse: Invalid port '" << port << "'");
326
415
request->initHTTP(method, protocol, urlpath);
329
xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
418
request->SetHost(host);
330
419
xstrncpy(request->login, login, MAX_LOGIN_SZ);
331
420
request->port = (u_short) port;
335
424
static HttpRequest *
336
urnParse(method_t method, char *urn)
425
urnParse(const HttpRequestMethod& method, char *urn)
338
427
debugs(50, 5, "urnParse: " << urn);
339
428
return new HttpRequest(method, PROTO_URN, urn + 4);
343
432
urlCanonical(HttpRequest * request)
345
434
LOCAL_ARRAY(char, portbuf, 32);
435
/// \todo AYJ: Performance: making this a ptr and allocating when needed will be better than a write and future xstrdup().
346
436
LOCAL_ARRAY(char, urlbuf, MAX_URL);
348
438
if (request->canonical)
349
439
return request->canonical;
351
441
if (request->protocol == PROTO_URN) {
352
snprintf(urlbuf, MAX_URL, "urn:%s", request->urlpath.buf());
442
snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
443
SQUIDSTRINGPRINT(request->urlpath));
354
switch (request->method) {
445
/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
446
switch (request->method.id()) {
356
448
case METHOD_CONNECT:
357
snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
449
snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
363
455
if (request->port != urlDefaultPort(request->protocol))
364
456
snprintf(portbuf, 32, ":%d", request->port);
366
snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
458
snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
367
459
ProtocolStr[request->protocol],
369
461
*request->login ? "@" : null_string,
372
request->urlpath.buf());
464
SQUIDSTRINGPRINT(request->urlpath));
389
485
if (request->protocol == PROTO_URN) {
390
snprintf(buf, MAX_URL, "urn:%s", request->urlpath.buf());
486
snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
487
SQUIDSTRINGPRINT(request->urlpath));
392
switch (request->method) {
489
/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
490
switch (request->method.id()) {
394
492
case METHOD_CONNECT:
395
snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
493
snprintf(buf, MAX_URL, "%s:%d",
412
512
strcat(loginbuf, "@");
415
snprintf(buf, MAX_URL, "%s://%s%s%s%s",
515
snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
416
516
ProtocolStr[request->protocol],
420
request->urlpath.buf());
520
SQUIDSTRINGPRINT(request->urlpath));
422
522
* strip arguments AFTER a question-mark
448
548
LOCAL_ARRAY(char, buf, MAX_URL);
450
550
// method CONNECT and port HTTPS
451
if(request->method == METHOD_CONNECT && request->port == 443) {
452
snprintf(buf, MAX_URL, "https://%s/*", request->host);
551
if (request->method == METHOD_CONNECT && request->port == 443) {
552
snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
562
* Test if a URL is relative.
564
* RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
565
* appear before a ':'.
568
urlIsRelative(const char *url)
579
for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
588
* Convert a relative URL to an absolute URL using the context of a given
591
* It is assumed that you have already ensured that the URL is relative.
593
* If NULL is returned it is an indication that the method in use in the
594
* request does not distinguish between relative and absolute and you should
595
* use the url unchanged.
597
* If non-NULL is returned, it is up to the caller to free the resulting
598
* memory using safe_free().
601
urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
604
if (req->method.id() == METHOD_CONNECT) {
608
char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
610
if (req->protocol == PROTO_URN) {
611
snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
612
SQUIDSTRINGPRINT(req->urlpath));
618
if (req->port != urlDefaultPort(req->protocol)) {
619
urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
620
ProtocolStr[req->protocol],
622
*req->login ? "@" : null_string,
627
urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
628
ProtocolStr[req->protocol],
630
*req->login ? "@" : null_string,
635
if (relUrl[0] == '/') {
636
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
638
const char *path = req->urlpath.termedBuf();
639
const char *last_slash = strrchr(path, '/');
641
if (last_slash == NULL) {
642
urlbuf[urllen++] = '/';
643
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
646
size_t pathlen = last_slash - path;
647
if (pathlen > MAX_URL - urllen - 1) {
648
pathlen = MAX_URL - urllen - 1;
650
strncpy(&urlbuf[urllen], path, pathlen);
652
if (urllen + 1 < MAX_URL) {
653
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
462
662
* matchDomainName() compares a hostname with a domainname according
463
663
* to the following rules:
465
665
* HOST DOMAIN MATCH?
466
666
* ------------- ------------- ------
467
667
* foo.com foo.com YES