21
21
* it under the terms of the GNU General Public License as published by
22
22
* the Free Software Foundation; either version 2 of the License, or
23
23
* (at your option) any later version.
25
25
* This program is distributed in the hope that it will be useful,
26
26
* but WITHOUT ANY WARRANTY; without even the implied warranty of
27
27
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28
28
* GNU General Public License for more details.
30
30
* You should have received a copy of the GNU General Public License
31
31
* along with this program; if not, write to the Free Software
32
32
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
37
37
#include "HttpRequest.h"
38
38
#include "URLScheme.h"
40
static HttpRequest *urnParse(method_t method, char *urn);
41
static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn);
41
42
static const char valid_hostname_chars_u[] =
42
43
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
43
44
"abcdefghijklmnopqrstuvwxyz"
46
50
static const char valid_hostname_chars[] =
47
51
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
48
52
"abcdefghijklmnopqrstuvwxyz"
174
181
* If the 'request' arg is non-NULL, put parsed values there instead
175
182
* of allocating a new HttpRequest.
177
* This abuses HttpRequest as a way of representing the parsed url
184
* This abuses HttpRequest as a way of representing the parsed url
178
185
* and its components.
179
186
* method is used to switch parsers and to init the HttpRequest.
180
187
* If method is METHOD_CONNECT, then rather than a URL a hostname:port is
182
189
* The url is non const so that if its too long we can NULL-terminate it in place.
193
* This routine parses a URL. Its assumed that the URL is complete -
194
* ie, the end of the string is the end of the URL. Don't pass a partial
195
* URL here as this routine doesn't have any way of knowing whether
196
* its partial or not (ie, it handles the case of no trailing slash as
197
* being "end of host with implied path of /".
185
urlParse(method_t method, char *url, HttpRequest *request)
200
urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
187
202
LOCAL_ARRAY(char, proto, MAX_URL);
188
203
LOCAL_ARRAY(char, login, MAX_URL);
201
219
debugs(23, 1, "urlParse: URL too large (" << l << " bytes)");
205
222
if (method == METHOD_CONNECT) {
206
223
port = CONNECT_PORT;
208
if (sscanf(url, "%[^:]:%d", host, &port) < 1)
225
if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
226
if (sscanf(url, "%[^:]:%d", host, &port) < 1)
210
229
} else if (!strncmp(url, "urn:", 4)) {
211
230
return urnParse(method, url);
213
if (sscanf(url, "%[^:]://%[^/]%[^\r\n]", proto, host, urlpath) < 2)
235
/* Find first : - everything before is protocol */
236
for (i = 0, dst = proto; i < l && *src != ':'; i++, src++, dst++) {
244
/* (XXX yah, I'm not checking we've got enough data left before checking the array..) */
245
if (*src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
250
/* Then everything until first /; thats host (and port; which we'll look for here later) */
251
/* bug 1881: If we don't get a "/" then we imply it was there */
252
for (dst = host; i < l && *src != '/' && *src != '\0'; i++, src++, dst++) {
257
* We can't check for "i >= l" here because we could be at the end of the line
258
* and have a perfectly valid URL w/ no trailing '/'. In this case we assume we've
259
* been -given- a valid URL and the path is just '/'.
265
/* Then everything from / (inclusive) until \r\n or \0 - thats urlpath */
266
for (dst = urlpath; i < l && *src != '\r' && *src != '\n' && *src != '\0'; i++, src++, dst++) {
270
/* We -could- be at the end of the buffer here */
273
/* If the URL path is empty we set it to be "/" */
274
if (dst == urlpath) {
216
279
protocol = urlParseProtocol(proto);
218
280
port = urlDefaultPort(protocol);
220
/* Is there any login informaiton? */
282
/* Is there any login information? (we should eventually parse it above) */
221
283
if ((t = strrchr(host, '@'))) {
222
284
strcpy((char *) login, (char *) host);
223
285
t = strrchr(login, '@');
225
287
strcpy((char *) host, t + 1);
228
if ((t = strrchr(host, ':'))) {
290
/* Is there any host information? (we should eventually parse it above) */
292
/* strip any IPA brackets. valid under IPv6. */
295
/* only for IPv6 sadly, pre-IPv6/URL code can't handle the clean result properly anyway. */
300
for (; i < l && *src != ']' && *src != '\0'; i++, src++, dst++) {
304
/* we moved in-place, so truncate the actual hostname found */
307
/* IPv4-pure needs to skip the whole hostname to ']' inclusive for now */
308
while (*dst != '\0' && *dst != ']') dst++;
311
/* skip ahead to either start of port, or original EOS */
312
while (*dst != '\0' && *dst != ':') dst++;
315
t = strrchr(host, ':');
317
if (t != strchr(host,':') ) {
318
/* RFC 2732 states IPv6 "SHOULD" be bracketed. allowing for times when its not. */
319
/* RFC 3986 'update' simply modifies this to an "is" with no emphasis at all! */
320
/* therefore we MUST accept the case where they are not bracketed at all. */
325
if (t && *t == ':') {
239
335
if (stringHasWhitespace(host)) {
240
336
if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
244
339
if (!xisspace(*t))
347
debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
254
349
if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
255
350
debugs(23, 1, "urlParse: Illegal character in hostname '" << host << "'");
259
#if DONT_DO_THIS_IT_BREAKS_SEMANTIC_TRANSPARENCY
354
if (Config.appendDomain && !strchr(host, '.'))
355
strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
260
357
/* remove trailing dots from hostnames */
261
358
while ((l = strlen(host)) > 0 && host[--l] == '.')
264
/* remove duplicate dots */
265
while ((t = strstr(host, "..")))
266
xmemmove(t, t + 1, strlen(t));
270
if (Config.appendDomain && !strchr(host, '.'))
271
strncat(host, Config.appendDomain, SQUIDHOSTNAMELEN - strlen(host) - 1);
361
/* reject duplicate or leading dots */
362
if (strstr(host, "..") || *host == '.') {
363
debugs(23, 1, "urlParse: Illegal hostname '" << host << "'");
273
367
if (port < 1 || port > 65535) {
274
368
debugs(23, 3, "urlParse: Invalid port '" << port << "'");
326
416
request->initHTTP(method, protocol, urlpath);
329
xstrncpy(request->host, host, SQUIDHOSTNAMELEN);
419
request->SetHost(host);
330
420
xstrncpy(request->login, login, MAX_LOGIN_SZ);
331
421
request->port = (u_short) port;
335
425
static HttpRequest *
336
urnParse(method_t method, char *urn)
426
urnParse(const HttpRequestMethod& method, char *urn)
338
428
debugs(50, 5, "urnParse: " << urn);
339
429
return new HttpRequest(method, PROTO_URN, urn + 4);
343
433
urlCanonical(HttpRequest * request)
345
435
LOCAL_ARRAY(char, portbuf, 32);
436
/// \todo AYJ: Performance: making this a ptr and allocating when needed will be better than a write and future xstrdup().
346
437
LOCAL_ARRAY(char, urlbuf, MAX_URL);
348
439
if (request->canonical)
349
440
return request->canonical;
351
442
if (request->protocol == PROTO_URN) {
352
snprintf(urlbuf, MAX_URL, "urn:%s", request->urlpath.buf());
443
snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
444
SQUIDSTRINGPRINT(request->urlpath));
354
switch (request->method) {
446
/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
447
switch (request->method.id()) {
356
449
case METHOD_CONNECT:
357
snprintf(urlbuf, MAX_URL, "%s:%d", request->host, request->port);
450
snprintf(urlbuf, MAX_URL, "%s:%d", request->GetHost(), request->port);
363
456
if (request->port != urlDefaultPort(request->protocol))
364
457
snprintf(portbuf, 32, ":%d", request->port);
366
snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s%s",
459
snprintf(urlbuf, MAX_URL, "%s://%s%s%s%s" SQUIDSTRINGPH,
367
460
ProtocolStr[request->protocol],
369
462
*request->login ? "@" : null_string,
372
request->urlpath.buf());
465
SQUIDSTRINGPRINT(request->urlpath));
389
486
if (request->protocol == PROTO_URN) {
390
snprintf(buf, MAX_URL, "urn:%s", request->urlpath.buf());
487
snprintf(buf, MAX_URL, "urn:" SQUIDSTRINGPH,
488
SQUIDSTRINGPRINT(request->urlpath));
392
switch (request->method) {
490
/// \todo AYJ: this could use "if..else and method == METHOD_CONNECT" easier.
491
switch (request->method.id()) {
394
493
case METHOD_CONNECT:
395
snprintf(buf, MAX_URL, "%s:%d", request->host, request->port);
494
snprintf(buf, MAX_URL, "%s:%d",
412
513
strcat(loginbuf, "@");
415
snprintf(buf, MAX_URL, "%s://%s%s%s%s",
516
snprintf(buf, MAX_URL, "%s://%s%s%s" SQUIDSTRINGPH,
416
517
ProtocolStr[request->protocol],
420
request->urlpath.buf());
521
SQUIDSTRINGPRINT(request->urlpath));
422
523
* strip arguments AFTER a question-mark
448
549
LOCAL_ARRAY(char, buf, MAX_URL);
450
551
// method CONNECT and port HTTPS
451
if(request->method == METHOD_CONNECT && request->port == 443) {
452
snprintf(buf, MAX_URL, "https://%s/*", request->host);
552
if (request->method == METHOD_CONNECT && request->port == 443) {
553
snprintf(buf, MAX_URL, "https://%s/*", request->GetHost());
563
* Test if a URL is relative.
565
* RFC 2396, Section 5 (Page 17) implies that in a relative URL, a '/' will
566
* appear before a ':'.
569
urlIsRelative(const char *url)
580
for (p = url; *p != '\0' && *p != ':' && *p != '/'; p++);
589
* Convert a relative URL to an absolute URL using the context of a given
592
* It is assumed that you have already ensured that the URL is relative.
594
* If NULL is returned it is an indication that the method in use in the
595
* request does not distinguish between relative and absolute and you should
596
* use the url unchanged.
598
* If non-NULL is returned, it is up to the caller to free the resulting
599
* memory using safe_free().
602
urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
605
if (req->method.id() == METHOD_CONNECT) {
609
char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
611
if (req->protocol == PROTO_URN) {
612
snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
613
SQUIDSTRINGPRINT(req->urlpath));
619
if (req->port != urlDefaultPort(req->protocol)) {
620
urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s:%d",
621
ProtocolStr[req->protocol],
623
*req->login ? "@" : null_string,
628
urllen = snprintf(urlbuf, MAX_URL, "%s://%s%s%s",
629
ProtocolStr[req->protocol],
631
*req->login ? "@" : null_string,
636
if (relUrl[0] == '/') {
637
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
639
const char *path = req->urlpath.termedBuf();
640
const char *last_slash = strrchr(path, '/');
642
if (last_slash == NULL) {
643
urlbuf[urllen++] = '/';
644
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
647
size_t pathlen = last_slash - path;
648
if (pathlen > MAX_URL - urllen - 1) {
649
pathlen = MAX_URL - urllen - 1;
651
strncpy(&urlbuf[urllen], path, pathlen);
653
if (urllen + 1 < MAX_URL) {
654
strncpy(&urlbuf[urllen], relUrl, MAX_URL - urllen - 1);
462
663
* matchDomainName() compares a hostname with a domainname according
463
664
* to the following rules:
465
666
* HOST DOMAIN MATCH?
466
667
* ------------- ------------- ------
467
668
* foo.com foo.com YES