26
26
exports.resolveObject = urlResolveObject;
27
27
exports.format = urlFormat;
29
46
// Reference: RFC 3986, RFC 1808, RFC 2396
31
48
// define these here so at least they only have to be
32
49
// compiled once on the first module load.
33
50
var protocolPattern = /^([a-z0-9.+-]+:)/i,
34
portPattern = /:[0-9]+$/,
51
portPattern = /:[0-9]*$/,
35
53
// RFC 2396: characters reserved for delimiting URLs.
54
// We actually just auto-escape these.
36
55
delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'],
37
57
// RFC 2396: characters not allowed for various reasons.
38
unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
58
unwise = ['{', '}', '|', '\\', '^', '~', '`'].concat(delims),
39
60
// Allowed by RFCs, but cause of XSS attacks. Always escape these.
61
autoEscape = ['\''].concat(delims),
41
62
// Characters that are never ever allowed in a hostname.
42
63
// Note that any invalid chars are also handled, but these
43
64
// are the ones that are *expected* to be seen, so we fast-path
45
66
nonHostChars = ['%', '/', '?', ';', '#']
46
67
.concat(unwise).concat(autoEscape),
47
nonAuthChars = ['/', '@', '?', '#'].concat(delims),
68
hostEndingChars = ['/', '?', '#'],
48
69
hostnameMaxLen = 255,
49
hostnamePartPattern = /^[a-zA-Z0-9][a-z0-9A-Z_-]{0,62}$/,
50
hostnamePartStart = /^([a-zA-Z0-9][a-z0-9A-Z_-]{0,62})(.*)$/,
70
hostnamePartPattern = /^[a-z0-9A-Z_-]{0,63}$/,
71
hostnamePartStart = /^([a-z0-9A-Z_-]{0,63})(.*)$/,
51
72
// protocols that can allow "unsafe" and "unwise" chars.
53
74
'javascript': true,
86
95
querystring = require('querystring');
88
97
function urlParse(url, parseQueryString, slashesDenoteHost) {
89
if (url && typeof(url) === 'object' && url.href) return url;
98
if (url && typeof(url) === 'object' && url instanceof Url) return url;
101
u.parse(url, parseQueryString, slashesDenoteHost);
105
Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
91
106
if (typeof url !== 'string') {
92
107
throw new TypeError("Parameter 'url' must be a string, not " + typeof url);
98
// cut off any delimiters.
99
// This is to support parse stuff like "<http://foo.com>"
100
for (var i = 0, l = rest.length; i < l; i++) {
101
if (delims.indexOf(rest.charAt(i)) === -1) break;
103
if (i !== 0) rest = rest.substr(i);
112
// trim before proceeding.
113
// This is to support parse stuff like " http://foo.com \n"
106
116
var proto = protocolPattern.exec(rest);
108
118
proto = proto[0];
109
119
var lowerProto = proto.toLowerCase();
110
out.protocol = lowerProto;
120
this.protocol = lowerProto;
111
121
rest = rest.substr(proto.length);
119
129
var slashes = rest.substr(0, 2) === '//';
120
130
if (slashes && !(proto && hostlessProtocol[proto])) {
121
131
rest = rest.substr(2);
126
136
if (!hostlessProtocol[proto] &&
127
137
(slashes || (proto && !slashedProtocol[proto]))) {
128
139
// there's a hostname.
129
140
// the first instance of /, ?, ;, or # ends the host.
130
// don't enforce full RFC correctness, just be unstupid about it.
132
142
// If there is an @ in the hostname, then non-host chars *are* allowed
133
// to the left of the first @ sign, unless some non-auth character
143
// to the left of the last @ sign, unless some host-ending character
134
144
// comes *before* the @-sign.
135
145
// URLs are obnoxious.
136
var atSign = rest.indexOf('@');
148
// http://a@b@c/ => user:a@b host:c
149
// http://a@b?@c => user:a host:c path:/?@c
151
// v0.12 TODO(isaacs): This is not quite how Chrome does things.
152
// Review our test case against browsers more comprehensively.
154
// find the first instance of any hostEndingChars
156
for (var i = 0; i < hostEndingChars.length; i++) {
157
var hec = rest.indexOf(hostEndingChars[i]);
158
if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
162
// at this point, either we have an explicit point where the
163
// auth portion cannot go past, or the last @ char is the decider.
165
if (hostEnd === -1) {
166
// atSign can be anywhere.
167
atSign = rest.lastIndexOf('@');
169
// atSign must be in auth portion.
170
// http://a@b/c@d => host:b auth:a path:/c@d
171
atSign = rest.lastIndexOf('@', hostEnd);
174
// Now we have a portion which is definitely the auth.
137
176
if (atSign !== -1) {
138
// there *may be* an auth
140
for (var i = 0, l = nonAuthChars.length; i < l; i++) {
141
var index = rest.indexOf(nonAuthChars[i]);
142
if (index !== -1 && index < atSign) {
143
// not a valid auth. Something like http://foo.com/bar@baz/
149
// pluck off the auth portion.
150
out.auth = rest.substr(0, atSign);
151
rest = rest.substr(atSign + 1);
155
var firstNonHost = -1;
156
for (var i = 0, l = nonHostChars.length; i < l; i++) {
157
var index = rest.indexOf(nonHostChars[i]);
159
(firstNonHost < 0 || index < firstNonHost)) firstNonHost = index;
162
if (firstNonHost !== -1) {
163
out.host = rest.substr(0, firstNonHost);
164
rest = rest.substr(firstNonHost);
177
auth = rest.slice(0, atSign);
178
rest = rest.slice(atSign + 1);
179
this.auth = decodeURIComponent(auth);
182
// the host is the remaining to the left of the first non-host char
184
for (var i = 0; i < nonHostChars.length; i++) {
185
var hec = rest.indexOf(nonHostChars[i]);
186
if (hec !== -1 && (hostEnd === -1 || hec < hostEnd))
189
// if we still have not hit it, then the entire thing is a host.
191
hostEnd = rest.length;
193
this.host = rest.slice(0, hostEnd);
194
rest = rest.slice(hostEnd);
170
196
// pull out port.
171
var p = parseHost(out.host);
172
var keys = Object.keys(p);
173
for (var i = 0, l = keys.length; i < l; i++) {
178
199
// we've indicated that there is a hostname,
179
200
// so even if it's empty, it has to be present.
180
out.hostname = out.hostname || '';
201
this.hostname = this.hostname || '';
203
// if hostname begins with [ and ends with ]
204
// assume that it's an IPv6 address.
205
var ipv6Hostname = this.hostname[0] === '[' &&
206
this.hostname[this.hostname.length - 1] === ']';
182
208
// validate a little.
183
if (out.hostname.length > hostnameMaxLen) {
186
var hostparts = out.hostname.split(/\./);
210
var hostparts = this.hostname.split(/\./);
187
211
for (var i = 0, l = hostparts.length; i < l; i++) {
188
212
var part = hostparts[i];
189
213
if (!part) continue;
211
235
if (notHost.length) {
212
236
rest = '/' + notHost.join('.') + rest;
214
out.hostname = validParts.join('.');
238
this.hostname = validParts.join('.');
221
// hostnames are always lower case.
222
out.hostname = out.hostname.toLowerCase();
224
// IDNA Support: Returns a puny coded representation of "domain".
225
// It only converts the part of the domain name that
226
// has non ASCII characters. I.e. it dosent matter if
227
// you call it with a domain that already is in ASCII.
228
var domainArray = out.hostname.split('.');
230
for (var i = 0; i < domainArray.length; ++i) {
231
var s = domainArray[i];
232
newOut.push(s.match(/[^A-Za-z0-9_-]/) ?
233
'xn--' + punycode.encode(s) : s);
235
out.hostname = newOut.join('.');
237
out.host = (out.hostname || '') +
238
((out.port) ? ':' + out.port : '');
239
out.href += out.host;
245
if (this.hostname.length > hostnameMaxLen) {
248
// hostnames are always lower case.
249
this.hostname = this.hostname.toLowerCase();
253
// IDNA Support: Returns a puny coded representation of "domain".
254
// It only converts the part of the domain name that
255
// has non ASCII characters. I.e. it dosent matter if
256
// you call it with a domain that already is in ASCII.
257
var domainArray = this.hostname.split('.');
259
for (var i = 0; i < domainArray.length; ++i) {
260
var s = domainArray[i];
261
newOut.push(s.match(/[^A-Za-z0-9_-]/) ?
262
'xn--' + punycode.encode(s) : s);
264
this.hostname = newOut.join('.');
267
var p = this.port ? ':' + this.port : '';
268
var h = this.hostname || '';
270
this.href += this.host;
272
// strip [ and ] from the hostname
273
// the host field still retains them, though
275
this.hostname = this.hostname.substr(1, this.hostname.length - 2);
276
if (rest[0] !== '/') {
242
282
// now rest is set to the post-host stuff.
271
301
var hash = rest.indexOf('#');
272
302
if (hash !== -1) {
273
303
// got a fragment string.
274
out.hash = rest.substr(hash);
304
this.hash = rest.substr(hash);
275
305
rest = rest.slice(0, hash);
277
307
var qm = rest.indexOf('?');
279
out.search = rest.substr(qm);
280
out.query = rest.substr(qm + 1);
309
this.search = rest.substr(qm);
310
this.query = rest.substr(qm + 1);
281
311
if (parseQueryString) {
282
out.query = querystring.parse(out.query);
312
this.query = querystring.parse(this.query);
284
314
rest = rest.slice(0, qm);
285
315
} else if (parseQueryString) {
286
316
// no query string, but parseQueryString still requested
290
if (rest) out.pathname = rest;
291
if (slashedProtocol[proto] &&
292
out.hostname && !out.pathname) {
320
if (rest) this.pathname = rest;
321
if (slashedProtocol[lowerProto] &&
322
this.hostname && !this.pathname) {
296
326
//to support http.request
297
if (out.pathname || out.search) {
298
out.path = (out.pathname ? out.pathname : '') +
299
(out.search ? out.search : '');
327
if (this.pathname || this.search) {
328
var p = this.pathname || '';
329
var s = this.search || '';
302
333
// finally, reconstruct the href based on what has been validated.
303
out.href = urlFormat(out);
334
this.href = this.format();
307
338
// format a parsed object into a url string
308
339
function urlFormat(obj) {
311
342
// this way, you can call url_format() on strings
312
343
// to clean up potentially wonky urls.
313
344
if (typeof(obj) === 'string') obj = urlParse(obj);
345
if (!(obj instanceof Url)) return Url.prototype.format.call(obj);
315
var auth = obj.auth || '';
349
Url.prototype.format = function() {
350
var auth = this.auth || '';
317
auth = auth.split('@').join('%40');
318
for (var i = 0, l = nonAuthChars.length; i < l; i++) {
319
var nAC = nonAuthChars[i];
320
auth = auth.split(nAC).join(encodeURIComponent(nAC));
352
auth = encodeURIComponent(auth);
353
auth = auth.replace(/%3A/i, ':');
325
var protocol = obj.protocol || '',
326
host = (obj.host !== undefined) ? auth + obj.host :
327
obj.hostname !== undefined ? (
328
auth + obj.hostname +
329
(obj.port ? ':' + obj.port : '')
332
pathname = obj.pathname || '',
334
((typeof obj.query === 'object' &&
335
Object.keys(obj.query).length) ?
336
querystring.stringify(obj.query) :
338
search = obj.search || (query && ('?' + query)) || '',
339
hash = obj.hash || '';
357
var protocol = this.protocol || '',
358
pathname = this.pathname || '',
359
hash = this.hash || '',
364
host = auth + this.host;
365
} else if (this.hostname) {
366
host = auth + (this.hostname.indexOf(':') === -1 ?
368
'[' + this.hostname + ']');
370
host += ':' + this.port;
374
if (this.query && typeof this.query === 'object' &&
375
Object.keys(this.query).length) {
376
query = querystring.stringify(this.query);
379
var search = this.search || (query && ('?' + query)) || '';
341
381
if (protocol && protocol.substr(-1) !== ':') protocol += ':';
343
383
// only the slashedProtocols get the //. Not mailto:, xmpp:, etc.
344
384
// unless they had them to begin with.
346
386
(!protocol || slashedProtocol[protocol]) && host !== false) {
347
387
host = '//' + (host || '');
348
388
if (pathname && pathname.charAt(0) !== '/') pathname = '/' + pathname;
353
393
if (hash && hash.charAt(0) !== '#') hash = '#' + hash;
354
394
if (search && search.charAt(0) !== '?') search = '?' + search;
396
pathname = pathname.replace(/[?#]/g, function(match) {
397
return encodeURIComponent(match);
399
search = search.replace('#', '%23');
356
401
return protocol + host + pathname + search + hash;
359
404
function urlResolve(source, relative) {
360
return urlFormat(urlResolveObject(source, relative));
405
return urlParse(source, false, true).resolve(relative);
408
Url.prototype.resolve = function(relative) {
409
return this.resolveObject(urlParse(relative, false, true)).format();
363
412
function urlResolveObject(source, relative) {
364
413
if (!source) return relative;
366
source = urlParse(urlFormat(source), false, true);
367
relative = urlParse(urlFormat(relative), false, true);
414
return urlParse(source, false, true).resolveObject(relative);
417
Url.prototype.resolveObject = function(relative) {
418
if (typeof relative === 'string') {
420
rel.parse(relative, false, true);
424
var result = new Url();
425
Object.keys(this).forEach(function(k) {
369
429
// hash is always overridden, no matter what.
370
source.hash = relative.hash;
430
// even href="" will remove it.
431
result.hash = relative.hash;
433
// if the relative url is empty, then there's nothing left to do here.
372
434
if (relative.href === '') {
373
source.href = urlFormat(source);
435
result.href = result.format();
377
439
// hrefs like //foo/bar always cut to the protocol.
378
440
if (relative.slashes && !relative.protocol) {
379
relative.protocol = source.protocol;
441
// take everything except the protocol from relative
442
Object.keys(relative).forEach(function(k) {
443
if (k !== 'protocol')
444
result[k] = relative[k];
380
447
//urlParse appends trailing / to urls like http://www.example.com
381
if (slashedProtocol[relative.protocol] &&
382
relative.hostname && !relative.pathname) {
383
relative.path = relative.pathname = '/';
448
if (slashedProtocol[result.protocol] &&
449
result.hostname && !result.pathname) {
450
result.path = result.pathname = '/';
385
relative.href = urlFormat(relative);
453
result.href = result.format();
389
if (relative.protocol && relative.protocol !== source.protocol) {
457
if (relative.protocol && relative.protocol !== result.protocol) {
390
458
// if it's a known url protocol, then changing
391
459
// the protocol does weird things
392
460
// first, if it's not file:, then we MUST have a host,
407
479
if (!relative.hostname) relative.hostname = '';
408
480
if (relPath[0] !== '') relPath.unshift('');
409
481
if (relPath.length < 2) relPath.unshift('');
410
relative.pathname = relPath.join('/');
412
source.pathname = relative.pathname;
413
source.search = relative.search;
414
source.query = relative.query;
415
source.host = relative.host || '';
416
source.auth = relative.auth;
417
source.hostname = relative.hostname || relative.host;
418
source.port = relative.port;
419
//to support http.request
420
if (source.pathname !== undefined || source.search !== undefined) {
421
source.path = (source.pathname ? source.pathname : '') +
422
(source.search ? source.search : '');
424
source.slashes = source.slashes || relative.slashes;
425
source.href = urlFormat(source);
482
result.pathname = relPath.join('/');
484
result.pathname = relative.pathname;
486
result.search = relative.search;
487
result.query = relative.query;
488
result.host = relative.host || '';
489
result.auth = relative.auth;
490
result.hostname = relative.hostname || relative.host;
491
result.port = relative.port;
492
// to support http.request
493
if (result.pathname || result.search) {
494
var p = result.pathname || '';
495
var s = result.search || '';
498
result.slashes = result.slashes || relative.slashes;
499
result.href = result.format();
429
var isSourceAbs = (source.pathname && source.pathname.charAt(0) === '/'),
503
var isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/'),
431
relative.host !== undefined ||
432
506
relative.pathname && relative.pathname.charAt(0) === '/'
434
508
mustEndAbs = (isRelAbs || isSourceAbs ||
435
(source.host && relative.pathname)),
509
(result.host && relative.pathname)),
436
510
removeAllDots = mustEndAbs,
437
srcPath = source.pathname && source.pathname.split('/') || [],
511
srcPath = result.pathname && result.pathname.split('/') || [],
438
512
relPath = relative.pathname && relative.pathname.split('/') || [],
439
psychotic = source.protocol &&
440
!slashedProtocol[source.protocol];
513
psychotic = result.protocol && !slashedProtocol[result.protocol];
442
515
// if the url is a non-slashed url, then relative
443
516
// links like ../.. should be able
444
517
// to crawl up to the hostname, as well. This is strange.
445
// source.protocol has already been set by now.
518
// result.protocol has already been set by now.
446
519
// Later on, put the first path part into the host field.
449
delete source.hostname;
452
if (srcPath[0] === '') srcPath[0] = source.host;
453
else srcPath.unshift(source.host);
521
result.hostname = '';
524
if (srcPath[0] === '') srcPath[0] = result.host;
525
else srcPath.unshift(result.host);
456
528
if (relative.protocol) {
457
delete relative.hostname;
458
delete relative.port;
529
relative.hostname = null;
530
relative.port = null;
459
531
if (relative.host) {
460
532
if (relPath[0] === '') relPath[0] = relative.host;
461
533
else relPath.unshift(relative.host);
463
delete relative.host;
535
relative.host = null;
465
537
mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === '');
469
541
// it's absolute.
470
source.host = (relative.host || relative.host === '') ?
471
relative.host : source.host;
472
source.hostname = (relative.hostname || relative.hostname === '') ?
473
relative.hostname : source.hostname;
474
source.search = relative.search;
475
source.query = relative.query;
542
result.host = (relative.host || relative.host === '') ?
543
relative.host : result.host;
544
result.hostname = (relative.hostname || relative.hostname === '') ?
545
relative.hostname : result.hostname;
546
result.search = relative.search;
547
result.query = relative.query;
476
548
srcPath = relPath;
477
549
// fall through to the dot-handling below.
478
550
} else if (relPath.length) {
481
553
if (!srcPath) srcPath = [];
483
555
srcPath = srcPath.concat(relPath);
484
source.search = relative.search;
485
source.query = relative.query;
486
} else if ('search' in relative) {
556
result.search = relative.search;
557
result.query = relative.query;
558
} else if (relative.search !== null && relative.search !== undefined) {
487
559
// just pull out the search.
488
560
// like href='?foo'.
489
561
// Put this after the other two cases because it simplifies the booleans
491
source.hostname = source.host = srcPath.shift();
563
result.hostname = result.host = srcPath.shift();
492
564
//occationaly the auth can get stuck only in host
493
565
//this especialy happens in cases like
494
566
//url.resolveObject('mailto:local1@domain1', 'local2@domain2')
495
var authInHost = source.host && source.host.indexOf('@') > 0 ?
496
source.host.split('@') : false;
567
var authInHost = result.host && result.host.indexOf('@') > 0 ?
568
result.host.split('@') : false;
497
569
if (authInHost) {
498
source.auth = authInHost.shift();
499
source.host = source.hostname = authInHost.shift();
570
result.auth = authInHost.shift();
571
result.host = result.hostname = authInHost.shift();
502
source.search = relative.search;
503
source.query = relative.query;
574
result.search = relative.search;
575
result.query = relative.query;
504
576
//to support http.request
505
if (source.pathname !== undefined || source.search !== undefined) {
506
source.path = (source.pathname ? source.pathname : '') +
507
(source.search ? source.search : '');
577
if (result.pathname !== null || result.search !== null) {
578
result.path = (result.pathname ? result.pathname : '') +
579
(result.search ? result.search : '');
509
source.href = urlFormat(source);
581
result.href = result.format();
512
585
if (!srcPath.length) {
513
586
// no path at all. easy.
514
587
// we've already handled the other stuff above.
515
delete source.pathname;
588
result.pathname = null;
516
589
//to support http.request
517
if (!source.search) {
518
source.path = '/' + source.search;
591
result.path = '/' + result.search;
522
source.href = urlFormat(source);
595
result.href = result.format();
525
599
// if a url ENDs in . or .., then it must get a trailing slash.
526
600
// however, if it ends in anything else non-slashy,
527
601
// then it must NOT get a trailing slash.
528
602
var last = srcPath.slice(-1)[0];
529
603
var hasTrailingSlash = (
530
(source.host || relative.host) && (last === '.' || last === '..') ||
604
(result.host || relative.host) && (last === '.' || last === '..') ||
533
607
// strip single dots, resolve double dots to parent dir
568
642
// put the host back
570
source.hostname = source.host = isAbsolute ? '' :
644
result.hostname = result.host = isAbsolute ? '' :
571
645
srcPath.length ? srcPath.shift() : '';
572
646
//occationaly the auth can get stuck only in host
573
647
//this especialy happens in cases like
574
648
//url.resolveObject('mailto:local1@domain1', 'local2@domain2')
575
var authInHost = source.host && source.host.indexOf('@') > 0 ?
576
source.host.split('@') : false;
649
var authInHost = result.host && result.host.indexOf('@') > 0 ?
650
result.host.split('@') : false;
577
651
if (authInHost) {
578
source.auth = authInHost.shift();
579
source.host = source.hostname = authInHost.shift();
652
result.auth = authInHost.shift();
653
result.host = result.hostname = authInHost.shift();
583
mustEndAbs = mustEndAbs || (source.host && srcPath.length);
657
mustEndAbs = mustEndAbs || (result.host && srcPath.length);
585
659
if (mustEndAbs && !isAbsolute) {
586
660
srcPath.unshift('');
589
source.pathname = srcPath.join('/');
663
if (!srcPath.length) {
664
result.pathname = null;
667
result.pathname = srcPath.join('/');
590
670
//to support request.http
591
if (source.pathname !== undefined || source.search !== undefined) {
592
source.path = (source.pathname ? source.pathname : '') +
593
(source.search ? source.search : '');
671
if (result.pathname !== null || result.search !== null) {
672
result.path = (result.pathname ? result.pathname : '') +
673
(result.search ? result.search : '');
595
source.auth = relative.auth || source.auth;
596
source.slashes = source.slashes || relative.slashes;
597
source.href = urlFormat(source);
675
result.auth = relative.auth || result.auth;
676
result.slashes = result.slashes || relative.slashes;
677
result.href = result.format();
601
function parseHost(host) {
681
Url.prototype.parseHost = function() {
682
var host = this.host;
603
683
var port = portPattern.exec(host);
606
out.port = port.substr(1);
687
this.port = port.substr(1);
607
689
host = host.substr(0, host.length - port.length);
609
if (host) out.hostname = host;
691
if (host) this.hostname = host;