1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Netscape Public License
6
* Version 1.1 (the "License"); you may not use this file except in
7
* compliance with the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/NPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is mozilla.org code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
23
* Darin Fisher (original author)
25
* Alternatively, the contents of this file may be used under the terms of
26
* either the GNU General Public License Version 2 or later (the "GPL"), or
27
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
* in which case the provisions of the GPL or the LGPL are applicable instead
29
* of those above. If you wish to allow use of your version of this file only
30
* under the terms of either the GPL or the LGPL, and not to allow others to
31
* use your version of this file under the terms of the NPL, indicate your
32
* decision by deleting the provisions above and replace them with the notice
33
* and other provisions required by the GPL or the LGPL. If you do not delete
34
* the provisions above, a recipient may use your version of this file under
35
* the terms of any one of the NPL, the GPL or the LGPL.
37
* ***** END LICENSE BLOCK ***** */
40
#include "nsURLParsers.h"
41
#include "nsURLHelper.h"
48
//----------------------------------------------------------------------------
51
CountConsecutiveSlashes(const char *str, PRInt32 len)
54
while (len-- && *str++ == '/') ++count;
58
//----------------------------------------------------------------------------
59
// nsBaseURLParser implementation
60
//----------------------------------------------------------------------------
62
// The URL parser service does not have any internal state; however, it can
63
// be called from multiple threads, so we must use a threadsafe AddRef and
64
// Release implementation.
65
NS_IMPL_THREADSAFE_ISUPPORTS1(nsBaseURLParser, nsIURLParser)
67
#define SET_RESULT(component, pos, len) \
69
if (component ## Pos) \
70
*component ## Pos = PRUint32(pos); \
71
if (component ## Len) \
72
*component ## Len = PRInt32(len); \
75
#define OFFSET_RESULT(component, offset) \
77
if (component ## Pos) \
78
*component ## Pos += offset; \
82
nsBaseURLParser::ParseURL(const char *spec, PRInt32 specLen,
83
PRUint32 *schemePos, PRInt32 *schemeLen,
84
PRUint32 *authorityPos, PRInt32 *authorityLen,
85
PRUint32 *pathPos, PRInt32 *pathLen)
87
NS_PRECONDITION(spec, "null pointer");
90
specLen = strlen(spec);
92
const char *stop = nsnull;
93
const char *colon = nsnull;
94
const char *slash = nsnull;
96
PRInt32 len = specLen;
97
for (p = spec; len && *p && !colon && !slash; ++p, --len) {
98
// skip leading whitespace and control characters
99
if (*p > '\0' && *p <= ' ') {
109
case '/': // start of filepath
110
case '?': // start of query
111
case '#': // start of ref
112
case ';': // start of param
116
case '@': // username@hostname
117
case '[': // start of IPv6 address literal
123
// disregard the first colon if it follows an '@' or a '['
124
if (colon && stop && colon > stop)
127
// if the spec only contained whitespace or control characters...
129
SET_RESULT(scheme, 0, -1);
130
SET_RESULT(authority, 0, 0);
131
SET_RESULT(path, 0, 0);
135
// ignore trailing whitespace and control characters
136
for (p = spec + specLen - 1; ((unsigned char) *p <= ' ') && (p != spec); --p)
139
specLen = p - spec + 1;
141
if (colon && (colon < slash || !slash)) {
143
// spec = <scheme>:/<the-rest>
147
// spec = <scheme>:<authority>
148
// spec = <scheme>:<path-no-slashes>
150
if (!net_IsValidScheme(spec, colon - spec) || (*(colon+1) == ':')) {
151
NS_WARNING("malformed uri");
152
return NS_ERROR_MALFORMED_URI;
154
SET_RESULT(scheme, 0, colon - spec);
155
if (authorityLen || pathLen) {
156
PRUint32 offset = colon + 1 - spec;
157
ParseAfterScheme(colon + 1, specLen - offset,
158
authorityPos, authorityLen,
160
OFFSET_RESULT(authority, offset);
161
OFFSET_RESULT(path, offset);
166
// spec = <authority-no-port-or-password>/<path>
171
// spec = <authority-no-port-or-password>/<path-with-colon>
172
// spec = <path-with-colon>
176
// spec = <authority-no-port-or-password>
177
// spec = <path-no-slashes-or-colon>
179
SET_RESULT(scheme, 0, -1);
180
if (authorityLen || pathLen)
181
ParseAfterScheme(spec, specLen,
182
authorityPos, authorityLen,
189
nsBaseURLParser::ParseAuthority(const char *auth, PRInt32 authLen,
190
PRUint32 *usernamePos, PRInt32 *usernameLen,
191
PRUint32 *passwordPos, PRInt32 *passwordLen,
192
PRUint32 *hostnamePos, PRInt32 *hostnameLen,
195
NS_PRECONDITION(auth, "null pointer");
198
authLen = strlen(auth);
200
SET_RESULT(username, 0, -1);
201
SET_RESULT(password, 0, -1);
202
SET_RESULT(hostname, 0, authLen);
209
nsBaseURLParser::ParseUserInfo(const char *userinfo, PRInt32 userinfoLen,
210
PRUint32 *usernamePos, PRInt32 *usernameLen,
211
PRUint32 *passwordPos, PRInt32 *passwordLen)
213
SET_RESULT(username, 0, -1);
214
SET_RESULT(password, 0, -1);
219
nsBaseURLParser::ParseServerInfo(const char *serverinfo, PRInt32 serverinfoLen,
220
PRUint32 *hostnamePos, PRInt32 *hostnameLen,
223
SET_RESULT(hostname, 0, -1);
230
nsBaseURLParser::ParsePath(const char *path, PRInt32 pathLen,
231
PRUint32 *filepathPos, PRInt32 *filepathLen,
232
PRUint32 *paramPos, PRInt32 *paramLen,
233
PRUint32 *queryPos, PRInt32 *queryLen,
234
PRUint32 *refPos, PRInt32 *refLen)
236
NS_PRECONDITION(path, "null pointer");
239
pathLen = strlen(path);
241
// path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
243
// XXX PL_strnpbrk would be nice, but it's buggy
245
// search for first occurance of either ? or #
246
const char *query_beg = 0, *query_end = 0;
247
const char *ref_beg = 0;
249
for (p = path; *p; ++p) {
250
// only match the query string if it precedes the reference fragment
251
if (!ref_beg && !query_beg && *p == '?')
253
else if (*p == '#') {
263
SET_RESULT(query, query_beg - path, query_end - query_beg);
265
SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
268
SET_RESULT(query, 0, -1);
271
SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
273
SET_RESULT(ref, 0, -1);
275
// search backwards for param
276
const char *param_beg = 0;
283
end = path + pathLen;
284
for (p = end - 1; p >= path && *p != '/'; --p) {
292
// found <filepath>;<param>
293
SET_RESULT(param, param_beg - path, end - param_beg);
297
SET_RESULT(param, 0, -1);
299
// an empty file path is no file path
301
SET_RESULT(filepath, 0, end - path);
303
SET_RESULT(filepath, 0, -1);
308
nsBaseURLParser::ParseFilePath(const char *filepath, PRInt32 filepathLen,
309
PRUint32 *directoryPos, PRInt32 *directoryLen,
310
PRUint32 *basenamePos, PRInt32 *basenameLen,
311
PRUint32 *extensionPos, PRInt32 *extensionLen)
313
NS_PRECONDITION(filepath, "null pointer");
316
filepathLen = strlen(filepath);
318
if (filepathLen == 0) {
319
SET_RESULT(directory, 0, -1);
320
SET_RESULT(basename, 0, 0); // assume a zero length file basename
321
SET_RESULT(extension, 0, -1);
326
const char *end = filepath + filepathLen;
328
// search backwards for filename
329
for (p = end - 1; *p != '/' && p > filepath; --p)
333
if ((p+1 < end && *(p+1) == '.') &&
334
(p+2 == end || (*(p+2) == '.' && p+3 == end)))
336
// filepath = <directory><filename>.<extension>
337
SET_RESULT(directory, 0, p - filepath + 1);
338
ParseFileName(p + 1, end - (p + 1),
339
basenamePos, basenameLen,
340
extensionPos, extensionLen);
341
OFFSET_RESULT(basename, p + 1 - filepath);
342
OFFSET_RESULT(extension, p + 1 - filepath);
345
// filepath = <filename>.<extension>
346
SET_RESULT(directory, 0, -1);
347
ParseFileName(filepath, filepathLen,
348
basenamePos, basenameLen,
349
extensionPos, extensionLen);
355
nsBaseURLParser::ParseFileName(const char *filename, PRInt32 filenameLen,
356
PRUint32 *basenamePos, PRInt32 *basenameLen,
357
PRUint32 *extensionPos, PRInt32 *extensionLen)
359
NS_PRECONDITION(filename, "null pointer");
362
filenameLen = strlen(filename);
364
// no extension if filename ends with a '.'
365
if (filename[filenameLen-1] != '.') {
366
// ignore '.' at the beginning
367
for (const char *p = filename + filenameLen - 1; p > filename; --p) {
369
// filename = <basename.extension>
370
SET_RESULT(basename, 0, p - filename);
371
SET_RESULT(extension, p + 1 - filename, filenameLen - (p - filename + 1));
376
// filename = <basename>
377
SET_RESULT(basename, 0, filenameLen);
378
SET_RESULT(extension, 0, -1);
382
//----------------------------------------------------------------------------
383
// nsNoAuthURLParser implementation
384
//----------------------------------------------------------------------------
387
nsNoAuthURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
388
PRUint32 *authPos, PRInt32 *authLen,
389
PRUint32 *pathPos, PRInt32 *pathLen)
391
NS_PRECONDITION(specLen >= 0, "unexpected");
393
// everything is the path
395
switch (CountConsecutiveSlashes(spec, specLen)) {
401
const char *p = nsnull;
403
// looks like there is an authority section
404
#if defined(XP_WIN) || defined(XP_OS2)
405
// if the authority looks like a drive number then we
406
// really want to treat it as part of the path
407
if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
408
nsCRT::IsAsciiAlpha(spec[2]) &&
409
((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
414
p = (const char *) memchr(spec + 2, '/', specLen - 2);
417
SET_RESULT(auth, 2, p - (spec + 2));
418
SET_RESULT(path, p - spec, specLen - (p - spec));
421
SET_RESULT(auth, 2, specLen - 2);
422
SET_RESULT(path, 0, -1);
430
SET_RESULT(auth, pos, 0);
431
SET_RESULT(path, pos, specLen - pos);
434
#if defined(XP_WIN) || defined(XP_OS2)
436
nsNoAuthURLParser::ParseFilePath(const char *filepath, PRInt32 filepathLen,
437
PRUint32 *directoryPos, PRInt32 *directoryLen,
438
PRUint32 *basenamePos, PRInt32 *basenameLen,
439
PRUint32 *extensionPos, PRInt32 *extensionLen)
441
NS_PRECONDITION(filepath, "null pointer");
444
filepathLen = strlen(filepath);
446
// look for a filepath consisting of only a drive number, which may or
447
// may not have a leading slash.
448
if (filepathLen > 1 && filepathLen < 4) {
449
const char *end = filepath + filepathLen;
450
const char *p = filepath;
453
if ((end-p == 2) && (p[1]==':' || p[1]=='|') && nsCRT::IsAsciiAlpha(*p)) {
454
// filepath = <drive-number>:
455
SET_RESULT(directory, 0, filepathLen);
456
SET_RESULT(basename, 0, -1);
457
SET_RESULT(extension, 0, -1);
462
// otherwise fallback on common implementation
463
return nsBaseURLParser::ParseFilePath(filepath, filepathLen,
464
directoryPos, directoryLen,
465
basenamePos, basenameLen,
466
extensionPos, extensionLen);
470
//----------------------------------------------------------------------------
471
// nsAuthURLParser implementation
472
//----------------------------------------------------------------------------
475
nsAuthURLParser::ParseAuthority(const char *auth, PRInt32 authLen,
476
PRUint32 *usernamePos, PRInt32 *usernameLen,
477
PRUint32 *passwordPos, PRInt32 *passwordLen,
478
PRUint32 *hostnamePos, PRInt32 *hostnameLen,
483
NS_PRECONDITION(auth, "null pointer");
486
authLen = strlen(auth);
489
SET_RESULT(username, 0, -1);
490
SET_RESULT(password, 0, -1);
491
SET_RESULT(hostname, 0, 0);
497
// search backwards for @
498
const char *p = auth + authLen - 1;
499
for (; (*p != '@') && (p > auth); --p);
501
// auth = <user-info@server-info>
502
rv = ParseUserInfo(auth, p - auth,
503
usernamePos, usernameLen,
504
passwordPos, passwordLen);
505
if (NS_FAILED(rv)) return rv;
506
rv = ParseServerInfo(p + 1, authLen - (p - auth + 1),
507
hostnamePos, hostnameLen,
509
if (NS_FAILED(rv)) return rv;
510
OFFSET_RESULT(hostname, p + 1 - auth);
513
// auth = <server-info>
514
SET_RESULT(username, 0, -1);
515
SET_RESULT(password, 0, -1);
516
rv = ParseServerInfo(auth, authLen,
517
hostnamePos, hostnameLen,
519
if (NS_FAILED(rv)) return rv;
525
nsAuthURLParser::ParseUserInfo(const char *userinfo, PRInt32 userinfoLen,
526
PRUint32 *usernamePos, PRInt32 *usernameLen,
527
PRUint32 *passwordPos, PRInt32 *passwordLen)
529
NS_PRECONDITION(userinfo, "null pointer");
532
userinfoLen = strlen(userinfo);
534
const char *p = (const char *) memchr(userinfo, ':', userinfoLen);
536
// userinfo = <username:password>
537
SET_RESULT(username, 0, p - userinfo);
538
SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
541
// userinfo = <username>
542
SET_RESULT(username, 0, userinfoLen);
543
SET_RESULT(password, 0, -1);
549
nsAuthURLParser::ParseServerInfo(const char *serverinfo, PRInt32 serverinfoLen,
550
PRUint32 *hostnamePos, PRInt32 *hostnameLen,
553
NS_PRECONDITION(serverinfo, "null pointer");
555
if (serverinfoLen < 0)
556
serverinfoLen = strlen(serverinfo);
558
if (serverinfoLen == 0) {
559
SET_RESULT(hostname, 0, 0);
565
// search backwards for a ':' but stop on ']' (IPv6 address literal
566
// delimiter). check for illegal characters in the hostname.
567
const char *p = serverinfo + serverinfoLen - 1;
568
const char *colon = nsnull, *bracket = nsnull;
569
for (; p > serverinfo; --p) {
575
if (bracket == nsnull)
579
// hostname must not contain a space
580
NS_WARNING("malformed hostname");
581
return NS_ERROR_MALFORMED_URI;
586
// serverinfo = <hostname:port>
587
SET_RESULT(hostname, 0, colon - serverinfo);
589
// XXX unfortunately ToInteger is not defined for substrings
590
nsCAutoString buf(colon+1, serverinfoLen - (colon + 1 - serverinfo));
592
*port = buf.ToInteger(&err);
598
// serverinfo = <hostname>
599
SET_RESULT(hostname, 0, serverinfoLen);
607
nsAuthURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
608
PRUint32 *authPos, PRInt32 *authLen,
609
PRUint32 *pathPos, PRInt32 *pathLen)
611
NS_PRECONDITION(specLen >= 0, "unexpected");
613
PRUint32 nslash = CountConsecutiveSlashes(spec, specLen);
615
// search for the end of the authority section
616
const char *end = spec + specLen;
618
for (p = spec + nslash; p < end; ++p) {
619
if (strchr("/?#;", *p))
623
// spec = [/]<auth><path>
624
SET_RESULT(auth, nslash, p - (spec + nslash));
625
SET_RESULT(path, p - spec, specLen - (p - spec));
629
SET_RESULT(auth, nslash, specLen - nslash);
630
SET_RESULT(path, 0, -1);
634
//----------------------------------------------------------------------------
635
// nsStdURLParser implementation
636
//----------------------------------------------------------------------------
639
nsStdURLParser::ParseAfterScheme(const char *spec, PRInt32 specLen,
640
PRUint32 *authPos, PRInt32 *authLen,
641
PRUint32 *pathPos, PRInt32 *pathLen)
643
NS_PRECONDITION(specLen >= 0, "unexpected");
645
PRUint32 nslash = CountConsecutiveSlashes(spec, specLen);
647
// search for the end of the authority section
648
const char *end = spec + specLen;
650
for (p = spec + nslash; p < end; ++p) {
651
if (strchr("/?#;", *p))
658
// spec = (//)<auth><path>
659
SET_RESULT(auth, nslash, p - (spec + nslash));
660
SET_RESULT(path, p - spec, specLen - (p - spec));
664
SET_RESULT(auth, nslash, specLen - nslash);
665
SET_RESULT(path, 0, -1);
670
SET_RESULT(auth, 0, -1);
671
SET_RESULT(path, 0, specLen);
674
// spec = ///[/]<path>
675
SET_RESULT(auth, 2, 0);
676
SET_RESULT(path, 2, specLen - 2);