2
* uri.c: set of generic URI related routines
4
* Reference: RFCs 2396, 2732 and 2373
6
* See Copyright for the status of this software.
16
#include <libxml/xmlmemory.h>
17
#include <libxml/uri.h>
18
#include <libxml/globals.h>
19
#include <libxml/xmlerror.h>
21
/************************************************************************
23
* Macros to differentiate various character type *
24
* directly extracted from RFC 2396 *
26
************************************************************************/
29
* alpha = lowalpha | upalpha
31
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
35
* lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
36
* "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
37
* "u" | "v" | "w" | "x" | "y" | "z"
40
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
43
* upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
44
* "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
45
* "U" | "V" | "W" | "X" | "Y" | "Z"
47
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
53
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
55
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
58
* alphanum = alpha | digit
61
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
64
* hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
65
* "a" | "b" | "c" | "d" | "e" | "f"
68
#define IS_HEX(x) ((IS_DIGIT(x)) || (((x) >= 'a') && ((x) <= 'f')) || \
69
(((x) >= 'A') && ((x) <= 'F')))
72
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
75
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
76
((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
77
((x) == '(') || ((x) == ')'))
81
* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
85
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
86
((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
87
((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
91
* unreserved = alphanum | mark
94
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
97
* escaped = "%" hex hex
100
#define IS_ESCAPED(p) ((*(p) == '%') && (IS_HEX((p)[1])) && \
104
* uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
105
* "&" | "=" | "+" | "$" | ","
107
#define IS_URIC_NO_SLASH(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) ||\
108
((*(p) == ';')) || ((*(p) == '?')) || ((*(p) == ':')) ||\
109
((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||\
110
((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ',')))
113
* pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
115
#define IS_PCHAR(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
116
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||\
117
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||\
121
* rel_segment = 1*( unreserved | escaped |
122
* ";" | "@" | "&" | "=" | "+" | "$" | "," )
125
#define IS_SEGMENT(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
126
((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || \
127
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
131
* scheme = alpha *( alpha | digit | "+" | "-" | "." )
134
#define IS_SCHEME(x) ((IS_ALPHA(x)) || (IS_DIGIT(x)) || \
135
((x) == '+') || ((x) == '-') || ((x) == '.'))
138
* reg_name = 1*( unreserved | escaped | "$" | "," |
139
* ";" | ":" | "@" | "&" | "=" | "+" )
142
#define IS_REG_NAME(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
143
((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || \
144
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || \
145
((*(p) == '=')) || ((*(p) == '+')))
148
* userinfo = *( unreserved | escaped | ";" | ":" | "&" | "=" |
151
#define IS_USERINFO(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
152
((*(p) == ';')) || ((*(p) == ':')) || ((*(p) == '&')) || \
153
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || \
157
* uric = reserved | unreserved | escaped
160
#define IS_URIC(p) ((IS_UNRESERVED(*(p))) || (IS_ESCAPED(p)) || \
164
* unwise = "{" | "}" | "|" | "\" | "^" | "`"
167
#define IS_UNWISE(p) \
168
(((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
169
((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
170
((*(p) == ']')) || ((*(p) == '`')))
173
* Skip to next pointer char, handle escaped sequences
176
#define NEXT(p) ((*p == '%')? p += 3 : p++)
179
* Productions from the spec.
181
* authority = server | reg_name
182
* reg_name = 1*( unreserved | escaped | "$" | "," |
183
* ";" | ":" | "@" | "&" | "=" | "+" )
185
* path = [ abs_path | opaque_part ]
188
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
190
/************************************************************************
192
* Generic URI structure functions *
194
************************************************************************/
199
* Simply creates an empty xmlURI
201
* Returns the new structure or NULL in case of error
207
ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
209
xmlGenericError(xmlGenericErrorContext,
210
"xmlCreateURI: out of memory\n");
213
memset(ret, 0, sizeof(xmlURI));
219
* @uri: pointer to an xmlURI
221
* Save the URI as an escaped string
223
* Returns a new string (to be deallocated by caller)
226
xmlSaveUri(xmlURIPtr uri) {
232
if (uri == NULL) return(NULL);
236
ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
238
xmlGenericError(xmlGenericErrorContext,
239
"xmlSaveUri: out of memory\n");
244
if (uri->scheme != NULL) {
249
ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
251
xmlGenericError(xmlGenericErrorContext,
252
"xmlSaveUri: out of memory\n");
260
ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
262
xmlGenericError(xmlGenericErrorContext,
263
"xmlSaveUri: out of memory\n");
269
if (uri->opaque != NULL) {
272
if (len + 3 >= max) {
274
ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
276
xmlGenericError(xmlGenericErrorContext,
277
"xmlSaveUri: out of memory\n");
281
if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
284
int val = *(unsigned char *)p++;
285
int hi = val / 0x10, lo = val % 0x10;
287
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
288
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
292
if (uri->server != NULL) {
293
if (len + 3 >= max) {
295
ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
297
xmlGenericError(xmlGenericErrorContext,
298
"xmlSaveUri: out of memory\n");
304
if (uri->user != NULL) {
307
if (len + 3 >= max) {
309
ret = (xmlChar *) xmlRealloc(ret,
310
(max + 1) * sizeof(xmlChar));
312
xmlGenericError(xmlGenericErrorContext,
313
"xmlSaveUri: out of memory\n");
317
if ((IS_UNRESERVED(*(p))) ||
318
((*(p) == ';')) || ((*(p) == ':')) ||
319
((*(p) == '&')) || ((*(p) == '=')) ||
320
((*(p) == '+')) || ((*(p) == '$')) ||
324
int val = *(unsigned char *)p++;
325
int hi = val / 0x10, lo = val % 0x10;
327
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
328
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
331
if (len + 3 >= max) {
333
ret = (xmlChar *) xmlRealloc(ret,
334
(max + 1) * sizeof(xmlChar));
336
xmlGenericError(xmlGenericErrorContext,
337
"xmlSaveUri: out of memory\n");
347
ret = (xmlChar *) xmlRealloc(ret,
348
(max + 1) * sizeof(xmlChar));
350
xmlGenericError(xmlGenericErrorContext,
351
"xmlSaveUri: out of memory\n");
358
if (len + 10 >= max) {
360
ret = (xmlChar *) xmlRealloc(ret,
361
(max + 1) * sizeof(xmlChar));
363
xmlGenericError(xmlGenericErrorContext,
364
"xmlSaveUri: out of memory\n");
368
len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
370
} else if (uri->authority != NULL) {
371
if (len + 3 >= max) {
373
ret = (xmlChar *) xmlRealloc(ret,
374
(max + 1) * sizeof(xmlChar));
376
xmlGenericError(xmlGenericErrorContext,
377
"xmlSaveUri: out of memory\n");
385
if (len + 3 >= max) {
387
ret = (xmlChar *) xmlRealloc(ret,
388
(max + 1) * sizeof(xmlChar));
390
xmlGenericError(xmlGenericErrorContext,
391
"xmlSaveUri: out of memory\n");
395
if ((IS_UNRESERVED(*(p))) ||
396
((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
397
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
398
((*(p) == '=')) || ((*(p) == '+')))
401
int val = *(unsigned char *)p++;
402
int hi = val / 0x10, lo = val % 0x10;
404
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
405
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
408
} else if (uri->scheme != NULL) {
409
if (len + 3 >= max) {
411
ret = (xmlChar *) xmlRealloc(ret,
412
(max + 1) * sizeof(xmlChar));
414
xmlGenericError(xmlGenericErrorContext,
415
"xmlSaveUri: out of memory\n");
422
if (uri->path != NULL) {
425
* the colon in file:///d: should not be escaped or
426
* Windows accesses fail later.
428
if ((uri->scheme != NULL) &&
430
(((p[1] >= 'a') && (p[1] <= 'z')) ||
431
((p[1] >= 'A') && (p[1] <= 'Z'))) &&
433
(xmlStrEqual(uri->scheme, BAD_CAST "file"))) {
434
if (len + 3 >= max) {
436
ret = (xmlChar *) xmlRealloc(ret,
437
(max + 1) * sizeof(xmlChar));
439
xmlGenericError(xmlGenericErrorContext,
440
"xmlSaveUri: out of memory\n");
449
if (len + 3 >= max) {
451
ret = (xmlChar *) xmlRealloc(ret,
452
(max + 1) * sizeof(xmlChar));
454
xmlGenericError(xmlGenericErrorContext,
455
"xmlSaveUri: out of memory\n");
459
if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
460
((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
461
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
465
int val = *(unsigned char *)p++;
466
int hi = val / 0x10, lo = val % 0x10;
468
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
469
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
473
if (uri->query_raw != NULL) {
474
if (len + 1 >= max) {
476
ret = (xmlChar *) xmlRealloc(ret,
477
(max + 1) * sizeof(xmlChar));
479
xmlGenericError(xmlGenericErrorContext,
480
"xmlSaveUri: out of memory\n");
487
if (len + 1 >= max) {
489
ret = (xmlChar *) xmlRealloc(ret,
490
(max + 1) * sizeof(xmlChar));
492
xmlGenericError(xmlGenericErrorContext,
493
"xmlSaveUri: out of memory\n");
499
} else if (uri->query != NULL) {
500
if (len + 3 >= max) {
502
ret = (xmlChar *) xmlRealloc(ret,
503
(max + 1) * sizeof(xmlChar));
505
xmlGenericError(xmlGenericErrorContext,
506
"xmlSaveUri: out of memory\n");
513
if (len + 3 >= max) {
515
ret = (xmlChar *) xmlRealloc(ret,
516
(max + 1) * sizeof(xmlChar));
518
xmlGenericError(xmlGenericErrorContext,
519
"xmlSaveUri: out of memory\n");
523
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
526
int val = *(unsigned char *)p++;
527
int hi = val / 0x10, lo = val % 0x10;
529
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
530
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
535
if (uri->fragment != NULL) {
536
if (len + 3 >= max) {
538
ret = (xmlChar *) xmlRealloc(ret,
539
(max + 1) * sizeof(xmlChar));
541
xmlGenericError(xmlGenericErrorContext,
542
"xmlSaveUri: out of memory\n");
549
if (len + 3 >= max) {
551
ret = (xmlChar *) xmlRealloc(ret,
552
(max + 1) * sizeof(xmlChar));
554
xmlGenericError(xmlGenericErrorContext,
555
"xmlSaveUri: out of memory\n");
559
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
562
int val = *(unsigned char *)p++;
563
int hi = val / 0x10, lo = val % 0x10;
565
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
566
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
572
ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
574
xmlGenericError(xmlGenericErrorContext,
575
"xmlSaveUri: out of memory\n");
585
* @stream: a FILE* for the output
586
* @uri: pointer to an xmlURI
588
* Prints the URI in the stream @stream.
591
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
594
out = xmlSaveUri(uri);
596
fprintf(stream, "%s", (char *) out);
603
* @uri: pointer to an xmlURI
605
* Make sure the xmlURI struct is free of content
608
xmlCleanURI(xmlURIPtr uri) {
609
if (uri == NULL) return;
611
if (uri->scheme != NULL) xmlFree(uri->scheme);
613
if (uri->server != NULL) xmlFree(uri->server);
615
if (uri->user != NULL) xmlFree(uri->user);
617
if (uri->path != NULL) xmlFree(uri->path);
619
if (uri->fragment != NULL) xmlFree(uri->fragment);
620
uri->fragment = NULL;
621
if (uri->opaque != NULL) xmlFree(uri->opaque);
623
if (uri->authority != NULL) xmlFree(uri->authority);
624
uri->authority = NULL;
625
if (uri->query != NULL) xmlFree(uri->query);
627
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
628
uri->query_raw = NULL;
633
* @uri: pointer to an xmlURI
635
* Free up the xmlURI struct
638
xmlFreeURI(xmlURIPtr uri) {
639
if (uri == NULL) return;
641
if (uri->scheme != NULL) xmlFree(uri->scheme);
642
if (uri->server != NULL) xmlFree(uri->server);
643
if (uri->user != NULL) xmlFree(uri->user);
644
if (uri->path != NULL) xmlFree(uri->path);
645
if (uri->fragment != NULL) xmlFree(uri->fragment);
646
if (uri->opaque != NULL) xmlFree(uri->opaque);
647
if (uri->authority != NULL) xmlFree(uri->authority);
648
if (uri->query != NULL) xmlFree(uri->query);
649
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
653
/************************************************************************
657
************************************************************************/
660
* xmlNormalizeURIPath:
661
* @path: pointer to the path string
663
* Applies the 5 normalization steps to a path string--that is, RFC 2396
664
* Section 5.2, steps 6.c through 6.g.
666
* Normalization occurs directly on the string, no new allocation is done
668
* Returns 0 or an error code
671
xmlNormalizeURIPath(char *path) {
677
/* Skip all initial "/" chars. We want to get to the beginning of the
678
* first non-empty segment.
681
while (cur[0] == '/')
686
/* Keep everything we've seen so far. */
690
* Analyze each segment in sequence for cases (c) and (d).
692
while (cur[0] != '\0') {
694
* c) All occurrences of "./", where "." is a complete path segment,
695
* are removed from the buffer string.
697
if ((cur[0] == '.') && (cur[1] == '/')) {
699
/* '//' normalization should be done at this point too */
700
while (cur[0] == '/')
706
* d) If the buffer string ends with "." as a complete path segment,
707
* that "." is removed.
709
if ((cur[0] == '.') && (cur[1] == '\0'))
712
/* Otherwise keep the segment. */
713
while (cur[0] != '/') {
716
(out++)[0] = (cur++)[0];
719
while ((cur[0] == '/') && (cur[1] == '/'))
722
(out++)[0] = (cur++)[0];
727
/* Reset to the beginning of the first segment for the next sequence. */
729
while (cur[0] == '/')
735
* Analyze each segment in sequence for cases (e) and (f).
737
* e) All occurrences of "<segment>/../", where <segment> is a
738
* complete path segment not equal to "..", are removed from the
739
* buffer string. Removal of these path segments is performed
740
* iteratively, removing the leftmost matching pattern on each
741
* iteration, until no matching pattern remains.
743
* f) If the buffer string ends with "<segment>/..", where <segment>
744
* is a complete path segment not equal to "..", that
745
* "<segment>/.." is removed.
747
* To satisfy the "iterative" clause in (e), we need to collapse the
748
* string every time we find something that needs to be removed. Thus,
749
* we don't need to keep two pointers into the string: we only need a
750
* "current position" pointer.
755
/* At the beginning of each iteration of this loop, "cur" points to
756
* the first character of the segment we want to examine.
759
/* Find the end of the current segment. */
761
while ((segp[0] != '/') && (segp[0] != '\0'))
764
/* If this is the last segment, we're done (we need at least two
765
* segments to meet the criteria for the (e) and (f) cases).
770
/* If the first segment is "..", or if the next segment _isn't_ "..",
771
* keep this segment and try the next one.
774
if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
775
|| ((segp[0] != '.') || (segp[1] != '.')
776
|| ((segp[2] != '/') && (segp[2] != '\0')))) {
781
/* If we get here, remove this segment and the next one and back up
782
* to the previous segment (if there is one), to implement the
783
* "iteratively" clause. It's pretty much impossible to back up
784
* while maintaining two pointers into the buffer, so just compact
785
* the whole buffer now.
788
/* If this is the end of the buffer, we're done. */
789
if (segp[2] == '\0') {
793
/* Valgrind complained, strcpy(cur, segp + 3); */
794
/* string will overlap, do not use strcpy */
797
while ((*tmp++ = *segp++) != 0);
799
/* If there are no previous segments, then keep going from here. */
801
while ((segp > path) && ((--segp)[0] == '/'))
806
/* "segp" is pointing to the end of a previous segment; find it's
807
* start. We need to back up to the previous segment and start
808
* over with that to handle things like "foo/bar/../..". If we
809
* don't do this, then on the first pass we'll remove the "bar/..",
810
* but be pointing at the second ".." so we won't realize we can also
811
* remove the "foo/..".
814
while ((cur > path) && (cur[-1] != '/'))
820
* g) If the resulting buffer string still begins with one or more
821
* complete path segments of "..", then the reference is
822
* considered to be in error. Implementations may handle this
823
* error by retaining these components in the resolved path (i.e.,
824
* treating them as part of the final URI), by removing them from
825
* the resolved path (i.e., discarding relative levels above the
826
* root), or by avoiding traversal of the reference.
828
* We discard them from the final path.
830
if (path[0] == '/') {
832
while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
833
&& ((cur[3] == '/') || (cur[3] == '\0')))
838
while (cur[0] != '\0')
839
(out++)[0] = (cur++)[0];
847
static int is_hex(char c) {
848
if (((c >= '0') && (c <= '9')) ||
849
((c >= 'a') && (c <= 'f')) ||
850
((c >= 'A') && (c <= 'F')))
856
* xmlURIUnescapeString:
857
* @str: the string to unescape
858
* @len: the length in bytes to unescape (or <= 0 to indicate full string)
859
* @target: optional destination buffer
861
* Unescaping routine, but does not check that the string is an URI. The
862
* output is a direct unsigned char translation of %XX values (no encoding)
863
* Note that the length of the result can only be smaller or same size as
866
* Returns a copy of the string, but unescaped, will return NULL only in case
870
xmlURIUnescapeString(const char *str, int len, char *target) {
876
if (len <= 0) len = strlen(str);
877
if (len < 0) return(NULL);
879
if (target == NULL) {
880
ret = (char *) xmlMallocAtomic(len + 1);
882
xmlGenericError(xmlGenericErrorContext,
883
"xmlURIUnescapeString: out of memory\n");
891
if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
893
if ((*in >= '0') && (*in <= '9'))
895
else if ((*in >= 'a') && (*in <= 'f'))
896
*out = (*in - 'a') + 10;
897
else if ((*in >= 'A') && (*in <= 'F'))
898
*out = (*in - 'A') + 10;
900
if ((*in >= '0') && (*in <= '9'))
901
*out = *out * 16 + (*in - '0');
902
else if ((*in >= 'a') && (*in <= 'f'))
903
*out = *out * 16 + (*in - 'a') + 10;
904
else if ((*in >= 'A') && (*in <= 'F'))
905
*out = *out * 16 + (*in - 'A') + 10;
920
* @str: string to escape
921
* @list: exception list string of chars not to escape
923
* This routine escapes a string to hex, ignoring reserved characters (a-z)
924
* and the characters in the exception list.
926
* Returns a new escaped string or NULL in case of error.
929
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
933
unsigned int len, out;
938
return(xmlStrdup(str));
939
len = xmlStrlen(str);
940
if (!(len > 0)) return(NULL);
943
ret = (xmlChar *) xmlMallocAtomic(len);
945
xmlGenericError(xmlGenericErrorContext,
946
"xmlURIEscapeStr: out of memory\n");
949
in = (const xmlChar *) str;
952
if (len - out <= 3) {
954
ret = (xmlChar *) xmlRealloc(ret, len);
956
xmlGenericError(xmlGenericErrorContext,
957
"xmlURIEscapeStr: out of memory\n");
964
if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
969
ret[out++] = '0' + val;
971
ret[out++] = 'A' + val - 0xA;
974
ret[out++] = '0' + val;
976
ret[out++] = 'A' + val - 0xA;
989
* @str: the string of the URI to escape
991
* Escaping routine, does not do validity checks !
992
* It will try to escape the chars needing this, but this is heuristic
993
* based it's impossible to be sure.
995
* Returns an copy of the string, but escaped
998
* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
999
* according to RFC2396.
1003
xmlURIEscape(const xmlChar * str)
1005
xmlChar *ret, *segment = NULL;
1009
#define NULLCHK(p) if(!p) { \
1010
xmlGenericError(xmlGenericErrorContext, \
1011
"xmlURIEscape: out of memory\n"); \
1017
uri = xmlCreateURI();
1020
* Allow escaping errors in the unescaped form
1023
ret2 = xmlParseURIReference(uri, (const char *)str);
1036
segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1038
ret = xmlStrcat(ret, segment);
1039
ret = xmlStrcat(ret, BAD_CAST ":");
1043
if (uri->authority) {
1045
xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1047
ret = xmlStrcat(ret, BAD_CAST "//");
1048
ret = xmlStrcat(ret, segment);
1053
segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1055
ret = xmlStrcat(ret,BAD_CAST "//");
1056
ret = xmlStrcat(ret, segment);
1057
ret = xmlStrcat(ret, BAD_CAST "@");
1062
segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1064
if (uri->user == NULL)
1065
ret = xmlStrcat(ret, BAD_CAST "//");
1066
ret = xmlStrcat(ret, segment);
1073
snprintf((char *) port, 10, "%d", uri->port);
1074
ret = xmlStrcat(ret, BAD_CAST ":");
1075
ret = xmlStrcat(ret, port);
1080
xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1082
ret = xmlStrcat(ret, segment);
1086
if (uri->query_raw) {
1087
ret = xmlStrcat(ret, BAD_CAST "?");
1088
ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1090
else if (uri->query) {
1092
xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1094
ret = xmlStrcat(ret, BAD_CAST "?");
1095
ret = xmlStrcat(ret, segment);
1100
segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1102
ret = xmlStrcat(ret, segment);
1106
if (uri->fragment) {
1107
segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1109
ret = xmlStrcat(ret, BAD_CAST "#");
1110
ret = xmlStrcat(ret, segment);
1120
/************************************************************************
1122
* Escaped URI parsing *
1124
************************************************************************/
1127
* xmlParseURIFragment:
1128
* @uri: pointer to an URI structure
1129
* @str: pointer to the string to analyze
1131
* Parse an URI fragment string and fills in the appropriate fields
1132
* of the @uri structure.
1136
* Returns 0 or the error code
1139
xmlParseURIFragment(xmlURIPtr uri, const char **str)
1148
while (IS_URIC(cur) || IS_UNWISE(cur))
1151
if (uri->fragment != NULL)
1152
xmlFree(uri->fragment);
1153
if (uri->cleanup & 2)
1154
uri->fragment = STRNDUP(*str, cur - *str);
1156
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
1164
* @uri: pointer to an URI structure
1165
* @str: pointer to the string to analyze
1167
* Parse the query part of an URI
1171
* Returns 0 or the error code
1174
xmlParseURIQuery(xmlURIPtr uri, const char **str)
1183
while ((IS_URIC(cur)) ||
1184
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1187
if (uri->query != NULL)
1188
xmlFree(uri->query);
1189
if (uri->cleanup & 2)
1190
uri->query = STRNDUP(*str, cur - *str);
1192
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
1194
/* Save the raw bytes of the query as well.
1195
* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
1197
if (uri->query_raw != NULL)
1198
xmlFree (uri->query_raw);
1199
uri->query_raw = STRNDUP (*str, cur - *str);
1206
* xmlParseURIScheme:
1207
* @uri: pointer to an URI structure
1208
* @str: pointer to the string to analyze
1210
* Parse an URI scheme
1212
* scheme = alpha *( alpha | digit | "+" | "-" | "." )
1214
* Returns 0 or the error code
1217
xmlParseURIScheme(xmlURIPtr uri, const char **str) {
1224
if (!IS_ALPHA(*cur))
1227
while (IS_SCHEME(*cur)) cur++;
1229
if (uri->scheme != NULL) xmlFree(uri->scheme);
1230
uri->scheme = STRNDUP(*str, cur - *str);
1237
* xmlParseURIOpaquePart:
1238
* @uri: pointer to an URI structure
1239
* @str: pointer to the string to analyze
1241
* Parse an URI opaque part
1243
* opaque_part = uric_no_slash *uric
1245
* Returns 0 or the error code
1248
xmlParseURIOpaquePart(xmlURIPtr uri, const char **str)
1256
if (!((IS_URIC_NO_SLASH(cur)) ||
1257
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1261
while ((IS_URIC(cur)) ||
1262
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1265
if (uri->opaque != NULL)
1266
xmlFree(uri->opaque);
1267
if (uri->cleanup & 2)
1268
uri->opaque = STRNDUP(*str, cur - *str);
1270
uri->opaque = xmlURIUnescapeString(*str, cur - *str, NULL);
1277
* xmlParseURIServer:
1278
* @uri: pointer to an URI structure
1279
* @str: pointer to the string to analyze
1281
* Parse a server subpart of an URI, it's a finer grain analysis
1282
* of the authority part.
1284
* server = [ [ userinfo "@" ] hostport ]
1285
* userinfo = *( unreserved | escaped |
1286
* ";" | ":" | "&" | "=" | "+" | "$" | "," )
1287
* hostport = host [ ":" port ]
1288
* host = hostname | IPv4address | IPv6reference
1289
* hostname = *( domainlabel "." ) toplabel [ "." ]
1290
* domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1291
* toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1292
* IPv6reference = "[" IPv6address "]"
1293
* IPv6address = hexpart [ ":" IPv4address ]
1294
* IPv4address = 1*3digit "." 1*3digit "." 1*3digit "." 1*3digit
1295
* hexpart = hexseq | hexseq "::" [ hexseq ]| "::" [ hexseq ]
1296
* hexseq = hex4 *( ":" hex4)
1300
* Returns 0 or the error code
1303
xmlParseURIServer(xmlURIPtr uri, const char **str) {
1305
const char *host, *tmp;
1306
const int IPV4max = 4;
1307
const int IPV6max = 8;
1316
* is there a userinfo ?
1318
while (IS_USERINFO(cur)) NEXT(cur);
1321
if (uri->user != NULL) xmlFree(uri->user);
1322
if (uri->cleanup & 2)
1323
uri->user = STRNDUP(*str, cur - *str);
1325
uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
1330
if (uri->user != NULL) xmlFree(uri->user);
1336
* This can be empty in the case where there is no server
1341
if (uri->authority != NULL) xmlFree(uri->authority);
1342
uri->authority = NULL;
1343
if (uri->server != NULL) xmlFree(uri->server);
1350
* host part of hostport can denote an IPV4 address, an IPV6 address
1351
* or an unresolved name. Check the IP first, its easier to detect
1352
* errors if wrong one.
1353
* An IPV6 address must start with a '[' and end with a ']'.
1358
for (oct = 0; oct < IPV6max; ++oct) {
1361
return(3); /* multiple compression attempted */
1362
if (!oct) { /* initial char is compression */
1366
compress = 1; /* set compression-encountered flag */
1367
cur++; /* skip over the second ':' */
1370
while(IS_HEX(*cur)) cur++;
1371
if (oct == (IPV6max-1))
1377
if ((!compress) && (oct != IPV6max))
1382
if (uri->server != NULL) xmlFree(uri->server);
1383
uri->server = (char *)xmlStrndup((xmlChar *)host+1,
1389
* Not IPV6, maybe IPV4
1391
for (oct = 0; oct < IPV4max; ++oct) {
1393
return(3); /* e.g. http://.xml/ or http://18.29..30/ */
1394
while(IS_DIGIT(*cur)) cur++;
1395
if (oct == (IPV4max-1))
1402
if ((host[0] != '[') && (oct < IPV4max || (*cur == '.' && cur++) ||
1404
/* maybe host_name */
1405
if (!IS_ALPHANUM(*cur))
1406
return(4); /* e.g. http://xml.$oft */
1408
do ++cur; while (IS_ALPHANUM(*cur));
1412
return(5); /* e.g. http://xml.-soft */
1419
return(6); /* e.g. http://xml-.soft */
1421
return(7); /* e.g. http://xml..soft */
1429
--tmp; /* e.g. http://xml.$Oft/ */
1430
do --tmp; while (tmp >= host && IS_ALPHANUM(*tmp));
1431
if ((++tmp == host || tmp[-1] == '.') && !IS_ALPHA(*tmp))
1432
return(8); /* e.g. http://xmlsOft.0rg/ */
1435
if (uri->authority != NULL) xmlFree(uri->authority);
1436
uri->authority = NULL;
1437
if (host[0] != '[') { /* it's not an IPV6 addr */
1438
if (uri->server != NULL) xmlFree(uri->server);
1439
if (uri->cleanup & 2)
1440
uri->server = STRNDUP(host, cur - host);
1442
uri->server = xmlURIUnescapeString(host, cur - host, NULL);
1446
* finish by checking for a port presence.
1450
if (IS_DIGIT(*cur)) {
1453
while (IS_DIGIT(*cur)) {
1455
uri->port = uri->port * 10 + (*cur - '0');
1465
* xmlParseURIRelSegment:
1466
* @uri: pointer to an URI structure
1467
* @str: pointer to the string to analyze
1469
* Parse an URI relative segment
1471
* rel_segment = 1*( unreserved | escaped | ";" | "@" | "&" | "=" |
1474
* Returns 0 or the error code
1477
xmlParseURIRelSegment(xmlURIPtr uri, const char **str)
1485
if (!((IS_SEGMENT(cur)) ||
1486
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))) {
1490
while ((IS_SEGMENT(cur)) ||
1491
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1494
if (uri->path != NULL)
1496
if (uri->cleanup & 2)
1497
uri->path = STRNDUP(*str, cur - *str);
1499
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
1506
* xmlParseURIPathSegments:
1507
* @uri: pointer to an URI structure
1508
* @str: pointer to the string to analyze
1509
* @slash: should we add a leading slash
1511
* Parse an URI set of path segments
1513
* path_segments = segment *( "/" segment )
1514
* segment = *pchar *( ";" param )
1517
* Returns 0 or the error code
1520
xmlParseURIPathSegments(xmlURIPtr uri, const char **str, int slash)
1530
while ((IS_PCHAR(cur)) ||
1531
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1533
while (*cur == ';') {
1535
while ((IS_PCHAR(cur)) ||
1536
((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
1548
* Concat the set of path segments to the current path
1554
if (uri->path != NULL) {
1555
len2 = strlen(uri->path);
1558
path = (char *) xmlMallocAtomic(len + 1);
1560
xmlGenericError(xmlGenericErrorContext,
1561
"xmlParseURIPathSegments: out of memory\n");
1565
if (uri->path != NULL)
1566
memcpy(path, uri->path, len2);
1572
if (cur - *str > 0) {
1573
if (uri->cleanup & 2) {
1574
memcpy(&path[len2], *str, cur - *str);
1575
path[len2 + (cur - *str)] = 0;
1577
xmlURIUnescapeString(*str, cur - *str, &path[len2]);
1579
if (uri->path != NULL)
1588
* xmlParseURIAuthority:
1589
* @uri: pointer to an URI structure
1590
* @str: pointer to the string to analyze
1592
* Parse the authority part of an URI.
1594
* authority = server | reg_name
1595
* server = [ [ userinfo "@" ] hostport ]
1596
* reg_name = 1*( unreserved | escaped | "$" | "," | ";" | ":" |
1597
* "@" | "&" | "=" | "+" )
1599
* Note : this is completely ambiguous since reg_name is allowed to
1600
* use the full set of chars in use by server:
1602
* 3.2.1. Registry-based Naming Authority
1604
* The structure of a registry-based naming authority is specific
1605
* to the URI scheme, but constrained to the allowed characters
1606
* for an authority component.
1608
* Returns 0 or the error code
1611
xmlParseURIAuthority(xmlURIPtr uri, const char **str) {
1621
* try first to parse it as a server string.
1623
ret = xmlParseURIServer(uri, str);
1624
if ((ret == 0) && (*str != NULL) &&
1625
((**str == 0) || (**str == '/') || (**str == '?')))
1630
* failed, fallback to reg_name
1632
if (!IS_REG_NAME(cur)) {
1636
while (IS_REG_NAME(cur)) NEXT(cur);
1638
if (uri->server != NULL) xmlFree(uri->server);
1640
if (uri->user != NULL) xmlFree(uri->user);
1642
if (uri->authority != NULL) xmlFree(uri->authority);
1643
if (uri->cleanup & 2)
1644
uri->authority = STRNDUP(*str, cur - *str);
1646
uri->authority = xmlURIUnescapeString(*str, cur - *str, NULL);
1653
* xmlParseURIHierPart:
1654
* @uri: pointer to an URI structure
1655
* @str: pointer to the string to analyze
1657
* Parse an URI hierarchical part
1659
* hier_part = ( net_path | abs_path ) [ "?" query ]
1660
* abs_path = "/" path_segments
1661
* net_path = "//" authority [ abs_path ]
1663
* Returns 0 or the error code
1666
xmlParseURIHierPart(xmlURIPtr uri, const char **str) {
1675
if ((cur[0] == '/') && (cur[1] == '/')) {
1677
ret = xmlParseURIAuthority(uri, &cur);
1680
if (cur[0] == '/') {
1682
ret = xmlParseURIPathSegments(uri, &cur, 1);
1684
} else if (cur[0] == '/') {
1686
ret = xmlParseURIPathSegments(uri, &cur, 1);
1694
ret = xmlParseURIQuery(uri, &cur);
1703
* xmlParseAbsoluteURI:
1704
* @uri: pointer to an URI structure
1705
* @str: pointer to the string to analyze
1707
* Parse an URI reference string and fills in the appropriate fields
1708
* of the @uri structure
1710
* absoluteURI = scheme ":" ( hier_part | opaque_part )
1712
* Returns 0 or the error code
1715
xmlParseAbsoluteURI(xmlURIPtr uri, const char **str) {
1724
ret = xmlParseURIScheme(uri, str);
1725
if (ret != 0) return(ret);
1732
return(xmlParseURIHierPart(uri, str));
1733
return(xmlParseURIOpaquePart(uri, str));
1737
* xmlParseRelativeURI:
1738
* @uri: pointer to an URI structure
1739
* @str: pointer to the string to analyze
1741
* Parse an relative URI string and fills in the appropriate fields
1742
* of the @uri structure
1744
* relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1745
* abs_path = "/" path_segments
1746
* net_path = "//" authority [ abs_path ]
1747
* rel_path = rel_segment [ abs_path ]
1749
* Returns 0 or the error code
1752
xmlParseRelativeURI(xmlURIPtr uri, const char **str) {
1760
if ((cur[0] == '/') && (cur[1] == '/')) {
1762
ret = xmlParseURIAuthority(uri, &cur);
1765
if (cur[0] == '/') {
1767
ret = xmlParseURIPathSegments(uri, &cur, 1);
1769
} else if (cur[0] == '/') {
1771
ret = xmlParseURIPathSegments(uri, &cur, 1);
1772
} else if (cur[0] != '#' && cur[0] != '?') {
1773
ret = xmlParseURIRelSegment(uri, &cur);
1776
if (cur[0] == '/') {
1778
ret = xmlParseURIPathSegments(uri, &cur, 1);
1785
ret = xmlParseURIQuery(uri, &cur);
1794
* xmlParseURIReference:
1795
* @uri: pointer to an URI structure
1796
* @str: the string to analyze
1798
* Parse an URI reference string and fills in the appropriate fields
1799
* of the @uri structure
1801
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1803
* Returns 0 or the error code
1806
xmlParseURIReference(xmlURIPtr uri, const char *str) {
1808
const char *tmp = str;
1815
* Try first to parse absolute refs, then fallback to relative if
1818
ret = xmlParseAbsoluteURI(uri, &str);
1822
ret = xmlParseRelativeURI(uri, &str);
1831
ret = xmlParseURIFragment(uri, &str);
1832
if (ret != 0) return(ret);
1843
* @str: the URI string to analyze
1847
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1849
* Returns a newly built xmlURIPtr or NULL in case of error
1852
xmlParseURI(const char *str) {
1858
uri = xmlCreateURI();
1860
ret = xmlParseURIReference(uri, str);
1871
* @str: the URI string to analyze
1872
* @raw: if 1 unescaping of URI pieces are disabled
1874
* Parse an URI but allows to keep intact the original fragments.
1876
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1878
* Returns a newly built xmlURIPtr or NULL in case of error
1881
xmlParseURIRaw(const char *str, int raw) {
1887
uri = xmlCreateURI();
1892
ret = xmlParseURIReference(uri, str);
1901
/************************************************************************
1903
* Public functions *
1905
************************************************************************/
1909
* @URI: the URI instance found in the document
1910
* @base: the base value
1912
* Computes he final URI of the reference done by checking that
1913
* the given URI is valid, and building the final URI using the
1914
* base URI. This is processed according to section 5.2 of the
1917
* 5.2. Resolving Relative References to Absolute Form
1919
* Returns a new URI string (to be freed by the caller) or NULL in case
1923
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1924
xmlChar *val = NULL;
1925
int ret, len, indx, cur, out;
1926
xmlURIPtr ref = NULL;
1927
xmlURIPtr bas = NULL;
1928
xmlURIPtr res = NULL;
1931
* 1) The URI reference is parsed into the potential four components and
1932
* fragment identifier, as described in Section 4.3.
1934
* NOTE that a completely empty URI is treated by modern browsers
1935
* as a reference to "." rather than as a synonym for the current
1936
* URI. Should we do that here?
1942
ref = xmlCreateURI();
1945
ret = xmlParseURIReference(ref, (const char *) URI);
1952
if ((ref != NULL) && (ref->scheme != NULL)) {
1954
* The URI is absolute don't modify.
1956
val = xmlStrdup(URI);
1962
bas = xmlCreateURI();
1965
ret = xmlParseURIReference(bas, (const char *) base);
1969
val = xmlSaveUri(ref);
1974
* the base fragment must be ignored
1976
if (bas->fragment != NULL) {
1977
xmlFree(bas->fragment);
1978
bas->fragment = NULL;
1980
val = xmlSaveUri(bas);
1985
* 2) If the path component is empty and the scheme, authority, and
1986
* query components are undefined, then it is a reference to the
1987
* current document and we are done. Otherwise, the reference URI's
1988
* query and fragment components are defined as found (or not found)
1989
* within the URI reference and not inherited from the base URI.
1991
* NOTE that in modern browsers, the parsing differs from the above
1992
* in the following aspect: the query component is allowed to be
1993
* defined while still treating this as a reference to the current
1996
res = xmlCreateURI();
1999
if ((ref->scheme == NULL) && (ref->path == NULL) &&
2000
((ref->authority == NULL) && (ref->server == NULL))) {
2001
if (bas->scheme != NULL)
2002
res->scheme = xmlMemStrdup(bas->scheme);
2003
if (bas->authority != NULL)
2004
res->authority = xmlMemStrdup(bas->authority);
2005
else if (bas->server != NULL) {
2006
res->server = xmlMemStrdup(bas->server);
2007
if (bas->user != NULL)
2008
res->user = xmlMemStrdup(bas->user);
2009
res->port = bas->port;
2011
if (bas->path != NULL)
2012
res->path = xmlMemStrdup(bas->path);
2013
if (ref->query_raw != NULL)
2014
res->query_raw = xmlMemStrdup (ref->query_raw);
2015
else if (ref->query != NULL)
2016
res->query = xmlMemStrdup(ref->query);
2017
else if (bas->query_raw != NULL)
2018
res->query_raw = xmlMemStrdup(bas->query_raw);
2019
else if (bas->query != NULL)
2020
res->query = xmlMemStrdup(bas->query);
2021
if (ref->fragment != NULL)
2022
res->fragment = xmlMemStrdup(ref->fragment);
2027
* 3) If the scheme component is defined, indicating that the reference
2028
* starts with a scheme name, then the reference is interpreted as an
2029
* absolute URI and we are done. Otherwise, the reference URI's
2030
* scheme is inherited from the base URI's scheme component.
2032
if (ref->scheme != NULL) {
2033
val = xmlSaveUri(ref);
2036
if (bas->scheme != NULL)
2037
res->scheme = xmlMemStrdup(bas->scheme);
2039
if (ref->query_raw != NULL)
2040
res->query_raw = xmlMemStrdup(ref->query_raw);
2041
else if (ref->query != NULL)
2042
res->query = xmlMemStrdup(ref->query);
2043
if (ref->fragment != NULL)
2044
res->fragment = xmlMemStrdup(ref->fragment);
2047
* 4) If the authority component is defined, then the reference is a
2048
* network-path and we skip to step 7. Otherwise, the reference
2049
* URI's authority is inherited from the base URI's authority
2050
* component, which will also be undefined if the URI scheme does not
2051
* use an authority component.
2053
if ((ref->authority != NULL) || (ref->server != NULL)) {
2054
if (ref->authority != NULL)
2055
res->authority = xmlMemStrdup(ref->authority);
2057
res->server = xmlMemStrdup(ref->server);
2058
if (ref->user != NULL)
2059
res->user = xmlMemStrdup(ref->user);
2060
res->port = ref->port;
2062
if (ref->path != NULL)
2063
res->path = xmlMemStrdup(ref->path);
2066
if (bas->authority != NULL)
2067
res->authority = xmlMemStrdup(bas->authority);
2068
else if (bas->server != NULL) {
2069
res->server = xmlMemStrdup(bas->server);
2070
if (bas->user != NULL)
2071
res->user = xmlMemStrdup(bas->user);
2072
res->port = bas->port;
2076
* 5) If the path component begins with a slash character ("/"), then
2077
* the reference is an absolute-path and we skip to step 7.
2079
if ((ref->path != NULL) && (ref->path[0] == '/')) {
2080
res->path = xmlMemStrdup(ref->path);
2086
* 6) If this step is reached, then we are resolving a relative-path
2087
* reference. The relative path needs to be merged with the base
2088
* URI's path. Although there are many ways to do this, we will
2089
* describe a simple method using a separate string buffer.
2091
* Allocate a buffer large enough for the result string.
2093
len = 2; /* extra / and 0 */
2094
if (ref->path != NULL)
2095
len += strlen(ref->path);
2096
if (bas->path != NULL)
2097
len += strlen(bas->path);
2098
res->path = (char *) xmlMallocAtomic(len);
2099
if (res->path == NULL) {
2100
xmlGenericError(xmlGenericErrorContext,
2101
"xmlBuildURI: out of memory\n");
2107
* a) All but the last segment of the base URI's path component is
2108
* copied to the buffer. In other words, any characters after the
2109
* last (right-most) slash character, if any, are excluded.
2113
if (bas->path != NULL) {
2114
while (bas->path[cur] != 0) {
2115
while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2117
if (bas->path[cur] == 0)
2122
res->path[out] = bas->path[out];
2130
* b) The reference's path component is appended to the buffer
2133
if (ref->path != NULL && ref->path[0] != 0) {
2136
* Ensure the path includes a '/'
2138
if ((out == 0) && (bas->server != NULL))
2139
res->path[out++] = '/';
2140
while (ref->path[indx] != 0) {
2141
res->path[out++] = ref->path[indx++];
2147
* Steps c) to h) are really path normalization steps
2149
xmlNormalizeURIPath(res->path);
2154
* 7) The resulting URI components, including any inherited from the
2155
* base URI, are recombined to give the absolute form of the URI
2158
val = xmlSaveUri(res);
2171
* xmlBuildRelativeURI:
2172
* @URI: the URI reference under consideration
2173
* @base: the base value
2175
* Expresses the URI of the reference in terms relative to the
2176
* base. Some examples of this operation include:
2177
* base = "http://site1.com/docs/book1.html"
2178
* URI input URI returned
2179
* docs/pic1.gif pic1.gif
2180
* docs/img/pic1.gif img/pic1.gif
2181
* img/pic1.gif ../img/pic1.gif
2182
* http://site1.com/docs/pic1.gif pic1.gif
2183
* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2185
* base = "docs/book1.html"
2186
* URI input URI returned
2187
* docs/pic1.gif pic1.gif
2188
* docs/img/pic1.gif img/pic1.gif
2189
* img/pic1.gif ../img/pic1.gif
2190
* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2193
* Note: if the URI reference is really wierd or complicated, it may be
2194
* worthwhile to first convert it into a "nice" one by calling
2195
* xmlBuildURI (using 'base') before calling this routine,
2196
* since this routine (for reasonable efficiency) assumes URI has
2197
* already been through some validation.
2199
* Returns a new URI string (to be freed by the caller) or NULL in case
2203
xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2205
xmlChar *val = NULL;
2211
xmlURIPtr ref = NULL;
2212
xmlURIPtr bas = NULL;
2213
xmlChar *bptr, *uptr, *vptr;
2214
int remove_path = 0;
2216
if ((URI == NULL) || (*URI == 0))
2220
* First parse URI into a standard form
2222
ref = xmlCreateURI ();
2225
/* If URI not already in "relative" form */
2226
if (URI[0] != '.') {
2227
ret = xmlParseURIReference (ref, (const char *) URI);
2229
goto done; /* Error in URI, return NULL */
2231
ref->path = (char *)xmlStrdup(URI);
2234
* Next parse base into the same standard form
2236
if ((base == NULL) || (*base == 0)) {
2237
val = xmlStrdup (URI);
2240
bas = xmlCreateURI ();
2243
if (base[0] != '.') {
2244
ret = xmlParseURIReference (bas, (const char *) base);
2246
goto done; /* Error in base, return NULL */
2248
bas->path = (char *)xmlStrdup(base);
2251
* If the scheme / server on the URI differs from the base,
2252
* just return the URI
2254
if ((ref->scheme != NULL) &&
2255
((bas->scheme == NULL) ||
2256
(xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2257
(xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2258
val = xmlStrdup (URI);
2261
if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2262
val = xmlStrdup(BAD_CAST "");
2265
if (bas->path == NULL) {
2266
val = xmlStrdup((xmlChar *)ref->path);
2269
if (ref->path == NULL) {
2270
ref->path = (char *) "/";
2275
* At this point (at last!) we can compare the two paths
2277
* First we take care of the special case where either of the
2278
* two path components may be missing (bug 316224)
2280
if (bas->path == NULL) {
2281
if (ref->path != NULL) {
2282
uptr = (xmlChar *) ref->path;
2285
/* exception characters from xmlSaveUri */
2286
val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2290
bptr = (xmlChar *)bas->path;
2291
if (ref->path == NULL) {
2292
for (ix = 0; bptr[ix] != 0; ix++) {
2293
if (bptr[ix] == '/')
2297
len = 1; /* this is for a string terminator only */
2300
* Next we compare the two strings and find where they first differ
2302
if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2304
if ((*bptr == '.') && (bptr[1] == '/'))
2306
else if ((*bptr == '/') && (ref->path[pos] != '/'))
2308
while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2311
if (bptr[pos] == ref->path[pos]) {
2312
val = xmlStrdup(BAD_CAST "");
2313
goto done; /* (I can't imagine why anyone would do this) */
2317
* In URI, "back up" to the last '/' encountered. This will be the
2318
* beginning of the "unique" suffix of URI
2321
if ((ref->path[ix] == '/') && (ix > 0))
2323
else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2325
for (; ix > 0; ix--) {
2326
if (ref->path[ix] == '/')
2330
uptr = (xmlChar *)ref->path;
2333
uptr = (xmlChar *)&ref->path[ix];
2337
* In base, count the number of '/' from the differing point
2339
if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2340
for (; bptr[ix] != 0; ix++) {
2341
if (bptr[ix] == '/')
2345
len = xmlStrlen (uptr) + 1;
2350
/* exception characters from xmlSaveUri */
2351
val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2356
* Allocate just enough space for the returned string -
2357
* length of the remainder of the URI, plus enough space
2358
* for the "../" groups, plus one for the terminator
2360
val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2362
xmlGenericError(xmlGenericErrorContext,
2363
"xmlBuildRelativeURI: out of memory\n");
2368
* Put in as many "../" as needed
2370
for (; nbslash>0; nbslash--) {
2376
* Finish up with the end of the URI
2379
if ((vptr > val) && (len > 0) &&
2380
(uptr[0] == '/') && (vptr[-1] == '/')) {
2381
memcpy (vptr, uptr + 1, len - 1);
2384
memcpy (vptr, uptr, len);
2391
/* escape the freshly-built path */
2393
/* exception characters from xmlSaveUri */
2394
val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2399
* Free the working variables
2401
if (remove_path != 0)
2413
* @path: the resource locator in a filesystem notation
2415
* Constructs a canonic path from the specified path.
2417
* Returns a new canonic path, or a duplicate of the path parameter if the
2418
* construction fails. The caller is responsible for freeing the memory occupied
2419
* by the returned string. If there is insufficient memory available, or the
2420
* argument is NULL, the function returns NULL.
2422
#define IS_WINDOWS_PATH(p) \
2424
(((p[0] >= 'a') && (p[0] <= 'z')) || \
2425
((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2426
(p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2428
xmlCanonicPath(const xmlChar *path)
2431
* For Windows implementations, additional work needs to be done to
2432
* replace backslashes in pathnames with "forward slashes"
2434
#if defined(_WIN32) && !defined(__CYGWIN__)
2441
const xmlChar *absuri;
2446
/* sanitize filename starting with // so it can be used as URI */
2447
if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2450
if ((uri = xmlParseURI((const char *) path)) != NULL) {
2452
return xmlStrdup(path);
2455
/* Check if this is an "absolute uri" */
2456
absuri = xmlStrstr(path, BAD_CAST "://");
2457
if (absuri != NULL) {
2463
* this looks like an URI where some parts have not been
2464
* escaped leading to a parsing problem. Check that the first
2465
* part matches a protocol.
2468
/* Bypass if first part (part before the '://') is > 20 chars */
2469
if ((l <= 0) || (l > 20))
2470
goto path_processing;
2471
/* Bypass if any non-alpha characters are present in first part */
2472
for (j = 0;j < l;j++) {
2474
if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2475
goto path_processing;
2478
/* Escape all except the characters specified in the supplied path */
2479
escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2480
if (escURI != NULL) {
2481
/* Try parsing the escaped path */
2482
uri = xmlParseURI((const char *) escURI);
2483
/* If successful, return the escaped string */
2492
/* For Windows implementations, replace backslashes with 'forward slashes' */
2493
#if defined(_WIN32) && !defined(__CYGWIN__)
2495
* Create a URI structure
2497
uri = xmlCreateURI();
2498
if (uri == NULL) { /* Guard against 'out of memory' */
2502
len = xmlStrlen(path);
2503
if ((len > 2) && IS_WINDOWS_PATH(path)) {
2504
/* make the scheme 'file' */
2505
uri->scheme = xmlStrdup(BAD_CAST "file");
2506
/* allocate space for leading '/' + path + string terminator */
2507
uri->path = xmlMallocAtomic(len + 2);
2508
if (uri->path == NULL) {
2509
xmlFreeURI(uri); /* Guard agains 'out of memory' */
2512
/* Put in leading '/' plus path */
2515
strncpy(p, path, len + 1);
2517
uri->path = xmlStrdup(path);
2518
if (uri->path == NULL) {
2524
/* Now change all occurences of '\' to '/' */
2525
while (*p != '\0') {
2531
if (uri->scheme == NULL) {
2532
ret = xmlStrdup((const xmlChar *) uri->path);
2534
ret = xmlSaveUri(uri);
2539
ret = xmlStrdup((const xmlChar *) path);
2546
* @path: the resource locator in a filesystem notation
2548
* Constructs an URI expressing the existing path
2550
* Returns a new URI, or a duplicate of the path parameter if the
2551
* construction fails. The caller is responsible for freeing the memory
2552
* occupied by the returned string. If there is insufficient memory available,
2553
* or the argument is NULL, the function returns NULL.
2556
xmlPathToURI(const xmlChar *path)
2565
if ((uri = xmlParseURI((const char *) path)) != NULL) {
2567
return xmlStrdup(path);
2569
cal = xmlCanonicPath(path);
2572
#if defined(_WIN32) && !defined(__CYGWIN__)
2573
/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2574
If 'cal' is a valid URI allready then we are done here, as continuing would make
2576
if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2580
/* 'cal' can contain a relative path with backslashes. If that is processed
2581
by xmlSaveURI, they will be escaped and the external entity loader machinery
2582
will fail. So convert them to slashes. Misuse 'ret' for walking. */
2584
while (*ret != '\0') {
2590
memset(&temp, 0, sizeof(temp));
2591
temp.path = (char *) cal;
2592
ret = xmlSaveUri(&temp);
2597
#include "elfgcchack.h"