2
* The Apache Software License, Version 1.1
5
* Copyright (c) 1999-2003 The Apache Software Foundation. All rights
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in
17
* the documentation and/or other materials provided with the
20
* 3. The end-user documentation included with the redistribution,
21
* if any, must include the following acknowledgment:
22
* "This product includes software developed by the
23
* Apache Software Foundation (http://www.apache.org/)."
24
* Alternately, this acknowledgment may appear in the software itself,
25
* if and wherever such third-party acknowledgments normally appear.
27
* 4. The names "Xerces" and "Apache Software Foundation" must
28
* not be used to endorse or promote products derived from this
29
* software without prior written permission. For written
30
* permission, please contact apache@apache.org.
32
* 5. Products derived from this software may not be called "Apache",
33
* nor may "Apache" appear in their name, without prior written
34
* permission of the Apache Software Foundation.
36
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48
* ====================================================================
50
* This software consists of voluntary contributions made by many
51
* individuals on behalf of the Apache Software Foundation and was
52
* originally based on software copyright (c) 1999, iClick Inc.,
53
* http://www.apache.org. For more information on the Apache Software
54
* Foundation, please see <http://www.apache.org/>.
2
* Copyright 1999-2005 The Apache Software Foundation.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
57
17
package org.apache.xerces.util;
304
270
public URI(String p_uriSpec) throws MalformedURIException {
305
271
this((URI)null, p_uriSpec);
275
* Construct a new URI from a URI specification string. If the
276
* specification follows the "generic URI" syntax, (two slashes
277
* following the first colon), the specification will be parsed
278
* accordingly - setting the scheme, userinfo, host,port, path, query
279
* string and fragment fields as necessary. If the specification does
280
* not follow the "generic URI" syntax, the specification is parsed
281
* into a scheme and scheme-specific part (stored as the path) only.
282
* Construct a relative URI if boolean is assigned to "true"
283
* and p_uriSpec is not valid absolute URI, instead of throwing an exception.
285
* @param p_uriSpec the URI specification string (cannot be null or
287
* @param allowNonAbsoluteURI true to permit non-absolute URIs,
290
* @exception MalformedURIException if p_uriSpec violates any syntax
293
public URI(String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException {
294
this((URI)null, p_uriSpec, allowNonAbsoluteURI);
309
298
* Construct a new URI from a base URI and a URI specification string.
310
299
* The URI specification string may be a relative URI.
320
309
public URI(URI p_base, String p_uriSpec) throws MalformedURIException {
321
310
initialize(p_base, p_uriSpec);
314
* Construct a new URI from a base URI and a URI specification string.
315
* The URI specification string may be a relative URI.
316
* Construct a relative URI if boolean is assigned to "true"
317
* and p_uriSpec is not valid absolute URI and p_base is null
318
* instead of throwing an exception.
320
* @param p_base the base URI (cannot be null if p_uriSpec is null or
322
* @param p_uriSpec the URI specification string (cannot be null or
323
* empty if p_base is null)
324
* @param allowNonAbsoluteURI true to permit non-absolute URIs,
327
* @exception MalformedURIException if p_uriSpec violates any syntax
330
public URI(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException {
331
initialize(p_base, p_uriSpec, allowNonAbsoluteURI);
325
335
* Construct a new URI that does not follow the generic URI syntax.
455
465
m_queryString = p_other.getQueryString();
456
466
m_fragment = p_other.getFragment();
470
* Initializes this URI from a base URI and a URI specification string.
471
* See RFC 2396 Section 4 and Appendix B for specifications on parsing
472
* the URI and Section 5 for specifications on resolving relative URIs
473
* and relative paths.
475
* @param p_base the base URI (may be null if p_uriSpec is an absolute
477
* @param p_uriSpec the URI spec string which may be an absolute or
478
* relative URI (can only be null/empty if p_base
480
* @param allowNonAbsoluteURI true to permit non-absolute URIs,
481
* in case of relative URI, false otherwise.
483
* @exception MalformedURIException if p_base is null and p_uriSpec
484
* is not an absolute URI or if
485
* p_uriSpec violates syntax rules
487
private void initialize(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI)
488
throws MalformedURIException {
490
String uriSpec = p_uriSpec;
491
int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
493
if (p_base == null && uriSpecLen == 0) {
494
if (allowNonAbsoluteURI) {
498
throw new MalformedURIException("Cannot initialize URI with empty parameters.");
501
// just make a copy of the base if spec is empty
502
if (uriSpecLen == 0) {
509
// Check for scheme, which must be before '/', '?' or '#'.
510
int colonIdx = uriSpec.indexOf(':');
511
if (colonIdx != -1) {
512
final int searchFrom = colonIdx - 1;
513
// search backwards starting from character before ':'.
514
int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
515
int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
516
int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
518
if (colonIdx == 0 || slashIdx != -1 ||
519
queryIdx != -1 || fragmentIdx != -1) {
520
// A standalone base is a valid URI according to spec
521
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) {
522
throw new MalformedURIException("No scheme found in URI.");
526
initializeScheme(uriSpec);
527
index = m_scheme.length()+1;
529
// Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
530
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
531
throw new MalformedURIException("Scheme specific part cannot be empty.");
535
else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowNonAbsoluteURI) {
536
throw new MalformedURIException("No scheme found in URI.");
539
// Two slashes means we may have authority, but definitely means we're either
540
// matching net_path or abs_path. These two productions are ambiguous in that
541
// every net_path (except those containing an IPv6Reference) is an abs_path.
542
// RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
543
// Try matching net_path first, and if that fails we don't have authority so
544
// then attempt to match abs_path.
546
// net_path = "//" authority [ abs_path ]
547
// abs_path = "/" path_segments
548
if (((index+1) < uriSpecLen) &&
549
(uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
551
int startPos = index;
553
// Authority will be everything up to path, query or fragment
554
char testChar = '\0';
555
while (index < uriSpecLen) {
556
testChar = uriSpec.charAt(index);
557
if (testChar == '/' || testChar == '?' || testChar == '#') {
563
// Attempt to parse authority. If the section is an empty string
564
// this is a valid server based authority, so set the host to this
566
if (index > startPos) {
567
// If we didn't find authority we need to back up. Attempt to
568
// match against abs_path next.
569
if (!initializeAuthority(uriSpec.substring(startPos, index))) {
570
index = startPos - 2;
578
initializePath(uriSpec, index);
580
// Resolve relative URI to base URI - see RFC 2396 Section 5.2
581
// In some cases, it might make more sense to throw an exception
582
// (when scheme is specified is the string spec and the base URI
583
// is also specified, for example), but we're just following the
584
// RFC specifications
585
if (p_base != null) {
460
591
* Initializes this URI from a base URI and a URI specification string.
494
// Check for scheme, which must be before '/', '?' or '#'. Also handle
495
// names with DOS drive letters ('D:'), so 1-character schemes are not
497
int colonIdx = uriSpec.indexOf(':');
498
int slashIdx = uriSpec.indexOf('/');
499
int queryIdx = uriSpec.indexOf('?');
500
int fragmentIdx = uriSpec.indexOf('#');
502
if ((colonIdx < 2) ||
503
(colonIdx > slashIdx && slashIdx != -1) ||
504
(colonIdx > queryIdx && queryIdx != -1) ||
505
(colonIdx > fragmentIdx && fragmentIdx != -1)) {
506
// A standalone base is a valid URI according to spec
507
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) {
508
throw new MalformedURIException("No scheme found in URI.");
625
// Check for scheme, which must be before '/', '?' or '#'.
626
int colonIdx = uriSpec.indexOf(':');
627
if (colonIdx != -1) {
628
final int searchFrom = colonIdx - 1;
629
// search backwards starting from character before ':'.
630
int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
631
int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
632
int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
634
if (colonIdx == 0 || slashIdx != -1 ||
635
queryIdx != -1 || fragmentIdx != -1) {
636
// A standalone base is a valid URI according to spec
637
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) {
638
throw new MalformedURIException("No scheme found in URI.");
642
initializeScheme(uriSpec);
643
index = m_scheme.length()+1;
645
// Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
646
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
647
throw new MalformedURIException("Scheme specific part cannot be empty.");
512
initializeScheme(uriSpec);
513
index = m_scheme.length()+1;
515
// Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
516
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
517
throw new MalformedURIException("Scheme specific part cannot be empty.");
651
else if (p_base == null && uriSpec.indexOf('#') != 0) {
652
throw new MalformedURIException("No scheme found in URI.");
521
655
// Two slashes means we may have authority, but definitely means we're either
575
719
// identified this as a bug in the RFC
576
720
if (m_path.length() == 0 && m_scheme == null &&
577
721
m_host == null && m_regAuthority == null) {
578
m_scheme = p_base.getScheme();
579
m_userinfo = p_base.getUserinfo();
580
m_host = p_base.getHost();
581
m_port = p_base.getPort();
582
m_regAuthority = p_base.getRegBasedAuthority();
583
m_path = p_base.getPath();
585
if (m_queryString == null) {
586
m_queryString = p_base.getQueryString();
722
m_scheme = p_base.getScheme();
723
m_userinfo = p_base.getUserinfo();
724
m_host = p_base.getHost();
725
m_port = p_base.getPort();
726
m_regAuthority = p_base.getRegBasedAuthority();
727
m_path = p_base.getPath();
729
if (m_queryString == null) {
730
m_queryString = p_base.getQueryString();
732
if (m_fragment == null) {
733
m_fragment = p_base.getFragment();
591
739
// check for scheme - RFC 2396 5.2 #3
592
740
// if we found a scheme, it means absolute URI, so we're done
593
741
if (m_scheme == null) {
594
m_scheme = p_base.getScheme();
742
m_scheme = p_base.getScheme();
600
748
// check for authority - RFC 2396 5.2 #4
601
749
// if we found a host, then we've got a network path, so we're done
602
750
if (m_host == null && m_regAuthority == null) {
603
m_userinfo = p_base.getUserinfo();
604
m_host = p_base.getHost();
605
m_port = p_base.getPort();
606
m_regAuthority = p_base.getRegBasedAuthority();
751
m_userinfo = p_base.getUserinfo();
752
m_host = p_base.getHost();
753
m_port = p_base.getPort();
754
m_regAuthority = p_base.getRegBasedAuthority();
612
760
// check for absolute path - RFC 2396 5.2 #5
613
761
if (m_path.length() > 0 &&
614
m_path.startsWith("/")) {
762
m_path.startsWith("/")) {
618
766
// if we get to this point, we need to resolve relative path
619
767
// RFC 2396 5.2 #6
620
768
String path = "";
621
769
String basePath = p_base.getPath();
623
771
// 6a - get all but the last segment of the base URI path
624
772
if (basePath != null && basePath.length() > 0) {
625
int lastSlash = basePath.lastIndexOf('/');
626
if (lastSlash != -1) {
627
path = basePath.substring(0, lastSlash+1);
773
int lastSlash = basePath.lastIndexOf('/');
774
if (lastSlash != -1) {
775
path = basePath.substring(0, lastSlash+1);
630
778
else if (m_path.length() > 0) {
634
782
// 6b - append the relative URI path
635
783
path = path.concat(m_path);
637
785
// 6c - remove all "./" where "." is a complete path segment
639
787
while ((index = path.indexOf("/./")) != -1) {
640
path = path.substring(0, index+1).concat(path.substring(index+3));
788
path = path.substring(0, index+1).concat(path.substring(index+3));
643
791
// 6d - remove "." if path ends with "." as a complete path segment
644
792
if (path.endsWith("/.")) {
645
path = path.substring(0, path.length()-1);
793
path = path.substring(0, path.length()-1);
648
796
// 6e - remove all "<segment>/../" where "<segment>" is a complete
649
797
// path segment not equal to ".."
651
799
int segIndex = -1;
652
800
String tempString = null;
654
802
while ((index = path.indexOf("/../", index)) > 0) {
655
tempString = path.substring(0, path.indexOf("/../"));
656
segIndex = tempString.lastIndexOf('/');
657
if (segIndex != -1) {
658
if (!tempString.substring(segIndex).equals("..")) {
659
path = path.substring(0, segIndex+1).concat(path.substring(index+4));
803
tempString = path.substring(0, path.indexOf("/../"));
804
segIndex = tempString.lastIndexOf('/');
805
if (segIndex != -1) {
806
if (!tempString.substring(segIndex).equals("..")) {
807
path = path.substring(0, segIndex+1).concat(path.substring(index+4));
669
819
// 6f - remove ending "<segment>/.." where "<segment>" is a
670
820
// complete path segment
671
821
if (path.endsWith("/..")) {
672
tempString = path.substring(0, path.length()-3);
673
segIndex = tempString.lastIndexOf('/');
674
if (segIndex != -1) {
675
path = path.substring(0, segIndex+1);
822
tempString = path.substring(0, path.length()-3);
823
segIndex = tempString.lastIndexOf('/');
824
if (segIndex != -1) {
825
path = path.substring(0, segIndex+1);