2
* The Apache Software License, Version 1.1
5
* Copyright (c) 1999-2004 The Apache Software Foundation.
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in
17
* the documentation and/or other materials provided with the
20
* 3. The end-user documentation included with the redistribution,
21
* if any, must include the following acknowledgment:
22
* "This product includes software developed by the
23
* Apache Software Foundation (http://www.apache.org/)."
24
* Alternately, this acknowledgment may appear in the software itself,
25
* if and wherever such third-party acknowledgments normally appear.
27
* 4. The names "Xerces" and "Apache Software Foundation" must
28
* not be used to endorse or promote products derived from this
29
* software without prior written permission. For written
30
* permission, please contact apache@apache.org.
32
* 5. Products derived from this software may not be called "Apache",
33
* nor may "Apache" appear in their name, without prior written
34
* permission of the Apache Software Foundation.
36
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48
* ====================================================================
50
* This software consists of voluntary contributions made by many
51
* individuals on behalf of the Apache Software Foundation and was
52
* originally based on software copyright (c) 1999, International
53
* Business Machines, Inc., http://www.apache.org. For more
54
* information on the Apache Software Foundation, please see
55
* <http://www.apache.org/>.
2
* Copyright 1999-2006 The Apache Software Foundation.
4
* Licensed under the Apache License, Version 2.0 (the "License");
5
* you may not use this file except in compliance with the License.
6
* You may obtain a copy of the License at
8
* http://www.apache.org/licenses/LICENSE-2.0
10
* Unless required by applicable law or agreed to in writing, software
11
* distributed under the License is distributed on an "AS IS" BASIS,
12
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
* See the License for the specific language governing permissions and
14
* limitations under the License.
58
17
package org.apache.xerces.impl;
938
938
if (reader == null) {
939
939
stream = xmlInputSource.getByteStream();
940
if(stream != null && encoding != null)
941
declaredEncoding = true;
942
940
if (stream == null) {
943
941
URL location = new URL(expandedSystemId);
944
942
URLConnection connect = location.openConnection();
945
stream = connect.getInputStream();
947
// REVISIT: If the URLConnection has external encoding
948
// information, we should be reading it here. It's located
949
// in the charset parameter of Content-Type. -- mrglavas
950
if (connect instanceof HttpURLConnection) {
951
String redirect = connect.getURL().toString();
952
// E43: Check if the URL was redirected, and then
953
// update literal and expanded system IDs if needed.
954
if (!redirect.equals(expandedSystemId)) {
955
literalSystemId = redirect;
956
expandedSystemId = redirect;
943
if (!(connect instanceof HttpURLConnection)) {
944
stream = connect.getInputStream();
947
boolean followRedirects = true;
949
// setup URLConnection if we have an HTTPInputSource
950
if (xmlInputSource instanceof HTTPInputSource) {
951
final HttpURLConnection urlConnection = (HttpURLConnection) connect;
952
final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource;
954
// set request properties
955
Iterator propIter = httpInputSource.getHTTPRequestProperties();
956
while (propIter.hasNext()) {
957
Map.Entry entry = (Map.Entry) propIter.next();
958
urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue());
961
// set preference for redirection
962
followRedirects = httpInputSource.getFollowHTTPRedirects();
963
if (!followRedirects) {
964
setInstanceFollowRedirects(urlConnection, followRedirects);
968
stream = connect.getInputStream();
970
// REVISIT: If the URLConnection has external encoding
971
// information, we should be reading it here. It's located
972
// in the charset parameter of Content-Type. -- mrglavas
974
if (followRedirects) {
975
String redirect = connect.getURL().toString();
976
// E43: Check if the URL was redirected, and then
977
// update literal and expanded system IDs if needed.
978
if (!redirect.equals(expandedSystemId)) {
979
literalSystemId = redirect;
980
expandedSystemId = redirect;
1479
1552
gAfterEscaping2[ch] = gHexChs[ch & 0xf];
1556
private static PrivilegedAction GET_USER_DIR_SYSTEM_PROPERTY = new PrivilegedAction() {
1557
public Object run() {
1558
return System.getProperty("user.dir");
1482
1562
// To escape the "user.dir" system property, by using %HH to represent
1483
1563
// special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%'
1484
1564
// and '"'. It's a static method, so needs to be synchronized.
1485
1565
// this method looks heavy, but since the system property isn't expected
1486
// to change often, so in most cases, we only need to return the string
1566
// to change often, so in most cases, we only need to return the URI
1487
1567
// that was escaped before.
1488
1568
// According to the URI spec, non-ASCII characters (whose value >= 128)
1489
1569
// need to be escaped too.
1490
1570
// REVISIT: don't know how to escape non-ASCII characters, especially
1491
1571
// which encoding to use. Leave them for now.
1492
private static synchronized String getUserDir() {
1572
private static synchronized URI getUserDir() throws URI.MalformedURIException {
1493
1573
// get the user.dir property
1494
1574
String userDir = "";
1496
userDir = System.getProperty("user.dir");
1498
catch (SecurityException se) {
1576
userDir = (String) AccessController.doPrivileged(GET_USER_DIR_SYSTEM_PROPERTY);
1578
catch (SecurityException se) {}
1501
1580
// return empty string if property value is empty string.
1502
if (userDir.length() == 0)
1581
if (userDir.length() == 0)
1582
return new URI("file", "", "", null, null);
1505
1584
// compute the new escaped value if the new property value doesn't
1506
1585
// match the previous one
1507
if (userDir.equals(gUserDir)) {
1508
return gEscapedUserDir;
1586
if (gUserDirURI != null && userDir.equals(gUserDir)) {
1511
1590
// record the new value as the global property value
1602
1693
boolean strict)
1603
1694
throws URI.MalformedURIException {
1696
// check if there is a system id before
1697
// trying to expand it.
1698
if (systemId == null) {
1605
1702
// system id has to be a valid URI
1608
// check if there is a system id before
1609
// trying to expand it.
1610
if (systemId == null) {
1615
// if it's already an absolute one, return it
1616
URI uri = new URI(systemId);
1619
catch (URI.MalformedURIException ex) {
1622
// if there isn't a base uri, use the working directory
1623
if (baseSystemId == null || baseSystemId.length() == 0) {
1624
base = new URI("file", "", getUserDir(), null, null);
1626
// otherwise, use the base uri
1629
base = new URI(baseSystemId);
1631
catch (URI.MalformedURIException e) {
1632
// assume "base" is also a relative uri
1633
String dir = getUserDir();
1634
dir = dir + baseSystemId;
1635
base = new URI("file", "", dir, null, null);
1638
// absolutize the system id using the base
1639
URI uri = new URI(base, systemId);
1640
// return the string rep of the new uri (an absolute one)
1641
return uri.toString();
1643
// if any exception is thrown, it'll get thrown to the caller.
1704
return expandSystemIdStrictOn(systemId, baseSystemId);
1646
// check for bad parameters id
1647
if (systemId == null || systemId.length() == 0) {
1650
// if id already expanded, return
1707
// Assume the URIs are well-formed. If it turns out they're not, try fixing them up.
1652
URI uri = new URI(systemId.trim());
1709
return expandSystemIdStrictOff(systemId, baseSystemId);
1657
1711
catch (URI.MalformedURIException e) {
1658
1712
// continue on...
1715
// check for bad parameters id
1716
if (systemId.length() == 0) {
1660
1720
// normalize id
1661
1721
String id = fixURI(systemId);
1700
1757
return uri.toString();
1702
} // expandSystemId(String,String):String
1759
} // expandSystemId(String,String,boolean):String
1762
* Helper method for expandSystemId(String,String,boolean):String
1764
private static String expandSystemIdStrictOn(String systemId, String baseSystemId)
1765
throws URI.MalformedURIException {
1767
URI systemURI = new URI(systemId, true);
1768
// If it's already an absolute one, return it
1769
if (systemURI.isAbsoluteURI()) {
1773
// If there isn't a base URI, use the working directory
1775
if (baseSystemId == null || baseSystemId.length() == 0) {
1776
baseURI = getUserDir();
1779
baseURI = new URI(baseSystemId, true);
1780
if (!baseURI.isAbsoluteURI()) {
1781
// assume "base" is also a relative uri
1782
baseURI.absolutize(getUserDir());
1786
// absolutize the system identifier using the base URI
1787
systemURI.absolutize(baseURI);
1789
// return the string rep of the new uri (an absolute one)
1790
return systemURI.toString();
1792
// if any exception is thrown, it'll get thrown to the caller.
1794
} // expandSystemIdStrictOn(String,String):String
1797
* Helper method for expandSystemId(String,String,boolean):String
1799
private static String expandSystemIdStrictOff(String systemId, String baseSystemId)
1800
throws URI.MalformedURIException {
1802
URI systemURI = new URI(systemId, true);
1803
// If it's already an absolute one, return it
1804
if (systemURI.isAbsoluteURI()) {
1805
if (systemURI.getScheme().length() > 1) {
1809
* If the scheme's length is only one character,
1810
* it's likely that this was intended as a file
1811
* path. Fixing this up in expandSystemId to
1812
* maintain backwards compatibility.
1814
throw new URI.MalformedURIException();
1817
// If there isn't a base URI, use the working directory
1819
if (baseSystemId == null || baseSystemId.length() == 0) {
1820
baseURI = getUserDir();
1823
baseURI = new URI(baseSystemId, true);
1824
if (!baseURI.isAbsoluteURI()) {
1825
// assume "base" is also a relative uri
1826
baseURI.absolutize(getUserDir());
1830
// absolutize the system identifier using the base URI
1831
systemURI.absolutize(baseURI);
1833
// return the string rep of the new uri (an absolute one)
1834
return systemURI.toString();
1836
// if any exception is thrown, it'll get thrown to the caller.
1838
} // expandSystemIdStrictOff(String,String):String
1841
* Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>.
1842
* This may fail on earlier JDKs which do not support setting this preference.
1844
public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) {
1846
Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE});
1847
method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE});
1849
// setInstanceFollowRedirects doesn't exist.
1850
catch (Exception exc) {}
1705
1854
// Protected methods
2433
2653
// return line number of position in most
2434
2654
// recent external entity
2435
2655
public int getLineNumber() {
2436
2656
// search for the first external entity on the stack
2437
2657
int size = fEntityStack.size();
2438
for (int i=size-1; i>0 ; i--) {
2439
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2658
for (int i = size - 1; i >= 0 ; --i) {
2659
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2440
2660
if (firstExternalEntity.isExternal()) {
2441
2661
return firstExternalEntity.lineNumber;
2447
2667
// return column number of position in most
2448
2668
// recent external entity
2449
2669
public int getColumnNumber() {
2450
2670
// search for the first external entity on the stack
2451
2671
int size = fEntityStack.size();
2452
for (int i=size-1; i>0 ; i--) {
2453
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2672
for (int i = size - 1; i >= 0; --i) {
2673
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2454
2674
if (firstExternalEntity.isExternal()) {
2455
2675
return firstExternalEntity.columnNumber;
2681
// return character offset of position in most
2682
// recent external entity
2683
public int getCharacterOffset() {
2684
// search for the first external entity on the stack
2685
int size = fEntityStack.size();
2686
for (int i = size - 1; i >= 0; --i) {
2687
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2688
if (firstExternalEntity.isExternal()) {
2689
return firstExternalEntity.baseCharOffset + (firstExternalEntity.position - firstExternalEntity.startPosition);
2461
2695
// return encoding of most recent external entity
2462
2696
public String getEncoding() {
2463
2697
// search for the first external entity on the stack
2464
2698
int size = fEntityStack.size();
2465
for (int i=size-1; i>0 ; i--) {
2466
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2699
for (int i = size - 1; i >= 0; --i) {
2700
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2467
2701
if (firstExternalEntity.isExternal()) {
2468
2702
return firstExternalEntity.encoding;
2708
// return xml version of most recent external entity
2709
public String getXMLVersion() {
2710
// search for the first external entity on the stack
2711
int size = fEntityStack.size();
2712
for (int i = size - 1; i >= 0; --i) {
2713
ScannedEntity firstExternalEntity = (ScannedEntity)fEntityStack.elementAt(i);
2714
if (firstExternalEntity.isExternal()) {
2715
return firstExternalEntity.xmlVersion;
2721
/** Returns whether the encoding of this entity was externally specified. **/
2722
public boolean isEncodingExternallySpecified() {
2723
return externallySpecifiedEncoding;
2726
/** Sets whether the encoding of this entity was externally specified. **/
2727
public void setEncodingExternallySpecified(boolean value) {
2728
externallySpecifiedEncoding = value;
2475
2732
// Object methods
2478
2735
/** Returns a string representation of this object. */
2479
2736
public String toString() {
2481
2738
StringBuffer str = new StringBuffer();
2482
str.append("name=\""+name+'"');
2739
str.append("name=\"").append(name).append('"');
2483
2740
str.append(",ch=");
2484
2741
str.append(ch);
2485
str.append(",position="+position);
2486
str.append(",count="+count);
2742
str.append(",position=").append(position);
2743
str.append(",count=").append(count);
2744
str.append(",baseCharOffset=").append(baseCharOffset);
2745
str.append(",startPosition=").append(startPosition);
2487
2746
return str.toString();
2489
2748
} // toString():String
2491
public boolean isDeclaredEncoding() {
2492
return declaredEncoding;
2495
public void setDeclaredEncoding(boolean value) {
2496
declaredEncoding = value;
2499
2750
} // class ScannedEntity
2501
// This class wraps the byte inputstreams we're presented with.
2502
// We need it because java.io.InputStreams don't provide
2503
// functionality to reread processed bytes, and they have a habit
2504
// of reading more than one character when you call their read()
2505
// methods. This means that, once we discover the true (declared)
2506
// encoding of a document, we can neither backtrack to read the
2507
// whole doc again nor start reading where we are with a new
2510
// This class allows rewinding an inputStream by allowing a mark
2511
// to be set, and the stream reset to that position. <strong>The
2512
// class assumes that it needs to read one character per
2513
// invocation when it's read() method is inovked, but uses the
2514
// underlying InputStream's read(char[], offset length) method--it
2515
// won't buffer data read this way!</strong>
2517
// @author Neil Graham, IBM
2518
// @author Glenn Marcy, IBM
2753
* Pool of byte buffers for the java.io.Readers.
2757
* @author Michael Glavassevich, IBM
2759
private static final class ByteBufferPool {
2761
private static final int DEFAULT_POOL_SIZE = 3;
2763
private int fPoolSize;
2764
private int fBufferSize;
2765
private byte[][] fByteBufferPool;
2768
public ByteBufferPool(int bufferSize) {
2769
this(DEFAULT_POOL_SIZE, bufferSize);
2772
public ByteBufferPool(int poolSize, int bufferSize) {
2773
fPoolSize = poolSize;
2774
fBufferSize = bufferSize;
2775
fByteBufferPool = new byte[fPoolSize][];
2779
/** Retrieves a byte buffer from the pool. **/
2780
public byte[] getBuffer() {
2781
return (fDepth > 0) ? fByteBufferPool[--fDepth] : new byte[fBufferSize];
2784
/** Returns byte buffer to pool. **/
2785
public void returnBuffer(byte[] buffer) {
2786
if (fDepth < fByteBufferPool.length) {
2787
fByteBufferPool[fDepth++] = buffer;
2791
/** Sets the size of the buffers and dumps the old pool. **/
2792
public void setBufferSize(int bufferSize) {
2793
fBufferSize = bufferSize;
2794
fByteBufferPool = new byte[fPoolSize][];
2800
* Buffer used in entity manager to reuse character arrays instead
2801
* of creating new ones every time.
2805
* @author Ankit Pasricha, IBM
2807
private static final class CharacterBuffer {
2809
/** character buffer */
2812
/** whether the buffer is for an external or internal scanned entity */
2813
private boolean isExternal;
2815
public CharacterBuffer(boolean isExternal, int size) {
2816
this.isExternal = isExternal;
2817
ch = new char[size];
2822
* Stores a number of character buffers and provides it to the entity
2823
* manager to use when an entity is seen.
2827
* @author Ankit Pasricha, IBM
2829
private static final class CharacterBufferPool {
2831
private static final int DEFAULT_POOL_SIZE = 3;
2833
private CharacterBuffer[] fInternalBufferPool;
2834
private CharacterBuffer[] fExternalBufferPool;
2836
private int fExternalBufferSize;
2837
private int fInternalBufferSize;
2838
private int fPoolSize;
2840
private int fInternalTop;
2841
private int fExternalTop;
2843
public CharacterBufferPool(int externalBufferSize, int internalBufferSize) {
2844
this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize);
2847
public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) {
2848
fExternalBufferSize = externalBufferSize;
2849
fInternalBufferSize = internalBufferSize;
2850
fPoolSize = poolSize;
2854
/** Initializes buffer pool. **/
2855
private void init() {
2856
fInternalBufferPool = new CharacterBuffer[fPoolSize];
2857
fExternalBufferPool = new CharacterBuffer[fPoolSize];
2862
/** Retrieves buffer from pool. **/
2863
public CharacterBuffer getBuffer(boolean external) {
2865
if (fExternalTop > -1) {
2866
return (CharacterBuffer)fExternalBufferPool[fExternalTop--];
2869
return new CharacterBuffer(true, fExternalBufferSize);
2873
if (fInternalTop > -1) {
2874
return (CharacterBuffer)fInternalBufferPool[fInternalTop--];
2877
return new CharacterBuffer(false, fInternalBufferSize);
2882
/** Returns buffer to pool. **/
2883
public void returnBuffer(CharacterBuffer buffer) {
2884
if (buffer.isExternal) {
2885
if (fExternalTop < fExternalBufferPool.length - 1) {
2886
fExternalBufferPool[++fExternalTop] = buffer;
2889
else if (fInternalTop < fInternalBufferPool.length - 1) {
2890
fInternalBufferPool[++fInternalTop] = buffer;
2894
/** Sets the size of external buffers and dumps the old pool. **/
2895
public void setExternalBufferSize(int bufferSize) {
2896
fExternalBufferSize = bufferSize;
2897
fExternalBufferPool = new CharacterBuffer[fPoolSize];
2903
* This class wraps the byte inputstreams we're presented with.
2904
* We need it because java.io.InputStreams don't provide
2905
* functionality to reread processed bytes, and they have a habit
2906
* of reading more than one character when you call their read()
2907
* methods. This means that, once we discover the true (declared)
2908
* encoding of a document, we can neither backtrack to read the
2909
* whole doc again nor start reading where we are with a new
2912
* This class allows rewinding an inputStream by allowing a mark
2913
* to be set, and the stream reset to that position. <strong>The
2914
* class assumes that it needs to read one character per
2915
* invocation when it's read() method is inovked, but uses the
2916
* underlying InputStream's read(char[], offset length) method--it
2917
* won't buffer data read this way!</strong>
2921
* @author Neil Graham, IBM
2922
* @author Glenn Marcy, IBM
2520
2924
protected final class RewindableInputStream extends InputStream {
2522
2926
private InputStream fInputStream;