2
* The Apache Software License, Version 1.1
5
* Copyright (c) 1999-2004 The Apache Software Foundation.
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in
17
* the documentation and/or other materials provided with the
20
* 3. The end-user documentation included with the redistribution,
21
* if any, must include the following acknowledgment:
22
* "This product includes software developed by the
23
* Apache Software Foundation (http://www.apache.org/)."
24
* Alternately, this acknowledgment may appear in the software itself,
25
* if and wherever such third-party acknowledgments normally appear.
27
* 4. The names "Xerces" and "Apache Software Foundation" must
28
* not be used to endorse or promote products derived from this
29
* software without prior written permission. For written
30
* permission, please contact apache@apache.org.
32
* 5. Products derived from this software may not be called "Apache",
33
* nor may "Apache" appear in their name, without prior written
34
* permission of the Apache Software Foundation.
36
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48
* ====================================================================
50
* This software consists of voluntary contributions made by many
51
* individuals on behalf of the Apache Software Foundation and was
52
* originally based on software copyright (c) 1999, International
53
* Business Machines, Inc., http://www.apache.org. For more
54
* information on the Apache Software Foundation, please see
55
* <http://www.apache.org/>.
58
package org.apache.xerces.impl;
60
import java.io.IOException;
62
import org.apache.xerces.impl.msg.XMLMessageFormatter;
63
import org.apache.xerces.util.SymbolTable;
64
import org.apache.xerces.util.XMLChar;
65
import org.apache.xerces.util.XMLResourceIdentifierImpl;
66
import org.apache.xerces.util.XMLStringBuffer;
67
import org.apache.xerces.xni.XMLAttributes;
68
import org.apache.xerces.xni.XMLResourceIdentifier;
69
import org.apache.xerces.xni.XMLString;
70
import org.apache.xerces.xni.XNIException;
71
import org.apache.xerces.xni.parser.XMLComponent;
72
import org.apache.xerces.xni.parser.XMLComponentManager;
73
import org.apache.xerces.xni.parser.XMLConfigurationException;
76
* This class is responsible for holding scanning methods common to
77
* scanning the XML document structure and content as well as the DTD
78
* structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
79
* from this base class.
82
* This component requires the following features and properties from the
83
* component manager that uses it:
85
* <li>http://xml.org/sax/features/validation</li>
86
* <li>http://xml.org/sax/features/namespaces</li>
87
* <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
88
* <li>http://apache.org/xml/properties/internal/symbol-table</li>
89
* <li>http://apache.org/xml/properties/internal/error-reporter</li>
90
* <li>http://apache.org/xml/properties/internal/entity-manager</li>
93
* @author Andy Clark, IBM
94
* @author Arnaud Le Hors, IBM
95
* @author Eric Ye, IBM
97
* @version $Id: XMLScanner.java,v 1.45 2004/02/11 18:55:20 mrglavas Exp $
99
public abstract class XMLScanner
100
implements XMLComponent {
106
// feature identifiers
108
/** Feature identifier: validation. */
109
protected static final String VALIDATION =
110
Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
112
/** Feature identifier: namespaces. */
113
protected static final String NAMESPACES =
114
Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
116
/** Feature identifier: notify character references. */
117
protected static final String NOTIFY_CHAR_REFS =
118
Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
120
protected static final String PARSER_SETTINGS =
121
Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
123
// property identifiers
125
/** Property identifier: symbol table. */
126
protected static final String SYMBOL_TABLE =
127
Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
129
/** Property identifier: error reporter. */
130
protected static final String ERROR_REPORTER =
131
Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
133
/** Property identifier: entity manager. */
134
protected static final String ENTITY_MANAGER =
135
Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
139
/** Debug attribute normalization. */
140
protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
150
* Validation. This feature identifier is:
151
* http://xml.org/sax/features/validation
153
protected boolean fValidation = false;
156
protected boolean fNamespaces;
158
/** Character references notification. */
159
protected boolean fNotifyCharRefs = false;
161
/** Internal parser-settings feature */
162
protected boolean fParserSettings = true;
167
protected SymbolTable fSymbolTable;
169
/** Error reporter. */
170
protected XMLErrorReporter fErrorReporter;
172
/** Entity manager. */
173
protected XMLEntityManager fEntityManager;
177
/** Entity scanner. */
178
protected XMLEntityScanner fEntityScanner;
181
protected int fEntityDepth;
183
/** Literal value of the last character refence scanned. */
184
protected String fCharRefLiteral = null;
186
/** Scanning attribute. */
187
protected boolean fScanningAttribute;
189
/** Report entity boundary. */
190
protected boolean fReportEntity;
194
/** Symbol: "version". */
195
protected final static String fVersionSymbol = "version".intern();
197
/** Symbol: "encoding". */
198
protected final static String fEncodingSymbol = "encoding".intern();
200
/** Symbol: "standalone". */
201
protected final static String fStandaloneSymbol = "standalone".intern();
203
/** Symbol: "amp". */
204
protected final static String fAmpSymbol = "amp".intern();
207
protected final static String fLtSymbol = "lt".intern();
210
protected final static String fGtSymbol = "gt".intern();
212
/** Symbol: "quot". */
213
protected final static String fQuotSymbol = "quot".intern();
215
/** Symbol: "apos". */
216
protected final static String fAposSymbol = "apos".intern();
218
// temporary variables
220
// NOTE: These objects are private to help prevent accidental modification
221
// of values by a subclass. If there were protected *and* the sub-
222
// modified the values, it would be difficult to track down the real
223
// cause of the bug. By making these private, we avoid this
227
private XMLString fString = new XMLString();
229
/** String buffer. */
230
private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
232
/** String buffer. */
233
private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
235
/** String buffer. */
236
private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
238
// temporary location for Resource identification information.
239
protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
242
// XMLComponent methods
248
* @param componentManager The component manager.
250
* @throws SAXException Throws exception if required features and
251
* properties cannot be found.
253
public void reset(XMLComponentManager componentManager)
254
throws XMLConfigurationException {
257
fParserSettings = componentManager.getFeature(PARSER_SETTINGS);
258
} catch (XMLConfigurationException e) {
259
fParserSettings = true;
262
if (!fParserSettings) {
263
// parser settings have not been changed
269
fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
270
fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
271
fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
275
fValidation = componentManager.getFeature(VALIDATION);
277
catch (XMLConfigurationException e) {
281
fNamespaces = componentManager.getFeature(NAMESPACES);
283
catch (XMLConfigurationException e) {
287
fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS);
289
catch (XMLConfigurationException e) {
290
fNotifyCharRefs = false;
295
} // reset(XMLComponentManager)
298
* Sets the value of a property during parsing.
303
public void setProperty(String propertyId, Object value)
304
throws XMLConfigurationException {
307
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
308
final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
310
if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() &&
311
propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) {
312
fSymbolTable = (SymbolTable)value;
314
else if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() &&
315
propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) {
316
fErrorReporter = (XMLErrorReporter)value;
318
else if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
319
propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
320
fEntityManager = (XMLEntityManager)value;
324
} // setProperty(String,Object)
327
* Sets the feature of the scanner.
329
public void setFeature(String featureId, boolean value)
330
throws XMLConfigurationException {
332
if (VALIDATION.equals(featureId)) {
334
} else if (NOTIFY_CHAR_REFS.equals(featureId)) {
335
fNotifyCharRefs = value;
340
* Gets the state of the feature of the scanner.
342
public boolean getFeature(String featureId)
343
throws XMLConfigurationException {
345
if (VALIDATION.equals(featureId)) {
347
} else if (NOTIFY_CHAR_REFS.equals(featureId)) {
348
return fNotifyCharRefs;
350
throw new XMLConfigurationException(XMLConfigurationException.NOT_RECOGNIZED, featureId);
357
// anybody calling this had better have set Symtoltable!
358
protected void reset() {
361
// DTD preparsing defaults:
363
fNotifyCharRefs = false;
367
// common scanning methods
370
* Scans an XML or text declaration.
373
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
374
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
375
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
376
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
377
* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
378
* | ('"' ('yes' | 'no') '"'))
380
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
383
* @param scanningTextDecl True if a text declaration is to
384
* be scanned instead of an XML
386
* @param pseudoAttributeValues An array of size 3 to return the version,
387
* encoding and standalone pseudo attribute values
390
* <strong>Note:</strong> This method uses fString, anything in it
391
* at the time of calling is lost.
393
protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
394
String[] pseudoAttributeValues)
395
throws IOException, XNIException {
397
// pseudo-attribute values
398
String version = null;
399
String encoding = null;
400
String standalone = null;
402
// scan pseudo-attributes
403
final int STATE_VERSION = 0;
404
final int STATE_ENCODING = 1;
405
final int STATE_STANDALONE = 2;
406
final int STATE_DONE = 3;
407
int state = STATE_VERSION;
409
boolean dataFoundForTarget = false;
410
boolean sawSpace = fEntityScanner.skipDeclSpaces();
411
// since pseudoattributes are *not* attributes,
412
// their quotes don't need to be preserved in external parameter entities.
413
// the XMLEntityScanner#scanLiteral method will continue to
414
// emit -1 in such cases when it finds a quote; this is
415
// fine for other methods that parse scanned entities,
416
// but not for the scanning of pseudoattributes. So,
417
// temporarily, we must mark the current entity as not being "literal"
418
XMLEntityManager.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
419
boolean currLiteral = currEnt.literal;
420
currEnt.literal = false;
421
while (fEntityScanner.peekChar() != '?') {
422
dataFoundForTarget = true;
423
String name = scanPseudoAttribute(scanningTextDecl, fString);
425
case STATE_VERSION: {
426
if (name == fVersionSymbol) {
428
reportFatalError(scanningTextDecl
429
? "SpaceRequiredBeforeVersionInTextDecl"
430
: "SpaceRequiredBeforeVersionInXMLDecl",
433
version = fString.toString();
434
state = STATE_ENCODING;
435
if (!versionSupported(version)) {
436
reportFatalError(getVersionNotSupportedKey(),
437
new Object[]{version});
440
else if (name == fEncodingSymbol) {
441
if (!scanningTextDecl) {
442
reportFatalError("VersionInfoRequired", null);
445
reportFatalError(scanningTextDecl
446
? "SpaceRequiredBeforeEncodingInTextDecl"
447
: "SpaceRequiredBeforeEncodingInXMLDecl",
450
encoding = fString.toString();
451
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
454
if (scanningTextDecl) {
455
reportFatalError("EncodingDeclRequired", null);
458
reportFatalError("VersionInfoRequired", null);
463
case STATE_ENCODING: {
464
if (name == fEncodingSymbol) {
466
reportFatalError(scanningTextDecl
467
? "SpaceRequiredBeforeEncodingInTextDecl"
468
: "SpaceRequiredBeforeEncodingInXMLDecl",
471
encoding = fString.toString();
472
state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
473
// TODO: check encoding name; set encoding on
476
else if (!scanningTextDecl && name == fStandaloneSymbol) {
478
reportFatalError("SpaceRequiredBeforeStandalone",
481
standalone = fString.toString();
483
if (!standalone.equals("yes") && !standalone.equals("no")) {
484
reportFatalError("SDDeclInvalid", new Object[] {standalone});
488
reportFatalError("EncodingDeclRequired", null);
492
case STATE_STANDALONE: {
493
if (name == fStandaloneSymbol) {
495
reportFatalError("SpaceRequiredBeforeStandalone",
498
standalone = fString.toString();
500
if (!standalone.equals("yes") && !standalone.equals("no")) {
501
reportFatalError("SDDeclInvalid", new Object[] {standalone});
505
reportFatalError("EncodingDeclRequired", null);
510
reportFatalError("NoMorePseudoAttributes", null);
513
sawSpace = fEntityScanner.skipDeclSpaces();
515
// restore original literal value
517
currEnt.literal = true;
518
// REVISIT: should we remove this error reporting?
519
if (scanningTextDecl && state != STATE_DONE) {
520
reportFatalError("MorePseudoAttributes", null);
523
// If there is no data in the xml or text decl then we fail to report error
524
// for version or encoding info above.
525
if (scanningTextDecl) {
526
if (!dataFoundForTarget && encoding == null) {
527
reportFatalError("EncodingDeclRequired", null);
531
if (!dataFoundForTarget && version == null) {
532
reportFatalError("VersionInfoRequired", null);
537
if (!fEntityScanner.skipChar('?')) {
538
reportFatalError("XMLDeclUnterminated", null);
540
if (!fEntityScanner.skipChar('>')) {
541
reportFatalError("XMLDeclUnterminated", null);
545
// fill in return array
546
pseudoAttributeValues[0] = version;
547
pseudoAttributeValues[1] = encoding;
548
pseudoAttributeValues[2] = standalone;
550
} // scanXMLDeclOrTextDecl(boolean)
553
* Scans a pseudo attribute.
555
* @param scanningTextDecl True if scanning this pseudo-attribute for a
556
* TextDecl; false if scanning XMLDecl. This
557
* flag is needed to report the correct type of
559
* @param value The string to fill in with the attribute
562
* @return The name of the attribute
564
* <strong>Note:</strong> This method uses fStringBuffer2, anything in it
565
* at the time of calling is lost.
567
public String scanPseudoAttribute(boolean scanningTextDecl,
569
throws IOException, XNIException {
571
// REVISIT: This method is used for generic scanning of
572
// pseudo attributes, but since there are only three such
573
// attributes: version, encoding, and standalone there are
574
// for performant ways of scanning them. Every decl must
575
// have a version, and in TextDecls this version must
576
// be followed by an encoding declaration. Also the
577
// methods we invoke on the scanners allow non-ASCII
578
// characters to be parsed in the decls, but since
579
// we don't even know what the actual encoding of the
580
// document is until we scan the encoding declaration
581
// you cannot reliably read any characters outside
582
// of the ASCII range here. -- mrglavas
583
String name = fEntityScanner.scanName();
584
XMLEntityManager.print(fEntityManager.getCurrentEntity());
586
reportFatalError("PseudoAttrNameExpected", null);
588
fEntityScanner.skipDeclSpaces();
589
if (!fEntityScanner.skipChar('=')) {
590
reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
591
: "EqRequiredInXMLDecl", new Object[]{name});
593
fEntityScanner.skipDeclSpaces();
594
int quote = fEntityScanner.peekChar();
595
if (quote != '\'' && quote != '"') {
596
reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
597
: "QuoteRequiredInXMLDecl" , new Object[]{name});
599
fEntityScanner.scanChar();
600
int c = fEntityScanner.scanLiteral(quote, value);
602
fStringBuffer2.clear();
604
fStringBuffer2.append(value);
606
if (c == '&' || c == '%' || c == '<' || c == ']') {
607
fStringBuffer2.append((char)fEntityScanner.scanChar());
609
// REVISIT: Even if you could reliably read non-ASCII chars
610
// why bother scanning for surrogates here? Only ASCII chars
611
// match the productions in XMLDecls and TextDecls. -- mrglavas
612
else if (XMLChar.isHighSurrogate(c)) {
613
scanSurrogates(fStringBuffer2);
615
else if (isInvalidLiteral(c)) {
616
String key = scanningTextDecl
617
? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
618
reportFatalError(key,
619
new Object[] {Integer.toString(c, 16)});
620
fEntityScanner.scanChar();
623
c = fEntityScanner.scanLiteral(quote, value);
624
} while (c != quote);
625
fStringBuffer2.append(value);
626
value.setValues(fStringBuffer2);
628
if (!fEntityScanner.skipChar(quote)) {
629
reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
630
: "CloseQuoteMissingInXMLDecl",
637
} // scanPseudoAttribute(XMLString):String
640
* Scans a processing instruction.
643
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
644
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
646
* <strong>Note:</strong> This method uses fString, anything in it
647
* at the time of calling is lost.
649
protected void scanPI() throws IOException, XNIException {
652
fReportEntity = false;
653
String target = null;
655
target = fEntityScanner.scanNCName();
657
target = fEntityScanner.scanName();
659
if (target == null) {
660
reportFatalError("PITargetRequired", null);
664
scanPIData(target, fString);
665
fReportEntity = true;
670
* Scans a processing data. This is needed to handle the situation
671
* where a document starts with a processing instruction whose
672
* target name <em>starts with</em> "xml". (e.g. xmlfoo)
674
* <strong>Note:</strong> This method uses fStringBuffer, anything in it
675
* at the time of calling is lost.
677
* @param target The PI target
678
* @param data The string to fill in with the data
680
protected void scanPIData(String target, XMLString data)
681
throws IOException, XNIException {
684
if (target.length() == 3) {
685
char c0 = Character.toLowerCase(target.charAt(0));
686
char c1 = Character.toLowerCase(target.charAt(1));
687
char c2 = Character.toLowerCase(target.charAt(2));
688
if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
689
reportFatalError("ReservedPITarget", null);
694
if (!fEntityScanner.skipSpaces()) {
695
if (fEntityScanner.skipString("?>")) {
696
// we found the end, there is no data
701
if(fNamespaces && fEntityScanner.peekChar() == ':') {
702
fEntityScanner.scanChar();
703
XMLStringBuffer colonName = new XMLStringBuffer(target);
704
colonName.append(":");
705
String str = fEntityScanner.scanName();
707
colonName.append(str);
708
reportFatalError("ColonNotLegalWithNS", new Object[] {colonName.toString()});
709
fEntityScanner.skipSpaces();
711
// if there is data there should be some space
712
reportFatalError("SpaceRequiredInPI", null);
717
fStringBuffer.clear();
719
if (fEntityScanner.scanData("?>", fStringBuffer)) {
721
int c = fEntityScanner.peekChar();
723
if (XMLChar.isHighSurrogate(c)) {
724
scanSurrogates(fStringBuffer);
726
else if (isInvalidLiteral(c)) {
727
reportFatalError("InvalidCharInPI",
728
new Object[]{Integer.toHexString(c)});
729
fEntityScanner.scanChar();
732
} while (fEntityScanner.scanData("?>", fStringBuffer));
734
data.setValues(fStringBuffer);
736
} // scanPIData(String,XMLString)
742
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
745
* <strong>Note:</strong> Called after scanning past '<!--'
746
* <strong>Note:</strong> This method uses fString, anything in it
747
* at the time of calling is lost.
749
* @param text The buffer to fill in with the text.
751
protected void scanComment(XMLStringBuffer text)
752
throws IOException, XNIException {
755
// REVISIT: handle invalid character, eof
757
while (fEntityScanner.scanData("--", text)) {
758
int c = fEntityScanner.peekChar();
760
if (XMLChar.isHighSurrogate(c)) {
761
scanSurrogates(text);
763
else if (isInvalidLiteral(c)) {
764
reportFatalError("InvalidCharInComment",
765
new Object[] { Integer.toHexString(c) });
766
fEntityScanner.scanChar();
770
if (!fEntityScanner.skipChar('>')) {
771
reportFatalError("DashDashInComment", null);
777
* Scans an attribute value and normalizes whitespace converting all
778
* whitespace characters to space characters.
780
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
782
* @param value The XMLString to fill in with the value.
783
* @param nonNormalizedValue The XMLString to fill in with the
784
* non-normalized value.
785
* @param atName The name of the attribute being parsed (for error msgs).
786
* @param checkEntities true if undeclared entities should be reported as VC violation,
787
* false if undeclared entities should be reported as WFC violation.
788
* @param eleName The name of element to which this attribute belongs.
790
* <strong>Note:</strong> This method uses fStringBuffer2, anything in it
791
* at the time of calling is lost.
793
protected void scanAttributeValue(XMLString value,
794
XMLString nonNormalizedValue,
796
boolean checkEntities,String eleName)
797
throws IOException, XNIException
800
int quote = fEntityScanner.peekChar();
801
if (quote != '\'' && quote != '"') {
802
reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName});
805
fEntityScanner.scanChar();
806
int entityDepth = fEntityDepth;
808
int c = fEntityScanner.scanLiteral(quote, value);
809
if (DEBUG_ATTR_NORMALIZATION) {
810
System.out.println("** scanLiteral -> \""
811
+ value.toString() + "\"");
813
fStringBuffer2.clear();
814
fStringBuffer2.append(value);
815
normalizeWhitespace(value);
816
if (DEBUG_ATTR_NORMALIZATION) {
817
System.out.println("** normalizeWhitespace -> \""
818
+ value.toString() + "\"");
821
fScanningAttribute = true;
822
fStringBuffer.clear();
824
fStringBuffer.append(value);
825
if (DEBUG_ATTR_NORMALIZATION) {
826
System.out.println("** value2: \""
827
+ fStringBuffer.toString() + "\"");
830
fEntityScanner.skipChar('&');
831
if (entityDepth == fEntityDepth) {
832
fStringBuffer2.append('&');
834
if (fEntityScanner.skipChar('#')) {
835
if (entityDepth == fEntityDepth) {
836
fStringBuffer2.append('#');
838
int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
840
if (DEBUG_ATTR_NORMALIZATION) {
841
System.out.println("** value3: \""
842
+ fStringBuffer.toString()
848
String entityName = fEntityScanner.scanName();
849
if (entityName == null) {
850
reportFatalError("NameRequiredInReference", null);
852
else if (entityDepth == fEntityDepth) {
853
fStringBuffer2.append(entityName);
855
if (!fEntityScanner.skipChar(';')) {
856
reportFatalError("SemicolonRequiredInReference",
857
new Object []{entityName});
859
else if (entityDepth == fEntityDepth) {
860
fStringBuffer2.append(';');
862
if (entityName == fAmpSymbol) {
863
fStringBuffer.append('&');
864
if (DEBUG_ATTR_NORMALIZATION) {
865
System.out.println("** value5: \""
866
+ fStringBuffer.toString()
870
else if (entityName == fAposSymbol) {
871
fStringBuffer.append('\'');
872
if (DEBUG_ATTR_NORMALIZATION) {
873
System.out.println("** value7: \""
874
+ fStringBuffer.toString()
878
else if (entityName == fLtSymbol) {
879
fStringBuffer.append('<');
880
if (DEBUG_ATTR_NORMALIZATION) {
881
System.out.println("** value9: \""
882
+ fStringBuffer.toString()
886
else if (entityName == fGtSymbol) {
887
fStringBuffer.append('>');
888
if (DEBUG_ATTR_NORMALIZATION) {
889
System.out.println("** valueB: \""
890
+ fStringBuffer.toString()
894
else if (entityName == fQuotSymbol) {
895
fStringBuffer.append('"');
896
if (DEBUG_ATTR_NORMALIZATION) {
897
System.out.println("** valueD: \""
898
+ fStringBuffer.toString()
903
if (fEntityManager.isExternalEntity(entityName)) {
904
reportFatalError("ReferenceToExternalEntity",
905
new Object[] { entityName });
908
if (!fEntityManager.isDeclaredEntity(entityName)) {
909
//WFC & VC: Entity Declared
912
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
914
new Object[]{entityName},
915
XMLErrorReporter.SEVERITY_ERROR);
919
reportFatalError("EntityNotDeclared",
920
new Object[]{entityName});
923
fEntityManager.startEntity(entityName, true);
929
reportFatalError("LessthanInAttValue",
930
new Object[] { eleName, atName });
931
fEntityScanner.scanChar();
932
if (entityDepth == fEntityDepth) {
933
fStringBuffer2.append((char)c);
936
else if (c == '%' || c == ']') {
937
fEntityScanner.scanChar();
938
fStringBuffer.append((char)c);
939
if (entityDepth == fEntityDepth) {
940
fStringBuffer2.append((char)c);
942
if (DEBUG_ATTR_NORMALIZATION) {
943
System.out.println("** valueF: \""
944
+ fStringBuffer.toString() + "\"");
947
else if (c == '\n' || c == '\r') {
948
fEntityScanner.scanChar();
949
fStringBuffer.append(' ');
950
if (entityDepth == fEntityDepth) {
951
fStringBuffer2.append('\n');
954
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
955
fStringBuffer3.clear();
956
if (scanSurrogates(fStringBuffer3)) {
957
fStringBuffer.append(fStringBuffer3);
958
if (entityDepth == fEntityDepth) {
959
fStringBuffer2.append(fStringBuffer3);
961
if (DEBUG_ATTR_NORMALIZATION) {
962
System.out.println("** valueI: \""
963
+ fStringBuffer.toString()
968
else if (c != -1 && isInvalidLiteral(c)) {
969
reportFatalError("InvalidCharInAttValue",
970
new Object[] {eleName, atName, Integer.toString(c, 16)});
971
fEntityScanner.scanChar();
972
if (entityDepth == fEntityDepth) {
973
fStringBuffer2.append((char)c);
976
c = fEntityScanner.scanLiteral(quote, value);
977
if (entityDepth == fEntityDepth) {
978
fStringBuffer2.append(value);
980
normalizeWhitespace(value);
981
} while (c != quote || entityDepth != fEntityDepth);
982
fStringBuffer.append(value);
983
if (DEBUG_ATTR_NORMALIZATION) {
984
System.out.println("** valueN: \""
985
+ fStringBuffer.toString() + "\"");
987
value.setValues(fStringBuffer);
988
fScanningAttribute = false;
990
nonNormalizedValue.setValues(fStringBuffer2);
993
int cquote = fEntityScanner.scanChar();
994
if (cquote != quote) {
995
reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName});
997
} // scanAttributeValue()
1001
* Scans External ID and return the public and system IDs.
1003
* @param identifiers An array of size 2 to return the system id,
1004
* and public id (in that order).
1005
* @param optionalSystemId Specifies whether the system id is optional.
1007
* <strong>Note:</strong> This method uses fString and fStringBuffer,
1008
* anything in them at the time of calling is lost.
1010
protected void scanExternalID(String[] identifiers,
1011
boolean optionalSystemId)
1012
throws IOException, XNIException {
1014
String systemId = null;
1015
String publicId = null;
1016
if (fEntityScanner.skipString("PUBLIC")) {
1017
if (!fEntityScanner.skipSpaces()) {
1018
reportFatalError("SpaceRequiredAfterPUBLIC", null);
1020
scanPubidLiteral(fString);
1021
publicId = fString.toString();
1023
if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1024
reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1028
if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1029
if (publicId == null && !fEntityScanner.skipSpaces()) {
1030
reportFatalError("SpaceRequiredAfterSYSTEM", null);
1032
int quote = fEntityScanner.peekChar();
1033
if (quote != '\'' && quote != '"') {
1034
if (publicId != null && optionalSystemId) {
1035
// looks like we don't have any system id
1036
// simply return the public id
1037
identifiers[0] = null;
1038
identifiers[1] = publicId;
1041
reportFatalError("QuoteRequiredInSystemID", null);
1043
fEntityScanner.scanChar();
1044
XMLString ident = fString;
1045
if (fEntityScanner.scanLiteral(quote, ident) != quote) {
1046
fStringBuffer.clear();
1048
fStringBuffer.append(ident);
1049
int c = fEntityScanner.peekChar();
1050
if (XMLChar.isMarkup(c) || c == ']') {
1051
fStringBuffer.append((char)fEntityScanner.scanChar());
1053
} while (fEntityScanner.scanLiteral(quote, ident) != quote);
1054
fStringBuffer.append(ident);
1055
ident = fStringBuffer;
1057
systemId = ident.toString();
1058
if (!fEntityScanner.skipChar(quote)) {
1059
reportFatalError("SystemIDUnterminated", null);
1063
// store result in array
1064
identifiers[0] = systemId;
1065
identifiers[1] = publicId;
1070
* Scans public ID literal.
1072
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1073
* [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1075
* The returned string is normalized according to the following rule,
1076
* from http://www.w3.org/TR/REC-xml#dt-pubid:
1078
* Before a match is attempted, all strings of white space in the public
1079
* identifier must be normalized to single space characters (#x20), and
1080
* leading and trailing white space must be removed.
1082
* @param literal The string to fill in with the public ID literal.
1083
* @return True on success.
1085
* <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1086
* the time of calling is lost.
1088
protected boolean scanPubidLiteral(XMLString literal)
1089
throws IOException, XNIException
1091
int quote = fEntityScanner.scanChar();
1092
if (quote != '\'' && quote != '"') {
1093
reportFatalError("QuoteRequiredInPublicID", null);
1097
fStringBuffer.clear();
1098
// skip leading whitespace
1099
boolean skipSpace = true;
1100
boolean dataok = true;
1102
int c = fEntityScanner.scanChar();
1103
if (c == ' ' || c == '\n' || c == '\r') {
1105
// take the first whitespace as a space and skip the others
1106
fStringBuffer.append(' ');
1110
else if (c == quote) {
1112
// if we finished on a space let's trim it
1113
fStringBuffer.length--;
1115
literal.setValues(fStringBuffer);
1118
else if (XMLChar.isPubid(c)) {
1119
fStringBuffer.append((char)c);
1123
reportFatalError("PublicIDUnterminated", null);
1128
reportFatalError("InvalidCharInPublicID",
1129
new Object[]{Integer.toHexString(c)});
1137
* Normalize whitespace in an XMLString converting all whitespace
1138
* characters to space characters.
1140
protected void normalizeWhitespace(XMLString value) {
1141
int end = value.offset + value.length;
1142
for (int i = value.offset; i < end; i++) {
1143
int c = value.ch[i];
1144
// Performance: For XML 1.0 documents take advantage of
1145
// the fact that the only legal characters below 0x20
1146
// are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). Since we've
1147
// already determined the well-formedness of these
1148
// characters it is sufficient (and safe) to check
1149
// against 0x20. -- mrglavas
1157
// XMLEntityHandler methods
1161
* This method notifies of the start of an entity. The document entity
1162
* has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1163
* parameter entity names start with '%'; and general entities are just
1164
* specified by their name.
1166
* @param name The name of the entity.
1167
* @param identifier The resource identifier.
1168
* @param encoding The auto-detected IANA encoding name of the entity
1169
* stream. This value will be null in those situations
1170
* where the entity encoding is not auto-detected (e.g.
1171
* internal entities or a document entity that is
1172
* parsed from a java.io.Reader).
1174
* @throws XNIException Thrown by handler to signal an error.
1176
public void startEntity(String name,
1177
XMLResourceIdentifier identifier,
1178
String encoding) throws XNIException {
1180
// keep track of the entity depth
1182
// must reset entity scanner
1183
fEntityScanner = fEntityManager.getEntityScanner();
1185
} // startEntity(String,XMLResourceIdentifier,String)
1188
* This method notifies the end of an entity. The document entity has
1189
* the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1190
* parameter entity names start with '%'; and general entities are just
1191
* specified by their name.
1193
* @param name The name of the entity.
1195
* @throws XNIException Thrown by handler to signal an error.
1197
public void endEntity(String name) throws XNIException {
1199
// keep track of the entity depth
1202
} // endEntity(String)
1205
* Scans a character reference and append the corresponding chars to the
1210
* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1213
* <strong>Note:</strong> This method uses fStringBuffer, anything in it
1214
* at the time of calling is lost.
1216
* @param buf the character buffer to append chars to
1217
* @param buf2 the character buffer to append non-normalized chars to
1219
* @return the character value or (-1) on conversion failure
1221
protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1222
throws IOException, XNIException {
1224
// scan hexadecimal value
1225
boolean hex = false;
1226
if (fEntityScanner.skipChar('x')) {
1227
if (buf2 != null) { buf2.append('x'); }
1229
fStringBuffer3.clear();
1230
boolean digit = true;
1232
int c = fEntityScanner.peekChar();
1233
digit = (c >= '0' && c <= '9') ||
1234
(c >= 'a' && c <= 'f') ||
1235
(c >= 'A' && c <= 'F');
1237
if (buf2 != null) { buf2.append((char)c); }
1238
fEntityScanner.scanChar();
1239
fStringBuffer3.append((char)c);
1242
c = fEntityScanner.peekChar();
1243
digit = (c >= '0' && c <= '9') ||
1244
(c >= 'a' && c <= 'f') ||
1245
(c >= 'A' && c <= 'F');
1247
if (buf2 != null) { buf2.append((char)c); }
1248
fEntityScanner.scanChar();
1249
fStringBuffer3.append((char)c);
1254
reportFatalError("HexdigitRequiredInCharRef", null);
1258
// scan decimal value
1260
fStringBuffer3.clear();
1261
boolean digit = true;
1263
int c = fEntityScanner.peekChar();
1264
digit = c >= '0' && c <= '9';
1266
if (buf2 != null) { buf2.append((char)c); }
1267
fEntityScanner.scanChar();
1268
fStringBuffer3.append((char)c);
1271
c = fEntityScanner.peekChar();
1272
digit = c >= '0' && c <= '9';
1274
if (buf2 != null) { buf2.append((char)c); }
1275
fEntityScanner.scanChar();
1276
fStringBuffer3.append((char)c);
1281
reportFatalError("DigitRequiredInCharRef", null);
1286
if (!fEntityScanner.skipChar(';')) {
1287
reportFatalError("SemicolonRequiredInCharRef", null);
1289
if (buf2 != null) { buf2.append(';'); }
1291
// convert string to number
1294
value = Integer.parseInt(fStringBuffer3.toString(),
1297
// character reference must be a valid XML character
1298
if (isInvalid(value)) {
1299
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1300
if (hex) errorBuf.append('x');
1301
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1302
reportFatalError("InvalidCharRef",
1303
new Object[]{errorBuf.toString()});
1306
catch (NumberFormatException e) {
1307
// Conversion failed, let -1 value drop through.
1308
// If we end up here, the character reference was invalid.
1309
StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1310
if (hex) errorBuf.append('x');
1311
errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1312
reportFatalError("InvalidCharRef",
1313
new Object[]{errorBuf.toString()});
1316
// append corresponding chars to the given buffer
1317
if (!XMLChar.isSupplemental(value)) {
1318
buf.append((char) value);
1321
// character is supplemental, split it into surrogate chars
1322
buf.append(XMLChar.highSurrogate(value));
1323
buf.append(XMLChar.lowSurrogate(value));
1326
// char refs notification code
1327
if (fNotifyCharRefs && value != -1) {
1328
String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1329
if (!fScanningAttribute) {
1330
fCharRefLiteral = literal;
1337
// returns true if the given character is not
1338
// valid with respect to the version of
1339
// XML understood by this scanner.
1340
protected boolean isInvalid(int value) {
1341
return (XMLChar.isInvalid(value));
1342
} // isInvalid(int): boolean
1344
// returns true if the given character is not
1345
// valid or may not be used outside a character reference
1346
// with respect to the version of XML understood by this scanner.
1347
protected boolean isInvalidLiteral(int value) {
1348
return (XMLChar.isInvalid(value));
1349
} // isInvalidLiteral(int): boolean
1351
// returns true if the given character is
1352
// a valid nameChar with respect to the version of
1353
// XML understood by this scanner.
1354
protected boolean isValidNameChar(int value) {
1355
return (XMLChar.isName(value));
1356
} // isValidNameChar(int): boolean
1358
// returns true if the given character is
1359
// a valid nameStartChar with respect to the version of
1360
// XML understood by this scanner.
1361
protected boolean isValidNameStartChar(int value) {
1362
return (XMLChar.isNameStart(value));
1363
} // isValidNameStartChar(int): boolean
1365
// returns true if the given character is
1366
// a valid NCName character with respect to the version of
1367
// XML understood by this scanner.
1368
protected boolean isValidNCName(int value) {
1369
return (XMLChar.isNCName(value));
1370
} // isValidNCName(int): boolean
1372
// returns true if the given character is
1373
// a valid high surrogate for a nameStartChar
1374
// with respect to the version of XML understood
1376
protected boolean isValidNameStartHighSurrogate(int value) {
1378
} // isValidNameStartHighSurrogate(int): boolean
1380
protected boolean versionSupported(String version ) {
1381
return version.equals("1.0");
1382
} // version Supported
1384
// returns the error message key for unsupported
1385
// versions of XML with respect to the version of
1386
// XML understood by this scanner.
1387
protected String getVersionNotSupportedKey () {
1388
return "VersionNotSupported";
1389
} // getVersionNotSupportedKey: String
1392
* Scans surrogates and append them to the specified buffer.
1394
* <strong>Note:</strong> This assumes the current char has already been
1395
* identified as a high surrogate.
1397
* @param buf The StringBuffer to append the read surrogates to.
1398
* @return True if it succeeded.
1400
protected boolean scanSurrogates(XMLStringBuffer buf)
1401
throws IOException, XNIException {
1403
int high = fEntityScanner.scanChar();
1404
int low = fEntityScanner.peekChar();
1405
if (!XMLChar.isLowSurrogate(low)) {
1406
reportFatalError("InvalidCharInContent",
1407
new Object[] {Integer.toString(high, 16)});
1410
fEntityScanner.scanChar();
1412
// convert surrogates to supplemental character
1413
int c = XMLChar.supplemental((char)high, (char)low);
1415
// supplemental character must be a valid XML character
1417
reportFatalError("InvalidCharInContent",
1418
new Object[]{Integer.toString(c, 16)});
1422
// fill in the buffer
1423
buf.append((char)high);
1424
buf.append((char)low);
1428
} // scanSurrogates():boolean
1432
* Convenience function used in all XML scanners.
1434
protected void reportFatalError(String msgId, Object[] args)
1435
throws XNIException {
1436
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1438
XMLErrorReporter.SEVERITY_FATAL_ERROR);
1442
private void init() {
1443
fEntityScanner = null;
1446
fReportEntity = true;
1447
fResourceIdentifier.clear();
1450
} // class XMLScanner