1
/* Copyright 2002-2005 Elliotte Rusty Harold
3
This library is free software; you can redistribute it and/or modify
4
it under the terms of version 2.1 of the GNU Lesser General Public
5
License as published by the Free Software Foundation.
7
This library is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU Lesser General Public License for more details.
12
You should have received a copy of the GNU Lesser General Public
13
License along with this library; if not, write to the
14
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15
Boston, MA 02111-1307 USA
17
You can contact Elliotte Rusty Harold by sending e-mail to
18
elharo@metalab.unc.edu. Please include the word "XOM" in the
19
subject line. The XOM home page is located at http://www.xom.nu/
25
import java.util.ArrayList;
27
import org.xml.sax.ContentHandler;
28
import org.xml.sax.DTDHandler;
29
import org.xml.sax.Locator;
30
import org.xml.sax.ext.DeclHandler;
31
import org.xml.sax.ext.LexicalHandler;
34
* @author Elliotte Rusty Harold
39
implements ContentHandler, LexicalHandler, DeclHandler, DTDHandler {
41
protected Document document;
42
protected String documentBaseURI;
44
// parent is never null. It is the node we're adding children
45
// to. current corresponds to the most recent startElement()
46
// method and may be null if we've skipped it (makeElement
47
// returned null.) If we didn't skip it, then parent and
48
// current should be the same node.
49
protected ParentNode parent;
50
protected ParentNode current;
51
protected ArrayList parents;
52
protected boolean inProlog;
53
protected boolean inDTD;
54
protected int position; // current number of items in prolog
55
protected Locator locator;
56
protected DocType doctype;
57
protected StringBuffer internalDTDSubset;
58
protected NodeFactory factory;
59
boolean usingCrimson = false;
62
XOMHandler(NodeFactory factory) {
63
this.factory = factory;
67
public void setDocumentLocator(Locator locator) {
68
this.locator = locator;
72
Document getDocument() {
77
// See http://www.servlets.com/archive/servlet/ReadMsg?msgId=554071&listName=jdom-interest
78
// This method is called to avoid leaking document sized leaking memory
79
// when a Builder is not imediately reused
87
internalDTDSubset = null;
91
public void startDocument() {
94
document = factory.startMakingDocument();
97
parents = new ArrayList();
98
parents.add(document);
103
if (locator != null) {
104
documentBaseURI = locator.getSystemId();
105
// According to the XML spec,
106
// "It is an error for a fragment identifier
107
// (beginning with a # character) to be part of a system identifier"
108
// but some parsers including Xerces seem to get this wrong, so we'll
109
document.setBaseURI(documentBaseURI);
116
public void endDocument() {
117
factory.finishMakingDocument(document);
118
parents.remove(parents.size()-1);
122
public void startElement(String namespaceURI, String localName,
123
String qualifiedName, org.xml.sax.Attributes attributes) {
127
if (parent != document) {
128
element = factory.startMakingElement(qualifiedName, namespaceURI);
131
element = factory.makeRootElement(qualifiedName, namespaceURI);
132
if (element == null) { // null root; that's a no-no
133
throw new NullPointerException(
134
"Factory failed to create root element."
137
document.setRootElement(element);
142
// Need to push this, even if it's null
143
parents.add(element);
145
if (element != null) { // wasn't filtered out
146
if (parent != document) {
147
// a.k.a. parent not instanceof Document
148
parent.appendChild(element);
150
// This is optimized for the very common case where
151
// everything in the document has the same actual base URI.
152
// It may add redundant base URIs in cases like XInclude
153
// where different parts of the document have different
155
if (locator != null) {
156
String baseURI = locator.getSystemId();
157
if (baseURI != null && !baseURI.equals(documentBaseURI)) {
158
element.setActualBaseURI(baseURI);
162
// Attach the attributes; this must be done before the
163
// namespaces are attached.
164
// XXX pull out length
166
// XXX we've got a pretty good guess at how many attributes there
167
// will be here; we should ensureCapacity up to that length
168
for (int i = 0; i < attributes.getLength(); i++) {
169
String qName = attributes.getQName(i);
170
if (qName.startsWith("xmlns:") || qName.equals("xmlns")) {
174
String namespace = attributes.getURI(i);
175
String value = attributes.getValue(i);
176
Nodes nodes = factory.makeAttribute(
180
convertStringToType(attributes.getType(i))
182
int numberChildren = 0;
183
for (int j=0; j < nodes.size(); j++) {
184
Node node = nodes.get(j);
185
if (node.isAttribute()) {
186
factory.addAttribute(element, (Attribute) node);
189
factory.insertChild(element, node, numberChildren++);
195
// Attach the namespaces
196
for (int i = 0; i < attributes.getLength(); i++) {
197
String qName = attributes.getQName(i);
198
if (qName.startsWith("xmlns:")) {
199
String namespaceName = attributes.getValue(i);
200
String namespacePrefix = qName.substring(6);
202
= element.getNamespaceURI(namespacePrefix);
203
if (!namespaceName.equals(currentValue) && ! namespacePrefix.equals(element.getNamespacePrefix())) {
204
element.addNamespaceDeclaration(
205
namespacePrefix, namespaceName);
208
else if (qName.equals("xmlns")) {
209
String namespaceName = attributes.getValue(i);
210
String namespacePrefix = "";
212
= element.getNamespaceURI(namespacePrefix);
213
if (!namespaceName.equals(currentValue) && ! "".equals(element.getNamespacePrefix())) {
214
element.addNamespaceDeclaration(namespacePrefix,
220
// this is the new parent
227
public void endElement(
228
String namespaceURI, String localName, String qualifiedName) {
230
// If we're immediately inside a skipped element
231
// we need to reset current to null, not to the parent
232
current = (ParentNode) parents.remove(parents.size()-1);
235
if (current != null) {
236
parent = current.getParent();
237
Nodes result = factory.finishMakingElement((Element) current);
239
// Optimization for default case where result only contains current
240
if (result.size() != 1 || result.get(0) != current) {
241
if (!parent.isDocument()) {
242
// allow factories to detach the element itself in
243
// finishMakingElement
244
int childCount = parent.getChildCount();
246
parent.removeChild(childCount - 1);
248
catch (IndexOutOfBoundsException ex) {
249
throw new XMLException(
250
"Factory detached element in finishMakingElement()",
253
for (int i=0; i < result.size(); i++) {
254
Node node = result.get(i);
255
if (node.isAttribute()) {
256
((Element) parent).addAttribute((Attribute) node);
259
parent.appendChild(node);
263
else { // root element
264
Document doc = (Document) parent;
265
Element currentRoot = doc.getRootElement();
266
boolean beforeRoot = true;
267
for (int i=0; i < result.size(); i++) {
268
Node node = result.get(i);
269
if (node.isElement()) {
270
if (node != currentRoot) {
272
// already set root, oops
273
throw new IllegalAddException("Factory returned multiple roots");
275
doc.setRootElement((Element) node);
279
else if (beforeRoot) {
280
doc.insertChild(node, doc.indexOf(doc.getRootElement()));
283
doc.appendChild(node);
287
// somebody tried to replace the root element with
288
// no element at all. That's a no-no
289
throw new WellformednessException(
290
"Factory attempted to remove the root element");
299
static Attribute.Type convertStringToType(String saxType) {
301
if (saxType.equals("CDATA")) return Attribute.Type.CDATA;
302
if (saxType.equals("ID")) return Attribute.Type.ID;
303
if (saxType.equals("IDREF")) return Attribute.Type.IDREF;
304
if (saxType.equals("IDREFS")) return Attribute.Type.IDREFS;
305
if (saxType.equals("NMTOKEN")) return Attribute.Type.NMTOKEN;
306
if (saxType.equals("NMTOKENS")) return Attribute.Type.NMTOKENS;
307
if (saxType.equals("ENTITY")) return Attribute.Type.ENTITY;
308
if (saxType.equals("ENTITIES")) return Attribute.Type.ENTITIES;
309
if (saxType.equals("NOTATION")) return Attribute.Type.NOTATION;
311
// non-standard but some parsers use this
312
if (saxType.equals("ENUMERATION")) {
313
return Attribute.Type.ENUMERATION;
315
if (saxType.startsWith("(")) return Attribute.Type.ENUMERATION;
317
return Attribute.Type.UNDECLARED;
322
protected String textString = null;
323
protected StringBuffer buffer = null;
325
public void characters(char[] text, int start, int length) {
327
if (length <= 0) return;
328
if (textString == null) textString = new String(text, start, length);
330
if (buffer == null) buffer = new StringBuffer(textString);
331
buffer.append(text, start, length);
333
if (finishedCDATA) inCDATA = false;
338
// accumulate all text that's in the buffer into a text node
339
protected void flushText() {
341
if (buffer != null) {
342
textString = buffer.toString();
346
if (textString != null) {
349
result = factory.makeText(textString);
352
result = factory.makeCDATASection(textString);
354
for (int i=0; i < result.size(); i++) {
355
Node node = result.get(i);
356
if (node.isAttribute()) {
357
((Element) parent).addAttribute((Attribute) node);
360
parent.appendChild(node);
366
finishedCDATA = false;
371
public void ignorableWhitespace(
372
char[] text, int start, int length) {
373
characters(text, start, length);
377
public void processingInstruction(String target, String data) {
379
if (!inDTD) flushText();
380
if (inDTD && !inInternalSubset()) return;
381
Nodes result = factory.makeProcessingInstruction(target, data);
383
for (int i = 0; i < result.size(); i++) {
384
Node node = result.get(i);
387
parent.insertChild(node, position);
391
if (node.isAttribute()) {
392
((Element) parent).addAttribute((Attribute) node);
394
else parent.appendChild(node);
398
if (node.isProcessingInstruction() || node.isComment()) {
399
internalDTDSubset.append(" ");
400
internalDTDSubset.append(node.toXML());
401
internalDTDSubset.append("\n");
404
throw new XMLException("Factory tried to put a "
405
+ node.getClass().getName()
406
+ " in the internal DTD subset");
414
// XOM handles this with attribute values; not prefix mappings
415
public void startPrefixMapping(String prefix, String uri) {}
416
public void endPrefixMapping(String prefix) {}
418
public void skippedEntity(String name) {
420
// Xerces 2.7 now calls this method in the DTD
421
// for parameter entities it doesn't resolve. We can ignore these.
422
if (name.startsWith("%")) return;
424
throw new XMLException("Could not resolve entity " + name);
429
// LexicalHandler events
430
public void startDTD(String rootName, String publicID,
434
Nodes result = factory.makeDocType(rootName, publicID, systemID);
435
for (int i = 0; i < result.size(); i++) {
436
Node node = result.get(i);
437
document.insertChild(node, position);
439
if (node.isDocType()) {
440
DocType doctype = (DocType) node;
441
internalDTDSubset = new StringBuffer();
442
this.doctype = doctype;
449
public void endDTD() {
452
if (doctype != null) {
453
doctype.fastSetInternalDTDSubset(internalDTDSubset.toString());
459
protected boolean inExternalSubset = false;
461
// We have a problem here. Xerces gets this right,
462
// but Crimson and possibly other parsers don't properly
463
// report these entities, or perhaps just not tag them
464
// with [dtd] like they're supposed to.
465
public void startEntity(String name) {
466
if (name.equals("[dtd]")) inExternalSubset = true;
470
public void endEntity(String name) {
471
if (name.equals("[dtd]")) inExternalSubset = false;
475
protected boolean inCDATA = false;
476
protected boolean finishedCDATA = false;
478
public void startCDATA() {
479
if (textString == null) inCDATA = true;
480
finishedCDATA = false;
484
public void endCDATA() {
485
finishedCDATA = true;
489
public void comment(char[] text, int start, int length) {
491
if (!inDTD) flushText();
492
if (inDTD && !inInternalSubset()) return;
494
Nodes result = factory.makeComment(new String(text, start, length));
496
for (int i = 0; i < result.size(); i++) {
497
Node node = result.get(i);
500
parent.insertChild(node, position);
504
if (node instanceof Attribute) {
505
((Element) parent).addAttribute((Attribute) node);
507
else parent.appendChild(node);
511
if (node.isComment() || node.isProcessingInstruction()) {
512
internalDTDSubset.append(" ");
513
internalDTDSubset.append(node.toXML());
514
internalDTDSubset.append("\n");
517
throw new XMLException("Factory tried to put a "
518
+ node.getClass().getName()
519
+ " in the internal DTD subset");
527
public void elementDecl(String name, String model) {
529
if (inInternalSubset() && doctype != null) {
530
internalDTDSubset.append(" <!ELEMENT ");
531
internalDTDSubset.append(name);
532
internalDTDSubset.append(' ');
533
internalDTDSubset.append(model);
534
// workaround for Crimson bug
535
if (model.indexOf("#PCDATA") > 0 && model.indexOf('|') > 0) {
536
if (model.endsWith(")")) {
537
internalDTDSubset.append('*');
540
internalDTDSubset.append(">\n");
546
// This method only behaves properly when called from the DeclHandler
547
// and DTDHandler callbacks; i.e. from inside the DTD;
548
// It is not intended for use anywhere in the document.
549
protected boolean inInternalSubset() {
551
if (!usingCrimson && !inExternalSubset) return true;
552
String currentURI = locator.getSystemId();
553
if (currentURI == this.documentBaseURI) return true;
554
if (currentURI.equals(this.documentBaseURI)) return true;
560
public void attributeDecl(String elementName,
561
String attributeName, String type, String mode,
562
String defaultValue) {
564
// workaround for Crimson bug
565
if (type.startsWith("NOTATION ")) {
566
if (type.indexOf('(') == -1 && ! type.endsWith(")")) {
567
type = "NOTATION (" + type.substring("NOTATION ".length()) + ")";
571
if (inInternalSubset() && doctype != null) {
572
internalDTDSubset.append(" <!ATTLIST ");
573
internalDTDSubset.append(elementName);
574
internalDTDSubset.append(' ');
575
internalDTDSubset.append(attributeName);
576
internalDTDSubset.append(' ');
577
internalDTDSubset.append(type);
579
internalDTDSubset.append(' ');
580
internalDTDSubset.append(mode);
582
if (defaultValue != null) {
583
internalDTDSubset.append(' ');
584
internalDTDSubset.append('"');
585
internalDTDSubset.append(escapeReservedCharactersInDefaultAttributeValues(defaultValue));
586
internalDTDSubset.append("\"");
588
internalDTDSubset.append(">\n");
594
public void internalEntityDecl(String name,
597
if (inInternalSubset() && doctype != null) {
598
internalDTDSubset.append(" <!ENTITY ");
599
if (name.startsWith("%")) {
600
internalDTDSubset.append("% ");
601
internalDTDSubset.append(name.substring(1));
604
internalDTDSubset.append(name);
606
internalDTDSubset.append(" \"");
607
internalDTDSubset.append(escapeReservedCharactersInDeclarations(value));
608
internalDTDSubset.append("\">\n");
614
public void externalEntityDecl(String name,
615
String publicID, String systemID) {
617
if (inInternalSubset() && doctype != null) {
618
internalDTDSubset.append(" <!ENTITY ");
619
if (name.startsWith("%")) {
620
internalDTDSubset.append("% ");
621
internalDTDSubset.append(name.substring(1));
624
internalDTDSubset.append(name);
627
if (locator != null && URIUtil.isAbsolute(systemID)) {
628
String documentURL = locator.getSystemId();
629
// work around Crimson style file:/root URLs
630
if (documentURL != null) {
631
if (documentURL.startsWith("file:/") && !documentURL.startsWith("file:///")) {
632
documentURL = "file://" + documentURL.substring(5);
634
if (systemID.startsWith("file:/") && !systemID.startsWith("file:///")) {
635
systemID = "file://" + systemID.substring(5);
637
systemID = URIUtil.relativize(documentURL, systemID);
641
if (publicID != null) {
642
internalDTDSubset.append(" PUBLIC \"");
643
internalDTDSubset.append(publicID);
644
internalDTDSubset.append("\" \"");
645
internalDTDSubset.append(systemID);
648
// need to escape system ID????
649
internalDTDSubset.append(" SYSTEM \"");
650
internalDTDSubset.append(systemID);
652
internalDTDSubset.append("\">\n");
659
public void notationDecl(String name, String publicID,
662
if (systemID != null) {
663
systemID = escapeReservedCharactersInDeclarations(systemID);
666
if (inInternalSubset() && doctype != null) {
667
internalDTDSubset.append(" <!NOTATION ");
668
internalDTDSubset.append(name);
669
if (publicID != null) {
670
internalDTDSubset.append(" PUBLIC \"");
671
internalDTDSubset.append(publicID);
672
internalDTDSubset.append('"');
673
if (systemID != null) {
674
internalDTDSubset.append(" \"");
675
internalDTDSubset.append(systemID);
676
internalDTDSubset.append('"');
680
internalDTDSubset.append(" SYSTEM \"");
681
internalDTDSubset.append(systemID);
682
internalDTDSubset.append('"');
684
internalDTDSubset.append(">\n");
690
public void unparsedEntityDecl(String name, String publicID,
691
String systemID, String notationName) {
693
// escapable characters????
694
if (inInternalSubset() && doctype != null) {
695
internalDTDSubset.append(" <!ENTITY ");
696
if (publicID != null) {
697
internalDTDSubset.append(name);
698
internalDTDSubset.append(" PUBLIC \"");
699
internalDTDSubset.append(publicID);
700
internalDTDSubset.append("\" \"");
701
internalDTDSubset.append(systemID);
702
internalDTDSubset.append("\" NDATA ");
703
internalDTDSubset.append(notationName);
706
internalDTDSubset.append(name);
707
internalDTDSubset.append(" SYSTEM \"");
708
internalDTDSubset.append(systemID);
709
internalDTDSubset.append("\" NDATA ");
710
internalDTDSubset.append(notationName);
712
internalDTDSubset.append(">\n");
718
private static String escapeReservedCharactersInDeclarations(String s) {
720
int length = s.length();
721
StringBuffer result = new StringBuffer(length);
722
for (int i = 0; i < length; i++) {
723
char c = s.charAt(i);
726
result.append("
");
729
// placeholder for table lookup
732
// placeholder for table lookup
735
// placeholder for table lookup
738
// placeholder for table lookup
741
// placeholder for table lookup
744
// placeholder for table lookup
747
// placeholder for table lookup
750
// placeholder for table lookup
753
// placeholder for table lookup
756
// placeholder for table lookup
759
// placeholder for table lookup
762
// placeholder for table lookup
765
// placeholder for table lookup
768
// placeholder for table lookup
771
// placeholder for table lookup
774
// placeholder for table lookup
777
// placeholder for table lookup
780
// placeholder for table lookup
789
result.append(""");
798
result.append("%");
801
result.append("&");
808
return result.toString();
813
private static String escapeReservedCharactersInDefaultAttributeValues(String s) {
815
int length = s.length();
816
StringBuffer result = new StringBuffer(length);
817
for (int i = 0; i < length; i++) {
818
char c = s.charAt(i);
821
result.append("
");
824
// placeholder for table lookup
827
// placeholder for table lookup
830
// placeholder for table lookup
833
// placeholder for table lookup
836
// placeholder for table lookup
839
// placeholder for table lookup
842
// placeholder for table lookup
845
// placeholder for table lookup
848
// placeholder for table lookup
851
// placeholder for table lookup
854
// placeholder for table lookup
857
// placeholder for table lookup
860
// placeholder for table lookup
863
// placeholder for table lookup
866
// placeholder for table lookup
869
// placeholder for table lookup
872
// placeholder for table lookup
875
// placeholder for table lookup
884
result.append(""");
893
result.append("%");
896
result.append("&");
962
result.append("<");
969
return result.toString();
b'\\ No newline at end of file'