2
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
4
* This software is open source.
5
* See the bottom of this file for the licence.
10
import java.io.InputStream;
11
import java.io.Reader;
12
import java.util.Iterator;
14
import javax.xml.namespace.QName;
15
import javax.xml.stream.XMLEventReader;
16
import javax.xml.stream.XMLInputFactory;
17
import javax.xml.stream.XMLStreamConstants;
18
import javax.xml.stream.XMLStreamException;
19
import javax.xml.stream.events.Attribute;
20
import javax.xml.stream.events.Characters;
21
import javax.xml.stream.events.Comment;
22
import javax.xml.stream.events.EndElement;
23
import javax.xml.stream.events.EntityReference;
24
import javax.xml.stream.events.Namespace;
25
import javax.xml.stream.events.ProcessingInstruction;
26
import javax.xml.stream.events.StartDocument;
27
import javax.xml.stream.events.StartElement;
28
import javax.xml.stream.events.XMLEvent;
30
import org.dom4j.CharacterData;
31
import org.dom4j.Document;
32
import org.dom4j.DocumentFactory;
33
import org.dom4j.Element;
34
import org.dom4j.Entity;
35
import org.dom4j.Node;
38
* Reads a DOM4J {@link Document}, as well as other {@link Node}s, from a StAX
39
* {@link XMLEventReader}.
41
* @author Christian Niles
43
public class STAXEventReader {
44
/** Reference to the DocumentFactory used to build DOM4J nodes. */
45
private DocumentFactory factory;
47
/** A StAX input factory, used to construct streams from IO streams. */
48
private XMLInputFactory inputFactory = XMLInputFactory.newInstance();
51
* Constructs a default <code>STAXEventReader</code> instance with a
52
* default {@link DocumentFactory}.
54
public STAXEventReader() {
55
this.factory = DocumentFactory.getInstance();
59
* Constructs a <code>STAXEventReader</code> instance that uses the
60
* specified {@link DocumentFactory}to construct DOM4J {@link Node}s.
63
* The DocumentFactory to use when constructing DOM4J nodes, or
64
* <code>null</code> if a default should be used.
66
public STAXEventReader(DocumentFactory factory) {
67
if (factory != null) {
68
this.factory = factory;
70
this.factory = DocumentFactory.getInstance();
75
* Sets the DocumentFactory to be used when constructing DOM4J nodes.
77
* @param documentFactory
78
* The DocumentFactory to use when constructing DOM4J nodes, or
79
* <code>null</code> if a default should be used.
81
public void setDocumentFactory(DocumentFactory documentFactory) {
82
if (documentFactory != null) {
83
this.factory = documentFactory;
85
this.factory = DocumentFactory.getInstance();
90
* Constructs a StAX event stream from the provided I/O stream and reads a
91
* DOM4J document from it.
94
* The I/O stream from which the Document will be read.
96
* @return The Document that was read from the stream.
98
* @throws XMLStreamException
99
* If an error occurs reading content from the stream.
101
public Document readDocument(InputStream is) throws XMLStreamException {
102
return readDocument(is, null);
106
* Constructs a StAX event stream from the provided I/O character stream and
107
* reads a DOM4J document from it.
110
* The character stream from which the Document will be read.
112
* @return The Document that was read from the stream.
114
* @throws XMLStreamException
115
* If an error occurs reading content from the stream.
117
public Document readDocument(Reader reader) throws XMLStreamException {
118
return readDocument(reader, null);
122
* Constructs a StAX event stream from the provided I/O stream and reads a
123
* DOM4J document from it.
126
* The I/O stream from which the Document will be read.
128
* A system id used to resolve entities.
130
* @return The Document that was read from the stream.
132
* @throws XMLStreamException
133
* If an error occurs reading content from the stream.
135
public Document readDocument(InputStream is, String systemId)
136
throws XMLStreamException {
137
XMLEventReader eventReader = inputFactory.createXMLEventReader(
141
return readDocument(eventReader);
148
* Constructs a StAX event stream from the provided I/O character stream and
149
* reads a DOM4J document from it.
152
* The character stream from which the Document will be read.
154
* A system id used to resolve entities.
156
* @return The Document that was read from the stream.
158
* @throws XMLStreamException
159
* If an error occurs reading content from the stream.
161
public Document readDocument(Reader reader, String systemId)
162
throws XMLStreamException {
163
XMLEventReader eventReader = inputFactory.createXMLEventReader(
167
return readDocument(eventReader);
174
* Reads a {@link Node}from the event stream. If the next event is a
175
* {@link StartElement}, all events until the closing {@link EndElement}
176
* will be read, and the resulting nodes will be added to the returned
180
* <strong>Pre-Conditions </strong>: The stream must be positioned before an
181
* event other than an <code>EndElement</code>,<code>EndDocument</code>,
182
* or any DTD-related events, which are not currently supported.
186
* The reader from which events will be read.
188
* @return A DOM4J {@link Node}constructed from the read events.
190
* @throws XMLStreamException
191
* If an error occurs reading from the stream, or the stream was
192
* positioned before an unsupported event.
194
public Node readNode(XMLEventReader reader) throws XMLStreamException {
195
XMLEvent event = reader.peek();
197
if (event.isStartElement()) {
198
return readElement(reader);
199
} else if (event.isCharacters()) {
200
return readCharacters(reader);
201
} else if (event.isStartDocument()) {
202
return readDocument(reader);
203
} else if (event.isProcessingInstruction()) {
204
return readProcessingInstruction(reader);
205
} else if (event.isEntityReference()) {
206
return readEntityReference(reader);
207
} else if (event.isAttribute()) {
208
return readAttribute(reader);
209
} else if (event.isNamespace()) {
210
return readNamespace(reader);
212
throw new XMLStreamException("Unsupported event: " + event);
217
* Reads a DOM4J {@link Document}from the provided stream. The stream
218
* should be positioned at the start of a document, or before a {@link
219
* StartElement} event.
222
* The event stream from which to read the {@link Document}.
224
* @return The {@link Document}that was read from the stream.
226
* @throws XMLStreamException
227
* If an error occurs reading events from the stream.
229
public Document readDocument(XMLEventReader reader)
230
throws XMLStreamException {
233
while (reader.hasNext()) {
234
XMLEvent nextEvent = reader.peek();
235
int type = nextEvent.getEventType();
238
case XMLStreamConstants.START_DOCUMENT:
240
StartDocument event = (StartDocument) reader.nextEvent();
244
if (event.encodingSet()) {
245
String encodingScheme = event
246
.getCharacterEncodingScheme();
247
doc = factory.createDocument(encodingScheme);
249
doc = factory.createDocument();
252
// duplicate or misplaced xml declaration
253
String msg = "Unexpected StartDocument event";
254
throw new XMLStreamException(msg, event.getLocation());
259
case XMLStreamConstants.END_DOCUMENT:
260
case XMLStreamConstants.SPACE:
261
case XMLStreamConstants.CHARACTERS:
263
// skip end document and space outside the root element
272
doc = factory.createDocument();
275
Node n = readNode(reader);
284
* Reads a DOM4J Element from the provided event stream. The stream must be
285
* positioned before an {@link StartElement}event. In addition to the
286
* initial start event, all events up to and including the closing {@link
287
* EndElement} will be read, and included with the returned element.
290
* The event stream from which to read the Element.
292
* @return The Element that was read from the stream.
294
* @throws XMLStreamException
295
* If an error occured reading events from the stream, or the
296
* stream was not positioned before a {@linkStartElement}event.
298
public Element readElement(XMLEventReader eventReader)
299
throws XMLStreamException {
300
XMLEvent event = eventReader.peek();
302
if (event.isStartElement()) {
303
// advance the reader and get the StartElement event
304
StartElement startTag = eventReader.nextEvent().asStartElement();
305
Element elem = createElement(startTag);
307
// read element content
309
if (!eventReader.hasNext()) {
310
String msg = "Unexpected end of stream while reading"
311
+ " element content";
312
throw new XMLStreamException(msg);
315
XMLEvent nextEvent = eventReader.peek();
317
if (nextEvent.isEndElement()) {
318
EndElement endElem = eventReader.nextEvent().asEndElement();
320
if (!endElem.getName().equals(startTag.getName())) {
321
throw new XMLStreamException("Expected "
322
+ startTag.getName() + " end-tag, but found"
323
+ endElem.getName());
329
Node child = readNode(eventReader);
335
throw new XMLStreamException("Expected Element event, found: "
341
* Constructs a DOM4J Attribute from the provided event stream. The stream
342
* must be positioned before an {@link Attribute}event.
345
* The event stream from which to read the Attribute.
347
* @return The Attribute that was read from the stream.
349
* @throws XMLStreamException
350
* If an error occured reading events from the stream, or the
351
* stream was not positioned before an {@linkAttribute}event.
353
public org.dom4j.Attribute readAttribute(XMLEventReader reader)
354
throws XMLStreamException {
355
XMLEvent event = reader.peek();
357
if (event.isAttribute()) {
358
Attribute attr = (Attribute) reader.nextEvent();
360
return createAttribute(null, attr);
362
throw new XMLStreamException("Expected Attribute event, found: "
368
* Constructs a DOM4J Namespace from the provided event stream. The stream
369
* must be positioned before a {@link Namespace}event.
372
* The event stream from which to read the Namespace.
374
* @return The Namespace that was read from the stream.
376
* @throws XMLStreamException
377
* If an error occured reading events from the stream, or the
378
* stream was not positioned before a {@linkNamespace}event.
380
public org.dom4j.Namespace readNamespace(XMLEventReader reader)
381
throws XMLStreamException {
382
XMLEvent event = reader.peek();
384
if (event.isNamespace()) {
385
Namespace ns = (Namespace) reader.nextEvent();
387
return createNamespace(ns);
389
throw new XMLStreamException("Expected Namespace event, found: "
395
* Constructs a DOM4J Text or CDATA section from the provided event stream.
396
* The stream must be positioned before a {@link Characters}event.
399
* The event stream from which to read the Text or CDATA.
401
* @return The Text or CDATA that was read from the stream.
403
* @throws XMLStreamException
404
* If an error occured reading events from the stream, or the
405
* stream was not positioned before a {@linkCharacters}event.
407
public CharacterData readCharacters(XMLEventReader reader)
408
throws XMLStreamException {
409
XMLEvent event = reader.peek();
411
if (event.isCharacters()) {
412
Characters characters = reader.nextEvent().asCharacters();
414
return createCharacterData(characters);
416
throw new XMLStreamException("Expected Characters event, found: "
422
* Constructs a DOM4J Comment from the provided event stream. The stream
423
* must be positioned before a {@link Comment}event.
426
* The event stream from which to read the Comment.
428
* @return The Comment that was read from the stream.
430
* @throws XMLStreamException
431
* If an error occured reading events from the stream, or the
432
* stream was not positioned before a {@linkComment}event.
434
public org.dom4j.Comment readComment(XMLEventReader reader)
435
throws XMLStreamException {
436
XMLEvent event = reader.peek();
438
if (event instanceof Comment) {
439
return createComment((Comment) reader.nextEvent());
441
throw new XMLStreamException("Expected Comment event, found: "
447
* Constructs a DOM4J Entity from the provided event stream. The stream must
448
* be positioned before an {@link EntityReference}event.
451
* The event stream from which to read the {@link
454
* @return The {@link org.dom4j.Entity}that was read from the stream.
456
* @throws XMLStreamException
457
* If an error occured reading events from the stream, or the
458
* stream was not positioned before an {@linkEntityReference}
461
public Entity readEntityReference(XMLEventReader reader)
462
throws XMLStreamException {
463
XMLEvent event = reader.peek();
465
if (event.isEntityReference()) {
466
EntityReference entityRef = (EntityReference) reader.nextEvent();
468
return createEntity(entityRef);
470
throw new XMLStreamException("Expected EntityRef event, found: "
476
* Constructs a DOM4J ProcessingInstruction from the provided event stream.
477
* The stream must be positioned before a {@link ProcessingInstruction}
481
* The event stream from which to read the ProcessingInstruction.
483
* @return The ProcessingInstruction that was read from the stream.
485
* @throws XMLStreamException
486
* If an error occured reading events from the stream, or the
487
* stream was not positioned before a {@link
488
* ProcessingInstruction} event.
490
public org.dom4j.ProcessingInstruction readProcessingInstruction(
491
XMLEventReader reader) throws XMLStreamException {
492
XMLEvent event = reader.peek();
494
if (event.isProcessingInstruction()) {
495
ProcessingInstruction pi = (ProcessingInstruction) reader
498
return createProcessingInstruction(pi);
500
throw new XMLStreamException("Expected PI event, found: " + event);
505
* Constructs a new DOM4J Element from the provided StartElement event. All
506
* attributes and namespaces will be added to the returned element.
509
* The StartElement event from which to construct the new DOM4J
512
* @return The Element constructed from the provided StartElement event.
514
public Element createElement(StartElement startEvent) {
515
QName qname = startEvent.getName();
516
org.dom4j.QName elemName = createQName(qname);
518
Element elem = factory.createElement(elemName);
521
for (Iterator i = startEvent.getAttributes(); i.hasNext();) {
522
Attribute attr = (Attribute) i.next();
523
elem.addAttribute(createQName(attr.getName()), attr.getValue());
527
for (Iterator i = startEvent.getNamespaces(); i.hasNext();) {
528
Namespace ns = (Namespace) i.next();
529
elem.addNamespace(ns.getPrefix(), ns.getNamespaceURI());
536
* Constructs a new DOM4J Attribute from the provided StAX Attribute event.
541
* The Attribute event from which to construct the new DOM4J
544
* @return The Attribute constructed from the provided Attribute event.
546
public org.dom4j.Attribute createAttribute(Element elem, Attribute attr) {
547
return factory.createAttribute(elem, createQName(attr.getName()), attr
552
* Constructs a new DOM4J Namespace from the provided StAX Namespace event.
555
* The Namespace event from which to construct the new DOM4J
558
* @return The Namespace constructed from the provided Namespace event.
560
public org.dom4j.Namespace createNamespace(Namespace ns) {
561
return factory.createNamespace(ns.getPrefix(), ns.getNamespaceURI());
565
* Constructs a new DOM4J Text or CDATA object from the provided Characters
569
* The Characters event from which to construct the new DOM4J
570
* Text or CDATA object.
572
* @return The Text or CDATA object constructed from the provided Characters
575
public CharacterData createCharacterData(Characters characters) {
576
String data = characters.getData();
578
if (characters.isCData()) {
579
return factory.createCDATA(data);
581
return factory.createText(data);
586
* Constructs a new DOM4J Comment from the provided StAX Comment event.
589
* The Comment event from which to construct the new DOM4J
592
* @return The Comment constructed from the provided Comment event.
594
public org.dom4j.Comment createComment(Comment comment) {
595
return factory.createComment(comment.getText());
599
* Constructs a new DOM4J Entity from the provided StAX EntityReference
603
* The EntityReference event from which to construct the new
606
* @return The Entity constructed from the provided EntityReference event.
608
public org.dom4j.Entity createEntity(EntityReference entityRef) {
609
return factory.createEntity(entityRef.getName(), entityRef
610
.getDeclaration().getReplacementText());
614
* Constructs a new DOM4J ProcessingInstruction from the provided StAX
615
* ProcessingInstruction event.
618
* The ProcessingInstruction event from which to construct the
619
* new DOM4J ProcessingInstruction.
621
* @return The ProcessingInstruction constructed from the provided
622
* ProcessingInstruction event.
624
public org.dom4j.ProcessingInstruction createProcessingInstruction(
625
ProcessingInstruction pi) {
627
.createProcessingInstruction(pi.getTarget(), pi.getData());
631
* Constructs a new DOM4J QName from the provided JAXP QName.
634
* The JAXP QName from which to create a DOM4J QName.
636
* @return The newly constructed DOM4J QName.
638
public org.dom4j.QName createQName(QName qname) {
639
return factory.createQName(qname.getLocalPart(), qname.getPrefix(),
640
qname.getNamespaceURI());
645
* Redistribution and use of this software and associated documentation
646
* ("Software"), with or without modification, are permitted provided that the
647
* following conditions are met:
649
* 1. Redistributions of source code must retain copyright statements and
650
* notices. Redistributions must also contain a copy of this document.
652
* 2. Redistributions in binary form must reproduce the above copyright notice,
653
* this list of conditions and the following disclaimer in the documentation
654
* and/or other materials provided with the distribution.
656
* 3. The name "DOM4J" must not be used to endorse or promote products derived
657
* from this Software without prior written permission of MetaStuff, Ltd. For
658
* written permission, please contact dom4j-info@metastuff.com.
660
* 4. Products derived from this Software may not be called "DOM4J" nor may
661
* "DOM4J" appear in their names without prior written permission of MetaStuff,
662
* Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
664
* 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
666
* THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
667
* ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
668
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
669
* ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
670
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
671
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
672
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
673
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
674
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
675
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
676
* POSSIBILITY OF SUCH DAMAGE.
678
* Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.