1
/* Copyright 2002-2005 Elliotte Rusty Harold
3
This library is free software; you can redistribute it and/or modify
4
it under the terms of version 2.1 of the GNU Lesser General Public
5
License as published by the Free Software Foundation.
7
This library is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU Lesser General Public License for more details.
12
You should have received a copy of the GNU Lesser General Public
13
License along with this library; if not, write to the
14
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15
Boston, MA 02111-1307 USA
17
You can contact Elliotte Rusty Harold by sending e-mail to
18
elharo@metalab.unc.edu. Please include the word "XOM" in the
19
subject line. The XOM home page is located at http://www.xom.nu/
22
package nu.xom.converters;
24
import nu.xom.Attribute;
25
import nu.xom.Comment;
26
import nu.xom.DocType;
27
import nu.xom.Document;
28
import nu.xom.Element;
31
import nu.xom.ParentNode;
32
import nu.xom.ProcessingInstruction;
35
import org.xml.sax.ContentHandler;
36
import org.xml.sax.SAXException;
37
import org.xml.sax.ext.LexicalHandler;
38
import org.xml.sax.helpers.AttributesImpl;
39
import org.xml.sax.helpers.LocatorImpl;
43
* Feeds a XOM <code>Document</code> into a
44
* SAX2 <code>ContentHandler</code>.
47
* @author Elliotte Rusty Harold
50
public class SAXConverter {
53
private ContentHandler contentHandler;
54
private LexicalHandler lexicalHandler;
55
private LocatorImpl locator;
56
private boolean stripBaseAttributes = true;
61
* Creates a new <code>SAXConverter</code>.
64
* @param handler the SAX2 content handler
65
* that receives the data
67
* @throws NullPointerException if handler is null
70
public SAXConverter(ContentHandler handler) {
71
setContentHandler(handler);
77
* Set the content handler for this converter.
80
* @param handler SAX2 content handler that
83
* @throws NullPointerException if handler is null
86
public void setContentHandler(ContentHandler handler) {
88
if (handler == null) {
89
throw new NullPointerException(
90
"ContentHandler must be non-null."
93
// unbelievably skanky hack to allow xml:base attributes
94
// to be passed to XSL transforms without mucking with the
95
// public API. This would be so much easier if Java had friend
97
else if ("nu.xom.xslt.XSLTHandler".equals(handler.getClass().getName())) {
98
this.stripBaseAttributes = false;
101
this.contentHandler = handler;
109
* Returns the content handler.
112
* @return SAX2 content handler that receives the data
114
public ContentHandler getContentHandler() {
115
return this.contentHandler;
121
* Sets the optional lexical handler for this converter.
122
* The only lexical events the converter supplies
126
* @param handler the lexical handler;
127
* may be null to turn off lexical events
129
public void setLexicalHandler(LexicalHandler handler) {
130
this.lexicalHandler = handler;
136
* Returns the <code>LexicalHandler</code> for this
137
* converter. This is only used for comments.
140
* @return SAX2 lexical handler that receives
143
public LexicalHandler getLexicalHandler() {
144
return this.lexicalHandler;
148
// Not necessary to worry about parser exceptions passed to
149
// fatalError() because we're starting with a known good document.
150
// Only exceptions that can arise are thrown by
151
// the supplied ContentHandler, and we don't want to pass those
152
// to the ErrorHandler, or call endDocument() if such an exception
156
* Feed a document through this converter.
159
* @param doc the document to pass to SAX
161
* @throws SAXException if the content handler
162
* or lexical handler throws an exception
164
public void convert(Document doc) throws SAXException {
166
locator = new LocatorImpl();
167
locator.setSystemId(doc.getBaseURI());
168
contentHandler.setDocumentLocator(locator);
169
contentHandler.startDocument();
170
for (int i = 0; i < doc.getChildCount(); i++) {
171
process(doc.getChild(i));
173
contentHandler.endDocument();
178
private void process(Node node) throws SAXException {
180
if (node instanceof Element) {
181
convertElement((Element) node);
183
else if (node instanceof Text) {
184
String data = node.getValue();
185
contentHandler.characters(
186
data.toCharArray(), 0, data.length());
188
else if (node instanceof ProcessingInstruction) {
189
ProcessingInstruction instruction
190
= (ProcessingInstruction) node;
192
contentHandler.processingInstruction(
193
instruction.getTarget(), instruction.getValue());
195
else if (node instanceof Comment && lexicalHandler != null) {
196
String data = node.getValue();
197
lexicalHandler.comment(
198
data.toCharArray(), 0, data.length());
200
else if (node instanceof DocType && lexicalHandler != null) {
201
DocType type = (DocType) node;
202
lexicalHandler.startDTD(type.getRootElementName(),
203
type.getPublicID(), type.getSystemID());
204
lexicalHandler.endDTD();
206
// all other types are ignored
212
* @param element the context in which the prefix is mapped
213
* @param prefix the prefix to pass to statPrefixMapping
214
* @return true if and only if startPrefixMapping was called
215
* @throws SAXException if the ContentHandler throws an exception
217
private boolean convertNamespace(Element element, String prefix)
218
throws SAXException {
220
String uri = element.getNamespaceURI(prefix);
221
ParentNode parentNode = element.getParent();
222
Element parent = null;
223
if (parentNode instanceof Element) {
224
parent = (Element) parentNode;
227
if (parent != null && uri.equals(parent.getNamespaceURI(prefix))) {
230
else if (parent == null && "".equals(uri)) {
231
// Do not fire startPrefixMapping event for no namespace
235
contentHandler.startPrefixMapping(prefix, uri);
236
return true; // i.e. converted
241
private void convertElement(Element element) throws SAXException {
243
locator.setSystemId(element.getBaseURI());
245
// start prefix mapping
246
int namespaceCount = element.getNamespaceDeclarationCount();
247
String[] prefixes = new String[namespaceCount];
249
for (int i = 0; i < namespaceCount; i++) {
250
String prefix = element.getNamespacePrefix(i);
251
boolean converted = convertNamespace(element, prefix);
253
prefixes[prefixCount] = prefix;
258
// prepare attributes
259
AttributesImpl saxAttributes = new AttributesImpl();
260
int attributeCount = element.getAttributeCount();
261
for (int i = 0; i < attributeCount; i++) {
262
Attribute attribute = element.getAttribute(i);
263
// The base URIs provided by the locator have already
264
// accounted for any xml:base attributes. We do not
265
// also pass in xml:base attributes or some relative base
266
// URIs could be applied twice.
267
if ("base".equals(attribute.getLocalName())
268
&& "http://www.w3.org/XML/1998/namespace".equals(attribute.getNamespaceURI())
269
&& stripBaseAttributes) {
272
saxAttributes.addAttribute(attribute.getNamespaceURI(),
273
attribute.getLocalName(),
274
attribute.getQualifiedName(),
275
getSAXType(attribute),
276
attribute.getValue());
279
contentHandler.startElement(
280
element.getNamespaceURI(),
281
element.getLocalName(),
282
element.getQualifiedName(),
284
int childCount = element.getChildCount();
285
for (int i = 0; i < childCount; i++) {
286
process(element.getChild(i));
288
contentHandler.endElement(element.getNamespaceURI(),
289
element.getLocalName(), element.getQualifiedName());
291
// end prefix mappings
292
for (int i = 0; i < prefixCount; i++) {
293
contentHandler.endPrefixMapping(prefixes[i]);
299
private static String getSAXType(Attribute attribute) {
301
Attribute.Type type = attribute.getType();
302
if (type.equals(Attribute.Type.UNDECLARED)) return "CDATA";
303
if (type.equals(Attribute.Type.CDATA)) return "CDATA";
304
if (type.equals(Attribute.Type.ID)) return "ID";
305
if (type.equals(Attribute.Type.IDREF)) return "IDREF";
306
if (type.equals(Attribute.Type.IDREFS)) return "IDREFS";
307
if (type.equals(Attribute.Type.NMTOKEN)) return "NMTOKEN";
308
if (type.equals(Attribute.Type.NMTOKENS)) return "NMTOKENS";
309
if (type.equals(Attribute.Type.ENTITY)) return "ENTITY";
310
if (type.equals(Attribute.Type.ENTITIES)) return "ENTITIES";
311
if (type.equals(Attribute.Type.NOTATION)) return "NOTATION";
312
return "NMTOKEN"; // ENUMERATED
319
* Converts a <code>Nodes</code> list into SAX by firing events
320
* into the registered handlers. This method calls
321
* <code>startDocument</code> before processing the list
322
* of nodes, and calls <code>endDocument</code> after processing
326
* @param nodes the nodes to pass to SAX
328
* @throws SAXException if the content handler
329
* or lexical handler throws an exception
331
public void convert(Nodes nodes) throws SAXException {
333
if (nodes.size() == 1 && nodes.get(0) instanceof Document) {
334
convert((Document) nodes.get(0));
337
locator = new LocatorImpl();
338
contentHandler.setDocumentLocator(locator);
339
contentHandler.startDocument();
340
for (int i = 0; i < nodes.size(); i++) {
341
process(nodes.get(i));
343
contentHandler.endDocument();