2
* Licensed to the Apache Software Foundation (ASF) under one or more
3
* contributor license agreements. See the NOTICE file distributed with
4
* this work for additional information regarding copyright ownership.
5
* The ASF licenses this file to You under the Apache License, Version 2.0
6
* (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
9
* http://www.apache.org/licenses/LICENSE-2.0
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
19
// Fixed comments to preserve whitespaces and add a line break
20
// when indenting. Reported by Gervase Markham <gerv@gerv.net>
22
// Fixed serializer to report IO exception directly, instead at
23
// the end of document processing.
24
// Reported by Patrick Higgins <phiggins@transzap.com>
26
// CR in character data will print as �D;
28
// Fixed processing instruction printing inside element content
29
// to not escape content. Reported by Mikael Staldal
32
// Added ability to omit comments.
33
// Contributed by Anupam Bagchi <abagchi@jtcsv.com>
35
// Fixed bug in newline handling when preserving spaces.
36
// Contributed by Mike Dusseault <mdusseault@home.com>
38
// Fixed state.unescaped not being set to false when
39
// entering element state.
40
// Reported by Lowell Vaughn <lvaughn@agillion.com>
42
package org.apache.xml.serialize;
44
import java.io.IOException;
45
import java.io.OutputStream;
46
import java.io.Writer;
47
import java.util.Hashtable;
48
import java.util.Vector;
50
import org.apache.xerces.dom.DOMErrorImpl;
51
import org.apache.xerces.dom.DOMLocatorImpl;
52
import org.apache.xerces.dom.DOMMessageFormatter;
53
import org.apache.xerces.util.XMLChar;
54
import org.w3c.dom.DOMError;
55
import org.w3c.dom.DOMErrorHandler;
56
import org.w3c.dom.Document;
57
import org.w3c.dom.DocumentFragment;
58
import org.w3c.dom.DocumentType;
59
import org.w3c.dom.Element;
60
import org.w3c.dom.Node;
61
import org.w3c.dom.ls.LSException;
62
import org.w3c.dom.ls.LSSerializer;
63
import org.w3c.dom.ls.LSSerializerFilter;
64
import org.w3c.dom.traversal.NodeFilter;
65
import org.xml.sax.ContentHandler;
66
import org.xml.sax.DTDHandler;
67
import org.xml.sax.DocumentHandler;
68
import org.xml.sax.Locator;
69
import org.xml.sax.SAXException;
70
import org.xml.sax.ext.DeclHandler;
71
import org.xml.sax.ext.LexicalHandler;
74
* Base class for a serializer supporting both DOM and SAX pretty
75
* serializing of XML/HTML/XHTML documents. Derives classes perform
76
* the method-specific serializing, this class provides the common
77
* serializing mechanisms.
79
* The serializer must be initialized with the proper writer and
80
* output format before it can be used by calling {@link #setOutputCharStream}
81
* or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat}
82
* for the output format.
84
* The serializer can be reused any number of times, but cannot
85
* be used concurrently by two threads.
87
* If an output stream is used, the encoding is taken from the
88
* output format (defaults to <tt>UTF-8</tt>). If a writer is
89
* used, make sure the writer uses the same encoding (if applies)
90
* as specified in the output format.
92
* The serializer supports both DOM and SAX. DOM serializing is done
93
* by calling {@link #serialize(Document)} and SAX serializing is done by firing
94
* SAX events and using the serializer as a document handler.
95
* This also applies to derived class.
97
* If an I/O exception occurs while serializing, the serializer
98
* will not throw an exception directly, but only throw it
99
* at the end of serializing (either DOM or SAX's {@link
100
* org.xml.sax.DocumentHandler#endDocument}.
102
* For elements that are not specified as whitespace preserving,
103
* the serializer will potentially break long text lines at space
104
* boundaries, indent lines, and serialize elements on separate
105
* lines. Line terminators will be regarded as spaces, and
106
* spaces at beginning of line will be stripped.
108
* When indenting, the serializer is capable of detecting seemingly
109
* element content, and serializing these elements indented on separate
110
* lines. An element is serialized indented when it is the first or
111
* last child of an element, or immediate following or preceding
114
* @deprecated This class was deprecated in Xerces 2.9.0. It is recommended
115
* that new applications use the DOM Level 3 LSSerializer or JAXP's Transformation
116
* API for XML (TrAX) for serializing XML. See the Xerces documentation for more
118
* @version $Revision: 1.2 $ $Date: 2009/12/10 03:18:31 $
119
* @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
120
* @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
121
* @author Elena Litani, IBM
123
* @see org.w3c.dom.ls.LSSerializer
125
public abstract class BaseMarkupSerializer
126
implements ContentHandler, DocumentHandler, LexicalHandler,
127
DTDHandler, DeclHandler, DOMSerializer, Serializer
130
// DOM L3 implementation
131
protected short features = 0xFFFFFFFF;
132
protected DOMErrorHandler fDOMErrorHandler;
133
protected final DOMErrorImpl fDOMError = new DOMErrorImpl();
134
protected LSSerializerFilter fDOMFilter;
136
protected EncodingInfo _encodingInfo;
140
* Holds array of all element states that have been entered.
141
* The array is automatically resized. When leaving an element,
142
* it's state is not removed but reused when later returning
143
* to the same nesting level.
145
private ElementState[] _elementStates;
149
* The index of the next state to place in the array,
150
* or one plus the index of the current state. When zero,
151
* we are in no state.
153
private int _elementStateCount;
157
* Vector holding comments and PIs that come before the root
158
* element (even after it), see {@link #serializePreRoot}.
160
private Vector _preRoot;
164
* If the document has been started (header serialized), this
165
* flag is set to true so it's not started twice.
167
protected boolean _started;
171
* True if the serializer has been prepared. This flag is set
172
* to false when the serializer is reset prior to using it,
173
* and to true after it has been prepared for usage.
175
private boolean _prepared;
179
* Association between namespace URIs (keys) and prefixes (values).
180
* Accumulated here prior to starting an element and placing this
181
* list in the element state.
183
protected Hashtable _prefixes;
187
* The system identifier of the document type, if known.
189
protected String _docTypePublicId;
193
* The system identifier of the document type, if known.
195
protected String _docTypeSystemId;
199
* The output format associated with this serializer. This will never
200
* be a null reference. If no format was passed to the constructor,
201
* the default one for this document type will be used. The format
202
* object is never changed by the serializer.
204
protected OutputFormat _format;
208
* The printer used for printing text parts.
210
protected Printer _printer;
214
* True if indenting printer.
216
protected boolean _indenting;
218
/** Temporary buffer to store character data */
219
protected final StringBuffer fStrBuffer = new StringBuffer(40);
222
* The underlying writer.
224
private Writer _writer;
230
private OutputStream _output;
232
/** Current node that is being processed */
233
protected Node fCurrentNode = null;
237
//--------------------------------//
238
// Constructor and initialization //
239
//--------------------------------//
243
* Protected constructor can only be used by derived class.
244
* Must initialize the serializer before serializing any document,
245
* by calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
248
protected BaseMarkupSerializer( OutputFormat format )
252
_elementStates = new ElementState[ 10 ];
253
for ( i = 0 ; i < _elementStates.length ; ++i )
254
_elementStates[ i ] = new ElementState();
259
public DocumentHandler asDocumentHandler()
267
public ContentHandler asContentHandler()
275
public DOMSerializer asDOMSerializer()
283
public void setOutputByteStream( OutputStream output )
285
if ( output == null ) {
286
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
287
"ArgumentIsNull", new Object[]{"output"});
288
throw new NullPointerException(msg);
296
public void setOutputCharStream( Writer writer )
298
if ( writer == null ) {
299
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
300
"ArgumentIsNull", new Object[]{"writer"});
301
throw new NullPointerException(msg);
309
public void setOutputFormat( OutputFormat format )
311
if ( format == null ) {
312
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
313
"ArgumentIsNull", new Object[]{"format"});
314
throw new NullPointerException(msg);
321
public boolean reset()
323
if ( _elementStateCount > 1 ) {
324
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
325
"ResetInMiddle", null);
326
throw new IllegalStateException(msg);
330
fStrBuffer.setLength(0);
335
protected void prepare()
341
if ( _writer == null && _output == null ) {
342
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
343
"NoWriterSupplied", null);
344
throw new IOException(msg);
346
// If the output stream has been set, use it to construct
347
// the writer. It is possible that the serializer has been
348
// reused with the same output stream and different encoding.
350
_encodingInfo = _format.getEncodingInfo();
352
if ( _output != null ) {
353
_writer = _encodingInfo.getWriter(_output);
356
if ( _format.getIndenting() ) {
358
_printer = new IndentPrinter( _writer, _format );
361
_printer = new Printer( _writer, _format );
366
_elementStateCount = 0;
367
state = _elementStates[ 0 ];
368
state.namespaceURI = null;
369
state.localName = null;
370
state.rawName = null;
371
state.preserveSpace = _format.getPreserveSpace();
373
state.afterElement = false;
374
state.afterComment = false;
375
state.doCData = state.inCData = false;
376
state.prefixes = null;
378
_docTypePublicId = _format.getDoctypePublic();
379
_docTypeSystemId = _format.getDoctypeSystem();
386
//----------------------------------//
387
// DOM document serializing methods //
388
//----------------------------------//
392
* Serializes the DOM element using the previously specified
393
* writer and output format. Throws an exception only if
394
* an I/O exception occured while serializing.
396
* @param elem The element to serialize
397
* @throws IOException An I/O exception occured while
400
public void serialize( Element elem )
405
serializeNode( elem );
407
if ( _printer.getException() != null )
408
throw _printer.getException();
413
* Serializes the DOM document fragmnt using the previously specified
414
* writer and output format. Throws an exception only if
415
* an I/O exception occured while serializing.
417
* @param frag The document fragment to serialize
418
* @throws IOException An I/O exception occured while
421
public void serialize( DocumentFragment frag )
426
serializeNode( frag );
428
if ( _printer.getException() != null )
429
throw _printer.getException();
434
* Serializes the DOM document using the previously specified
435
* writer and output format. Throws an exception only if
436
* an I/O exception occured while serializing.
438
* @param doc The document to serialize
439
* @throws IOException An I/O exception occured while
442
public void serialize( Document doc )
447
serializeNode( doc );
450
if ( _printer.getException() != null )
451
throw _printer.getException();
455
//------------------------------------------//
456
// SAX document handler serializing methods //
457
//------------------------------------------//
460
public void startDocument()
465
} catch ( IOException except ) {
466
throw new SAXException( except.toString() );
468
// Nothing to do here. All the magic happens in startDocument(String)
472
public void characters( char[] chars, int start, int length )
480
// Check if text should be print as CDATA section or unescaped
481
// based on elements listed in the output format (the element
482
// state) or whether we are inside a CDATA section or entity.
484
if ( state.inCData || state.doCData ) {
487
// Print a CDATA section. The text is not escaped, but ']]>'
488
// appearing in the code must be identified and dealt with.
489
// The contents of a text node is considered space preserving.
490
if ( ! state.inCData ) {
491
_printer.printText( "<![CDATA[" );
492
state.inCData = true;
494
saveIndent = _printer.getNextIndent();
495
_printer.setNextIndent( 0 );
497
final int end = start + length;
498
for ( int index = start ; index < end; ++index ) {
500
if ( ch == ']' && index + 2 < end &&
501
chars[ index + 1 ] == ']' && chars[ index + 2 ] == '>' ) {
502
_printer.printText("]]]]><![CDATA[>");
506
if (!XMLChar.isValid(ch)) {
507
// check if it is surrogate
509
surrogates(ch, chars[index], true);
512
fatalError("The character '"+ch+"' is an invalid XML character");
516
if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
517
ch == '\n' || ch == '\r' || ch == '\t' ) {
518
_printer.printText(ch);
521
// The character is not printable -- split CDATA section
522
_printer.printText("]]>&#x");
523
_printer.printText(Integer.toHexString(ch));
524
_printer.printText(";<![CDATA[");
527
_printer.setNextIndent( saveIndent );
533
if ( state.preserveSpace ) {
534
// If preserving space then hold of indentation so no
535
// excessive spaces are printed at line breaks, escape
536
// the text content without replacing spaces and print
537
// the text breaking only at line breaks.
538
saveIndent = _printer.getNextIndent();
539
_printer.setNextIndent( 0 );
540
printText( chars, start, length, true, state.unescaped );
541
_printer.setNextIndent( saveIndent );
543
printText( chars, start, length, false, state.unescaped );
546
} catch ( IOException except ) {
547
throw new SAXException( except );
552
public void ignorableWhitespace( char[] chars, int start, int length )
560
// Print ignorable whitespaces only when indenting, after
561
// all they are indentation. Cancel the indentation to
564
_printer.setThisIndent( 0 );
565
for ( i = start ; length-- > 0 ; ++i )
566
_printer.printText( chars[ i ] );
568
} catch ( IOException except ) {
569
throw new SAXException( except );
574
public final void processingInstruction( String target, String code )
578
processingInstructionIO( target, code );
579
} catch ( IOException except ) {
580
throw new SAXException( except );
584
public void processingInstructionIO( String target, String code )
592
// Create the processing instruction textual representation.
593
// Make sure we don't have '?>' inside either target or code.
594
index = target.indexOf( "?>" );
596
fStrBuffer.append( "<?" ).append( target.substring( 0, index ) );
598
fStrBuffer.append( "<?" ).append( target );
599
if ( code != null ) {
600
fStrBuffer.append( ' ' );
601
index = code.indexOf( "?>" );
603
fStrBuffer.append( code.substring( 0, index ) );
605
fStrBuffer.append( code );
607
fStrBuffer.append( "?>" );
609
// If before the root element (or after it), do not print
610
// the PI directly but place it in the pre-root vector.
611
if ( isDocumentState() ) {
612
if ( _preRoot == null )
613
_preRoot = new Vector();
614
_preRoot.addElement( fStrBuffer.toString() );
617
printText( fStrBuffer.toString(), true, true );
620
state.afterElement = true;
623
fStrBuffer.setLength(0);
627
public void comment( char[] chars, int start, int length )
631
comment( new String( chars, start, length ) );
632
} catch ( IOException except ) {
633
throw new SAXException( except );
638
public void comment( String text )
644
if ( _format.getOmitComments() )
648
// Create the processing comment textual representation.
649
// Make sure we don't have '-->' inside the comment.
650
index = text.indexOf( "-->" );
652
fStrBuffer.append( "<!--" ).append( text.substring( 0, index ) ).append( "-->" );
654
fStrBuffer.append( "<!--" ).append( text ).append( "-->" );
656
// If before the root element (or after it), do not print
657
// the comment directly but place it in the pre-root vector.
658
if ( isDocumentState() ) {
659
if ( _preRoot == null )
660
_preRoot = new Vector();
661
_preRoot.addElement( fStrBuffer.toString() );
663
// Indent this element on a new line if the first
664
// content of the parent element or immediately
665
// following an element.
666
if ( _indenting && ! state.preserveSpace)
667
_printer.breakLine();
669
printText( fStrBuffer.toString(), true, true );
672
state.afterElement = true;
675
fStrBuffer.setLength(0);
676
state.afterComment = true;
677
state.afterElement = false;
681
public void startCDATA()
685
state = getElementState();
686
state.doCData = true;
690
public void endCDATA()
694
state = getElementState();
695
state.doCData = false;
699
public void startNonEscaping()
703
state = getElementState();
704
state.unescaped = true;
708
public void endNonEscaping()
712
state = getElementState();
713
state.unescaped = false;
717
public void startPreserving()
721
state = getElementState();
722
state.preserveSpace = true;
726
public void endPreserving()
730
state = getElementState();
731
state.preserveSpace = false;
736
* Called at the end of the document to wrap it up.
737
* Will flush the output stream and throw an exception
738
* if any I/O error occured while serializing.
740
* @throws SAXException An I/O exception occured during
743
public void endDocument()
747
// Print all the elements accumulated outside of
750
// Flush the output, this is necessary for fStrBuffered output.
752
} catch ( IOException except ) {
753
throw new SAXException( except );
758
public void startEntity( String name )
764
public void endEntity( String name )
770
public void setDocumentLocator( Locator locator )
776
//-----------------------------------------//
777
// SAX content handler serializing methods //
778
//-----------------------------------------//
781
public void skippedEntity ( String name )
787
_printer.printText( '&' );
788
_printer.printText( name );
789
_printer.printText( ';' );
790
} catch ( IOException except ) {
791
throw new SAXException( except );
796
public void startPrefixMapping( String prefix, String uri )
799
if ( _prefixes == null )
800
_prefixes = new Hashtable();
801
_prefixes.put( uri, prefix == null ? "" : prefix );
805
public void endPrefixMapping( String prefix )
811
//------------------------------------------//
812
// SAX DTD/Decl handler serializing methods //
813
//------------------------------------------//
816
public final void startDTD( String name, String publicId, String systemId )
821
_docTypePublicId = publicId;
822
_docTypeSystemId = systemId;
823
} catch ( IOException except ) {
824
throw new SAXException( except );
831
// Nothing to do here, all the magic occurs in startDocument(String).
835
public void elementDecl( String name, String model )
840
_printer.printText( "<!ELEMENT " );
841
_printer.printText( name );
842
_printer.printText( ' ' );
843
_printer.printText( model );
844
_printer.printText( '>' );
846
_printer.breakLine();
847
} catch ( IOException except ) {
848
throw new SAXException( except );
853
public void attributeDecl( String eName, String aName, String type,
854
String valueDefault, String value )
859
_printer.printText( "<!ATTLIST " );
860
_printer.printText( eName );
861
_printer.printText( ' ' );
862
_printer.printText( aName );
863
_printer.printText( ' ' );
864
_printer.printText( type );
865
if ( valueDefault != null ) {
866
_printer.printText( ' ' );
867
_printer.printText( valueDefault );
869
if ( value != null ) {
870
_printer.printText( " \"" );
871
printEscaped( value );
872
_printer.printText( '"' );
874
_printer.printText( '>' );
876
_printer.breakLine();
877
} catch ( IOException except ) {
878
throw new SAXException( except );
883
public void internalEntityDecl( String name, String value )
888
_printer.printText( "<!ENTITY " );
889
_printer.printText( name );
890
_printer.printText( " \"" );
891
printEscaped( value );
892
_printer.printText( "\">" );
894
_printer.breakLine();
895
} catch ( IOException except ) {
896
throw new SAXException( except );
901
public void externalEntityDecl( String name, String publicId, String systemId )
906
unparsedEntityDecl( name, publicId, systemId, null );
907
} catch ( IOException except ) {
908
throw new SAXException( except );
913
public void unparsedEntityDecl( String name, String publicId,
914
String systemId, String notationName )
919
if ( publicId == null ) {
920
_printer.printText( "<!ENTITY " );
921
_printer.printText( name );
922
_printer.printText( " SYSTEM " );
923
printDoctypeURL( systemId );
925
_printer.printText( "<!ENTITY " );
926
_printer.printText( name );
927
_printer.printText( " PUBLIC " );
928
printDoctypeURL( publicId );
929
_printer.printText( ' ' );
930
printDoctypeURL( systemId );
932
if ( notationName != null ) {
933
_printer.printText( " NDATA " );
934
_printer.printText( notationName );
936
_printer.printText( '>' );
938
_printer.breakLine();
939
} catch ( IOException except ) {
940
throw new SAXException( except );
945
public void notationDecl( String name, String publicId, String systemId )
950
if ( publicId != null ) {
951
_printer.printText( "<!NOTATION " );
952
_printer.printText( name );
953
_printer.printText( " PUBLIC " );
954
printDoctypeURL( publicId );
955
if ( systemId != null ) {
956
_printer.printText( ' ' );
957
printDoctypeURL( systemId );
960
_printer.printText( "<!NOTATION " );
961
_printer.printText( name );
962
_printer.printText( " SYSTEM " );
963
printDoctypeURL( systemId );
965
_printer.printText( '>' );
967
_printer.breakLine();
968
} catch ( IOException except ) {
969
throw new SAXException( except );
974
//------------------------------------------//
975
// Generic node serializing methods methods //
976
//------------------------------------------//
980
* Serialize the DOM node. This method is shared across XML, HTML and XHTML
981
* serializers and the differences are masked out in a separate {@link
982
* #serializeElement}.
984
* @param node The node to serialize
985
* @see #serializeElement
986
* @throws IOException An I/O exception occured while
989
protected void serializeNode( Node node )
994
// Based on the node type call the suitable SAX handler.
995
// Only comments entities and documents which are not
996
// handled by SAX are serialized directly.
997
switch ( node.getNodeType() ) {
998
case Node.TEXT_NODE : {
1001
text = node.getNodeValue();
1002
if ( text != null ) {
1003
if (fDOMFilter !=null &&
1004
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT)!= 0) {
1005
short code = fDOMFilter.acceptNode(node);
1007
case NodeFilter.FILTER_REJECT:
1008
case NodeFilter.FILTER_SKIP: {
1016
else if ( !_indenting || getElementState().preserveSpace
1017
|| (text.replace('\n',' ').trim().length() != 0))
1024
case Node.CDATA_SECTION_NODE : {
1025
String text = node.getNodeValue();
1026
if ((features & DOMSerializerImpl.CDATA) != 0) {
1028
if (fDOMFilter != null
1029
&& (fDOMFilter.getWhatToShow()
1030
& NodeFilter.SHOW_CDATA_SECTION)
1032
short code = fDOMFilter.acceptNode(node);
1034
case NodeFilter.FILTER_REJECT :
1035
case NodeFilter.FILTER_SKIP :
1037
// skip the CDATA node
1051
// transform into a text node
1056
case Node.COMMENT_NODE : {
1059
if ( ! _format.getOmitComments() ) {
1060
text = node.getNodeValue();
1061
if ( text != null ) {
1063
if (fDOMFilter !=null &&
1064
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT)!= 0) {
1065
short code = fDOMFilter.acceptNode(node);
1067
case NodeFilter.FILTER_REJECT:
1068
case NodeFilter.FILTER_SKIP: {
1069
// skip the comment node
1083
case Node.ENTITY_REFERENCE_NODE : {
1089
if (((features & DOMSerializerImpl.ENTITIES) != 0)
1090
|| (node.getFirstChild() == null)) {
1091
if (fDOMFilter !=null &&
1092
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE)!= 0) {
1093
short code = fDOMFilter.acceptNode(node);
1095
case NodeFilter.FILTER_REJECT:{
1096
return; // remove the node
1098
case NodeFilter.FILTER_SKIP: {
1099
child = node.getFirstChild();
1100
while ( child != null ) {
1101
serializeNode( child );
1102
child = child.getNextSibling();
1112
checkUnboundNamespacePrefixedNode(node);
1114
_printer.printText("&");
1115
_printer.printText(node.getNodeName());
1116
_printer.printText(";");
1119
child = node.getFirstChild();
1120
while ( child != null ) {
1121
serializeNode( child );
1122
child = child.getNextSibling();
1129
case Node.PROCESSING_INSTRUCTION_NODE : {
1131
if (fDOMFilter !=null &&
1132
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION)!= 0) {
1133
short code = fDOMFilter.acceptNode(node);
1135
case NodeFilter.FILTER_REJECT:
1136
case NodeFilter.FILTER_SKIP: {
1137
return; // skip this node
1139
default: { // fall through
1143
processingInstructionIO( node.getNodeName(), node.getNodeValue() );
1146
case Node.ELEMENT_NODE : {
1148
if (fDOMFilter !=null &&
1149
(fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT)!= 0) {
1150
short code = fDOMFilter.acceptNode(node);
1152
case NodeFilter.FILTER_REJECT: {
1155
case NodeFilter.FILTER_SKIP: {
1156
Node child = node.getFirstChild();
1157
while ( child != null ) {
1158
serializeNode( child );
1159
child = child.getNextSibling();
1161
return; // skip this node
1164
default: { // fall through
1168
serializeElement( (Element) node );
1171
case Node.DOCUMENT_NODE : {
1172
DocumentType docType;
1174
// If there is a document type, use the SAX events to
1176
docType = ( (Document) node ).getDoctype();
1177
if (docType != null) {
1178
// DOM Level 2 (or higher)
1182
_printer.enterDTD();
1183
_docTypePublicId = docType.getPublicId();
1184
_docTypeSystemId = docType.getSystemId();
1185
internal = docType.getInternalSubset();
1186
if ( internal != null && internal.length() > 0 )
1187
_printer.printText( internal );
1190
// DOM Level 1 -- does implementation have methods?
1191
catch (NoSuchMethodError nsme) {
1192
Class docTypeClass = docType.getClass();
1194
String docTypePublicId = null;
1195
String docTypeSystemId = null;
1197
java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId", (Class[]) null);
1198
if (getPublicId.getReturnType().equals(String.class)) {
1199
docTypePublicId = (String)getPublicId.invoke(docType, (Object[]) null);
1202
catch (Exception e) {
1206
java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId", (Class[]) null);
1207
if (getSystemId.getReturnType().equals(String.class)) {
1208
docTypeSystemId = (String)getSystemId.invoke(docType, (Object[]) null);
1211
catch (Exception e) {
1214
_printer.enterDTD();
1215
_docTypePublicId = docTypePublicId;
1216
_docTypeSystemId = docTypeSystemId;
1222
case Node.DOCUMENT_FRAGMENT_NODE : {
1225
// By definition this will happen if the node is a document,
1226
// document fragment, etc. Just serialize its contents. It will
1227
// work well for other nodes that we do not know how to serialize.
1228
child = node.getFirstChild();
1229
while ( child != null ) {
1230
serializeNode( child );
1231
child = child.getNextSibling();
1243
* Must be called by a method about to print any type of content.
1244
* If the element was just opened, the opening tag is closed and
1245
* will be matched to a closing tag. Returns the current element
1246
* state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
1248
* @return The current element state
1249
* @throws IOException An I/O exception occurred while
1252
protected ElementState content()
1257
state = getElementState();
1258
if ( ! isDocumentState() ) {
1259
// Need to close CData section first
1260
if ( state.inCData && ! state.doCData ) {
1261
_printer.printText( "]]>" );
1262
state.inCData = false;
1264
// If this is the first content in the element,
1265
// change the state to not-empty and close the
1266
// opening element tag.
1267
if ( state.empty ) {
1268
_printer.printText( '>' );
1269
state.empty = false;
1271
// Except for one content type, all of them
1272
// are not last element. That one content
1273
// type will take care of itself.
1274
state.afterElement = false;
1275
// Except for one content type, all of them
1276
// are not last comment. That one content
1277
// type will take care of itself.
1278
state.afterComment = false;
1285
* Called to print the text contents in the prevailing element format.
1286
* Since this method is capable of printing text as CDATA, it is used
1287
* for that purpose as well. White space handling is determined by the
1288
* current element state. In addition, the output format can dictate
1289
* whether the text is printed as CDATA or unescaped.
1291
* @param text The text to print
1292
* @throws IOException An I/O exception occured while
1295
protected void characters( String text )
1301
// Check if text should be print as CDATA section or unescaped
1302
// based on elements listed in the output format (the element
1303
// state) or whether we are inside a CDATA section or entity.
1305
if ( state.inCData || state.doCData ) {
1306
// Print a CDATA section. The text is not escaped, but ']]>'
1307
// appearing in the code must be identified and dealt with.
1308
// The contents of a text node is considered space preserving.
1309
if ( ! state.inCData ) {
1310
_printer.printText("<![CDATA[");
1311
state.inCData = true;
1313
int saveIndent = _printer.getNextIndent();
1314
_printer.setNextIndent( 0 );
1315
printCDATAText( text);
1316
_printer.setNextIndent( saveIndent );
1322
if ( state.preserveSpace ) {
1323
// If preserving space then hold of indentation so no
1324
// excessive spaces are printed at line breaks, escape
1325
// the text content without replacing spaces and print
1326
// the text breaking only at line breaks.
1327
saveIndent = _printer.getNextIndent();
1328
_printer.setNextIndent( 0 );
1329
printText( text, true, state.unescaped );
1330
_printer.setNextIndent( saveIndent );
1332
printText( text, false, state.unescaped );
1339
* Returns the suitable entity reference for this character value,
1340
* or null if no such entity exists. Calling this method with <tt>'&'</tt>
1341
* will return <tt>"&amp;"</tt>.
1343
* @param ch Character value
1344
* @return Character entity name, or null
1346
protected abstract String getEntityRef( int ch );
1350
* Called to serializee the DOM element. The element is serialized based on
1351
* the serializer's method (XML, HTML, XHTML).
1353
* @param elem The element to serialize
1354
* @throws IOException An I/O exception occured while
1357
protected abstract void serializeElement( Element elem )
1362
* Comments and PIs cannot be serialized before the root element,
1363
* because the root element serializes the document type, which
1364
* generally comes first. Instead such PIs and comments are
1365
* accumulated inside a vector and serialized by calling this
1366
* method. Will be called when the root element is serialized
1367
* and when the document finished serializing.
1369
* @throws IOException An I/O exception occured while
1372
protected void serializePreRoot()
1377
if ( _preRoot != null ) {
1378
for ( i = 0 ; i < _preRoot.size() ; ++i ) {
1379
printText( (String) _preRoot.elementAt( i ), true, true );
1381
_printer.breakLine();
1383
_preRoot.removeAllElements();
1388
//---------------------------------------------//
1389
// Text pretty printing and formatting methods //
1390
//---------------------------------------------//
1392
protected void printCDATAText( String text ) throws IOException {
1393
int length = text.length();
1396
for ( int index = 0 ; index < length; ++index ) {
1397
ch = text.charAt( index );
1399
&& index + 2 < length
1400
&& text.charAt(index + 1) == ']'
1401
&& text.charAt(index + 2) == '>') { // check for ']]>'
1402
if (fDOMErrorHandler != null) {
1403
// REVISIT: this means that if DOM Error handler is not registered we don't report any
1404
// fatal errors and might serialize not wellformed document
1405
if ((features & DOMSerializerImpl.SPLITCDATA) == 0) {
1406
String msg = DOMMessageFormatter.formatMessage(
1407
DOMMessageFormatter.SERIALIZER_DOMAIN,
1410
if ((features & DOMSerializerImpl.WELLFORMED) != 0) {
1411
// issue fatal error
1412
modifyDOMError(msg, DOMError.SEVERITY_FATAL_ERROR, "wf-invalid-character", fCurrentNode);
1413
fDOMErrorHandler.handleError(fDOMError);
1414
throw new LSException(LSException.SERIALIZE_ERR, msg);
1417
modifyDOMError(msg, DOMError.SEVERITY_ERROR, "cdata-section-not-splitted", fCurrentNode);
1418
if (!fDOMErrorHandler.handleError(fDOMError)) {
1419
throw new LSException(LSException.SERIALIZE_ERR, msg);
1424
DOMMessageFormatter.formatMessage(
1425
DOMMessageFormatter.SERIALIZER_DOMAIN,
1430
DOMError.SEVERITY_WARNING,
1431
null, fCurrentNode);
1432
fDOMErrorHandler.handleError(fDOMError);
1435
// split CDATA section
1436
_printer.printText("]]]]><![CDATA[>");
1441
if (!XMLChar.isValid(ch)) {
1442
// check if it is surrogate
1443
if (++index <length) {
1444
surrogates(ch, text.charAt(index), true);
1447
fatalError("The character '"+ch+"' is an invalid XML character");
1451
if ( ( ch >= ' ' && _encodingInfo.isPrintable(ch) && ch != 0xF7 ) ||
1452
ch == '\n' || ch == '\r' || ch == '\t' ) {
1453
_printer.printText(ch);
1457
// The character is not printable -- split CDATA section
1458
_printer.printText("]]>&#x");
1459
_printer.printText(Integer.toHexString(ch));
1460
_printer.printText(";<![CDATA[");
1466
protected void surrogates(int high, int low, boolean inContent) throws IOException{
1467
if (XMLChar.isHighSurrogate(high)) {
1468
if (!XMLChar.isLowSurrogate(low)) {
1470
fatalError("The character '"+(char)low+"' is an invalid XML character");
1473
int supplemental = XMLChar.supplemental((char)high, (char)low);
1474
if (!XMLChar.isValid(supplemental)) {
1476
fatalError("The character '"+(char)supplemental+"' is an invalid XML character");
1479
if (inContent && content().inCData) {
1480
_printer.printText("]]>&#x");
1481
_printer.printText(Integer.toHexString(supplemental));
1482
_printer.printText(";<![CDATA[");
1485
printHex(supplemental);
1490
fatalError("The character '"+(char)high+"' is an invalid XML character");
1496
* Called to print additional text with whitespace handling.
1497
* If spaces are preserved, the text is printed as if by calling
1498
* {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine}
1499
* for each new line. If spaces are not preserved, the text is
1500
* broken at space boundaries if longer than the line width;
1501
* Multiple spaces are printed as such, but spaces at beginning
1502
* of line are removed.
1504
* @param chars The text to print
1505
* @param start The start offset
1506
* @param length The number of characters
1507
* @param preserveSpace Space preserving flag
1508
* @param unescaped Print unescaped
1510
protected void printText( char[] chars, int start, int length,
1511
boolean preserveSpace, boolean unescaped )
1515
if ( preserveSpace ) {
1516
// Preserving spaces: the text must print exactly as it is,
1517
// without breaking when spaces appear in the text and without
1518
// consolidating spaces. If a line terminator is used, a line
1519
// break will occur.
1520
while ( length-- > 0 ) {
1521
char ch = chars[ start ];
1523
if ( ch == '\n' || ch == '\r' || unescaped ) {
1524
_printer.printText( ch );
1531
// Not preserving spaces: print one part at a time, and
1532
// use spaces between parts to break them into different
1533
// lines. Spaces at beginning of line will be stripped
1534
// by printing mechanism. Line terminator is treated
1535
// no different than other text part.
1536
while ( length-- > 0 ) {
1537
char ch = chars[ start ];
1539
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) {
1540
_printer.printSpace();
1542
else if ( unescaped ) {
1543
_printer.printText( ch );
1553
protected void printText( String text, boolean preserveSpace, boolean unescaped )
1559
if ( preserveSpace ) {
1560
// Preserving spaces: the text must print exactly as it is,
1561
// without breaking when spaces appear in the text and without
1562
// consolidating spaces. If a line terminator is used, a line
1563
// break will occur.
1564
for ( index = 0 ; index < text.length() ; ++index ) {
1565
ch = text.charAt( index );
1566
if ( ch == '\n' || ch == '\r' || unescaped )
1567
_printer.printText( ch );
1572
// Not preserving spaces: print one part at a time, and
1573
// use spaces between parts to break them into different
1574
// lines. Spaces at beginning of line will be stripped
1575
// by printing mechanism. Line terminator is treated
1576
// no different than other text part.
1577
for ( index = 0 ; index < text.length() ; ++index ) {
1578
ch = text.charAt( index );
1579
if ( ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r' ) {
1580
_printer.printSpace();
1582
else if ( unescaped ) {
1583
_printer.printText( ch );
1594
* Print a document type public or system identifier URL.
1595
* Encapsulates the URL in double quotes, escapes non-printing
1596
* characters and print it equivalent to {@link #printText}.
1598
* @param url The document type url to print
1600
protected void printDoctypeURL( String url )
1605
_printer.printText( '"' );
1606
for( i = 0 ; i < url.length() ; ++i ) {
1607
if ( url.charAt( i ) == '"' || url.charAt( i ) < 0x20 || url.charAt( i ) > 0x7F ) {
1608
_printer.printText( '%' );
1609
_printer.printText( Integer.toHexString( url.charAt( i ) ) );
1611
_printer.printText( url.charAt( i ) );
1613
_printer.printText( '"' );
1617
protected void printEscaped( int ch )
1621
// If there is a suitable entity reference for this
1622
// character, print it. The list of available entity
1623
// references is almost but not identical between
1625
charRef = getEntityRef( ch );
1626
if ( charRef != null ) {
1627
_printer.printText( '&' );
1628
_printer.printText( charRef );
1629
_printer.printText( ';' );
1630
} else if ( ( ch >= ' ' && _encodingInfo.isPrintable((char)ch) && ch != 0xF7 ) ||
1631
ch == '\n' || ch == '\r' || ch == '\t' ) {
1632
// Non printables are below ASCII space but not tab or line
1633
// terminator, ASCII delete, or above a certain Unicode threshold.
1635
_printer.printText((char)ch );
1637
_printer.printText((char)(((ch-0x10000)>>10)+0xd800));
1638
_printer.printText((char)(((ch-0x10000)&0x3ff)+0xdc00));
1648
final void printHex( int ch) throws IOException {
1649
_printer.printText( "&#x" );
1650
_printer.printText(Integer.toHexString(ch));
1651
_printer.printText( ';' );
1657
* Escapes a string so it may be printed as text content or attribute
1658
* value. Non printable characters are escaped using character references.
1659
* Where the format specifies a deault entity reference, that reference
1660
* is used (e.g. <tt>&lt;</tt>).
1662
* @param source The string to escape
1664
protected void printEscaped( String source )
1667
for ( int i = 0 ; i < source.length() ; ++i ) {
1668
int ch = source.charAt(i);
1669
if ((ch & 0xfc00) == 0xd800 && i+1 < source.length()) {
1670
int lowch = source.charAt(i+1);
1671
if ((lowch & 0xfc00) == 0xdc00) {
1672
ch = 0x10000 + ((ch-0xd800)<<10) + lowch-0xdc00;
1681
//--------------------------------//
1682
// Element state handling methods //
1683
//--------------------------------//
1687
* Return the state of the current element.
1689
* @return Current element state
1691
protected ElementState getElementState()
1693
return _elementStates[ _elementStateCount ];
1698
* Enter a new element state for the specified element.
1699
* Tag name and space preserving is specified, element
1700
* state is initially empty.
1702
* @return Current element state, or null
1704
protected ElementState enterElementState( String namespaceURI, String localName,
1705
String rawName, boolean preserveSpace )
1709
if ( _elementStateCount + 1 == _elementStates.length ) {
1710
ElementState[] newStates;
1712
// Need to create a larger array of states. This does not happen
1713
// often, unless the document is really deep.
1714
newStates = new ElementState[ _elementStates.length + 10 ];
1715
for ( int i = 0 ; i < _elementStates.length ; ++i )
1716
newStates[ i ] = _elementStates[ i ];
1717
for ( int i = _elementStates.length ; i < newStates.length ; ++i )
1718
newStates[ i ] = new ElementState();
1719
_elementStates = newStates;
1722
++_elementStateCount;
1723
state = _elementStates[ _elementStateCount ];
1724
state.namespaceURI = namespaceURI;
1725
state.localName = localName;
1726
state.rawName = rawName;
1727
state.preserveSpace = preserveSpace;
1729
state.afterElement = false;
1730
state.afterComment = false;
1731
state.doCData = state.inCData = false;
1732
state.unescaped = false;
1733
state.prefixes = _prefixes;
1741
* Leave the current element state and return to the
1742
* state of the parent element. If this was the root
1743
* element, return to the state of the document.
1745
* @return Previous element state
1747
protected ElementState leaveElementState()
1749
if ( _elementStateCount > 0 ) {
1750
/*Corrected by David Blondeau (blondeau@intalio.com)*/
1752
//_prefixes = _elementStates[ _elementStateCount ].prefixes;
1753
-- _elementStateCount;
1754
return _elementStates[ _elementStateCount ];
1756
String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null);
1757
throw new IllegalStateException(msg);
1762
* Returns true if in the state of the document.
1763
* Returns true before entering any element and after
1764
* leaving the root element.
1766
* @return True if in the state of the document
1768
protected boolean isDocumentState() {
1769
return _elementStateCount == 0;
1772
/** Clears document state. **/
1773
final void clearDocumentState() {
1774
_elementStateCount = 0;
1778
* Returns the namespace prefix for the specified URI.
1779
* If the URI has been mapped to a prefix, returns the
1780
* prefix, otherwise returns null.
1782
* @param namespaceURI The namespace URI
1783
* @return The namespace prefix if known, or null
1785
protected String getPrefix( String namespaceURI )
1789
if ( _prefixes != null ) {
1790
prefix = (String) _prefixes.get( namespaceURI );
1791
if ( prefix != null )
1794
if ( _elementStateCount == 0 ) {
1797
for ( int i = _elementStateCount ; i > 0 ; --i ) {
1798
if ( _elementStates[ i ].prefixes != null ) {
1799
prefix = (String) _elementStates[ i ].prefixes.get( namespaceURI );
1800
if ( prefix != null )
1808
* The method modifies global DOM error object
1813
* @return a DOMError
1815
protected DOMError modifyDOMError(String message, short severity, String type, Node node){
1817
fDOMError.fMessage = message;
1818
fDOMError.fType = type;
1819
fDOMError.fSeverity = severity;
1820
fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null);
1826
protected void fatalError(String message) throws IOException{
1827
if (fDOMErrorHandler != null) {
1828
modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, null, fCurrentNode);
1829
fDOMErrorHandler.handleError(fDOMError);
1832
throw new IOException(message);
1838
* Check a node to determine if it contains unbound namespace prefixes.
1840
* @param node The node to check for unbound namespace prefices
1842
protected void checkUnboundNamespacePrefixedNode (Node node) throws IOException{