2
* The Apache Software License, Version 1.1
5
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
8
* Redistribution and use in source and binary forms, with or without
9
* modification, are permitted provided that the following conditions
12
* 1. Redistributions of source code must retain the above copyright
13
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in
17
* the documentation and/or other materials provided with the
20
* 3. The end-user documentation included with the redistribution,
21
* if any, must include the following acknowledgment:
22
* "This product includes software developed by the
23
* Apache Software Foundation (http://www.apache.org/)."
24
* Alternately, this acknowledgment may appear in the software itself,
25
* if and wherever such third-party acknowledgments normally appear.
27
* 4. The names "Xerces" and "Apache Software Foundation" must
28
* not be used to endorse or promote products derived from this
29
* software without prior written permission. For written
30
* permission, please contact apache@apache.org.
32
* 5. Products derived from this software may not be called "Apache",
33
* nor may "Apache" appear in their name, without prior written
34
* permission of the Apache Software Foundation.
36
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48
* ====================================================================
50
* This software consists of voluntary contributions made by many
51
* individuals on behalf of the Apache Software Foundation and was
52
* originally based on software copyright (c) 1999, International
53
* Business Machines, Inc., http://www.apache.org. For more
54
* information on the Apache Software Foundation, please see
55
* <http://www.apache.org/>.
57
package org.apache.html.dom;
60
import java.io.StringWriter;
61
import java.lang.reflect.Constructor;
62
import java.util.Hashtable;
63
import java.util.Locale;
65
import org.apache.xerces.dom.DocumentImpl;
66
import org.apache.xerces.dom.NodeImpl;
67
import org.w3c.dom.Attr;
68
import org.w3c.dom.DOMException;
69
import org.w3c.dom.Element;
70
import org.w3c.dom.Node;
71
import org.w3c.dom.NodeList;
72
import org.w3c.dom.html.HTMLBodyElement;
73
import org.w3c.dom.html.HTMLCollection;
74
import org.w3c.dom.html.HTMLDocument;
75
import org.w3c.dom.html.HTMLElement;
76
import org.w3c.dom.html.HTMLFrameSetElement;
77
import org.w3c.dom.html.HTMLHeadElement;
78
import org.w3c.dom.html.HTMLHtmlElement;
79
import org.w3c.dom.html.HTMLTitleElement;
83
* Implements an HTML document. Provides access to the top level element in the
84
* document, its body and title.
86
* Several methods create new nodes of all basic types (comment, text, element,
87
* etc.). These methods create new nodes but do not place them in the document
88
* tree. The nodes may be placed in the document tree using {@link
89
* org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or
90
* they may be placed in some other document tree.
92
* Note: <FRAMESET> documents are not supported at the moment, neither
93
* are direct document writing ({@link #open}, {@link #write}) and HTTP attribute
94
* methods ({@link #getURL}, {@link #getCookie}).
97
* @version $Revision: 1.18 $ $Date: 2004/02/17 07:14:48 $
98
* @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a>
99
* @see org.w3c.dom.html.HTMLDocument
101
public class HTMLDocumentImpl
103
implements HTMLDocument
108
* Holds {@link HTMLCollectionImpl} object with live collection of all
109
* anchors in document. This reference is on demand only once.
111
private HTMLCollectionImpl _anchors;
115
* Holds {@link HTMLCollectionImpl} object with live collection of all
116
* forms in document. This reference is on demand only once.
118
private HTMLCollectionImpl _forms;
122
* Holds {@link HTMLCollectionImpl} object with live collection of all
123
* images in document. This reference is on demand only once.
125
private HTMLCollectionImpl _images;
129
* Holds {@link HTMLCollectionImpl} object with live collection of all
130
* links in document. This reference is on demand only once.
132
private HTMLCollectionImpl _links;
136
* Holds {@link HTMLCollectionImpl} object with live collection of all
137
* applets in document. This reference is on demand only once.
139
private HTMLCollectionImpl _applets;
143
* Holds string writer used by direct manipulation operation ({@link #open}.
144
* {@link #write}, etc) to write new contents into the document and parse
145
* that text into a document tree.
147
private StringWriter _writer;
151
* Holds names and classes of HTML element types. When an element with a
152
* particular tag name is created, the matching {@link java.lang.Class}
153
* is used to create the element object. For example, <A> matches
154
* {@link HTMLAnchorElementImpl}. This static table is shared across all
157
* @see #createElement
159
private static Hashtable _elementTypesHTML;
163
* Signature used to locate constructor of HTML element classes. This
164
* static array is shared across all HTML documents.
166
* @see #createElement
168
private static final Class[] _elemClassSigHTML =
169
new Class[] { HTMLDocumentImpl.class, String.class };
174
public HTMLDocumentImpl()
177
populateElementTypes();
181
public synchronized Element getDocumentElement()
187
// The document element is the top-level HTML element of the HTML
188
// document. Only this element should exist at the top level.
189
// If the HTML element is found, all other elements that might
190
// precede it are placed inside the HTML element.
191
html = getFirstChild();
192
while ( html != null )
194
if ( html instanceof HTMLHtmlElement )
196
// REVISIT: [Q] Why is this code even here? In fact, the
197
// original code is in error because it will
198
// try to move ALL nodes to be children of the
199
// HTML tag. This is not the intended behavior
200
// for comments and processing instructions
201
// outside the root element; it will throw a
202
// hierarchy request error exception for doctype
203
// nodes; *and* this code shouldn't even be
204
// needed because the parser should never build
205
// a document that contains more than a single
206
// root element, anyway! -Ac
208
synchronized ( html )
210
child = getFirstChild();
211
while ( child != null && child != html )
213
next = child.getNextSibling();
214
html.appendChild( child );
219
return (HTMLElement) html;
221
html = html.getNextSibling();
224
// HTML element must exist. Create a new element and dump the
225
// entire contents of the document into it in the same order as
227
html = new HTMLHtmlElementImpl( this, "HTML" );
228
child = getFirstChild();
229
while ( child != null )
231
next = child.getNextSibling();
232
html.appendChild( child );
236
return (HTMLElement) html;
241
* Obtains the <HEAD> element in the document, creating one if does
242
* not exist before. The <HEAD> element is the first element in the
243
* <HTML> in the document. The <HTML> element is obtained by
244
* calling {@link #getDocumentElement}. If the element does not exist, one
247
* Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and
248
* {@link #setBody} to assure the document has the <HEAD> element
251
* @return The <HEAD> element
253
public synchronized HTMLElement getHead()
260
// Call getDocumentElement() to get the HTML element that is also the
261
// top-level element in the document. Get the first element in the
262
// document that is called HEAD. Work with that.
263
html = getDocumentElement();
264
synchronized ( html )
266
head = html.getFirstChild();
267
while ( head != null && ! ( head instanceof HTMLHeadElement ) )
268
head = head.getNextSibling();
269
// HEAD exists but might not be first element in HTML: make sure
270
// it is and return it.
273
synchronized ( head )
275
child = html.getFirstChild();
276
while ( child != null && child != head )
278
next = child.getNextSibling();
279
head.insertBefore( child, head.getFirstChild() );
283
return (HTMLElement) head;
286
// Head does not exist, create a new one, place it at the top of the
287
// HTML element and return it.
288
head = new HTMLHeadElementImpl( this, "HEAD" );
289
html.insertBefore( head, html.getFirstChild() );
291
return (HTMLElement) head;
295
public synchronized String getTitle()
301
// Get the HEAD element and look for the TITLE element within.
302
// When found, make sure the TITLE is a direct child of HEAD,
303
// and return the title's text (the Text node contained within).
305
title = head.getElementsByTagName( "TITLE" ).item( 0 );
306
list = head.getElementsByTagName( "TITLE" );
307
if ( list.getLength() > 0 ) {
308
title = list.item( 0 );
309
return ( (HTMLTitleElement) title ).getText();
311
// No TITLE found, return an empty string.
316
public synchronized void setTitle( String newTitle )
322
// Get the HEAD element and look for the TITLE element within.
323
// When found, make sure the TITLE is a direct child of HEAD,
324
// and set the title's text (the Text node contained within).
326
list = head.getElementsByTagName( "TITLE" );
327
if ( list.getLength() > 0 ) {
328
title = list.item( 0 );
329
if ( title.getParentNode() != head )
330
head.appendChild( title );
331
( (HTMLTitleElement) title ).setText( newTitle );
335
// No TITLE found, create a new element and place it at the end
336
// of the HEAD element.
337
title = new HTMLTitleElementImpl( this, "TITLE" );
338
( (HTMLTitleElement) title ).setText( newTitle );
339
head.appendChild( title );
344
public synchronized HTMLElement getBody()
352
// Call getDocumentElement() to get the HTML element that is also the
353
// top-level element in the document. Get the first element in the
354
// document that is called BODY. Work with that.
355
html = getDocumentElement();
357
synchronized ( html )
359
body = head.getNextSibling();
360
while ( body != null && ! ( body instanceof HTMLBodyElement )
361
&& ! ( body instanceof HTMLFrameSetElement ) )
362
body = body.getNextSibling();
364
// BODY/FRAMESET exists but might not be second element in HTML
365
// (after HEAD): make sure it is and return it.
368
synchronized ( body )
370
child = head.getNextSibling();
371
while ( child != null && child != body )
373
next = child.getNextSibling();
374
body.insertBefore( child, body.getFirstChild() );
378
return (HTMLElement) body;
381
// BODY does not exist, create a new one, place it in the HTML element
382
// right after the HEAD and return it.
383
body = new HTMLBodyElementImpl( this, "BODY" );
384
html.appendChild( body );
386
return (HTMLElement) body;
390
public synchronized void setBody( HTMLElement newBody )
398
synchronized ( newBody )
400
// Call getDocumentElement() to get the HTML element that is also the
401
// top-level element in the document. Get the first element in the
402
// document that is called BODY. Work with that.
403
html = getDocumentElement();
405
synchronized ( html )
407
list = this.getElementsByTagName( "BODY" );
408
if ( list.getLength() > 0 ) {
409
// BODY exists but might not follow HEAD in HTML. If not,
410
// make it so and replce it. Start with the HEAD and make
411
// sure the BODY is the first element after the HEAD.
412
body = list.item( 0 );
413
synchronized ( body )
416
while ( child != null )
418
if ( child instanceof Element )
421
html.insertBefore( newBody, child );
423
html.replaceChild( newBody, body );
426
child = child.getNextSibling();
428
html.appendChild( newBody );
432
// BODY does not exist, place it in the HTML element
433
// right after the HEAD.
434
html.appendChild( newBody );
440
public synchronized Element getElementById( String elementId )
442
return getElementById( elementId, this );
446
public NodeList getElementsByName( String elementName )
448
return new NameNodeListImpl( this, elementName );
452
public final NodeList getElementsByTagName( String tagName )
454
return super.getElementsByTagName( tagName.toUpperCase(Locale.ENGLISH) );
458
public final NodeList getElementsByTagNameNS( String namespaceURI,
461
if ( namespaceURI != null && namespaceURI.length() > 0 )
462
return super.getElementsByTagNameNS( namespaceURI, localName.toUpperCase(Locale.ENGLISH) );
464
return super.getElementsByTagName( localName.toUpperCase(Locale.ENGLISH) );
469
* Xerces-specific constructor. "localName" is passed in, so we don't need
470
* to create a new String for it.
472
* @param namespaceURI The namespace URI of the element to
474
* @param qualifiedName The qualified name of the element type to
476
* @param localName The local name of the element to instantiate.
477
* @return Element A new Element object with the following attributes:
478
* @throws DOMException INVALID_CHARACTER_ERR: Raised if the specified
479
* name contains an invalid character.
481
public Element createElementNS(String namespaceURI, String qualifiedName,
485
return createElementNS(namespaceURI, qualifiedName);
488
public Element createElementNS( String namespaceURI, String qualifiedName )
490
if ( namespaceURI == null || namespaceURI.length() == 0 )
491
return createElement( qualifiedName );
493
return super.createElementNS( namespaceURI, qualifiedName );
498
public Element createElement( String tagName )
504
// First, make sure tag name is all upper case, next get the associated
505
// element class. If no class is found, generate a generic HTML element.
506
// Do so also if an unexpected exception occurs.
507
tagName = tagName.toUpperCase(Locale.ENGLISH);
508
elemClass = (Class) _elementTypesHTML.get( tagName );
509
if ( elemClass != null )
511
// Get the constructor for the element. The signature specifies an
512
// owner document and a tag name. Use the constructor to instantiate
513
// a new object and return it.
516
cnst = elemClass.getConstructor( _elemClassSigHTML );
517
return (Element) cnst.newInstance( new Object[] { this, tagName } );
519
catch ( Exception except )
523
if ( except instanceof java.lang.reflect.InvocationTargetException )
524
thrw = ( (java.lang.reflect.InvocationTargetException) except ).getTargetException();
527
// System.out.println( "Exception " + thrw.getClass().getName() );
528
// System.out.println( thrw.getMessage() );
530
throw new IllegalStateException( "HTM15 Tag '" + tagName + "' associated with an Element class that failed to construct.\n" + tagName);
533
return new HTMLElementImpl( this, tagName );
538
* Creates an Attribute having this Document as its OwnerDoc.
539
* Overrides {@link DocumentImpl#createAttribute} and returns
540
* and attribute whose name is lower case.
542
* @param name The name of the attribute
543
* @return An attribute whose name is all lower case
544
* @throws DOMException(INVALID_NAME_ERR) if the attribute name
547
public Attr createAttribute( String name )
550
return super.createAttribute( name.toLowerCase(Locale.ENGLISH) );
554
public String getReferrer()
556
// Information not available on server side.
561
public String getDomain()
563
// Information not available on server side.
568
public String getURL()
570
// Information not available on server side.
575
public String getCookie()
577
// Information not available on server side.
582
public void setCookie( String cookie )
584
// Information not available on server side.
588
public HTMLCollection getImages()
590
// For more information see HTMLCollection#collectionMatch
591
if ( _images == null )
592
_images = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.IMAGE );
597
public HTMLCollection getApplets()
599
// For more information see HTMLCollection#collectionMatch
600
if ( _applets == null )
601
_applets = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.APPLET );
606
public HTMLCollection getLinks()
608
// For more information see HTMLCollection#collectionMatch
609
if ( _links == null )
610
_links = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.LINK );
615
public HTMLCollection getForms()
617
// For more information see HTMLCollection#collectionMatch
618
if ( _forms == null )
619
_forms = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.FORM );
624
public HTMLCollection getAnchors()
626
// For more information see HTMLCollection#collectionMatch
627
if ( _anchors == null )
628
_anchors = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.ANCHOR );
635
// When called an in-memory is prepared. The document tree is still
636
// accessible the old way, until this writer is closed.
637
if ( _writer == null )
638
_writer = new StringWriter();
644
// ! NOT IMPLEMENTED, REQUIRES PARSER !
645
if ( _writer != null )
652
public void write( String text )
654
// Write a string into the in-memory writer.
655
if ( _writer != null )
656
_writer.write( text );
660
public void writeln( String text )
662
// Write a line into the in-memory writer.
663
if ( _writer != null )
664
_writer.write( text + "\n" );
668
public Node cloneNode( boolean deep )
670
HTMLDocumentImpl clone;
673
clone = new HTMLDocumentImpl();
675
node = (NodeImpl) getFirstChild();
676
while ( node != null ) {
677
clone.appendChild( clone.importNode( node, true ) );
678
node = (NodeImpl) node.getNextSibling();
686
* Recursive method retreives an element by its <code>id</code> attribute.
687
* Called by {@link #getElementById(String)}.
689
* @param elementId The <code>id</code> value to look for
690
* @return The node in which to look for
692
private Element getElementById( String elementId, Node node )
697
child = node.getFirstChild();
698
while ( child != null )
700
if ( child instanceof Element )
702
if ( elementId.equals( ( (Element) child ).getAttribute( "id" ) ) )
703
return (Element) child;
704
result = getElementById( elementId, child );
705
if ( result != null )
708
child = child.getNextSibling();
715
* Called by the constructor to populate the element types list (see {@link
716
* #_elementTypesHTML}). Will be called multiple times but populate the list
717
* only the first time. Replacement for static constructor.
719
private synchronized static void populateElementTypes()
721
// This class looks like it is due to some strange
722
// (read: inconsistent) JVM bugs.
723
// Initially all this code was placed in the static constructor,
724
// but that caused some early JVMs (1.1) to go mad, and if a
725
// class could not be found (as happened during development),
726
// the JVM would die.
727
// Bertrand Delacretaz <bdelacretaz@worldcom.ch> pointed out
728
// several configurations where HTMLAnchorElementImpl.class
729
// failed, forcing me to revert back to Class.forName().
731
if ( _elementTypesHTML != null )
733
_elementTypesHTML = new Hashtable( 63 );
734
populateElementType( "A", "HTMLAnchorElementImpl" );
735
populateElementType( "APPLET", "HTMLAppletElementImpl" );
736
populateElementType( "AREA", "HTMLAreaElementImpl" );
737
populateElementType( "BASE", "HTMLBaseElementImpl" );
738
populateElementType( "BASEFONT", "HTMLBaseFontElementImpl" );
739
populateElementType( "BLOCKQUOTE", "HTMLQuoteElementImpl" );
740
populateElementType( "BODY", "HTMLBodyElementImpl" );
741
populateElementType( "BR", "HTMLBRElementImpl" );
742
populateElementType( "BUTTON", "HTMLButtonElementImpl" );
743
populateElementType( "DEL", "HTMLModElementImpl" );
744
populateElementType( "DIR", "HTMLDirectoryElementImpl" );
745
populateElementType( "DIV", "HTMLDivElementImpl" );
746
populateElementType( "DL", "HTMLDListElementImpl" );
747
populateElementType( "FIELDSET", "HTMLFieldSetElementImpl" );
748
populateElementType( "FONT", "HTMLFontElementImpl" );
749
populateElementType( "FORM", "HTMLFormElementImpl" );
750
populateElementType( "FRAME","HTMLFrameElementImpl" );
751
populateElementType( "FRAMESET", "HTMLFrameSetElementImpl" );
752
populateElementType( "HEAD", "HTMLHeadElementImpl" );
753
populateElementType( "H1", "HTMLHeadingElementImpl" );
754
populateElementType( "H2", "HTMLHeadingElementImpl" );
755
populateElementType( "H3", "HTMLHeadingElementImpl" );
756
populateElementType( "H4", "HTMLHeadingElementImpl" );
757
populateElementType( "H5", "HTMLHeadingElementImpl" );
758
populateElementType( "H6", "HTMLHeadingElementImpl" );
759
populateElementType( "HR", "HTMLHRElementImpl" );
760
populateElementType( "HTML", "HTMLHtmlElementImpl" );
761
populateElementType( "IFRAME", "HTMLIFrameElementImpl" );
762
populateElementType( "IMG", "HTMLImageElementImpl" );
763
populateElementType( "INPUT", "HTMLInputElementImpl" );
764
populateElementType( "INS", "HTMLModElementImpl" );
765
populateElementType( "ISINDEX", "HTMLIsIndexElementImpl" );
766
populateElementType( "LABEL", "HTMLLabelElementImpl" );
767
populateElementType( "LEGEND", "HTMLLegendElementImpl" );
768
populateElementType( "LI", "HTMLLIElementImpl" );
769
populateElementType( "LINK", "HTMLLinkElementImpl" );
770
populateElementType( "MAP", "HTMLMapElementImpl" );
771
populateElementType( "MENU", "HTMLMenuElementImpl" );
772
populateElementType( "META", "HTMLMetaElementImpl" );
773
populateElementType( "OBJECT", "HTMLObjectElementImpl" );
774
populateElementType( "OL", "HTMLOListElementImpl" );
775
populateElementType( "OPTGROUP", "HTMLOptGroupElementImpl" );
776
populateElementType( "OPTION", "HTMLOptionElementImpl" );
777
populateElementType( "P", "HTMLParagraphElementImpl" );
778
populateElementType( "PARAM", "HTMLParamElementImpl" );
779
populateElementType( "PRE", "HTMLPreElementImpl" );
780
populateElementType( "Q", "HTMLQuoteElementImpl" );
781
populateElementType( "SCRIPT", "HTMLScriptElementImpl" );
782
populateElementType( "SELECT", "HTMLSelectElementImpl" );
783
populateElementType( "STYLE", "HTMLStyleElementImpl" );
784
populateElementType( "TABLE", "HTMLTableElementImpl" );
785
populateElementType( "CAPTION", "HTMLTableCaptionElementImpl" );
786
populateElementType( "TD", "HTMLTableCellElementImpl" );
787
populateElementType( "TH", "HTMLTableCellElementImpl" );
788
populateElementType( "COL", "HTMLTableColElementImpl" );
789
populateElementType( "COLGROUP", "HTMLTableColElementImpl" );
790
populateElementType( "TR", "HTMLTableRowElementImpl" );
791
populateElementType( "TBODY", "HTMLTableSectionElementImpl" );
792
populateElementType( "THEAD", "HTMLTableSectionElementImpl" );
793
populateElementType( "TFOOT", "HTMLTableSectionElementImpl" );
794
populateElementType( "TEXTAREA", "HTMLTextAreaElementImpl" );
795
populateElementType( "TITLE", "HTMLTitleElementImpl" );
796
populateElementType( "UL", "HTMLUListElementImpl" );
800
private static void populateElementType( String tagName, String className )
803
_elementTypesHTML.put( tagName,
804
ObjectFactory.findProviderClass("org.apache.html.dom." + className,
805
HTMLDocumentImpl.class.getClassLoader(), true) );
806
} catch ( Exception except ) {
807
new RuntimeException( "HTM019 OpenXML Error: Could not find or execute class " + className + " implementing HTML element " + tagName
808
+ "\n" + className + "\t" + tagName);