1
/* Copyright 2002, 2003 Elliotte Rusty Harold
3
This library is free software; you can redistribute it and/or modify
4
it under the terms of version 2.1 of the GNU Lesser General Public
5
License as published by the Free Software Foundation.
7
This library is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU Lesser General Public License for more details.
12
You should have received a copy of the GNU Lesser General Public
13
License along with this library; if not, write to the
14
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15
Boston, MA 02111-1307 USA
17
You can contact Elliotte Rusty Harold by sending e-mail to
18
elharo@metalab.unc.edu. Please include the word "XOM" in the
19
subject line. The XOM home page is located at http://www.xom.nu/
22
package nu.xom.samples;
24
import java.io.IOException;
26
import nu.xom.Attribute;
27
import nu.xom.Builder;
28
import nu.xom.Document;
29
import nu.xom.Element;
30
import nu.xom.Elements;
31
import nu.xom.ParsingException;
35
* Demonstrates the removal of elements and
36
* their content from a document.
39
* @author Elliotte Rusty Harold
43
public class XHTMLPurifier {
45
public final static String XHTML_NAMESPACE
46
= "http://www.w3.org/1999/xhtml";
48
public static void main(String[] args) {
50
if (args.length <= 0) {
51
System.out.println("Usage: java nu.xom.samples.XHTMLPurifier URL");
56
Builder parser = new Builder();
57
Document doc = parser.build(args[0]);
58
Element root = doc.getRootElement();
59
if (root.getNamespaceURI().equals(XHTML_NAMESPACE)) {
63
System.out.println(args[0]
64
+ " does not appear to be an XHTML document");
69
System.out.println(doc.toXML());
71
catch (ParsingException ex) {
72
System.out.println(args[0] + " is not well-formed.");
73
System.out.println(ex.getMessage());
75
catch (IOException ex) {
77
"Due to an IOException, the parser could not read "
84
public static void strip(Element element) {
86
if (element.getNamespaceURI().equals(XHTML_NAMESPACE)) {
88
// Strip out non XHTML attributes
89
for (int i = 0; i < element.getAttributeCount(); i++) {
90
Attribute attribute = element.getAttribute(i);
92
if (!"".equals(attribute.getNamespaceURI())) {
93
if (!"xml".equals(attribute.getNamespacePrefix())) {
99
// Strip out additional namespaces
100
for (int i = 0; i < element.getNamespaceDeclarationCount(); i++) {
101
String prefix = element.getNamespacePrefix(i);
102
element.removeNamespaceDeclaration(prefix);
105
Elements elements = element.getChildElements();
106
for (int i = 0; i < elements.size(); i++) {
107
strip(elements.get(i));
b'\\ No newline at end of file'