1
package org.htmlcleaner;
3
import com.sun.org.apache.xml.internal.serialize.*;
5
import org.jdom.Document;
6
import org.jdom.output.*;
9
import javax.xml.parsers.ParserConfigurationException;
10
import java.io.IOException;
17
public class Working {
19
public static void main(String[] args) throws IOException, XPatherException, ParserConfigurationException {
20
String html = "<script src=\"a\" type=\"text/javascript\" /><script src=\"b\" type=\"text/javascript\"/>";
21
final HtmlCleaner cleaner = new HtmlCleaner();
22
final CleanerProperties props = cleaner.getProperties();
24
// final String resources[] = {
25
// "http://www.b92.net",
26
// "http://www.nba.com",
27
// "http://www.naslovi.net/",
28
// "http://www.theserverside.com/",
29
// "http://www.yahoo.com",
31
// final String resources[] = {
32
// "c:/temp/htmlcleanertest/1.htm",
33
// "c:/temp/htmlcleanertest/2.htm",
34
// "c:/temp/htmlcleanertest/3.htm",
35
// "c:/temp/htmlcleanertest/4.htm",
36
// "c:/temp/htmlcleanertest/5.htm",
39
props.setTransResCharsToNCR(false);
40
// props.setIgnoreQuestAndExclam(true);
41
props.setUseCdataForScriptAndStyle(false);
42
props.setRecognizeUnicodeChars(true);
43
props.setTranslateSpecialEntities(true);
44
props.setTransSpecialEntitiesToNCR(false);
45
props.setUseEmptyElementTags(false);
46
props.setOmitXmlDeclaration(true);
47
props.setOmitDoctypeDeclaration(false);
48
props.setNamespacesAware(true);
50
long start = System.currentTimeMillis();
51
TagNode node = cleaner.clean(new File("c:/temp/htmlcleanertest/b92.htm"), "UTF-8");
52
System.out.println("Cleanup time: " + (System.currentTimeMillis() - start));
b'\\ No newline at end of file'