10
15
* To change this template use File | Settings | File Templates.
12
17
public class HTMLConverter implements LayoutFormatter {
18
private HashMap<String, String> escapedSymbols = new HashMap<String, String>();
20
public HTMLConverter() {
22
escapedSymbols.put("“", "``");
23
escapedSymbols.put("”", "''");
24
escapedSymbols.put("‘", "``");
25
escapedSymbols.put("’", "''");
26
escapedSymbols.put(" ", " ");
27
escapedSymbols.put(""", "\"");
28
escapedSymbols.put("&", "&");
29
escapedSymbols.put("<", "<");
30
escapedSymbols.put(">", ">");
14
32
public String format(String text) {
18
text = text.replaceAll("“", "``");
19
text = text.replaceAll("”", "''");
20
text = text.replaceAll("‘", "`");
21
text = text.replaceAll("’", "'");
22
35
StringBuffer sb = new StringBuffer();
23
36
for (int i=0; i<text.length(); i++) {
25
38
int c = text.charAt(i);
28
i = readHtmlChar(text, sb, i);
30
} else if (c == '<') {
31
41
i = readTag(text, sb, i);
33
43
sb.append((char)c);
47
Set<String> patterns = escapedSymbols.keySet();
48
for (String pattern: patterns) {
49
text = text.replaceAll(pattern, escapedSymbols.get(pattern));
52
Pattern escapedPattern = Pattern.compile("&#([x]*\\d+);");
53
Matcher m = escapedPattern.matcher(text);
55
int num = Integer.decode(m.group(1).replace("x", "#"));
58
text = text.replaceAll("&#" + m.group(1) + ";", "%");
61
text = text.replaceAll("&#" + m.group(1) + ";", "&");
64
text = text.replaceAll("&#" + m.group(1) + ";", "$\\delta$");
67
text = text.replaceAll("&#" + m.group(1) + ";", "$\\mu$");
70
text = text.replaceAll("&#" + m.group(1) + ";", "-");
73
text = text.replaceAll("&#" + m.group(1) + ";", "--");
76
text = text.replaceAll("&#" + m.group(1) + ";", "---");
79
text = text.replaceAll("&#" + m.group(1) + ";", "'");
82
System.err.println("HTML escaped char not converted " + m.group(1) + ": " + Integer.toString(num));
40
private final int MAX_TAG_LENGTH = 20;
41
private final int MAX_CHAR_LENGTH = 10;
89
private final int MAX_TAG_LENGTH = 30;
90
/*private final int MAX_CHAR_LENGTH = 10;
43
92
private int readHtmlChar(String text, StringBuffer sb, int position) {
44
93
// Have just read the < character that starts the tag.