1
/* Copyright 2002-2004 Elliotte Rusty Harold
3
This library is free software; you can redistribute it and/or modify
4
it under the terms of version 2.1 of the GNU Lesser General Public
5
License as published by the Free Software Foundation.
7
This library is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU Lesser General Public License for more details.
12
You should have received a copy of the GNU Lesser General Public
13
License along with this library; if not, write to the
14
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15
Boston, MA 02111-1307 USA
17
You can contact Elliotte Rusty Harold by sending e-mail to
18
elharo@metalab.unc.edu. Please include the word "XOM" in the
19
subject line. The XOM home page is located at http://www.xom.nu/
24
import java.io.ByteArrayOutputStream;
25
import java.io.IOException;
26
import java.io.OutputStreamWriter;
27
import java.io.UnsupportedEncodingException;
28
import java.io.Writer;
29
import java.util.Locale;
33
* <code>GenericWriter</code> is a hack that figures out whether a
34
* character is or is not available in a particular encoding by writing
35
* it onto an OutputStream and seeing whether or not the character
36
* written is a question mark (Java's substitution character).
37
* There's a more staright-forward way to do this using
38
* <code>java.nio.Charset</code> in Java 1.4, but I'm not willing to
42
* @author Elliotte Rusty Harold
46
class GenericWriter extends TextWriter {
49
private final ByteArrayOutputStream bout;
50
private final OutputStreamWriter wout;
51
private final boolean isJapanese;
54
GenericWriter(Writer out, String encoding)
55
throws UnsupportedEncodingException {
58
bout = new ByteArrayOutputStream(32);
59
wout = new OutputStreamWriter(bout, encoding);
60
encoding = encoding.toUpperCase(Locale.ENGLISH);
61
if (encoding.indexOf("EUC-JP") > -1
62
|| encoding.startsWith("EUC_JP")
63
|| encoding.equals("SHIFT_JIS")
64
|| encoding.equals("SJIS")
65
|| encoding.equals("ISO-2022-JP")) {
75
boolean needsEscaping(char c) {
77
// assume everything has at least the ASCII characters
78
if (c <= 127) return false;
79
// work around various bugs in Japanese encodings
81
if (c == 0xA5) return true; // Yen symbol
82
if (c == 0x203E) return true; // Sun bugs in EUC-JP and SJIS
85
boolean result = false;
89
byte[] data = bout.toByteArray();
90
if (data.length == 0) result = true; // surrogate pair
91
else if (data[0] == '?') result = true;
92
// work around various bugs in Japanese encodings
93
// especially in JDK 1.4.2_05
94
else if (isJapanese && data[0] == 0x21) result = true;
96
catch (IOException ex) {
97
// There really shouldn't be any IOException here.
98
// However character conversion bugs in Java 1.2
99
// sometimes throw one. In this case, we just say
104
// This appears to be a wrapper around an undocumented
105
// sun.io.UnknownCharacterException or some such. In any
106
// case Java doesn't know how to output this character.