1
/* Copyright 2002-2004 Elliotte Rusty Harold
3
This library is free software; you can redistribute it and/or modify
4
it under the terms of version 2.1 of the GNU Lesser General Public
5
License as published by the Free Software Foundation.
7
This library is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU Lesser General Public License for more details.
12
You should have received a copy of the GNU Lesser General Public
13
License along with this library; if not, write to the
14
Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15
Boston, MA 02111-1307 USA
17
You can contact Elliotte Rusty Harold by sending e-mail to
18
elharo@metalab.unc.edu. Please include the word "XOM" in the
19
subject line. The XOM home page is located at http://www.xom.nu/
24
import java.io.UnsupportedEncodingException;
25
import java.io.Writer;
26
import java.util.Locale;
29
* @author Elliotte Rusty Harold
32
class TextWriterFactory {
34
public static TextWriter getTextWriter(
35
Writer out, String encoding) {
37
// Not all encoding names are case-insensitive in Java, even
38
// though they should be. For instance, MacRoman isn't.
39
String encodingUpperCase = encoding.toUpperCase(Locale.ENGLISH);
40
if (encodingUpperCase.startsWith("UTF")
41
|| encodingUpperCase.startsWith("UNICODE")
43
return new UnicodeWriter(out, encoding);
45
else if (encodingUpperCase.startsWith("ISO-10646-UCS")
46
|| encodingUpperCase.startsWith("UCS")
47
|| encodingUpperCase.equals("GB18030")) {
48
// GB18030 has a 1-1 mapping to Unicode. However, the Sun
49
// GB18030 VM is buggy with non-BMP characters. The IBM VM
50
// gets this right, but for safety we'll escape all non-BMP
52
return new UCSWriter(out, encoding);
54
else if (encodingUpperCase.equals("ISO-8859-1")) {
55
return new Latin1Writer(out, encoding);
57
else if (encodingUpperCase.equals("ISO-8859-2")) {
58
return new Latin2Writer(out, encodingUpperCase);
60
else if (encodingUpperCase.equals("ISO-8859-3")) {
61
return new Latin3Writer(out, encodingUpperCase);
63
else if (encodingUpperCase.equals("ISO-8859-4")) {
64
return new Latin4Writer(out, encodingUpperCase);
66
else if (encodingUpperCase.equals("ISO-8859-5")) {
67
return new ISOCyrillicWriter(out, encodingUpperCase);
69
else if (encodingUpperCase.equals("ISO-8859-6")) {
70
return new ISOArabicWriter(out, encodingUpperCase);
72
else if (encodingUpperCase.equals("ISO-8859-7")) {
73
return new ISOGreekWriter(out, encodingUpperCase);
75
else if (encodingUpperCase.equals("ISO-8859-8")) {
76
return new ISOHebrewWriter(out, encodingUpperCase);
78
else if (encodingUpperCase.equals("ISO-8859-9")
79
|| encodingUpperCase.equals("EBCDIC-CP-TR")
80
|| encodingUpperCase.equals("CP1037")) {
81
return new Latin5Writer(out, encodingUpperCase);
83
else if (encoding.equals("ISO-8859-10")) {
84
return new Latin6Writer(out, encoding);
86
else if (encodingUpperCase.equals("ISO-8859-11")
87
|| encodingUpperCase.equals("TIS-620")
88
|| encodingUpperCase.equals("TIS620")) {
89
return new ISOThaiWriter(out, encodingUpperCase);
91
// There's no such thing as ISO-8859-12
92
// nor is there likely to be one in the future.
93
else if (encodingUpperCase.equals("ISO-8859-13")) {
94
return new Latin7Writer(out, encodingUpperCase);
96
else if (encoding.equals("ISO-8859-14")) {
97
return new Latin8Writer(out, encoding);
99
else if (encodingUpperCase.equals("ISO-8859-15")) {
100
return new Latin9Writer(out, encodingUpperCase);
102
else if (encoding.equals("ISO-8859-16")) {
103
return new Latin10Writer(out, encoding);
105
else if (encodingUpperCase.endsWith("ASCII")) {
106
return new ASCIIWriter(out, encodingUpperCase);
108
else if (encodingUpperCase.equals("IBM037")
109
|| encodingUpperCase.equals("CP037")
110
|| encodingUpperCase.equals("EBCDIC-CP-US")
111
|| encodingUpperCase.equals("EBCDIC-CP-CA")
112
|| encodingUpperCase.equals("EBCDIC-CP-WA")
113
|| encodingUpperCase.equals("EBCDIC-CP-NL")
114
|| encodingUpperCase.equals("CSIBM037")) {
115
// EBCDIC-37 has same character set as ISO-8859-1;
116
// just at different code points.
117
return new Latin1Writer(out, encodingUpperCase);
121
return new GenericWriter(out, encoding);
123
catch (UnsupportedEncodingException ex) {
124
return new ASCIIWriter(out, encoding);