1
/****************************************************************************
3
** Copyright (C) 1992-2005 Trolltech AS. All rights reserved.
5
** This file is part of the internationalization module of the Qt Toolkit.
7
** This file may be distributed under the terms of the Q Public License
8
** as defined by Trolltech AS of Norway and appearing in the file
9
** LICENSE.QPL included in the packaging of this file.
11
** This file may be distributed and/or modified under the terms of the
12
** GNU General Public License version 2 as published by the Free Software
13
** Foundation and appearing in the file LICENSE.GPL included in the
14
** packaging of this file.
16
** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
17
** information about Qt Commercial License Agreements.
18
** See http://www.trolltech.com/qpl/ for QPL licensing information.
19
** See http://www.trolltech.com/gpl/ for GPL licensing information.
21
** Contact info@trolltech.com if any conditions of this licensing are
24
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
25
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27
****************************************************************************/
29
// Most of the code here was originally written by Serika Kurusugawa,
30
// a.k.a. Junji Takagi, and is included in Qt with the author's permission
31
// and the grateful thanks of the Trolltech team.
37
\brief The QJisCodec class provides conversion to and from JIS character sets.
39
More precisely, the QJisCodec class subclasses QTextCodec to
40
provide support for JIS X 0201 Latin, JIS X 0201 Kana, JIS X 0208
43
The environment variable UNICODEMAP_JP can be used to fine-tune
44
QJisCodec, QSjisCodec and QEucJpCodec. The mapping names are as for
45
the Japanese XML working group's \link
46
http://www.y-adagio.com/public/standards/tr_xml_jpf/toc.htm XML
47
Japanese Profile\endlink, because it names and explains all the
48
widely used mappings. Here are brief descriptions, written by
53
\i "unicode-0.9" or "unicode-0201" for Unicode style. This assumes
54
JISX0201 for 0x00-0x7f. (0.9 is a table version of jisx02xx mapping
55
used for Uniocde spec version 1.1.)
57
\i "unicode-ascii" This assumes US-ASCII for 0x00-0x7f; some
58
chars (JISX0208 0x2140 and JISX0212 0x2237) are different from
59
Unicode 1.1 to avoid conflict.
61
\i "open-19970715-0201" ("open-0201" for convenience) or
62
"jisx0221-1995" for JISX0221-JISX0201 style. JIS X 0221 is JIS
63
version of Unicode, but a few chars (0x5c, 0x7e, 0x2140, 0x216f,
64
0x2131) are different from Unicode 1.1. This is used when 0x5c is
67
\i "open-19970715-ascii" ("open-ascii" for convenience) for
68
JISX0221-ASCII style. This is used when 0x5c is treated as REVERSE
71
\i "open-19970715-ms" ("open-ms" for convenience) or "cp932" for
72
Microsoft Windows style. Windows Code Page 932. Some chars (0x2140,
73
0x2141, 0x2142, 0x215d, 0x2171, 0x2172) are different from Unicode
76
\i "jdk1.1.7" for Sun's JDK style. Same as Unicode 1.1, except that
77
JIS 0x2140 is mapped to UFF3C. Either ASCII or JISX0201 can be used
82
In addition, the extensions "nec-vdc", "ibm-vdc" and "udc" are
85
For example, if you want to use Unicode style conversion but with
86
NEC's extension, set \c UNICODEMAP_JP to
87
<nobr>\c {unicode-0.9, nec-vdc}.</nobr> (You will probably
88
need to quote that in a shell command.)
90
Most of the code here was written by Serika Kurusugawa,
91
a.k.a. Junji Takagi, and is included in Qt with the author's
92
permission and the grateful thanks of the Trolltech team. Here is
93
the copyright statement for that code:
97
Copyright (C) 1999 Serika Kurusugawa. All rights reserved.
99
Redistribution and use in source and binary forms, with or without
100
modification, are permitted provided that the following conditions
103
\i Redistributions of source code must retain the above copyright
104
notice, this list of conditions and the following disclaimer.
105
\i Redistributions in binary form must reproduce the above copyright
106
notice, this list of conditions and the following disclaimer in the
107
documentation and/or other materials provided with the distribution.
110
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS".
111
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
112
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
113
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
114
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
115
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
116
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
117
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
118
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
119
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
123
#include "qjiscodec.h"
128
So = 0x0e, // Shift Out
129
Si = 0x0f, // Shift In
131
ReverseSolidus = 0x5c,
137
#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
138
#define IsJisChar(c) (((c) >= 0x21) && ((c) <= 0x7e))
140
#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
142
enum Iso2022State{ Ascii, MinState = Ascii,
143
JISX0201_Latin, JISX0201_Kana,
144
JISX0208_1978, JISX0208_1983,
145
JISX0212, MaxState = JISX0212,
148
static const char Esc_CHARS[] = "()*+-./";
150
static const char Esc_Ascii[] = {Esc, '(', 'B', 0 };
151
static const char Esc_JISX0201_Latin[] = {Esc, '(', 'J', 0 };
152
static const char Esc_JISX0201_Kana[] = {Esc, '(', 'I', 0 };
153
static const char Esc_JISX0208_1978[] = {Esc, '$', '@', 0 };
154
static const char Esc_JISX0208_1983[] = {Esc, '$', 'B', 0 };
155
static const char Esc_JISX0212[] = {Esc, '$', '(', 'D', 0 };
156
static const char * const Esc_SEQ[] = { Esc_Ascii,
164
QJisCodec::QJisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
170
QJisCodec::~QJisCodec()
172
delete (QJpUnicodeConv*)conv;
176
QByteArray QJisCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *cs) const
178
char replacement = '?';
180
if (cs->flags & ConvertInvalidToNull)
186
Iso2022State state = Ascii;
187
Iso2022State prev = Ascii;
188
for (int i = 0; i < len; i++) {
191
if (ch.row() == 0x00 && ch.cell() < 0x80) {
193
if (state != JISX0201_Latin ||
194
ch.cell() == ReverseSolidus || ch.cell() == Tilde) {
198
} else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
201
if (state != Ascii ||
202
ch.cell() == YenSign || ch.cell() == Overline) {
203
state = JISX0201_Latin;
207
state = JISX0201_Kana;
210
} else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
212
state = JISX0208_1983;
213
} else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
218
state = UnknownState;
223
if (state == UnknownState) {
226
result += Esc_SEQ[state - MinState];
233
result += (j >> 8) & 0xff;
242
cs->invalidChars += invalid;
247
QString QJisCodec::convertToUnicode(const char* chars, int len, ConverterState *cs) const
251
Iso2022State state = Ascii, prev = Ascii;
253
QChar replacement = QChar::ReplacementCharacter;
255
if (cs->flags & ConvertInvalidToNull)
256
replacement = QChar::Null;
257
nbuf = cs->remainingChars;
258
buf[0] = (cs->state_data[0] >> 24) & 0xff;
259
buf[1] = (cs->state_data[0] >> 16) & 0xff;
260
buf[2] = (cs->state_data[0] >> 8) & 0xff;
261
buf[3] = (cs->state_data[0] >> 0) & 0xff;
262
state = (Iso2022State)((cs->state_data[1] >> 0) & 0xff);
263
prev = (Iso2022State)((cs->state_data[1] >> 8) & 0xff);
264
esc = cs->state_data[2];
269
for (int i=0; i<len; i++) {
273
state = UnknownState;
276
if (ch == '$' || strchr(Esc_CHARS, ch)) {
285
if (strchr(Esc_CHARS, ch)) {
290
state = JISX0208_1978; // Esc $ @
293
state = JISX0208_1983; // Esc $ B
303
state = Ascii; // Esc (B
306
state = JISX0201_Kana; // Esc (I
309
state = JISX0201_Latin; // Esc (J
321
state = JISX0212; // Esc $ (D
334
} else if (ch == So) {
337
state = JISX0201_Kana;
339
} else if (ch == Si) {
341
if (prev == Ascii || prev == JISX0201_Latin) {
354
result += QLatin1Char(ch);
359
u = conv->jisx0201ToUnicode(ch);
360
result += QValidChar(u);
363
u = conv->jisx0201ToUnicode(ch | 0x80);
364
result += QValidChar(u);
372
result += QChar::ReplacementCharacter;
380
u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
381
result += QValidChar(u);
384
u = conv->jisx0212ToUnicode(buf[0] & 0x7f, ch & 0x7f);
385
result += QValidChar(u);
388
result += replacement;
400
cs->remainingChars = nbuf;
401
cs->invalidChars += invalid;
402
cs->state_data[0] = (buf[0] << 24) + (buf[1] << 16) + (buf[2] << 8) + buf[3];
403
cs->state_data[1] = (prev << 8) + state;
404
cs->state_data[2] = esc;
413
int QJisCodec::_mibEnum()
419
QByteArray QJisCodec::_name()
421
return "ISO-2022-JP";
425
Returns the codec's mime name.
427
QList<QByteArray> QJisCodec::_aliases()
429
QList<QByteArray> list;
430
list << "JIS7"; // Qt 3 compat