1
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Netscape Public License
6
* Version 1.1 (the "License"); you may not use this file except in
7
* compliance with the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/NPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is mozilla.org code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
25
* Alternatively, the contents of this file may be used under the terms of
26
* either the GNU General Public License Version 2 or later (the "GPL"), or
27
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28
* in which case the provisions of the GPL or the LGPL are applicable instead
29
* of those above. If you wish to allow use of your version of this file only
30
* under the terms of either the GPL or the LGPL, and not to allow others to
31
* use your version of this file under the terms of the NPL, indicate your
32
* decision by deleting the provisions above and replace them with the notice
33
* and other provisions required by the GPL or the LGPL. If you do not delete
34
* the provisions above, a recipient may use your version of this file under
35
* the terms of any one of the NPL, the GPL or the LGPL.
37
* ***** END LICENSE BLOCK ***** */
39
* A character set converter from GBK to Unicode.
42
* @created 07/Sept/1999
43
* @author Yueheng Xu, Yueheng.Xu@intel.com
46
#include "nsGBKToUnicode.h"
47
#include "nsUCvCnDll.h"
51
static const PRInt16 g_2BytesShiftTable[] = {
53
ShiftCell(0,0,0,0,0,0,0,0)
55
//------------------------------------------------------------
56
// nsGBKUnique2BytesToUnicode
57
//------------------------------------------------------------
58
class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport
61
nsGBKUnique2BytesToUnicode();
62
virtual ~nsGBKUnique2BytesToUnicode()
67
static const PRUint16 g_utGBKUnique2Bytes[] = {
68
#include "gbkuniq2b.ut"
70
nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode()
71
: nsTableDecoderSupport((uShiftTable*) &g_2BytesShiftTable,
72
(uMappingTable*) &g_utGBKUnique2Bytes, 1)
76
//------------------------------------------------------------
77
// nsGB18030Unique2BytesToUnicode
78
//------------------------------------------------------------
79
class nsGB18030Unique2BytesToUnicode : public nsTableDecoderSupport
82
nsGB18030Unique2BytesToUnicode();
83
virtual ~nsGB18030Unique2BytesToUnicode()
88
static const PRUint16 g_utGB18030Unique2Bytes[] = {
89
#include "gb18030uniq2b.ut"
91
nsGB18030Unique2BytesToUnicode::nsGB18030Unique2BytesToUnicode()
92
: nsTableDecoderSupport((uShiftTable*) &g_2BytesShiftTable,
93
(uMappingTable*) &g_utGB18030Unique2Bytes, 1)
97
//------------------------------------------------------------
98
// nsGB18030Unique4BytesToUnicode
99
//------------------------------------------------------------
100
static const PRInt16 g_GB18030_4BytesShiftTable[] = {
101
0, u4BytesGB18030Charset,
102
ShiftCell(0,0,0,0,0,0,0,0)
105
class nsGB18030Unique4BytesToUnicode : public nsTableDecoderSupport
108
nsGB18030Unique4BytesToUnicode();
109
virtual ~nsGB18030Unique4BytesToUnicode()
114
static const PRUint16 g_utGB18030Unique4Bytes[] = {
115
#include "gb180304bytes.ut"
117
nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode()
118
: nsTableDecoderSupport((uShiftTable*) &g_GB18030_4BytesShiftTable,
119
(uMappingTable*) &g_utGB18030Unique4Bytes, 1)
124
//----------------------------------------------------------------------
125
// Class nsGBKToUnicode [implementation]
127
//----------------------------------------------------------------------
128
// Subclassing of nsTablesDecoderSupport class [implementation]
130
#define LEGAL_GBK_MULTIBYTE_FIRST_BYTE(c) \
131
(UINT8_IN_RANGE(0x81, (c), 0xFE))
132
#define FIRST_BYTE_IS_SURROGATE(c) \
133
(UINT8_IN_RANGE(0x90, (c), 0xFE))
134
#define LEGAL_GBK_2BYTE_SECOND_BYTE(c) \
135
(UINT8_IN_RANGE(0x40, (c), 0x7E)|| UINT8_IN_RANGE(0x80, (c), 0xFE))
136
#define LEGAL_GBK_4BYTE_SECOND_BYTE(c) \
137
(UINT8_IN_RANGE(0x30, (c), 0x39))
138
#define LEGAL_GBK_4BYTE_THIRD_BYTE(c) \
139
(UINT8_IN_RANGE(0x81, (c), 0xFE))
140
#define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \
141
(UINT8_IN_RANGE(0x30, (c), 0x39))
143
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
144
PRInt32 * aSrcLength,
146
PRInt32 * aDestLength)
149
PRInt32 iSrcLength = (*aSrcLength);
150
PRInt32 iDestlen = 0;
154
for (i=0;i<iSrcLength;i++)
156
if ( iDestlen >= (*aDestLength) )
158
rv = NS_OK_UDEC_MOREOUTPUT;
161
// The valid range for the 1st byte is [0x81,0xFE]
162
if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc))
164
if(i+1 >= iSrcLength)
166
rv = NS_OK_UDEC_MOREINPUT;
169
// To make sure, the second byte has to be checked as well.
170
// In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
171
if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1]))
174
*aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
175
if(UCS2_NO_MAPPING == *aDest)
177
// We cannot map in the common mapping, let's call the
178
// delegate 2 byte decoder to decode the gbk or gb18030 unique
180
if(! TryExtensionDecoder(aSrc, aDest))
182
*aDest = UCS2_NO_MAPPING;
188
else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
190
// from the first 2 bytes, it looks like a 4 byte GB18030
191
if(i+3 >= iSrcLength) // make sure we got 4 bytes
193
rv = NS_OK_UDEC_MOREINPUT;
197
// [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39]
200
if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) &&
201
LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
203
if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
205
// let's call the delegated 4 byte gb18030 converter to convert it
206
if(! Try4BytesDecoder(aSrc, aDest))
207
*aDest = UCS2_NO_MAPPING;
209
// let's try supplement mapping
210
NS_ASSERTION(( (iDestlen+1) <= (*aDestLength) ), "no enouth output memory");
211
if ( (iDestlen+1) <= (*aDestLength) )
213
if(DecodeToSurrogate(aSrc, aDest))
215
// surrogte two PRUnichar
219
*aDest = UCS2_NO_MAPPING;
222
*aDest = UCS2_NO_MAPPING;
226
*aDest = UCS2_NO_MAPPING;
231
else if ((PRUint8) aSrc[0] == (PRUint8)0xA0 )
233
// stand-alone (not followed by a valid second byte) 0xA0 !
234
// treat it as valid a la Netscape 4.x
235
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
238
// Invalid GBK code point (second byte should be 0x40 or higher)
239
*aDest = UCS2_NO_MAPPING;
245
// The source is an ASCII
246
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
249
if(IS_GBK_EURO(*aSrc)) {
252
*aDest = UCS2_NO_MAPPING;
261
*aDestLength = iDestlen;
266
void nsGBKToUnicode::CreateExtensionDecoder()
268
mExtensionDecoder = new nsGBKUnique2BytesToUnicode();
270
void nsGBKToUnicode::Create4BytesDecoder()
272
m4BytesDecoder = nsnull;
274
void nsGB18030ToUnicode::CreateExtensionDecoder()
276
mExtensionDecoder = new nsGB18030Unique2BytesToUnicode();
278
void nsGB18030ToUnicode::Create4BytesDecoder()
280
m4BytesDecoder = new nsGB18030Unique4BytesToUnicode();
282
PRBool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
284
NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte");
285
NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte");
286
NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte");
287
NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte");
288
if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
290
if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
292
if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
294
if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
297
PRUint8 a1 = (PRUint8) aSrc[0];
298
PRUint8 a2 = (PRUint8) aSrc[1];
299
PRUint8 a3 = (PRUint8) aSrc[2];
300
PRUint8 a4 = (PRUint8) aSrc[3];
305
PRUint32 idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;
307
*aOut++ = 0xD800 | (0x000003FF & (idx >> 10));
308
*aOut = 0xDC00 | (0x000003FF & idx);
312
PRBool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, PRUnichar* aOut)
314
if(!mExtensionDecoder)
315
CreateExtensionDecoder();
316
NS_ASSERTION(mExtensionDecoder, "cannot creqte 2 bytes unique converter");
317
if(mExtensionDecoder)
319
nsresult res = mExtensionDecoder->Reset();
320
NS_ASSERTION(NS_SUCCEEDED(res), "2 bytes unique conversoin reset failed");
323
res = mExtensionDecoder->Convert(aSrc,&len, aOut, &dstlen);
324
NS_ASSERTION(NS_FAILED(res) || ((len==2) && (dstlen == 1)),
325
"some strange conversion result");
326
// if we failed, we then just use the 0xfffd
327
// therefore, we ignore the res here.
328
if(NS_SUCCEEDED(res))
333
PRBool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
337
PRBool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, PRUnichar* aOut)
340
Create4BytesDecoder();
343
nsresult res = m4BytesDecoder->Reset();
344
NS_ASSERTION(NS_SUCCEEDED(res), "4 bytes unique conversoin reset failed");
347
res = m4BytesDecoder->Convert(aSrc,&len, aOut, &dstlen);
348
NS_ASSERTION(NS_FAILED(res) || ((len==4) && (dstlen == 1)),
349
"some strange conversion result");
350
// if we failed, we then just use the 0xfffd
351
// therefore, we ignore the res here.
352
if(NS_SUCCEEDED(res))