1
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* ***** BEGIN LICENSE BLOCK *****
3
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
5
* The contents of this file are subject to the Mozilla Public License Version
6
* 1.1 (the "License"); you may not use this file except in compliance with
7
* the License. You may obtain a copy of the License at
8
* http://www.mozilla.org/MPL/
10
* Software distributed under the License is distributed on an "AS IS" basis,
11
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
* for the specific language governing rights and limitations under the
15
* The Original Code is mozilla.org code.
17
* The Initial Developer of the Original Code is
18
* Netscape Communications Corporation.
19
* Portions created by the Initial Developer are Copyright (C) 1998
20
* the Initial Developer. All Rights Reserved.
24
* Alternatively, the contents of this file may be used under the terms of
25
* either of the GNU General Public License Version 2 or later (the "GPL"),
26
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
* in which case the provisions of the GPL or the LGPL are applicable instead
28
* of those above. If you wish to allow use of your version of this file only
29
* under the terms of either the GPL or the LGPL, and not to allow others to
30
* use your version of this file under the terms of the MPL, indicate your
31
* decision by deleting the provisions above and replace them with the notice
32
* and other provisions required by the GPL or the LGPL. If you do not delete
33
* the provisions above, a recipient may use your version of this file under
34
* the terms of any one of the MPL, the GPL or the LGPL.
36
* ***** END LICENSE BLOCK ***** */
38
* A character set converter from GBK to Unicode.
41
* @created 07/Sept/1999
42
* @author Yueheng Xu, Yueheng.Xu@intel.com
45
#include "nsGBKToUnicode.h"
46
#include "nsUCvCnDll.h"
50
//------------------------------------------------------------
51
// nsGBKUnique2BytesToUnicode
52
//------------------------------------------------------------
53
class nsGBKUnique2BytesToUnicode : public nsTableDecoderSupport
56
nsGBKUnique2BytesToUnicode();
57
virtual ~nsGBKUnique2BytesToUnicode()
62
static const PRUint16 g_utGBKUnique2Bytes[] = {
63
#include "gbkuniq2b.ut"
65
nsGBKUnique2BytesToUnicode::nsGBKUnique2BytesToUnicode()
66
: nsTableDecoderSupport(u2BytesCharset, nsnull,
67
(uMappingTable*) &g_utGBKUnique2Bytes, 1)
71
//------------------------------------------------------------
72
// nsGB18030Unique2BytesToUnicode
73
//------------------------------------------------------------
74
class nsGB18030Unique2BytesToUnicode : public nsTableDecoderSupport
77
nsGB18030Unique2BytesToUnicode();
78
virtual ~nsGB18030Unique2BytesToUnicode()
83
static const PRUint16 g_utGB18030Unique2Bytes[] = {
84
#include "gb18030uniq2b.ut"
86
nsGB18030Unique2BytesToUnicode::nsGB18030Unique2BytesToUnicode()
87
: nsTableDecoderSupport(u2BytesCharset, nsnull,
88
(uMappingTable*) &g_utGB18030Unique2Bytes, 1)
92
//------------------------------------------------------------
93
// nsGB18030Unique4BytesToUnicode
94
//------------------------------------------------------------
95
class nsGB18030Unique4BytesToUnicode : public nsTableDecoderSupport
98
nsGB18030Unique4BytesToUnicode();
99
virtual ~nsGB18030Unique4BytesToUnicode()
104
static const PRUint16 g_utGB18030Unique4Bytes[] = {
105
#include "gb180304bytes.ut"
107
nsGB18030Unique4BytesToUnicode::nsGB18030Unique4BytesToUnicode()
108
: nsTableDecoderSupport(u4BytesGB18030Charset, nsnull,
109
(uMappingTable*) &g_utGB18030Unique4Bytes, 1)
114
//----------------------------------------------------------------------
115
// Class nsGBKToUnicode [implementation]
117
//----------------------------------------------------------------------
118
// Subclassing of nsTablesDecoderSupport class [implementation]
120
#define LEGAL_GBK_MULTIBYTE_FIRST_BYTE(c) \
121
(UINT8_IN_RANGE(0x81, (c), 0xFE))
122
#define FIRST_BYTE_IS_SURROGATE(c) \
123
(UINT8_IN_RANGE(0x90, (c), 0xFE))
124
#define LEGAL_GBK_2BYTE_SECOND_BYTE(c) \
125
(UINT8_IN_RANGE(0x40, (c), 0x7E)|| UINT8_IN_RANGE(0x80, (c), 0xFE))
126
#define LEGAL_GBK_4BYTE_SECOND_BYTE(c) \
127
(UINT8_IN_RANGE(0x30, (c), 0x39))
128
#define LEGAL_GBK_4BYTE_THIRD_BYTE(c) \
129
(UINT8_IN_RANGE(0x81, (c), 0xFE))
130
#define LEGAL_GBK_4BYTE_FORTH_BYTE(c) \
131
(UINT8_IN_RANGE(0x30, (c), 0x39))
133
NS_IMETHODIMP nsGBKToUnicode::ConvertNoBuff(const char* aSrc,
134
PRInt32 * aSrcLength,
136
PRInt32 * aDestLength)
139
PRInt32 iSrcLength = (*aSrcLength);
140
PRInt32 iDestlen = 0;
144
for (i=0;i<iSrcLength;i++)
146
if ( iDestlen >= (*aDestLength) )
148
rv = NS_OK_UDEC_MOREOUTPUT;
151
// The valid range for the 1st byte is [0x81,0xFE]
152
if(LEGAL_GBK_MULTIBYTE_FIRST_BYTE(*aSrc))
154
if(i+1 >= iSrcLength)
156
rv = NS_OK_UDEC_MOREINPUT;
159
// To make sure, the second byte has to be checked as well.
160
// In GBK, the second byte range is [0x40,0x7E] and [0x80,0XFE]
161
if(LEGAL_GBK_2BYTE_SECOND_BYTE(aSrc[1]))
164
*aDest = mUtil.GBKCharToUnicode(aSrc[0], aSrc[1]);
165
if(UCS2_NO_MAPPING == *aDest)
167
// We cannot map in the common mapping, let's call the
168
// delegate 2 byte decoder to decode the gbk or gb18030 unique
170
if(! TryExtensionDecoder(aSrc, aDest))
172
*aDest = UCS2_NO_MAPPING;
178
else if (LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
180
// from the first 2 bytes, it looks like a 4 byte GB18030
181
if(i+3 >= iSrcLength) // make sure we got 4 bytes
183
rv = NS_OK_UDEC_MOREINPUT;
187
// [0x81-0xfe][0x30-0x39][0x81-0xfe][0x30-0x39]
190
if (LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]) &&
191
LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
193
if ( ! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
195
// let's call the delegated 4 byte gb18030 converter to convert it
196
if(! Try4BytesDecoder(aSrc, aDest))
197
*aDest = UCS2_NO_MAPPING;
199
// let's try supplement mapping
200
NS_ASSERTION(( (iDestlen+1) <= (*aDestLength) ), "no enouth output memory");
201
if ( (iDestlen+1) <= (*aDestLength) )
203
if(DecodeToSurrogate(aSrc, aDest))
205
// surrogte two PRUnichar
209
*aDest = UCS2_NO_MAPPING;
212
*aDest = UCS2_NO_MAPPING;
216
*aDest = UCS2_NO_MAPPING;
221
else if ((PRUint8) aSrc[0] == (PRUint8)0xA0 )
223
// stand-alone (not followed by a valid second byte) 0xA0 !
224
// treat it as valid a la Netscape 4.x
225
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
228
// Invalid GBK code point (second byte should be 0x40 or higher)
229
*aDest = UCS2_NO_MAPPING;
235
// The source is an ASCII
236
*aDest = CAST_CHAR_TO_UNICHAR(*aSrc);
239
if(IS_GBK_EURO(*aSrc)) {
242
*aDest = UCS2_NO_MAPPING;
251
*aDestLength = iDestlen;
256
void nsGBKToUnicode::CreateExtensionDecoder()
258
mExtensionDecoder = new nsGBKUnique2BytesToUnicode();
260
void nsGBKToUnicode::Create4BytesDecoder()
262
m4BytesDecoder = nsnull;
264
void nsGB18030ToUnicode::CreateExtensionDecoder()
266
mExtensionDecoder = new nsGB18030Unique2BytesToUnicode();
268
void nsGB18030ToUnicode::Create4BytesDecoder()
270
m4BytesDecoder = new nsGB18030Unique4BytesToUnicode();
272
PRBool nsGB18030ToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
274
NS_ASSERTION(FIRST_BYTE_IS_SURROGATE(aSrc[0]), "illegal first byte");
275
NS_ASSERTION(LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]), "illegal second byte");
276
NS_ASSERTION(LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]), "illegal third byte");
277
NS_ASSERTION(LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]), "illegal forth byte");
278
if(! FIRST_BYTE_IS_SURROGATE(aSrc[0]))
280
if(! LEGAL_GBK_4BYTE_SECOND_BYTE(aSrc[1]))
282
if(! LEGAL_GBK_4BYTE_THIRD_BYTE(aSrc[2]))
284
if(! LEGAL_GBK_4BYTE_FORTH_BYTE(aSrc[3]))
287
PRUint8 a1 = (PRUint8) aSrc[0];
288
PRUint8 a2 = (PRUint8) aSrc[1];
289
PRUint8 a3 = (PRUint8) aSrc[2];
290
PRUint8 a4 = (PRUint8) aSrc[3];
295
PRUint32 idx = (((a1 * 10 + a2 ) * 126 + a3) * 10) + a4;
297
*aOut++ = 0xD800 | (0x000003FF & (idx >> 10));
298
*aOut = 0xDC00 | (0x000003FF & idx);
302
PRBool nsGBKToUnicode::TryExtensionDecoder(const char* aSrc, PRUnichar* aOut)
304
if(!mExtensionDecoder)
305
CreateExtensionDecoder();
306
NS_ASSERTION(mExtensionDecoder, "cannot creqte 2 bytes unique converter");
307
if(mExtensionDecoder)
309
nsresult res = mExtensionDecoder->Reset();
310
NS_ASSERTION(NS_SUCCEEDED(res), "2 bytes unique conversoin reset failed");
313
res = mExtensionDecoder->Convert(aSrc,&len, aOut, &dstlen);
314
NS_ASSERTION(NS_FAILED(res) || ((len==2) && (dstlen == 1)),
315
"some strange conversion result");
316
// if we failed, we then just use the 0xfffd
317
// therefore, we ignore the res here.
318
if(NS_SUCCEEDED(res))
323
PRBool nsGBKToUnicode::DecodeToSurrogate(const char* aSrc, PRUnichar* aOut)
327
PRBool nsGBKToUnicode::Try4BytesDecoder(const char* aSrc, PRUnichar* aOut)
330
Create4BytesDecoder();
333
nsresult res = m4BytesDecoder->Reset();
334
NS_ASSERTION(NS_SUCCEEDED(res), "4 bytes unique conversoin reset failed");
337
res = m4BytesDecoder->Convert(aSrc,&len, aOut, &dstlen);
338
NS_ASSERTION(NS_FAILED(res) || ((len==4) && (dstlen == 1)),
339
"some strange conversion result");
340
// if we failed, we then just use the 0xfffd
341
// therefore, we ignore the res here.
342
if(NS_SUCCEEDED(res))