1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim:expandtab:shiftwidth=2:tabstop=2:
4
/* ***** BEGIN LICENSE BLOCK *****
5
** Version: MPL 1.1/GPL 2.0/LGPL 2.1
7
** The contents of this file are subject to the Mozilla Public License Version
8
** 1.1 (the "License"); you may not use this file except in compliance with
9
** the License. You may obtain a copy of the License at
10
** http://www.mozilla.org/MPL/
12
** Software distributed under the License is distributed on an "AS IS" basis,
13
** WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
** for the specific language governing rights and limitations under the
17
** The Original Code is Mozilla Communicator client code.
19
** The Initial Developer of the Original Code is
20
** Netscape Communications Corp.
21
** Portions created by the Initial Developer are Copyright (C) 2003
22
** the Initial Developer. All Rights Reserved.
25
** Jungshik Shin <jshin@mailaps.org>
26
** Frank Tang <ftang@netscape.com>
27
** Jin-Hwan Cho <chofchof@ktug.or.kr>
28
** Won-Kyu Park <wkpark@chem.skku.ac.kr>
30
** Alternatively, the contents of this file may be used under the terms of
31
** either the GNU General Public License Version 2 or later (the "GPL"), or
32
** the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
33
** in which case the provisions of the GPL or the LGPL are applicable instead
34
** of those above. If you wish to allow use of your version of this file only
35
** under the terms of either the GPL or the LGPL, and not to allow others to
36
** use your version of this file under the terms of the MPL, indicate your
37
** decision by deleting the provisions above and replace them with the notice
38
** and other provisions required by the GPL or the LGPL. If you do not delete
39
** the provisions above, a recipient may use your version of this file under
40
** the terms of any one of the MPL, the GPL or the LGPL.
42
** ***** END LICENSE BLOCK ***** */
46
* 1. Enable rendering over 1.5 million Hangul syllables with
47
* UnBatang and other fonts made available by UN KoaungHi
50
#include "nsUCvKODll.h"
51
#include "nsUnicodeToJamoTTF.h"
53
#include "nsXPIDLString.h"
56
#include "nsISupportsUtils.h"
58
#include "nsIUnicodeDecoder.h"
59
#include "nsIServiceManagerUtils.h"
60
#include "nsICharsetConverterManager.h"
61
#include "nsICharRepresentable.h"
70
#include "jamoclusters.h"
72
// Constants for Hangul Jamo/syllable handling taken from Unicode 3.0
84
#define SCOUNT (LCOUNT * VCOUNT * TCOUNT)
85
#define SEND (SBASE + SCOUNT - 1)
91
#define IS_LC(wc) (LBASE <= (wc) && (wc) < VFILL)
92
#define IS_VO(wc) (VFILL <= (wc) && (wc) < TSTART)
93
#define IS_TC(wc) (TSTART <= (wc) && (wc) <= 0x11FF)
94
#define IS_JAMO(wc) (IS_LC(wc) || IS_VO(wc) || IS_TC(wc))
96
// Jamos used in modern precomposed syllables
97
#define IS_SYL_LC(wc) (LBASE <= (wc) && (wc) < LBASE + LCOUNT)
98
#define IS_SYL_VO(wc) (VBASE <= (wc) && (wc) < VBASE + VCOUNT)
99
#define IS_SYL_TC(wc) (TBASE < (wc) && (wc) <= TBASE + TCOUNT)
101
// Modern precomposed syllables.
102
#define IS_SYL(wc) (SBASE <= (wc) && (wc) <= SEND)
103
#define IS_SYL_WO_TC(wc) (((wc) - SBASE) % TCOUNT == 0)
104
#define IS_SYL_WITH_TC(wc) (((wc) - SBASE) % TCOUNT)
106
// Compose precomposed syllables out of L, V, and T.
107
#define SYL_FROM_LVT(l,v,t) (SBASE + \
108
(((l) - LBASE) * VCOUNT + (v) - VBASE) * TCOUNT + \
112
#define HTONE1 0x302E
113
#define HTONE2 0x302F
115
#define IS_TONE(wc) ((wc) == HTONE1 || (wc) == HTONE2)
117
// Below are constants for rendering with UnBatang-like fonts.
119
#define LC_TMPPOS 0xF000 // temp. block for leading consonants
120
#define VO_TMPPOS 0xF100 // temp. block for vowels
121
#define TC_TMPPOS 0xF200 // temp. block for trailinng consonants
122
#define LC_OFFSET (LC_TMPPOS-LBASE)
123
#define VO_OFFSET (VO_TMPPOS-VFILL)
124
#define TC_OFFSET (TC_TMPPOS-TSTART)
126
// Jamo class of *temporary* code points in PUA for UnBatang-like fonts.
127
#define IS_LC_EXT(wc) ( ((wc) & 0xFF00) == LC_TMPPOS )
128
#define IS_VO_EXT(wc) ( ((wc) & 0xFF00) == VO_TMPPOS )
129
#define IS_TC_EXT(wc) ( ((wc) & 0xFF00) == TC_TMPPOS )
131
// Glyph code point bases for L,V, and T in UnBatang-like fonts
132
#define UP_LBASE 0xE000 // 0xE000 = Lfill, 0xE006 = Kiyeok
133
#define UP_VBASE 0xE300 // 0xE300 = Vfill, 0xE302 = Ah
134
#define UP_TBASE 0xE404 // 0xE400 = Tfill, 0xE404 = Kiyeok
136
// EUC-KR decoder for FillInfo.
137
static nsCOMPtr<nsIUnicodeDecoder> gDecoder = 0;
139
static inline void FillInfoRange (PRUint32* aInfo, PRUint32 aStart,
141
static nsresult JamoNormalize (const PRUnichar* aInSeq,
142
PRUnichar** aOutSeq, PRInt32* aLength);
143
static void JamosToExtJamos (PRUnichar* aInSeq, PRInt32* aLength);
144
static const JamoNormMap* JamoClusterSearch(JamoNormMap aKey,
145
const JamoNormMap* aClusters,
146
PRInt16 aClustersSize);
147
static nsresult FillInfoEUCKR (PRUint32 *aInfo, PRUint16 aHigh1,
150
static PRInt32 JamoNormMapComp (const JamoNormMap& p1,
151
const JamoNormMap& p2);
152
static PRInt16 JamoSrchReplace (const JamoNormMap* aCluster,
153
PRUint16 aSize, PRUnichar *aIn,
154
PRInt32* aLength, PRUint16 aOffset);
155
static nsresult GetDecoder (nsIUnicodeDecoder** aDecoder);
156
static nsresult ScanDecomposeSyllable (PRUnichar *aIn, PRInt32* aLength,
157
const PRInt32 aMaxLen);
159
//----------------------------------------------------------------------
160
// Class nsUnicodeToJamoTTF [implementation]
162
NS_IMPL_ISUPPORTS2(nsUnicodeToJamoTTF, nsIUnicodeEncoder, nsICharRepresentable)
165
nsUnicodeToJamoTTF::SetOutputErrorBehavior(PRInt32 aBehavior,
166
nsIUnicharEncoder *aEncoder,
169
if (aBehavior == kOnError_CallBack && aEncoder == nsnull)
170
return NS_ERROR_NULL_POINTER;
171
NS_IF_RELEASE(aEncoder);
172
mErrEncoder = aEncoder;
173
NS_IF_ADDREF(aEncoder);
175
mErrBehavior = aBehavior;
180
// constructor and destructor
182
nsUnicodeToJamoTTF::nsUnicodeToJamoTTF()
188
nsUnicodeToJamoTTF::~nsUnicodeToJamoTTF()
190
if (mJamos != nsnull && mJamos != mJamosStatic)
198
KO_CHAR_CLASS_SYL1, // modern precomposed syllable w/o TC (LV type syl.)
199
KO_CHAR_CLASS_SYL2, // modern precomposed syllable with TC (LVT type syl.)
200
KO_CHAR_CLASS_TONE, // Tone marks
201
KO_CHAR_CLASS_NOHANGUL, // Non-Hangul characters.
205
#define CHAR_CLASS(ch) \
206
(IS_LC(ch) ? KO_CHAR_CLASS_LC : \
207
IS_VO(ch) ? KO_CHAR_CLASS_VO : \
208
IS_TC(ch) ? KO_CHAR_CLASS_TC : \
210
(IS_SYL_WITH_TC(ch) ? KO_CHAR_CLASS_SYL2 : KO_CHAR_CLASS_SYL1) : \
211
IS_TONE(ch) ? KO_CHAR_CLASS_TONE : \
212
KO_CHAR_CLASS_NOHANGUL)
215
// Grapheme boundary checker : See UTR #29 and Unicode 3.2 section 3.11
216
const static PRBool gIsBoundary[KO_CHAR_CLASS_NUM][KO_CHAR_CLASS_NUM] =
218
{ 0, 0, 1, 0, 0, 0, 1 }, // L
219
{ 1, 0, 0, 1, 1, 0, 1 }, // V
220
{ 1, 1, 0, 1, 1, 0, 1 }, // T
221
{ 1, 0, 0, 1, 1, 0, 1 }, // S1
222
{ 1, 1, 0, 1, 1, 0, 1 }, // S2
223
{ 1, 1, 1, 1, 1, 0, 1 }, // M
224
{ 1, 1, 1, 1, 1, 0, 1 } // X
229
nsUnicodeToJamoTTF::Convert(const PRUnichar * aSrc,
230
PRInt32 * aSrcLength, char * aDest,
231
PRInt32 * aDestLength)
236
// This should never happen, but it happens under MS Windows, somehow...
237
if (mJamoCount > mJamosMaxLength)
239
NS_WARNING("mJamoCount > mJamoMaxLength on entering Convert()");
243
for (PRInt32 charOff = 0; charOff < *aSrcLength; charOff++)
245
PRUnichar ch = aSrc[charOff];
247
// Syllable boundary check. Ref. : Unicode 3.2 section 3.11
248
if (mJamoCount != 0 &&
249
gIsBoundary[CHAR_CLASS(mJamos[mJamoCount - 1])][CHAR_CLASS(ch)])
251
composeHangul(aDest);
254
// Ignore tone marks other than the first in a sequence of tone marks.
255
else if (mJamoCount != 0 && IS_TONE(mJamos[mJamoCount - 1]) && IS_TONE(ch))
258
composeHangul(aDest);
261
// skip over tone marks from the second on in a series.
262
while (IS_TONE(ch) && ++charOff < *aSrcLength)
267
mJamos[mJamoCount++] = ch;
274
if (mJamoCount == mJamosMaxLength)
277
if (mJamos == mJamosStatic)
279
mJamos = (PRUnichar *) PR_Malloc(sizeof(PRUnichar) * mJamosMaxLength);
281
return NS_ERROR_OUT_OF_MEMORY;
282
memcpy(mJamos, mJamosStatic, sizeof(PRUnichar) * mJamoCount);
286
mJamos = (PRUnichar *) PR_Realloc(mJamos,
287
sizeof(PRUnichar) * mJamosMaxLength);
289
return NS_ERROR_OUT_OF_MEMORY;
293
mJamos[mJamoCount++] = ch;
297
composeHangul(aDest);
299
*aDestLength = mByteOff;
305
nsUnicodeToJamoTTF::Finish(char* aDest, PRInt32* aDestLength)
309
composeHangul(aDest);
311
*aDestLength = mByteOff;
318
//================================================================
320
nsUnicodeToJamoTTF::Reset()
323
if (mJamos != nsnull && mJamos != mJamosStatic)
325
mJamos = mJamosStatic;
326
mJamosMaxLength = sizeof(mJamosStatic) / sizeof(PRUnichar);
327
memset(mJamos, sizeof(mJamosStatic), 0);
335
nsUnicodeToJamoTTF::GetMaxLength(const PRUnichar * aSrc, PRInt32 aSrcLength,
336
PRInt32 * aDestLength)
338
// a precomposed Hangul syllable can be decomposed into 3 Jamos, each of
339
// which takes 2bytes.
340
*aDestLength = aSrcLength * 6;
346
nsUnicodeToJamoTTF::FillInfo(PRUint32* aInfo)
348
FillInfoRange(aInfo, SBASE, SEND);
352
// Hangul Conjoining Jamos
353
for(i = 0x1100; i<= 0x1159; i++)
354
SET_REPRESENTABLE(aInfo, i);
355
SET_REPRESENTABLE(aInfo, 0x115f);
356
for(i = 0x1160; i <= 0x11a2; i++)
357
SET_REPRESENTABLE(aInfo, i);
358
for(i = 0x11a8; i <= 0x11f9; i++)
359
SET_REPRESENTABLE(aInfo, i);
362
SET_REPRESENTABLE(aInfo, HTONE1);
363
SET_REPRESENTABLE(aInfo, HTONE2);
365
// UnPark fonts have US-ASCII chars.
366
for(i=0x20; i < 0x7f; i++)
367
SET_REPRESENTABLE(aInfo, i);
371
// UnPark fonts have Hanjas and symbols defined in KS X 1001 as well.
373
// XXX: Do we need to exclude Cyrillic, Greek letters and some Latin letters
374
// included in KS X 1001 as 'symbol characters'?
375
// KS X 1001 has only a subset of Greek and Cyrillic alphabets and
376
// Latin letters with diacritic marks so that including them may
377
// result in ransom-note like effect if it is listed *before*
378
// any genuine Greek/Russian/Latin fonts in CSS.
380
// Lead byte range for symbol chars. in EUC-KR : 0xA1 - 0xAF
381
rv = FillInfoEUCKR(aInfo, 0xA1, 0xAF);
382
NS_ENSURE_SUCCESS(rv, rv);
384
// Lead byte range for Hanja in EUC-KR : 0xCA - 0xFD.
385
return FillInfoEUCKR(aInfo, 0xCA, 0xFD);
389
* Copied from mslvt.otp by Jin-Hwan Cho <chofchof@ktug.or.kr>.
390
* Extended by Jungshik Shin <jshin@mailaps.org> to support
391
* additional Jamo clusters not encoded in U+1100 Jamo block
392
* as precomposed Jamo clsuters.
393
* Corrected by Won-Kyu Park <wkpark@chem.skku.ac.kr>.
394
* See http://www.ktug.or.kr for its use in Lambda and swindow/SFontTTF.cpp at
395
* http://www.yudit.org for its use in Yudit.
396
* A patch with the same set of tables was submitted for
397
* inclusion in Pango (http://www.pango.org).
401
* Mapping from LC code points to glyph indices in UnPark fonts.
402
* UnPark fonts have the same glyph arrangement as Ogulim font, but
403
* they have them in BMP PUA (beginning at U+E000) to be proper Unicode
404
* fonts unlike Ogulim font with Jamo glyphs in CJK ideograph code points.
405
* Glyph indices for 90 LCs encoded in U+1100 block are followed by 6 reserved
406
* code points and glyph indices for 34 additional consonant clusters
407
* (not assigned code points of their own) for which separate glyphs exist in
409
* The first element is for Kiyeok and UP_LBASE is set to Lfill glyph(0xe000)
410
* so that the first element is '1' to map it to glyph for Kiyeok at 0xe006.
411
* (there are six glyphs for each LC in UnPark fonts.)
413
const static PRUint8 gUnParkLcGlyphMap[130] = {
414
1, 2, 4, 12, 14, 20, 36, 42, 46, 62, 70, 85,100,102,108,113,
415
114,116,120, 5, 6, 7, 8, 13, 23, 26, 34, 35, 39, 41, 43, 44,
416
45, 47, 48, 49, 50, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 65,
417
66, 67, 68, 69, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
418
84, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99,101,104,105,
419
106,107,109,110,111,112,117,119,122,123, 0, 0, 0, 0, 0, 0,
420
3, 9, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29,
421
30, 31, 32, 33, 37, 38, 40, 53, 56, 59, 71, 88, 98,103,115,118,
426
* Mapping from vowel code points to glyph indices in UnPark/Oxxx font.
427
* Glyphs for 28 additional vowel clusters (not given separate
428
* code points in U+1100 block) are available in O*ttf fonts.
429
* Total count: 95 = 1(Vfill) + 66 (in U+1100 block) + 28 (extra.)
431
const static PRUint8 gUnParkVoGlyphMap[95] = {
432
0, 1, 5, 6, 10, 11, 15, 16, 20, 21, 22, 23, 33, 34, 43, 46,
433
48, 52, 54, 64, 71, 73, 2, 3, 7, 8, 12, 13, 14, 18, 19, 26,
434
27, 29, 30, 32, 37, 38, 40, 41, 42, 44, 45, 47, 50, 51, 55, 57,
435
58, 59, 60, 62, 63, 69, 70, 72, 74, 75, 80, 83, 85, 87, 88, 90,
436
92, 93, 94, 4, 9, 17, 24, 25, 28, 31, 35, 36, 39, 49, 53, 56,
437
61, 65, 66, 67, 68, 76, 77, 78, 79, 81, 82, 84, 86, 89, 91
441
* Mapping from TC code points to glyph indices in UnPark/Oxxx font.
442
* glyphs for 59 additional trailing consonant clusters (not given separate
443
* code points in U+1100 blocks) are available in O*ttf fonts.
444
* Total count: 141 = 82 (in U+1100 block) + 59 (extra.)
445
* The first element is Kiyeok and UP_TBASE is set to 0x5204 (Kiyeok).
447
const static PRUint8 gUnParkTcGlyphMap[141] = {
448
0, 1, 5, 10, 17, 20, 21, 32, 33, 42, 46, 52, 57, 58, 59, 63,
449
78, 84, 91, 98,109,123,127,128,129,130,135, 3, 6, 11, 13, 15,
450
16, 19, 22, 25, 35, 37, 38, 39, 40, 43, 44, 48, 50, 51, 53, 54,
451
56, 60, 64, 67, 69, 71, 72, 73, 75, 76, 77, 80, 88, 89, 90, 92,
452
93, 94, 96,106,110,111,114,115,117,119,120,131,134,136,137,138,
453
139,140, 2, 4, 7, 8, 9, 12, 14, 18, 23, 24, 26, 27, 28, 29,
454
30, 31, 34, 36, 41, 45, 47, 49, 55, 61, 62, 65, 66, 68, 70, 74,
455
79, 81, 82, 83, 85, 86, 87, 95, 97, 99,100,101,102,103,104,105,
456
107,108,112,113,116,118,121,122,124,125,126,132,133
459
/* Which of six glyphs to use for choseong(L) depends on
460
the following vowel and whether or not jongseong(T) is present
461
in a syllable. Note that The first(0th) element is for Vfill.
463
shape Number of choseong(L) w.r.t. jungseong(V) without jongseong(T)
465
95 = 1(Vfill) + 66 + 28 (extra)
468
const static PRUint8 gUnParkVo2LcMap[95] = {
469
0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 1, 2, 2, 1,
470
1, 1, 2, 2, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
471
1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1,
472
1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1, 1, 1, 0, 2, 1,
473
2, 1, 2, 1, 1, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
474
2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 0, 2, 2
477
/* shape Number of choseong(L) w.r.t. jungseong(V) with jongseong(T) */
479
const static PRUint8 gUnParkVo2LcMap2[95] = {
480
3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 4, 4, 4, 5, 5, 4,
481
4, 4, 5, 5, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
482
4, 4, 5, 5, 4, 4, 4, 5, 4, 4, 4, 4, 4, 5, 4, 4,
483
4, 4, 4, 5, 4, 5, 5, 4, 3, 3, 4, 4, 4, 3, 5, 4,
484
5, 4, 5, 4, 4, 3, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4,
485
5, 4, 4, 4, 5, 4, 3, 3, 3, 4, 4, 4, 3, 5, 5
488
/* shape Number of jongseong(T) w.r.t. jungseong(V)
489
Which of four glyphs to use for jongseong(T) depends on
490
the preceding vowel. */
492
const static PRUint8 gUnParkVo2TcMap[95] = {
493
3, 0, 2, 0, 2, 1, 2, 1, 2, 3, 0, 2, 1, 3, 3, 1,
494
2, 1, 3, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
495
2, 2, 3, 3, 0, 2, 1, 3, 1, 0, 2, 1, 2, 3, 0, 1,
496
2, 1, 2, 3, 1, 3, 3, 1, 2, 2, 1, 1, 1, 1, 3, 1,
497
3, 1, 3, 0, 1, 0, 0, 0, 2, 3, 0, 2, 1, 1, 2, 2,
498
3, 0, 0, 0, 3, 0, 2, 2, 2, 1, 0, 1, 2, 1, 1
502
nsUnicodeToJamoTTF::composeHangul(char* aResult)
504
PRInt32 length = mJamoCount, i;
509
NS_WARNING("composeHangul() : zero length string comes in ! \n");
510
return NS_ERROR_UNEXPECTED;
514
return NS_ERROR_NULL_POINTER;
516
// Put Hangul tone mark first as it should be to the left of
517
// the character it follows.
518
// XXX : What should we do when a tone mark come by itself?
520
if (IS_TONE(mJamos[length - 1]))
522
aResult[mByteOff++] = PRUint8(mJamos[length - 1] >> 8);
523
aResult[mByteOff++] = PRUint8(mJamos[length - 1] & 0xff);
528
// no more processing is necessary for precomposed modern Hangul syllables.
529
if (length == 1 && IS_SYL(mJamos[0]))
531
aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
532
aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
536
if (CHAR_CLASS(mJamos[0]) == KO_CHAR_CLASS_NOHANGUL)
538
NS_ASSERTION(length == 1, "A non-Hangul should come by itself !!\n");
539
aResult[mByteOff++] = PRUint8(mJamos[0] >> 8);
540
aResult[mByteOff++] = PRUint8(mJamos[0] & 0xff);
544
nsXPIDLString buffer;
546
rv = JamoNormalize(mJamos, getter_Copies(buffer), &length);
548
// safe to cast away const.
549
PRUnichar* text = buffer.BeginWriting();
550
NS_ENSURE_SUCCESS(rv, rv);
552
text += RenderAsPrecompSyllable(text, &length, aResult);
557
// convert to extended Jamo sequence
558
JamosToExtJamos(text, &length);
561
// Check if not in LV or LVT form after the conversion
562
if (length != 2 && length != 3 ||
563
(!IS_LC_EXT(text[0]) || !IS_VO_EXT(text[1]) ||
564
(length == 3 && !IS_TC_EXT(text[2]))))
567
// Now that text[0..2] are identified as L,V, and T, it's safe to
568
// shift them back to U+1100 block although their ranges overlap each other.
570
text[0] -= LC_OFFSET;
571
text[1] -= VO_OFFSET;
573
text[2] -= TC_OFFSET;
577
text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
578
gUnParkVo2LcMap[text[1] - VFILL] + UP_LBASE;
579
text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE;
583
text[0] = gUnParkLcGlyphMap[text[0] - LBASE] * 6 +
584
gUnParkVo2LcMap2[text[1] - VFILL] + UP_LBASE;
585
text[2] = gUnParkTcGlyphMap[text[2] - TSTART] * 4 +
586
gUnParkVo2TcMap[text[1] - VFILL] + UP_TBASE;
587
text[1] = gUnParkVoGlyphMap[text[1] - VFILL] * 2 + UP_VBASE + 1;
590
// Xft doesn't like blank glyphs at code points other than listed in
591
// the blank glyph list. Replace Lfill glyph code points of UnPark
592
// fonts with standard LFILL code point (U+115F).
594
if (UP_LBASE <= text[0] && text[0] < UP_LBASE + 6)
597
// The same is true of glyph code points corresponding to VFILL
598
// in UnBatang-like fonts. VFILL is not only blank but also non-advancing
599
// so that we can just skip it.
600
if (UP_VBASE <= text[1] && text[1] < UP_VBASE + 2)
607
for (i = 0 ; i < length; i++)
609
aResult[mByteOff++] = PRUint8(text[i] >> 8);
610
aResult[mByteOff++] = PRUint8(text[i] & 0xff);
616
/* If jamo sequence is not convertible to a jamo cluster,
617
* just enumerate stand-alone jamos. Prepend V and T with Lf.
619
* XXX: It might be better to search for a sub-sequence (not just at the
620
* beginning of a cluster but also in the middle or at the end.)
621
* that can be rendered as precomposed and render it as such and enumerate
622
* jamos in the rest. This approach is useful when a simple Xkb-based input
627
for (i = 0; i < length; i++)
629
PRUnichar wc=0, wc2=0;
630
/* skip Lfill and Vfill if they're not the sole char. in a cluster */
632
(text[i] - LC_OFFSET == LFILL || text[i] - VO_OFFSET == VFILL))
634
else if (IS_LC_EXT (text[i]))
635
wc = gUnParkLcGlyphMap[text[i] - LC_OFFSET - LBASE] * 6 + UP_LBASE;
638
/* insert Lfill glyph to advance cursor pos. for V and T */
640
/* don't have to draw Vfill. Drawing Lfill is sufficient. */
641
if (text[i] - VO_OFFSET != VFILL)
642
wc2 = IS_VO_EXT (text[i]) ?
643
gUnParkVoGlyphMap[text[i] - VO_OFFSET - VFILL] * 2 + UP_VBASE:
644
gUnParkTcGlyphMap[text[i] - TC_OFFSET - TSTART] * 4 + UP_TBASE + 3;
646
aResult[mByteOff++] = PRUint8(wc >> 8);
647
aResult[mByteOff++] = PRUint8(wc & 0xff);
651
aResult[mByteOff++] = wc2 >> 8;
652
aResult[mByteOff++] = wc2 & 0xff;
660
nsUnicodeToJamoTTF::RenderAsPrecompSyllable (PRUnichar* aSrc,
661
PRInt32* aSrcLength, char* aResult)
666
if (*aSrcLength == 3 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]) &&
669
else if (*aSrcLength == 2 && IS_SYL_LC(aSrc[0]) && IS_SYL_VO(aSrc[1]))
678
wc = SYL_FROM_LVT(aSrc[0], aSrc[1], aSrc[2]);
680
wc = SYL_FROM_LVT(aSrc[0], aSrc[1], TBASE);
681
aResult[mByteOff++] = PRUint8(wc >> 8);
682
aResult[mByteOff++] = PRUint8(wc & 0xff);
685
*aSrcLength -= composed;
690
// Fill up Cmap array quickly for a rather large range.
692
inline void FillInfoRange(PRUint32* aInfo, PRUint32 aStart, PRUint32 aEnd)
695
PRUint32 b = aStart >> 5;
696
PRUint32 e = aEnd >> 5;
699
aInfo[b++] |= ~ (0xFFFFFFFFL >> (32 - ((aStart) & 0x1f)));
702
aInfo[b] |= 0xFFFFFFFFL;
704
aInfo[e] |= (0xFFFFFFFFL >> (31 - ((aEnd) & 0x1f)));
709
#define IS_GR94(x) (0xA0 < (x) && (x) < 0xFF)
711
// Given a range [aHigh1, aHigh2] in high bytes of EUC-KR, convert
712
// rows of 94 characters in the range (row by row) to Unicode and set
713
// representability if the result is not 0xFFFD (Unicode replacement char.).
715
nsresult FillInfoEUCKR (PRUint32 *aInfo, PRUint16 aHigh1, PRUint16 aHigh2)
717
char row[ROWLEN * 2];
718
PRUnichar dest[ROWLEN];
721
NS_ENSURE_TRUE(aInfo, NS_ERROR_NULL_POINTER);
722
NS_ENSURE_TRUE(IS_GR94(aHigh1) && IS_GR94(aHigh2), NS_ERROR_INVALID_ARG);
724
nsCOMPtr<nsIUnicodeDecoder> decoder;
725
rv = GetDecoder(getter_AddRefs(decoder));
726
NS_ENSURE_SUCCESS(rv,rv);
728
for (PRUint16 i = aHigh1 ; i <= aHigh2; i++)
731
// handle a row of 94 char. at a time.
732
for (j = 0 ; j < ROWLEN; j++)
734
row[j * 2] = char(i);
735
row[j * 2 + 1] = char(j + 0xa1);
737
PRInt32 srcLen = ROWLEN * 2;
738
PRInt32 destLen = ROWLEN;
739
rv = decoder->Convert(row, &srcLen, dest, &destLen);
740
NS_ENSURE_SUCCESS(rv, rv);
742
// set representability according to the conversion result.
743
for (j = 0 ; j < ROWLEN; j++)
744
if (dest[j] != 0xFFFD)
745
SET_REPRESENTABLE(aInfo, dest[j]);
751
nsresult GetDecoder(nsIUnicodeDecoder** aDecoder)
756
*aDecoder = gDecoder.get();
757
NS_ADDREF(*aDecoder);
761
nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
762
charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
763
NS_ENSURE_SUCCESS(rv,rv);
764
rv = charsetConverterManager->GetUnicodeDecoderRaw("EUC-KR", getter_AddRefs(gDecoder));
765
NS_ENSURE_SUCCESS(rv,rv);
767
*aDecoder = gDecoder.get();
768
NS_ADDREF(*aDecoder);
774
PRInt32 JamoNormMapComp (const JamoNormMap& p1, const JamoNormMap& p2)
776
if (p1.seq[0] != p2.seq[0])
777
return p1.seq[0] - p2.seq[0];
778
if (p1.seq[1] != p2.seq[1])
779
return p1.seq[1] - p2.seq[1];
780
return p1.seq[2] - p2.seq[2];
784
const JamoNormMap* JamoClusterSearch (JamoNormMap aKey,
785
const JamoNormMap* aClusters,
786
PRInt16 aClustersSize)
789
if (aClustersSize <= 0 || !aClusters)
791
NS_WARNING("aClustersSize <= 0 || !aClusters");
795
if (aClustersSize < 9)
798
for (i = 0; i < aClustersSize; i++)
799
if (JamoNormMapComp (aKey, aClusters[i]) == 0)
800
return aClusters + i;
804
PRUint16 l = 0, u = aClustersSize - 1;
805
PRUint16 h = (l + u) / 2;
807
if (JamoNormMapComp (aKey, aClusters[h]) < 0)
808
return JamoClusterSearch(aKey, &(aClusters[l]), h - l);
809
else if (JamoNormMapComp (aKey, aClusters[h]) > 0)
810
return JamoClusterSearch(aKey, &(aClusters[h + 1]), u - h);
812
return aClusters + h;
818
* look up cluster array for all possible matching Jamo sequences
819
* in 'aIn' and replace all matching substrings with match->liga in place.
820
* returns the difference in aLength between before and after the replacement.
821
* XXX : 1. Do we need caching here?
825
PRInt16 JamoSrchReplace (const JamoNormMap* aClusters,
826
PRUint16 aClustersSize, PRUnichar* aIn,
827
PRInt32* aLength, PRUint16 aOffset)
829
PRInt32 origLen = *aLength;
831
// non-zero third element => clusternLen = 3. otherwise, it's 2.
832
PRUint16 clusterLen = aClusters[0].seq[2] ? 3 : 2;
834
PRInt32 start = 0, end;
836
// identify the substring of aIn with values in [aOffset, aOffset + 0x100).
837
while (start < origLen && (aIn[start] & 0xff00) != aOffset)
839
for (end=start; end < origLen && (aIn[end] & 0xff00) == aOffset; ++end);
841
// now process the substring aIn[start] .. aIn[end]
842
// we don't need a separate range check here because the one in
843
// for-loop is sufficient.
844
for (PRInt32 i = start; i <= end - clusterLen; i++)
846
const JamoNormMap *match;
849
// cluster array is made up of PRUint8's to save memory
850
// and we have to subtract aOffset from the input before looking it up.
851
key.seq[0] = aIn[i] - aOffset;
852
key.seq[1] = aIn[i + 1] - aOffset;
853
key.seq[2] = clusterLen == 3 ? (aIn[i + 2] - aOffset) : 0;
855
match = JamoClusterSearch (key, aClusters, aClustersSize);
859
aIn[i] = match->liga + aOffset; // add back aOffset.
861
// move up the 'tail'
862
for (PRInt32 j = i + clusterLen ; j < *aLength; j++)
863
aIn[j - clusterLen + 1] = aIn[j];
865
end -= (clusterLen - 1);
866
*aLength -= (clusterLen - 1);
870
return *aLength - origLen;
874
nsresult ScanDecomposeSyllable(PRUnichar* aIn, PRInt32 *aLength,
875
const PRInt32 maxLength)
879
if (!aIn || *aLength < 1 || maxLength < *aLength + 2)
880
return NS_ERROR_INVALID_ARG;
883
while (i < *aLength && !IS_SYL(aIn[i]))
886
// Convert a precomposed syllable to an LV or LVT sequence.
887
if (i < *aLength && IS_SYL(aIn[i]))
889
PRUint16 j = IS_SYL_WITH_TC(aIn[i]) ? 1 : 0;
891
memmove(aIn + i + 2 + j, aIn + i + 1, *aLength - i - 1);
893
aIn[i + 2] = aIn[i] % TCOUNT + TBASE;
894
aIn[i + 1] = (aIn[i] / TCOUNT) % VCOUNT + VBASE;
895
aIn[i] = aIn[i] / (TCOUNT * VCOUNT) + LBASE;
903
* 1. Normalize (regularize) a jamo sequence to the regular
904
* syllable form defined in Unicode 3.2 section 3.11 to the extent
905
* that it's useful in rendering by render_func's().
907
* 2. Replace a compatibly decomposed Jamo sequence (unicode 2.0
908
* definition) with a 'precomposed' Jamo cluster (with codepoint
909
* of its own in U+1100 block). For instance, a seq.
910
* of U+1100, U+1100 is replaced by U+1101. It actually
911
* more than Unicode 2.0 decomposition map suggests.
912
* For a Jamo cluster made up of three basic Jamos
913
* (e.g. U+1133 : Sios, Piup, Kiyeok), not only
914
* a sequence of Sios(U+1109), Piup(U+1107) and
915
* Kiyeok(U+1100) but also two more sequences,
916
* {U+1132(Sios-Pieup), U+1100(Kiyeok) and {Sios(U+1109),
917
* U+111E(Piup-Kiyeok)} are mapped to U+1133.
919
* 3. the result is returned in a newly malloced
920
* PRUnichar*. Callers have to delete it, which
921
* is taken care of by using nsXPIDLString in caller.
925
nsresult JamoNormalize(const PRUnichar* aInSeq, PRUnichar** aOutSeq,
928
if (!aInSeq || !aOutSeq || *aLength <= 0)
929
return NS_ERROR_INVALID_ARG;
931
// 4 more slots : 2 for Lf and Vf, 2 for decomposing a modern precomposed
932
// syllable into a Jamo sequence of LVT?.
933
*aOutSeq = new PRUnichar[*aLength + 4];
935
return NS_ERROR_OUT_OF_MEMORY;
936
memcpy(*aOutSeq, aInSeq, *aLength * sizeof(PRUnichar));
938
nsresult rv = ScanDecomposeSyllable(*aOutSeq, aLength, *aLength + 4);
939
NS_ENSURE_SUCCESS(rv, rv);
941
// LV or LVT : no need to search for and replace jamo sequences
942
if ((*aLength == 2 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1])) ||
943
(*aLength == 3 && IS_LC((*aOutSeq)[0]) && IS_VO((*aOutSeq)[1]) &&
944
IS_TC((*aOutSeq)[2])))
947
// remove Lf in LfL sequence that may occur in an interim cluster during
948
// a simple Xkb-based input.
949
if ((*aOutSeq)[0] == LFILL && *aLength > 1 && IS_LC((*aOutSeq)[1]))
951
memmove (*aOutSeq, *aOutSeq + 1, (*aLength - 1) * sizeof(PRUnichar));
957
JamoSrchReplace (gJamoClustersGroup1,
958
sizeof(gJamoClustersGroup1) / sizeof(gJamoClustersGroup1[0]),
959
*aOutSeq, aLength, LBASE);
960
JamoSrchReplace (gJamoClustersGroup234,
961
sizeof(gJamoClustersGroup234) / sizeof(gJamoClustersGroup234[0]),
962
*aOutSeq, aLength, LBASE);
965
// prepend a leading V with Lf
966
if (IS_VO((*aOutSeq)[0]))
968
memmove(*aOutSeq + 1, *aOutSeq, *aLength * sizeof(PRUnichar));
969
(*aOutSeq)[0] = LFILL;
972
/* prepend a leading T with LfVf */
973
else if (IS_TC((*aOutSeq)[0]))
975
memmove (*aOutSeq + 2, *aOutSeq, *aLength * sizeof(PRUnichar));
976
(*aOutSeq)[0] = LFILL;
977
(*aOutSeq)[1] = VFILL;
984
/* JamosToExtJamos() :
985
* 1. shift jamo sequences to three disjoint code blocks in
986
* PUA (0xF000 for LC, 0xF1000 for VO, 0xF200 for TC).
987
* 2. replace a jamo sequence with a precomposed extended
988
* cluster jamo code point in PUA
989
* 3. this replacement is done 'in place'
993
void JamosToExtJamos (PRUnichar* aInSeq, PRInt32* aLength)
995
// translate jamo code points to temporary code points in PUA
996
for (PRInt32 i = 0; i < *aLength; i++)
998
if (IS_LC(aInSeq[i]))
999
aInSeq[i] += LC_OFFSET;
1000
else if (IS_VO(aInSeq[i]))
1001
aInSeq[i] += VO_OFFSET;
1002
else if (IS_TC(aInSeq[i]))
1003
aInSeq[i] += TC_OFFSET;
1006
// LV or LVT : no need to search for and replace jamo sequences
1007
if ((*aLength == 2 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1])) ||
1008
(*aLength == 3 && IS_LC_EXT(aInSeq[0]) && IS_VO_EXT(aInSeq[1]) &&
1009
IS_TC_EXT(aInSeq[2])))
1012
// replace a sequence of Jamos with the corresponding precomposed
1013
// Jamo cluster in PUA
1015
JamoSrchReplace (gExtLcClustersGroup1,
1016
sizeof (gExtLcClustersGroup1) / sizeof (gExtLcClustersGroup1[0]),
1017
aInSeq, aLength, LC_TMPPOS);
1018
JamoSrchReplace (gExtLcClustersGroup2,
1019
sizeof (gExtLcClustersGroup2) / sizeof (gExtLcClustersGroup2[0]),
1020
aInSeq, aLength, LC_TMPPOS);
1021
JamoSrchReplace (gExtVoClustersGroup1,
1022
sizeof (gExtVoClustersGroup1) / sizeof (gExtVoClustersGroup1[0]),
1023
aInSeq, aLength, VO_TMPPOS);
1024
JamoSrchReplace (gExtVoClustersGroup2,
1025
sizeof (gExtVoClustersGroup2) / sizeof (gExtVoClustersGroup2[0]),
1026
aInSeq, aLength, VO_TMPPOS);
1027
JamoSrchReplace (gExtTcClustersGroup1,
1028
sizeof (gExtTcClustersGroup1) / sizeof (gExtTcClustersGroup1[0]),
1029
aInSeq, aLength, TC_TMPPOS);
1030
JamoSrchReplace (gExtTcClustersGroup2,
1031
sizeof (gExtTcClustersGroup2) / sizeof (gExtTcClustersGroup2[0]),
1032
aInSeq, aLength, TC_TMPPOS);