~macslow/nux/nux.fix-839476

« back to all changes in this revision

Viewing changes to Nux/TextView/Unicode.cpp

  • Committer: Neil Jagdish Patel
  • Date: 2010-09-01 22:11:16 UTC
  • Revision ID: neil.patel@canonical.com-20100901221116-4hb351fcg6s5nka0
Initial Nux integration

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * Copyright 2010 Inalogic Inc.
 
3
 *
 
4
 * This program is free software: you can redistribute it and/or modify it 
 
5
 * under the terms of the GNU Lesser General Public License version 3, as
 
6
 * published by the  Free Software Foundation.
 
7
 *
 
8
 * This program is distributed in the hope that it will be useful, but 
 
9
 * WITHOUT ANY WARRANTY; without even the implied warranties of 
 
10
 * MERCHANTABILITY, SATISFACTORY QUALITY or FITNESS FOR A PARTICULAR 
 
11
 * PURPOSE.  See the applicable version of the GNU Lesser General Public 
 
12
 * License for more details.
 
13
 * 
 
14
 * You should have received a copy of both the GNU Lesser General Public 
 
15
 * License version 3 along with this program.  If not, see 
 
16
 * <http://www.gnu.org/licenses/>
 
17
 *
 
18
 * Authored by: Jay Taoko <jay.taoko_AT_gmail_DOT_com>
 
19
 *
 
20
 */
 
21
 
 
22
 
 
23
#include "Nux.h"
 
24
#include "Unicode.h"
 
25
 
 
26
NAMESPACE_BEGIN_GUI
 
27
 
 
28
//
 
29
//      utf8_to_utf32
 
30
//
 
31
//      Converts a single codepoint in the specified UTF-8 stream of text
 
32
//      into a UTF-32 value
 
33
//
 
34
//      Illegal sequences are converted to the unicode replacement character
 
35
//      
 
36
//      utf8str         - [in]   buffer containing UTF-8 text
 
37
//      utf8len         - [in]   number of code-units (bytes) available in buffer
 
38
//      pch32           - [out]  single UTF-32 value
 
39
//
 
40
//      Returns number of bytes processed from utf8str
 
41
//
 
42
size_t utf8_to_utf32(t_UTF8 *utf8str, size_t utf8len, t_UTF32 *pch32)
 
43
{
 
44
        t_UTF8   ch       = *utf8str++;
 
45
        t_UTF32  val32    = 0;  
 
46
        size_t trailing = 0;
 
47
        size_t len      = 1;
 
48
        size_t i;
 
49
        
 
50
        static t_UTF32 nonshortest[] = 
 
51
        { 
 
52
                0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff 
 
53
        };
 
54
 
 
55
        // validate parameters
 
56
        if(utf8str == 0 || utf8len <= 0 || pch32 == 0)
 
57
                return 0;
 
58
 
 
59
        // look for plain ASCII first as this is most likely
 
60
        if(ch < 0x80)
 
61
        {
 
62
                *pch32 = (t_UTF32)ch;
 
63
                return 1;
 
64
        }
 
65
        // LEAD-byte of 2-byte seq: 110xxxxx 10xxxxxx
 
66
        else if((ch & 0xE0) == 0xC0)                    
 
67
        {
 
68
                trailing = 1;
 
69
                val32    = ch & 0x1F;
 
70
        }
 
71
        // LEAD-byte of 3-byte seq: 1110xxxx 10xxxxxx 10xxxxxx
 
72
        else if((ch & 0xF0) == 0xE0)    
 
73
        {
 
74
                trailing = 2;
 
75
                val32    = ch & 0x0F;
 
76
        }
 
77
        // LEAD-byte of 4-byte seq: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 
78
        else if((ch & 0xF8) == 0xF0)    
 
79
        {
 
80
                trailing = 3;
 
81
                val32    = ch & 0x07;
 
82
        }
 
83
        // ILLEGAL 5-byte seq: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 
84
        else if((ch & 0xFC) == 0xF8)    
 
85
        {
 
86
                // range-checking the t_UTF32 result will catch this
 
87
                trailing = 4;
 
88
                val32    = ch & 0x03;
 
89
        }
 
90
        // ILLEGAL 6-byte seq: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 
91
        else if((ch & 0xFE) == 0xFC)    
 
92
        {
 
93
                // range-checking the t_UTF32 result will catch this
 
94
                trailing = 5;
 
95
                val32    = ch & 0x01;
 
96
        }
 
97
        // ILLEGAL continuation (trailing) byte by itself
 
98
        else if((ch & 0xC0) == 0x80)
 
99
        {
 
100
                *pch32 = UNI_REPLACEMENT_CHAR;
 
101
                return 1;
 
102
        }
 
103
        // any other ILLEGAL form.
 
104
        else                                                    
 
105
        {
 
106
                *pch32 = UNI_REPLACEMENT_CHAR;
 
107
                return 1;
 
108
        }
 
109
 
 
110
        // process trailing bytes
 
111
        for(i = 0; i < trailing && len < utf8len; i++)
 
112
        {
 
113
                ch = *utf8str++;
 
114
 
 
115
                // Valid trail-byte: 10xxxxxx
 
116
                if((ch & 0xC0) == 0x80)
 
117
                {
 
118
                        val32 = (val32 << 6) + (ch & 0x7f);
 
119
                        len++;
 
120
                }
 
121
                // Anything else is an error
 
122
                else
 
123
                {
 
124
                        *pch32 = UNI_REPLACEMENT_CHAR;
 
125
                        return len;
 
126
                }
 
127
        }
 
128
 
 
129
        // did we decode a full utf-8 sequence?
 
130
        if(val32 < nonshortest[trailing] || i != trailing)
 
131
                *pch32 = UNI_REPLACEMENT_CHAR;
 
132
        else
 
133
                *pch32 = val32;
 
134
 
 
135
        return len;
 
136
}
 
137
 
 
138
//
 
139
//      utf32_to_utf8
 
140
//
 
141
//      Converts the specified UTF-32 value to UTF-8
 
142
//
 
143
//      ch32            - [in]          single utf-32 value
 
144
//      utf8str         - [out]         buffer to receive UTF-8 text
 
145
//      utf8len         - [in]          size of utf8 buffer in bytes
 
146
//      
 
147
//      Returns number of bytes stored in utf8str
 
148
//
 
149
size_t utf32_to_utf8(t_UTF8 *utf8str, size_t utf8len, t_UTF32 ch32)
 
150
{
 
151
        size_t len = 0;
 
152
 
 
153
        // validate parameters
 
154
        if(utf8str == 0 || utf8len == 0)
 
155
                return 0;
 
156
 
 
157
        // ASCII is the easiest
 
158
        if(ch32 < 0x80)
 
159
        {
 
160
                *utf8str = (t_UTF8)ch32;
 
161
                return 1;
 
162
        }
 
163
 
 
164
        // make sure we have a legal utf32 char
 
165
        if(ch32 > UNI_MAX_LEGAL_UTF32)
 
166
                ch32 = UNI_REPLACEMENT_CHAR;
 
167
 
 
168
        // cannot encode the surrogate range
 
169
        if(ch32 >= UNI_SUR_HIGH_START && ch32 <= UNI_SUR_LOW_END)
 
170
                ch32 = UNI_REPLACEMENT_CHAR;
 
171
 
 
172
        // 2-byte sequence
 
173
        if(ch32 < 0x800 && utf8len >= 2)
 
174
        {
 
175
                *utf8str++ = (t_UTF8)((ch32 >> 6)                       | 0xC0);
 
176
                *utf8str++ = (t_UTF8)((ch32 & 0x3f)             | 0x80);
 
177
                len = 2;
 
178
        }
 
179
        // 3-byte sequence
 
180
        else if(ch32 < 0x10000 && utf8len >= 3)
 
181
        {
 
182
                *utf8str++ = (t_UTF8)((ch32 >> 12)        | 0xE0);
 
183
                *utf8str++ = (t_UTF8)((ch32 >> 6) & 0x3f  | 0x80);
 
184
                *utf8str++ = (t_UTF8)((ch32 & 0x3f)       | 0x80);
 
185
                len = 3;
 
186
        }
 
187
        // 4-byte sequence
 
188
        else if(ch32 <= UNI_MAX_LEGAL_UTF32 && utf8len >= 4)
 
189
        {
 
190
                *utf8str++ = (t_UTF8)((ch32 >> 18)        | 0xF0);
 
191
                *utf8str++ = (t_UTF8)((ch32 >> 12) & 0x3f | 0x80);
 
192
                *utf8str++ = (t_UTF8)((ch32 >> 6) & 0x3f  | 0x80);
 
193
                *utf8str++ = (t_UTF8)((ch32 & 0x3f)       | 0x80);
 
194
                len = 4;
 
195
        }
 
196
 
 
197
        // 5/6 byte sequences never occur because we limit using UNI_MAX_LEGAL_UTF32
 
198
 
 
199
        return len;
 
200
}
 
201
 
 
202
//
 
203
//      utf8_to_utf16
 
204
//
 
205
//      Convert the specified UTF-8 stream of text to UTF-16
 
206
//
 
207
//      1. The maximum number possible of whole UTF-16 characters are stored in wstr
 
208
//      2. Illegal sequences are converted to the unicode replacement character
 
209
//      3. Returns the number of bytes processeed from utf8str
 
210
//
 
211
//      utf8str         - [in]          buffer containing utf-8 text
 
212
//      utf8len         - [in]          number of code-units (bytes) in buffer
 
213
//      utf16str        - [out]         receives resulting utf-16 text
 
214
//      utf16len        - [in/out]      on input, specifies the size (in UTF16s) of utf16str
 
215
//                                                      on output, holds actual number of UTF16s stored in utf16str
 
216
//
 
217
//      Returns the number of bytes processed from utf8str
 
218
//
 
219
size_t utf8_to_utf16(t_UTF8 *utf8str, size_t utf8len, t_UTF16 *utf16str, size_t *utf16len)
 
220
{
 
221
        t_UTF16 *utf16start = utf16str;
 
222
        t_UTF8  *utf8start  = utf8str;
 
223
 
 
224
        size_t len;
 
225
        size_t tmp16len;
 
226
        t_UTF32  ch32;
 
227
 
 
228
        while(utf8len > 0 && *utf16len > 0)
 
229
        {
 
230
                // convert to utf-32
 
231
                len                  = utf8_to_utf32(utf8str, utf8len, &ch32);
 
232
                utf8str     += len;
 
233
                utf8len     -= len;
 
234
 
 
235
                // convert to utf-16
 
236
                tmp16len     = *utf16len;
 
237
                len          = utf32_to_utf16(&ch32, 1, utf16str, &tmp16len);
 
238
                utf16str    += len;
 
239
                (*utf16len) -= len;
 
240
        }
 
241
 
 
242
        *utf16len = utf16str - utf16start;
 
243
        return utf8str - utf8start;
 
244
}
 
245
 
 
246
//
 
247
//      utf16_to_utf8
 
248
//
 
249
//      Convert the specified UTF-16 stream of text to UTF-8
 
250
//
 
251
//      1. As many whole codepoints as possible are stored in utf8str 
 
252
//      2. Illegal sequences are converted to the unicode replacement character
 
253
//
 
254
//      utf16str                - [in]          buffer containing utf-16 text
 
255
//      utf16len                - [in]          number of code-units (UTF16s) in buffer
 
256
//      utf8str                 - [out]         receives resulting utf-8 text
 
257
//      utf8len                 - [in/out]      on input, specifies the size (in bytes) of utf8str
 
258
//                                                              on output, holds actual number of bytes stored in utf8str
 
259
//
 
260
//      Returns the number of characters (UTF16s) processed from utf16str
 
261
//
 
262
size_t utf16_to_utf8(t_UTF16 *utf16str, size_t utf16len, t_UTF8 *utf8str, size_t *utf8len)
 
263
{
 
264
        t_UTF16 * utf16start = utf16str;
 
265
        t_UTF8  * utf8start  = utf8str;
 
266
        size_t  len;
 
267
        t_UTF32 ch32;
 
268
        size_t  ch32len;
 
269
 
 
270
        while(utf16len > 0 && *utf8len > 0)
 
271
        {
 
272
                // convert to utf-32
 
273
                ch32len     = 1;
 
274
                len                 = utf16_to_utf32(utf16str, utf16len, &ch32, &ch32len);
 
275
                utf16str   += len;
 
276
                utf16len   -= len;
 
277
 
 
278
                // convert to utf-8
 
279
                len                 = utf32_to_utf8(utf8str, *utf8len, ch32);
 
280
                utf8str    += len;
 
281
                (*utf8len) -= len;
 
282
        }
 
283
 
 
284
        *utf8len = utf8str - utf8start;
 
285
        return utf16str - utf16start;
 
286
}
 
287
 
 
288
//
 
289
//      ascii_to_utf16
 
290
//
 
291
//      Converts plain ASCII string to UTF-16
 
292
//
 
293
//      asciistr        - [in]     buffer containing ASCII characters
 
294
//      asciilen        - [in]     number of characters in buffer
 
295
//      utf16str        - [out]    receives the resulting UTF-16 text
 
296
//      utf16len        - [in/out] on input, specifies length of utf16 buffer,
 
297
//                                                 on output, holds number of chars stored in utf16str
 
298
//
 
299
//      Returns number of characters processed from asciistr
 
300
//
 
301
size_t ascii_to_utf16(t_UTF8 *asciistr, size_t asciilen, t_UTF16 *utf16str, size_t *utf16len)
 
302
{
 
303
    size_t len = Min(*utf16len, asciilen);
 
304
                
 
305
        MultiByteToWideChar(CP_ACP, 0, (CCHAR*)asciistr, len, (WCHAR *)utf16str, len);
 
306
        *utf16len = len;
 
307
        return len;
 
308
}
 
309
 
 
310
//
 
311
//      utf16_to_ascii
 
312
//
 
313
//      Converts UTF-16 to plain ASCII (lossy)
 
314
//
 
315
//      utf16str        - [in]     buffer containing t_UTF16 characters
 
316
//      utf16len        - [in]     number of WCHARs in buffer
 
317
//      asciistr        - [out]    receives the resulting UTF-16 text
 
318
//      asciilen        - [in/out] on input, specifies length of ascii buffer,
 
319
//                                                 on output, holds number of chars stored in asciistr
 
320
//
 
321
//      Returns number of characters processed from utf16str
 
322
//
 
323
size_t utf16_to_ascii(t_UTF16 *utf16str, size_t utf16len, t_UTF8 *asciistr, size_t *asciilen)
 
324
{
 
325
        size_t len = Min(utf16len, *asciilen);
 
326
        
 
327
        WideCharToMultiByte(CP_ACP, 0, INL_REINTERPRET_CAST(LPCWSTR, utf16str), len, (LPSTR)asciistr, *asciilen, 0, 0);
 
328
        *asciilen = len;
 
329
        return len;
 
330
}
 
331
 
 
332
//
 
333
//      copy_utf8
 
334
//
 
335
//      Copies UTF-8 string from src to dest
 
336
//
 
337
//      src                     - [in]          buffer containing utf-8 text
 
338
//      srclen          - [in]          number of code-units in src
 
339
//      dest            - [out]         receives resulting string
 
340
//      destlen         - [in/out]      on input, specifies length of dest buffer
 
341
//                                                      on output, holds number of UTF8s stored in dest
 
342
//
 
343
//      returns number of CHARs processed from src
 
344
//
 
345
size_t copy_utf8(t_UTF8 *src, size_t srclen, t_UTF8 *dest, size_t *destlen)
 
346
{
 
347
    size_t len = Min(*destlen, srclen);
 
348
    memcpy(dest, src, len * sizeof(t_UTF8));
 
349
 
 
350
    *destlen = len;
 
351
    return len;
 
352
}
 
353
 
 
354
//
 
355
//      copy_utf16
 
356
//
 
357
//      Copies UTF-16 string from src to dest
 
358
//
 
359
//      src                     - [in]          buffer containing utf-16 text
 
360
//      srclen          - [in]          number of code-units in src
 
361
//      dest            - [out]         receives resulting string
 
362
//      destlen         - [in/out]      on input, specifies length of dest buffer
 
363
//                                                      on output, holds number of UTF16s stored in dest
 
364
//
 
365
//      returns number of WCHARs processed from src
 
366
//
 
367
size_t copy_utf16(t_UTF16 *src, size_t srclen, t_UTF16 *dest, size_t *destlen)
 
368
{
 
369
        size_t len = Min(*destlen, srclen);
 
370
        memcpy(dest, src, len * sizeof(t_UTF16));
 
371
 
 
372
        *destlen = len;
 
373
        return len;
 
374
}
 
375
 
 
376
//
 
377
//      swap_utf16
 
378
//
 
379
//      Copies UTF-16 string from src to dest, performing endianess swap
 
380
//      for each code-unit
 
381
//
 
382
//      src                     - [in]          buffer containing utf-16 text
 
383
//      srclen          - [in]          number of code-units in src
 
384
//      dest            - [out]         receives resulting word-swapped string
 
385
//      destlen         - [in/out]      on input, specifies length of dest buffer
 
386
//                                                      on output, holds number of UTF16s stored in dest
 
387
//
 
388
//      Returns number of WCHARs processed from src
 
389
//
 
390
size_t swap_utf16(t_UTF16 *src, size_t srclen, t_UTF16 *dest, size_t *destlen)
 
391
{
 
392
        size_t len = Min(*destlen, srclen);
 
393
        size_t i;
 
394
        
 
395
        for(i = 0; i < len; i++)
 
396
                dest[i] = SWAPWORD(src[i]);
 
397
 
 
398
        *destlen = len;
 
399
        return len;
 
400
}
 
401
 
 
402
//
 
403
//      utf32_to_utf16
 
404
//
 
405
//      Converts the specified UTF-32 stream of text to UTF-16
 
406
//
 
407
//      utf32str        - [in]          buffer containing utf-32 text
 
408
//      utf32len        - [in]          number of characters (UTF32s) in utf32str
 
409
//      utf16str        - [out]         receives resulting utf-16 text
 
410
//      utf16len        - [in/out]      on input, specifies the size (in UTF16s) of utf16str
 
411
//                                                      on output, holds actual number of t_UTF16 values stored in utf16str
 
412
//
 
413
//      returns number of UTF32s processed from utf32str
 
414
//
 
415
size_t utf32_to_utf16(t_UTF32 *utf32str, size_t utf32len, t_UTF16 *utf16str, size_t *utf16len)
 
416
{
 
417
        t_UTF16 *utf16start = utf16str;
 
418
        t_UTF32 *utf32start = utf32str;
 
419
 
 
420
        while(utf32len > 0 && *utf16len > 0)
 
421
        {
 
422
                t_UTF32 ch32 = *utf32str++;
 
423
                utf32len--;
 
424
 
 
425
                // target is a character <= 0xffff
 
426
                if(ch32 < 0xfffe)
 
427
                {
 
428
                        // make sure we don't represent anything in t_UTF16 surrogate range
 
429
                        // (this helps protect against non-shortest forms)
 
430
                        if(ch32 >= UNI_SUR_HIGH_START && ch32 <= UNI_SUR_LOW_END)
 
431
                        {
 
432
                                *utf16str++ = UNI_REPLACEMENT_CHAR;
 
433
                                (*utf16len)--;
 
434
                        }
 
435
                        else
 
436
                        {
 
437
                                *utf16str++ = (WORD)ch32;
 
438
                                (*utf16len)--;
 
439
                        }
 
440
                }
 
441
                // FFFE and FFFF are illegal mid-stream
 
442
                else if(ch32 == 0xfffe || ch32 == 0xffff)
 
443
                {
 
444
                        *utf16str++ = UNI_REPLACEMENT_CHAR;
 
445
                        (*utf16len)--;
 
446
                }
 
447
                // target is illegal Unicode value
 
448
                else if(ch32 > UNI_MAX_UTF16)
 
449
                {
 
450
                        *utf16str++ = UNI_REPLACEMENT_CHAR;
 
451
                        (*utf16len)--;
 
452
                }
 
453
                // target is in range 0xffff - 0x10ffff
 
454
                else if(*utf16len >= 2)
 
455
                { 
 
456
                        ch32 -= 0x0010000;
 
457
 
 
458
                        *utf16str++ = (WORD)((ch32 >> 10)   + UNI_SUR_HIGH_START);
 
459
                        *utf16str++ = (WORD)((ch32 & 0x3ff) + UNI_SUR_LOW_START);
 
460
 
 
461
                        (*utf16len)-=2;
 
462
                }
 
463
                else
 
464
                {
 
465
                        // no room to store result
 
466
                        break;
 
467
                }
 
468
        }
 
469
 
 
470
        *utf16len = utf16str - utf16start;
 
471
        return utf32str - utf32start;
 
472
}
 
473
 
 
474
//
 
475
//      utf16_to_utf32
 
476
//
 
477
//      Converts the specified UTF-16 stream of text to UTF-32
 
478
//
 
479
//      utf16str        - [in]          buffer containing utf-16 text
 
480
//      utf16len        - [in]          number of code-units (UTF16s) in utf16str
 
481
//      utf32str        - [out]         receives resulting utf-32 text
 
482
//      utf32len        - [in/out]      on input, specifies the size (in UTF32s) of utf32str
 
483
//                                                      on output, holds actual number of t_UTF32 values stored in utf32str
 
484
//
 
485
//      returns number of UTF16s processed from utf16str
 
486
//
 
487
size_t utf16_to_utf32(t_UTF16 *utf16str, size_t utf16len, t_UTF32 *utf32str, size_t *utf32len)
 
488
{
 
489
        t_UTF16 *utf16start = utf16str;
 
490
        t_UTF32 *utf32start = utf32str;
 
491
 
 
492
        while(utf16len > 0 && *utf32len > 0)
 
493
        {
 
494
                t_UTF32 ch = *utf16str;
 
495
 
 
496
                // first of a surrogate pair?
 
497
                if(ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && utf16len >= 2)
 
498
                {
 
499
                        // get the second half of the pair
 
500
                        t_UTF32 ch2 = *(utf16str + 1);
 
501
                        
 
502
                        // valid trailing surrogate unit?
 
503
                        if(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
 
504
                        {
 
505
                                ch = ((ch  - UNI_SUR_HIGH_START) << 10) + 
 
506
                                         ((ch2 - UNI_SUR_LOW_START) + 0x00010000);
 
507
 
 
508
                                utf16str++;
 
509
                                utf16len--;
 
510
                        }
 
511
                        // illegal character
 
512
                        else
 
513
                        {
 
514
                                ch = UNI_REPLACEMENT_CHAR;
 
515
                        }
 
516
                }
 
517
 
 
518
                *utf32str++ = ch;
 
519
                (*utf32len)--;          
 
520
                
 
521
                utf16str++;
 
522
                utf16len--;
 
523
        }
 
524
 
 
525
        *utf32len = utf32str - utf32start;
 
526
        return utf16str - utf16start;
 
527
}
 
528
 
 
529
//
 
530
//      utf16be_to_utf32
 
531
//
 
532
//      Converts the specified big-endian UTF-16 stream of text to UTF-32
 
533
//
 
534
//      utf16str        - [in]          buffer containing utf-16 big-endian text
 
535
//      utf16len        - [in]          number of code-units (UTF16s) in utf16str
 
536
//      utf32str        - [out]         receives resulting utf-32 text
 
537
//      utf32len        - [in/out]      on input, specifies the size (in UTF32s) of utf32str
 
538
//                                                      on output, holds actual number of t_UTF32 values stored in utf32str
 
539
//
 
540
//      returns number of UTF16s processed from utf16str
 
541
//
 
542
size_t utf16be_to_utf32(t_UTF16 *utf16str, size_t utf16len, t_UTF32 *utf32str, size_t *utf32len)
 
543
{
 
544
        t_UTF16 *utf16start = utf16str;
 
545
        t_UTF32 *utf32start = utf32str;
 
546
 
 
547
        while(utf16len > 0 && *utf32len > 0)
 
548
        {
 
549
                t_UTF32 ch = SWAPWORD(*utf16str);
 
550
 
 
551
                // first of a surrogate pair?
 
552
                if(ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && utf16len >= 2)
 
553
                {
 
554
                        t_UTF32 ch2 = SWAPWORD(*(utf16str + 1));
 
555
                        
 
556
                        // valid trailing surrogate unit?
 
557
                        if(ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
 
558
                        {
 
559
                                ch = ((ch  - UNI_SUR_HIGH_START) << 10) + 
 
560
                                         ((ch2 - UNI_SUR_LOW_START) + 0x00010000);
 
561
 
 
562
                                utf16str++;
 
563
                                utf16len--;
 
564
                        }
 
565
                        // illegal character
 
566
                        else
 
567
                        {
 
568
                                ch = UNI_REPLACEMENT_CHAR;
 
569
                        }
 
570
                }
 
571
 
 
572
                *utf32str++ = ch;
 
573
                (*utf32len)--;
 
574
                
 
575
                utf16str++;
 
576
                utf16len--;
 
577
        }
 
578
 
 
579
        *utf32len = utf32str - utf32start;
 
580
        return utf16str - utf16start;
 
581
}
 
582
 
 
583
NAMESPACE_END_GUI