208
208
/* Transform the code so that it fits in 16 bits. */
211
case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
211
case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
239
/* Binary search in unicode_code_to_name. */
241
unsigned int i2 = SIZEOF (unicode_code_to_name);
244
unsigned int i = (i1 + i2) >> 1;
245
if (unicode_code_to_name[i].code == c)
247
words = &unicode_names[unicode_code_to_name[i].name];
250
else if (unicode_code_to_name[i].code < c)
257
/* Note here: i1 < i < i2. */
260
else if (unicode_code_to_name[i].code > c)
267
/* Note here: i1 <= i < i2. */
242
/* Binary search in unicode_code_to_name. */
244
unsigned int i2 = SIZEOF (unicode_code_to_name);
247
unsigned int i = (i1 + i2) >> 1;
248
if (unicode_code_to_name[i].code == c)
250
words = &unicode_names[unicode_code_to_name[i].name];
253
else if (unicode_code_to_name[i].code < c)
260
/* Note here: i1 < i < i2. */
263
else if (unicode_code_to_name[i].code > c)
270
/* Note here: i1 <= i < i2. */
272
275
if (words != NULL)
274
/* Found it in unicode_code_to_name. Now concatenate the words. */
275
/* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes. */
279
unsigned int wordlen;
280
const char *word = unicode_name_word (*words>>1, &wordlen);
283
while (--wordlen > 0);
284
if ((*words & 1) == 0)
277
/* Found it in unicode_code_to_name. Now concatenate the words. */
278
/* buf needs to have at least UNICODE_CHARNAME_MAX_LENGTH bytes. */
282
unsigned int wordlen;
283
const char *word = unicode_name_word (*words>>1, &wordlen);
286
while (--wordlen > 0);
287
if ((*words & 1) == 0)
305
308
char buf[UNICODE_CHARNAME_MAX_LENGTH];
310
if (!(c >= ' ' && c <= '~'))
312
*ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
313
if (!(c >= ' ' && c <= '~'))
315
*ptr++ = (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
319
/* Convert the constituents to uint16_t words. */
320
uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
321
uint16_t *wordptr = words;
323
const char *p1 = buf;
329
while (p2 < ptr && *p2 != ' ')
331
word = unicode_name_word_lookup (p1, p2 - p1);
334
if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
341
/* Special case for Hangul syllables. Keeps the tables small. */
342
if (wordptr == &words[2]
343
&& words[0] == UNICODE_CHARNAME_WORD_HANGUL
344
&& words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
346
/* Split the last word [p1..ptr) into three parts:
357
&& (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
358
|| *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
359
|| *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
360
|| *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
365
&& (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
366
|| *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
371
&& (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
372
|| *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
373
|| *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
374
|| *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
375
|| *p4 == 'S' || *p4 == 'T'))
379
unsigned int n1 = p2 - p1;
380
unsigned int n2 = p3 - p2;
381
unsigned int n3 = p4 - p3;
383
if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
387
for (index1 = 0; index1 < 19; index1++)
388
if (memcmp(jamo_initial_short_name[index1], p1, n1) == 0
389
&& jamo_initial_short_name[index1][n1] == '\0')
393
for (index2 = 0; index2 < 21; index2++)
394
if (memcmp(jamo_medial_short_name[index2], p2, n2) == 0
395
&& jamo_medial_short_name[index2][n2] == '\0')
399
for (index3 = 0; index3 < 28; index3++)
400
if (memcmp(jamo_final_short_name[index3], p3, n3) == 0
401
&& jamo_final_short_name[index3][n3] == '\0')
403
return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
412
/* Special case for CJK compatibility ideographs. Keeps the
414
if (wordptr == &words[2]
415
&& words[0] == UNICODE_CHARNAME_WORD_CJK
416
&& words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
419
&& memcmp (p1, "IDEOGRAPH-", 10) == 0)
421
const char *p2 = p1 + 10;
429
if (*p2 >= '0' && *p2 <= '9')
431
else if (*p2 >= 'A' && *p2 <= 'F')
432
c += (*p2 - 'A' + 10);
438
if ((c >= 0xF900 && c <= 0xFA2D)
439
|| (c >= 0xFA30 && c <= 0xFA6A)
440
|| (c >= 0xFA70 && c <= 0xFAD9)
441
|| (c >= 0x2F800 && c <= 0x2FA1D))
455
/* Multiply by 2, to simplify later comparisons. */
456
unsigned int words_length = wordptr - words;
458
int i = words_length - 1;
459
words[i] = 2 * words[i];
461
words[i] = 2 * words[i] + 1;
463
/* Binary search in unicode_name_to_code. */
466
unsigned int i2 = SIZEOF (unicode_name_to_code);
469
unsigned int i = (i1 + i2) >> 1;
470
const uint16_t *w = words;
471
const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
472
unsigned int n = words_length;
479
/* Note here: i1 < i < i2. */
487
/* Note here: i1 <= i < i2. */
494
unsigned int c = unicode_name_to_code[i].code;
496
/* Undo the transformation to 16-bit space. */
497
static const unsigned int offset[12] =
499
0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
500
0x05000, 0x09000, 0x09000, 0x0A000, 0x14000,
503
return c + offset[c >> 12];
322
/* Convert the constituents to uint16_t words. */
323
uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
324
uint16_t *wordptr = words;
326
const char *p1 = buf;
332
while (p2 < ptr && *p2 != ' ')
334
word = unicode_name_word_lookup (p1, p2 - p1);
337
if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
344
/* Special case for Hangul syllables. Keeps the tables small. */
345
if (wordptr == &words[2]
346
&& words[0] == UNICODE_CHARNAME_WORD_HANGUL
347
&& words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
349
/* Split the last word [p1..ptr) into three parts:
360
&& (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
361
|| *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
362
|| *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
363
|| *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
368
&& (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
369
|| *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
374
&& (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
375
|| *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
376
|| *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
377
|| *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
378
|| *p4 == 'S' || *p4 == 'T'))
382
unsigned int n1 = p2 - p1;
383
unsigned int n2 = p3 - p2;
384
unsigned int n3 = p4 - p3;
386
if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
390
for (index1 = 0; index1 < 19; index1++)
391
if (memcmp (jamo_initial_short_name[index1], p1, n1) == 0
392
&& jamo_initial_short_name[index1][n1] == '\0')
396
for (index2 = 0; index2 < 21; index2++)
397
if (memcmp (jamo_medial_short_name[index2], p2, n2) == 0
398
&& jamo_medial_short_name[index2][n2] == '\0')
402
for (index3 = 0; index3 < 28; index3++)
403
if (memcmp (jamo_final_short_name[index3], p3, n3) == 0
404
&& jamo_final_short_name[index3][n3] == '\0')
406
return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
415
/* Special case for CJK compatibility ideographs. Keeps the
417
if (wordptr == &words[2]
418
&& words[0] == UNICODE_CHARNAME_WORD_CJK
419
&& words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
422
&& memcmp (p1, "IDEOGRAPH-", 10) == 0)
424
const char *p2 = p1 + 10;
432
if (*p2 >= '0' && *p2 <= '9')
434
else if (*p2 >= 'A' && *p2 <= 'F')
435
c += (*p2 - 'A' + 10);
441
if ((c >= 0xF900 && c <= 0xFA2D)
442
|| (c >= 0xFA30 && c <= 0xFA6A)
443
|| (c >= 0xFA70 && c <= 0xFAD9)
444
|| (c >= 0x2F800 && c <= 0x2FA1D))
458
/* Multiply by 2, to simplify later comparisons. */
459
unsigned int words_length = wordptr - words;
461
int i = words_length - 1;
462
words[i] = 2 * words[i];
464
words[i] = 2 * words[i] + 1;
466
/* Binary search in unicode_name_to_code. */
469
unsigned int i2 = SIZEOF (unicode_name_to_code);
472
unsigned int i = (i1 + i2) >> 1;
473
const uint16_t *w = words;
474
const uint16_t *p = &unicode_names[unicode_name_to_code[i].name];
475
unsigned int n = words_length;
482
/* Note here: i1 < i < i2. */
490
/* Note here: i1 <= i < i2. */
497
unsigned int c = unicode_name_to_code[i].code;
499
/* Undo the transformation to 16-bit space. */
500
static const unsigned int offset[13] =
502
0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
503
0x05000, 0x09000, 0x09000, 0x0A000, 0x14000,
504
0x15000, 0x24000, 0xD4000
506
return c + offset[c >> 12];
512
515
return UNINAME_INVALID;