65
67
/* These are the breakpoints for different numbers of bytes in a UTF-8
70
const int _pcre_utf8_table1[] =
70
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
71
|| (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
73
/* These tables are also required by pcretest in 16 bit mode. */
75
const int PRIV(utf8_table1)[] =
71
76
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
73
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
78
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
75
80
/* These are the indicator bits and the mask for the data bits to set in the
76
81
first byte of a character, indexed by the number of additional bytes. */
78
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
79
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
83
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
84
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
81
86
/* Table of the number of extra bytes, indexed by the first byte masked with
82
87
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
84
const uschar _pcre_utf8_table4[] = {
89
const pcre_uint8 PRIV(utf8_table4)[] = {
85
90
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
86
91
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
87
92
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
88
93
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
95
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
90
99
/* Table to translate from particular type value to the general value. */
92
const int _pcre_ucp_gentype[] = {
101
const int PRIV(ucp_gentype)[] = {
93
102
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
94
103
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
95
104
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
393
const ucp_type_table _pcre_utt[] = {
423
const ucp_type_table PRIV(utt)[] = {
394
424
{ 0, PT_ANY, 0 },
395
425
{ 4, PT_SC, ucp_Arabic },
396
426
{ 11, PT_SC, ucp_Armenian },
397
427
{ 20, PT_SC, ucp_Avestan },
398
428
{ 28, PT_SC, ucp_Balinese },
399
429
{ 37, PT_SC, ucp_Bamum },
400
{ 43, PT_SC, ucp_Bengali },
401
{ 51, PT_SC, ucp_Bopomofo },
402
{ 60, PT_SC, ucp_Braille },
403
{ 68, PT_SC, ucp_Buginese },
404
{ 77, PT_SC, ucp_Buhid },
405
{ 83, PT_GC, ucp_C },
406
{ 85, PT_SC, ucp_Canadian_Aboriginal },
407
{ 105, PT_SC, ucp_Carian },
408
{ 112, PT_PC, ucp_Cc },
409
{ 115, PT_PC, ucp_Cf },
410
{ 118, PT_SC, ucp_Cham },
411
{ 123, PT_SC, ucp_Cherokee },
412
{ 132, PT_PC, ucp_Cn },
413
{ 135, PT_PC, ucp_Co },
414
{ 138, PT_SC, ucp_Common },
415
{ 145, PT_SC, ucp_Coptic },
416
{ 152, PT_PC, ucp_Cs },
417
{ 155, PT_SC, ucp_Cuneiform },
418
{ 165, PT_SC, ucp_Cypriot },
419
{ 173, PT_SC, ucp_Cyrillic },
420
{ 182, PT_SC, ucp_Deseret },
421
{ 190, PT_SC, ucp_Devanagari },
422
{ 201, PT_SC, ucp_Egyptian_Hieroglyphs },
423
{ 222, PT_SC, ucp_Ethiopic },
424
{ 231, PT_SC, ucp_Georgian },
425
{ 240, PT_SC, ucp_Glagolitic },
426
{ 251, PT_SC, ucp_Gothic },
427
{ 258, PT_SC, ucp_Greek },
428
{ 264, PT_SC, ucp_Gujarati },
429
{ 273, PT_SC, ucp_Gurmukhi },
430
{ 282, PT_SC, ucp_Han },
431
{ 286, PT_SC, ucp_Hangul },
432
{ 293, PT_SC, ucp_Hanunoo },
433
{ 301, PT_SC, ucp_Hebrew },
434
{ 308, PT_SC, ucp_Hiragana },
435
{ 317, PT_SC, ucp_Imperial_Aramaic },
436
{ 334, PT_SC, ucp_Inherited },
437
{ 344, PT_SC, ucp_Inscriptional_Pahlavi },
438
{ 366, PT_SC, ucp_Inscriptional_Parthian },
439
{ 389, PT_SC, ucp_Javanese },
440
{ 398, PT_SC, ucp_Kaithi },
441
{ 405, PT_SC, ucp_Kannada },
442
{ 413, PT_SC, ucp_Katakana },
443
{ 422, PT_SC, ucp_Kayah_Li },
444
{ 431, PT_SC, ucp_Kharoshthi },
445
{ 442, PT_SC, ucp_Khmer },
446
{ 448, PT_GC, ucp_L },
448
{ 453, PT_SC, ucp_Lao },
449
{ 457, PT_SC, ucp_Latin },
450
{ 463, PT_SC, ucp_Lepcha },
451
{ 470, PT_SC, ucp_Limbu },
452
{ 476, PT_SC, ucp_Linear_B },
453
{ 485, PT_SC, ucp_Lisu },
454
{ 490, PT_PC, ucp_Ll },
455
{ 493, PT_PC, ucp_Lm },
456
{ 496, PT_PC, ucp_Lo },
457
{ 499, PT_PC, ucp_Lt },
458
{ 502, PT_PC, ucp_Lu },
459
{ 505, PT_SC, ucp_Lycian },
460
{ 512, PT_SC, ucp_Lydian },
461
{ 519, PT_GC, ucp_M },
462
{ 521, PT_SC, ucp_Malayalam },
463
{ 531, PT_PC, ucp_Mc },
464
{ 534, PT_PC, ucp_Me },
465
{ 537, PT_SC, ucp_Meetei_Mayek },
466
{ 550, PT_PC, ucp_Mn },
467
{ 553, PT_SC, ucp_Mongolian },
468
{ 563, PT_SC, ucp_Myanmar },
469
{ 571, PT_GC, ucp_N },
470
{ 573, PT_PC, ucp_Nd },
471
{ 576, PT_SC, ucp_New_Tai_Lue },
472
{ 588, PT_SC, ucp_Nko },
473
{ 592, PT_PC, ucp_Nl },
474
{ 595, PT_PC, ucp_No },
475
{ 598, PT_SC, ucp_Ogham },
476
{ 604, PT_SC, ucp_Ol_Chiki },
477
{ 613, PT_SC, ucp_Old_Italic },
478
{ 624, PT_SC, ucp_Old_Persian },
479
{ 636, PT_SC, ucp_Old_South_Arabian },
480
{ 654, PT_SC, ucp_Old_Turkic },
481
{ 665, PT_SC, ucp_Oriya },
482
{ 671, PT_SC, ucp_Osmanya },
483
{ 679, PT_GC, ucp_P },
484
{ 681, PT_PC, ucp_Pc },
485
{ 684, PT_PC, ucp_Pd },
486
{ 687, PT_PC, ucp_Pe },
487
{ 690, PT_PC, ucp_Pf },
488
{ 693, PT_SC, ucp_Phags_Pa },
489
{ 702, PT_SC, ucp_Phoenician },
490
{ 713, PT_PC, ucp_Pi },
491
{ 716, PT_PC, ucp_Po },
492
{ 719, PT_PC, ucp_Ps },
493
{ 722, PT_SC, ucp_Rejang },
494
{ 729, PT_SC, ucp_Runic },
495
{ 735, PT_GC, ucp_S },
496
{ 737, PT_SC, ucp_Samaritan },
497
{ 747, PT_SC, ucp_Saurashtra },
498
{ 758, PT_PC, ucp_Sc },
499
{ 761, PT_SC, ucp_Shavian },
500
{ 769, PT_SC, ucp_Sinhala },
501
{ 777, PT_PC, ucp_Sk },
502
{ 780, PT_PC, ucp_Sm },
503
{ 783, PT_PC, ucp_So },
504
{ 786, PT_SC, ucp_Sundanese },
505
{ 796, PT_SC, ucp_Syloti_Nagri },
506
{ 809, PT_SC, ucp_Syriac },
507
{ 816, PT_SC, ucp_Tagalog },
508
{ 824, PT_SC, ucp_Tagbanwa },
509
{ 833, PT_SC, ucp_Tai_Le },
510
{ 840, PT_SC, ucp_Tai_Tham },
511
{ 849, PT_SC, ucp_Tai_Viet },
512
{ 858, PT_SC, ucp_Tamil },
513
{ 864, PT_SC, ucp_Telugu },
514
{ 871, PT_SC, ucp_Thaana },
515
{ 878, PT_SC, ucp_Thai },
516
{ 883, PT_SC, ucp_Tibetan },
517
{ 891, PT_SC, ucp_Tifinagh },
518
{ 900, PT_SC, ucp_Ugaritic },
519
{ 909, PT_SC, ucp_Vai },
520
{ 913, PT_ALNUM, 0 },
521
{ 917, PT_PXSPACE, 0 },
522
{ 921, PT_SPACE, 0 },
524
{ 929, PT_SC, ucp_Yi },
525
{ 932, PT_GC, ucp_Z },
526
{ 934, PT_PC, ucp_Zl },
527
{ 937, PT_PC, ucp_Zp },
528
{ 940, PT_PC, ucp_Zs }
430
{ 43, PT_SC, ucp_Batak },
431
{ 49, PT_SC, ucp_Bengali },
432
{ 57, PT_SC, ucp_Bopomofo },
433
{ 66, PT_SC, ucp_Brahmi },
434
{ 73, PT_SC, ucp_Braille },
435
{ 81, PT_SC, ucp_Buginese },
436
{ 90, PT_SC, ucp_Buhid },
437
{ 96, PT_GC, ucp_C },
438
{ 98, PT_SC, ucp_Canadian_Aboriginal },
439
{ 118, PT_SC, ucp_Carian },
440
{ 125, PT_PC, ucp_Cc },
441
{ 128, PT_PC, ucp_Cf },
442
{ 131, PT_SC, ucp_Cham },
443
{ 136, PT_SC, ucp_Cherokee },
444
{ 145, PT_PC, ucp_Cn },
445
{ 148, PT_PC, ucp_Co },
446
{ 151, PT_SC, ucp_Common },
447
{ 158, PT_SC, ucp_Coptic },
448
{ 165, PT_PC, ucp_Cs },
449
{ 168, PT_SC, ucp_Cuneiform },
450
{ 178, PT_SC, ucp_Cypriot },
451
{ 186, PT_SC, ucp_Cyrillic },
452
{ 195, PT_SC, ucp_Deseret },
453
{ 203, PT_SC, ucp_Devanagari },
454
{ 214, PT_SC, ucp_Egyptian_Hieroglyphs },
455
{ 235, PT_SC, ucp_Ethiopic },
456
{ 244, PT_SC, ucp_Georgian },
457
{ 253, PT_SC, ucp_Glagolitic },
458
{ 264, PT_SC, ucp_Gothic },
459
{ 271, PT_SC, ucp_Greek },
460
{ 277, PT_SC, ucp_Gujarati },
461
{ 286, PT_SC, ucp_Gurmukhi },
462
{ 295, PT_SC, ucp_Han },
463
{ 299, PT_SC, ucp_Hangul },
464
{ 306, PT_SC, ucp_Hanunoo },
465
{ 314, PT_SC, ucp_Hebrew },
466
{ 321, PT_SC, ucp_Hiragana },
467
{ 330, PT_SC, ucp_Imperial_Aramaic },
468
{ 347, PT_SC, ucp_Inherited },
469
{ 357, PT_SC, ucp_Inscriptional_Pahlavi },
470
{ 379, PT_SC, ucp_Inscriptional_Parthian },
471
{ 402, PT_SC, ucp_Javanese },
472
{ 411, PT_SC, ucp_Kaithi },
473
{ 418, PT_SC, ucp_Kannada },
474
{ 426, PT_SC, ucp_Katakana },
475
{ 435, PT_SC, ucp_Kayah_Li },
476
{ 444, PT_SC, ucp_Kharoshthi },
477
{ 455, PT_SC, ucp_Khmer },
478
{ 461, PT_GC, ucp_L },
480
{ 466, PT_SC, ucp_Lao },
481
{ 470, PT_SC, ucp_Latin },
482
{ 476, PT_SC, ucp_Lepcha },
483
{ 483, PT_SC, ucp_Limbu },
484
{ 489, PT_SC, ucp_Linear_B },
485
{ 498, PT_SC, ucp_Lisu },
486
{ 503, PT_PC, ucp_Ll },
487
{ 506, PT_PC, ucp_Lm },
488
{ 509, PT_PC, ucp_Lo },
489
{ 512, PT_PC, ucp_Lt },
490
{ 515, PT_PC, ucp_Lu },
491
{ 518, PT_SC, ucp_Lycian },
492
{ 525, PT_SC, ucp_Lydian },
493
{ 532, PT_GC, ucp_M },
494
{ 534, PT_SC, ucp_Malayalam },
495
{ 544, PT_SC, ucp_Mandaic },
496
{ 552, PT_PC, ucp_Mc },
497
{ 555, PT_PC, ucp_Me },
498
{ 558, PT_SC, ucp_Meetei_Mayek },
499
{ 571, PT_PC, ucp_Mn },
500
{ 574, PT_SC, ucp_Mongolian },
501
{ 584, PT_SC, ucp_Myanmar },
502
{ 592, PT_GC, ucp_N },
503
{ 594, PT_PC, ucp_Nd },
504
{ 597, PT_SC, ucp_New_Tai_Lue },
505
{ 609, PT_SC, ucp_Nko },
506
{ 613, PT_PC, ucp_Nl },
507
{ 616, PT_PC, ucp_No },
508
{ 619, PT_SC, ucp_Ogham },
509
{ 625, PT_SC, ucp_Ol_Chiki },
510
{ 634, PT_SC, ucp_Old_Italic },
511
{ 645, PT_SC, ucp_Old_Persian },
512
{ 657, PT_SC, ucp_Old_South_Arabian },
513
{ 675, PT_SC, ucp_Old_Turkic },
514
{ 686, PT_SC, ucp_Oriya },
515
{ 692, PT_SC, ucp_Osmanya },
516
{ 700, PT_GC, ucp_P },
517
{ 702, PT_PC, ucp_Pc },
518
{ 705, PT_PC, ucp_Pd },
519
{ 708, PT_PC, ucp_Pe },
520
{ 711, PT_PC, ucp_Pf },
521
{ 714, PT_SC, ucp_Phags_Pa },
522
{ 723, PT_SC, ucp_Phoenician },
523
{ 734, PT_PC, ucp_Pi },
524
{ 737, PT_PC, ucp_Po },
525
{ 740, PT_PC, ucp_Ps },
526
{ 743, PT_SC, ucp_Rejang },
527
{ 750, PT_SC, ucp_Runic },
528
{ 756, PT_GC, ucp_S },
529
{ 758, PT_SC, ucp_Samaritan },
530
{ 768, PT_SC, ucp_Saurashtra },
531
{ 779, PT_PC, ucp_Sc },
532
{ 782, PT_SC, ucp_Shavian },
533
{ 790, PT_SC, ucp_Sinhala },
534
{ 798, PT_PC, ucp_Sk },
535
{ 801, PT_PC, ucp_Sm },
536
{ 804, PT_PC, ucp_So },
537
{ 807, PT_SC, ucp_Sundanese },
538
{ 817, PT_SC, ucp_Syloti_Nagri },
539
{ 830, PT_SC, ucp_Syriac },
540
{ 837, PT_SC, ucp_Tagalog },
541
{ 845, PT_SC, ucp_Tagbanwa },
542
{ 854, PT_SC, ucp_Tai_Le },
543
{ 861, PT_SC, ucp_Tai_Tham },
544
{ 870, PT_SC, ucp_Tai_Viet },
545
{ 879, PT_SC, ucp_Tamil },
546
{ 885, PT_SC, ucp_Telugu },
547
{ 892, PT_SC, ucp_Thaana },
548
{ 899, PT_SC, ucp_Thai },
549
{ 904, PT_SC, ucp_Tibetan },
550
{ 912, PT_SC, ucp_Tifinagh },
551
{ 921, PT_SC, ucp_Ugaritic },
552
{ 930, PT_SC, ucp_Vai },
553
{ 934, PT_ALNUM, 0 },
554
{ 938, PT_PXSPACE, 0 },
555
{ 942, PT_SPACE, 0 },
557
{ 950, PT_SC, ucp_Yi },
558
{ 953, PT_GC, ucp_Z },
559
{ 955, PT_PC, ucp_Zl },
560
{ 958, PT_PC, ucp_Zp },
561
{ 961, PT_PC, ucp_Zs }
531
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
564
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
533
#endif /* SUPPORT_UTF8 */
566
#endif /* SUPPORT_UTF */
535
568
/* End of pcre_tables.c */