40
40
#define L_grc 0x677263 // grc Ancient Greek
41
41
#define L_jbo 0x6a626f // jbo Lojban
42
42
#define L_pap 0x706170 // pap Papiamento]
43
#define L_qvi 0x717669 // qvi Kichwa
43
44
#define L_shs 0x736873 // shs Shuswap / Secwepemctsin
44
45
#define L_zhy 0x7a6879 // zhy
46
47
// start of unicode pages for character sets
47
#define OFFSET_GREEK 0x380
48
#define OFFSET_GREEK 0x380
48
49
#define OFFSET_CYRILLIC 0x420
49
50
#define OFFSET_ARMENIAN 0x530
50
#define OFFSET_ARABIC 0x600
51
#define OFFSET_ARABIC 0x600
52
#define OFFSET_THAANA 0x780 // Divehi/Maldives
51
53
#define OFFSET_DEVANAGARI 0x900
52
#define OFFSET_BENGALI 0x980
54
#define OFFSET_BENGALI 0x980
53
55
#define OFFSET_GURMUKHI 0xa00
54
#define OFFSET_TAMIL 0xb80
55
#define OFFSET_KANNADA 0xc80
56
#define OFFSET_GUJARATI 0xa80
57
#define OFFSET_ORIYA 0xb00
58
#define OFFSET_TAMIL 0xb80
59
#define OFFSET_TELUGU 0xc00
60
#define OFFSET_KANNADA 0xc80
56
61
#define OFFSET_MALAYALAM 0xd00
57
#define OFFSET_KOREAN 0x1100
62
#define OFFSET_SINHALA 0x0d80
63
#define OFFSET_GEORGIAN 0x1080
65
#define OFFSET_KOREAN 0x1100
59
67
static void Translator_Russian(Translator *tr);
106
114
{//===================================
109
static const unsigned char stress_amps2[] = {17,17, 20,20, 20,22, 22,20 };
117
static const unsigned char stress_amps2[] = {18,18, 20,20, 20,22, 22,20 };
110
118
static const short stress_lengths2[8] = {182,140, 220,220, 220,240, 260,280};
111
119
static const wchar_t empty_wstring[1] = {0};
112
120
static const wchar_t punct_in_word[2] = {'\'', 0}; // allow hyphen within words
300
308
memset(tr->letter_bits,0,sizeof(tr->letter_bits));
301
309
SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14); // vowel letters
302
310
SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d); // + vowel signs, and virama
311
SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57); // + vowel signs
304
313
SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama
314
SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57); // + vowel signs
306
316
SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range
307
317
SetLetterBits(tr,LETTERGP_C,dev_consonants2); // + additional consonants
309
319
SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14); // vowel letters
310
320
SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c); // + vowel signs
321
SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57); // + vowel signs
312
323
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
313
324
tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d; //virama
342
static const short stress_lengths_fr[8] = {190, 170, 190, 200, 0, 0, 190, 240};
343
static const unsigned char stress_amps_fr[8] = {18,16, 18,18, 18,18, 18,18 };
331
345
static const unsigned char stress_amps_sk[8] = {17,17, 20,20, 20,22, 22,21 };
332
346
static const short stress_lengths_sk[8] = {190,190, 210,210, 0,0, 210,210};
348
static const short stress_lengths_ta[8] = {200, 200, 210, 210, 0, 0, 230, 230};
349
static const unsigned char stress_amps_ta[8] = {18,18, 18,18, 20,20, 22,22 };
334
351
// convert name string into a word of up to 4 characters, for the switch()
335
352
while(*name != 0)
336
353
name2 = (name2 << 8) + *name++;
375
case L('a','m'): // Amharic, Ethiopia
377
SetupTranslator(tr,stress_lengths_fr,stress_amps_fr);
378
tr->langopts.stress_rule = STRESSPOSN_1L;
379
tr->langopts.stress_flags = 0x0024; // don't use secondary stress
380
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
381
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
358
386
case L('a','r'): // Arabic
359
387
tr->letter_bits_offset = OFFSET_ARABIC;
388
tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
360
389
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
365
394
SetCyrillicLetters(tr);
366
395
SetLetterVowel(tr,0x2a);
367
396
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word
368
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x10; // devoice at end of word
397
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x17; // devoice at end of word, and change voicing to match a following consonant (except v)
369
398
tr->langopts.param[LOPT_REDUCE] = 2;
370
399
tr->langopts.stress_rule = STRESSPOSN_2R;
371
400
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_SINGLE_AND | NUM_ROMAN | NUM_ROMAN_ORDINAL | NUM_ROMAN_CAPITALS ;
435
static const short stress_lengths_de[8] = {150,130, 200,200, 0, 0, 250,260};
464
static const short stress_lengths_de[8] = {150,130, 200,200, 0, 0, 270,270};
465
static const unsigned char stress_amps_de[] = {20,20, 20,20, 20,22, 22,20 };
466
SetupTranslator(tr, stress_lengths_de, stress_amps_de);
436
467
tr->langopts.stress_rule = STRESSPOSN_1L;
437
468
tr->langopts.word_gap = 0x8; // don't use linking phonemes
438
469
tr->langopts.vowel_pause = 0x30;
439
470
tr->langopts.param[LOPT_PREFIXES] = 1;
440
471
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x10; // devoice at end of word
441
472
tr->langopts.param[LOPT_LONG_VOWEL_THRESHOLD] = 175/2;
442
memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths));
444
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN;
474
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN;
475
// tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN;
445
476
SetLetterVowel(tr,'y');
446
477
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 2; // use de_rules for unpronouncable rules
481
case L('d','v'): // Divehi (Maldives)
483
SetupTranslator(tr,stress_lengths_ta,stress_amps_ta);
484
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
485
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
486
tr->letter_bits_offset = OFFSET_THAANA;
487
tr->langopts.stress_rule = STRESSPOSN_1L;
488
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
489
SetLetterBitsRange(tr,LETTERGP_B,0x26,0x30); // vowel signs, and virama
490
tr->langopts.break_numbers = 0x14a8; // 1000, 100,000 10,000,000
491
tr->langopts.numbers = 1;
452
497
static const short stress_lengths_en[8] = {182,140, 220,220, 0,0, 248,275};
511
static const short stress_lengths_eo[8] = {145, 145, 230, 170, 0, 0, 360, 370};
556
static const short stress_lengths_eo[8] = {150, 150, 230, 180, 0, 0, 300, 320};
512
557
static const unsigned char stress_amps_eo[] = {16,14, 20,20, 20,22, 22,21 };
513
558
static const wchar_t eo_char_apostrophe[2] = {'l',0};
517
562
tr->charset_a0 = charsets[3]; // ISO-8859-3
518
563
tr->char_plus_apostrophe = eo_char_apostrophe;
520
tr->langopts.word_gap = 1;
565
// tr->langopts.word_gap = 1;
521
566
tr->langopts.vowel_pause = 2;
522
567
tr->langopts.stress_rule = STRESSPOSN_2R;
523
568
tr->langopts.stress_flags = 0x6 | 0x10;
524
tr->langopts.unstressed_wd1 = 3;
569
// tr->langopts.unstressed_wd1 = 3;
525
570
tr->langopts.unstressed_wd2 = 2;
527
572
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ROMAN;
637
case L('e','t'): // Estonian
638
tr->charset_a0 = charsets[4]; // ISO-8859-4
639
// drop through to Finnish
592
640
case L('f','i'): // Finnish
594
642
static const unsigned char stress_amps_fi[8] = {18,16, 22,22, 20,22, 22,22 };
612
660
case L('f','r'): // french
614
static const short stress_lengths_fr[8] = {190, 170, 190, 200, 0, 0, 190, 240};
615
static const unsigned char stress_amps_fr[8] = {18,16, 20,20, 20,22, 18,18 };
617
662
SetupTranslator(tr,stress_lengths_fr,stress_amps_fr);
618
663
tr->langopts.stress_rule = STRESSPOSN_1R; // stress on final syllable
619
664
tr->langopts.stress_flags = 0x0024; // don't use secondary stress
620
665
tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables
621
666
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
622
667
tr->langopts.accents = 2; // Say "Capital" after the letter.
623
tr->langopts.vowel_pause = 0;
625
669
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_NOPAUSE | NUM_ROMAN | NUM_ROMAN_CAPITALS | NUM_ROMAN_AFTER | NUM_VIGESIMAL | NUM_DFRACTION_4;
626
670
SetLetterVowel(tr,'y');
687
731
case L('h','t'): // Haitian Creole
688
// static const short stress_lengths_fr[8] = {190, 170, 190, 200, 0, 0, 235, 240};
689
732
// memcpy(tr->stress_lengths,stress_lengths_fr,sizeof(tr->stress_lengths));
690
733
tr->langopts.stress_rule = STRESSPOSN_1R; // stress on final syllable
691
734
tr->langopts.stress_flags = 0x0024; // don't use secondary stress
860
case L('k','a'): // Georgian
862
// character codes offset by 0x1080
863
static const char ka_vowels[] = {0x50,0x54,0x58,0x5d,0x63,0x75,0x77,0};
864
static const char ka_consonants[] = {0x51,0x52,0x53,0x55,0x56,0x57,0x59,0x5a,0x5b,0x5c,0x5e,0x5f,0x60,0x61,0x62,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,0x70,0x71,0x72,0x73,0x74,0x76,0};
865
SetupTranslator(tr,stress_lengths_ta,stress_amps_ta);
866
memset(tr->letter_bits,0,sizeof(tr->letter_bits));
867
SetLetterBits(tr,LETTERGP_A,ka_vowels);
868
SetLetterBits(tr,LETTERGP_C,ka_consonants);
869
SetLetterBits(tr,LETTERGP_VOWEL2,ka_vowels);
871
tr->langopts.stress_rule = STRESSPOSN_1L;
872
tr->langopts.stress_flags = S_FINAL_NO_2;
873
tr->letter_bits_offset = OFFSET_GEORGIAN;
874
// tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
875
tr->langopts.max_initial_consonants = 7;
876
tr->langopts.numbers = NUM_VIGESIMAL | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED |NUM_OMIT_1_THOUSAND | NUM_DFRACTION_5;
817
880
case L('k','o'): // Korean, TEST
819
882
static const char ko_ivowels[] = {0x63,0x64,0x67,0x68,0x6d,0x72,0x74,0x75,0}; // y and i vowels
903
966
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x10; // devoice at end of word
904
967
SetLetterVowel(tr,'y');
906
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT;
969
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT;
907
970
tr->langopts.ordinal_indicator = "e";
971
tr->langopts.stress_flags = S_FIRST_PRIMARY;
908
972
memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths));
1098
case L('s','i'): // Sinhala
1100
SetupTranslator(tr,stress_lengths_ta,stress_amps_ta);
1101
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
1103
tr->langopts.stress_rule = STRESSPOSN_1L;
1104
tr->langopts.stress_flags = S_FINAL_DIM | S_NO_AUTO_DIM | S_FINAL_NO_2;
1105
tr->langopts.spelling_stress = 1;
1107
tr->letter_bits_offset = OFFSET_SINHALA;
1108
memset(tr->letter_bits,0,sizeof(tr->letter_bits));
1109
SetLetterBitsRange(tr,LETTERGP_A,0x05,0x16); // vowel letters
1110
SetLetterBitsRange(tr,LETTERGP_A,0x4a,0x73); // + vowel signs, and virama
1112
SetLetterBitsRange(tr,LETTERGP_B,0x4a,0x73); // vowel signs, and virama
1114
SetLetterBitsRange(tr,LETTERGP_C,0x1a,0x46); // the main consonant range
1116
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
1117
tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4a; //virama
1118
tr->langopts.numbers = NUM_OMIT_1_THOUSAND ;
1034
1122
case L('s','l'): // Slovenian
1035
1123
tr->charset_a0 = charsets[2]; // ISO-8859-2
1036
1124
tr->langopts.stress_rule = STRESSPOSN_2R; // Temporary
1074
1162
case L('s','w'): // Swahili
1163
case L('t','n'): // Setswana
1076
1165
static const short stress_lengths_sw[8] = {160, 170, 200, 200, 0, 0, 320, 340};
1077
1166
static const unsigned char stress_amps_sw[] = {16,12, 19,19, 20,22, 22,21 };
1092
1181
case L('m','l'): // Malayalam
1093
1182
case L('k','n'): // Kannada
1094
1183
case L('m','r'): // Marathi
1184
case L('t','e'): // Telugu
1096
static const short stress_lengths_ta[8] = {200, 200, 210, 210, 0, 0, 230, 230};
1097
static const unsigned char stress_amps_ta[8] = {18,18, 18,18, 20,20, 22,22 };
1099
1186
SetupTranslator(tr,stress_lengths_ta,stress_amps_ta);
1100
1187
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable
1102
1189
tr->langopts.stress_rule = STRESSPOSN_1L;
1103
1190
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
1191
tr->langopts.spelling_stress = 1;
1104
1192
tr->langopts.break_numbers = 0x14a8; // 1000, 100,000 10,000,000
1106
1194
if(name2 == L('t','a'))
1123
1212
tr->letter_bits_offset = OFFSET_KANNADA;
1124
1213
tr->langopts.numbers = 0x1;
1216
if(name2 == L('t','e'))
1218
tr->letter_bits_offset = OFFSET_TELUGU;
1219
tr->langopts.numbers = 0x1;
1126
1221
tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words
1127
1222
SetIndicLetters(tr); // call this after setting OFFSET_
1223
SetLetterBitsRange(tr,LETTERGP_B,0x4e,0x4e); // chillu-virama (unofficial)
1157
1253
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
1158
1254
tr->langopts.stress_flags = 0x20; //no automatic secondary stress
1160
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_DFRACTION_2;
1256
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_DFRACTION_2;
1161
1257
tr->langopts.max_initial_consonants = 2;
1196
1292
tr->letter_groups[0] = tr->letter_groups[7] = vowels_vi;
1197
1293
tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches()
1198
1294
tr->langopts.unstressed_wd1 = 2;
1199
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_DFRACTION_4;
1295
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND_DIGIT | NUM_DFRACTION_4 | NUM_ZERO_HUNDRED;
1301
tr->langopts.stress_rule = STRESSPOSN_1L;
1302
tr->langopts.numbers = NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_SINGLE_STRESS;
1204
1305
case L('z','h'):