1
/* This file contains code common to the translator and back-translator.
2
* It is included immediately after the headr files. */
4
/* liblouis Braille Translation and Back-Translation
7
Based on the Linux screenreader BRLTTY, copyright (C) 1999-2006 by
10
Copyright (C) 2004, 2005, 2006
11
ViewPlus Technologies, Inc. www.viewplus.com
13
JJB Software, Inc. www.jjb-software.com
16
This file is free software; you can redistribute it and/or modify it
17
under the terms of the Lesser or Library GNU General Public License
19
Free Software Foundation; either version 3, or (at your option) any
22
This file is distributed in the hope that it will be useful, but
23
WITHOUT ANY WARRANTY; without even the implied warranty of
24
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25
Library GNU General Public License for more details.
27
You should have received a copy of the Library GNU General Public
28
License along with this program; see the file COPYING. If not, write to
29
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
30
Boston, MA 02110-1301, USA.
32
Maintained by John J. Boyer john.boyer@jjb-software.com
35
/*additional bits in typebuf*/
36
#define capsemph 0x8000
38
#define STARTWORD 0x4000
39
#define FIRSTWORD 0x2000
40
#define SYLLABLEMARKS 0x00c0
41
#define INTERNALMARKS 0xff00
43
static const TranslationTableHeader *table;
44
static int src, srcmax;
45
static int dest, destmax;
47
static int currentPass = 1;
48
static const widechar *currentInput;
49
static widechar *passbuf1 = NULL;
50
static widechar *passbuf2 = NULL;
51
static widechar *currentOutput;
52
static int *srcMapping = NULL;
53
static unsigned short *typebuf = NULL;
54
static unsigned char *srcSpacing = NULL;
55
static unsigned char *destSpacing = NULL;
56
static int haveEmphasis = 0;
57
static TranslationTableOpcode transOpcode;
58
static TranslationTableOpcode prevTransOpcode;
59
static const TranslationTableRule *transRule;
60
static int transCharslen;
61
static int checkAttr (const widechar c,
62
const TranslationTableCharacterAttributes a, int nm);
63
static int putCharacter (widechar c);
64
static int makeCorrections (void);
65
static int passDoTest (void);
66
static int passDoAction (void);
67
static int passVariables[NUMVAR];
68
static int passCharDots;
70
static widechar const *passInstructions;
71
static int passIC; /*Instruction counter */
72
static int startMatch;
74
static int startReplace;
75
static int endReplace;
77
static int srcIncremented;
78
static int *outputPositions;
79
static int *inputPositions;
80
static int cursorPosition;
81
static int cursorStatus;
83
static TranslationTableCharacter *
84
findCharOrDots (widechar c, int m)
86
/*Look up character or dot pattern in the appropriate
88
static TranslationTableCharacter noChar =
89
{ 0, 0, 0, CTC_Space, 32, 32, 32 };
90
static TranslationTableCharacter noDots =
91
{ 0, 0, 0, CTC_Space, B16, B16, B16 };
92
TranslationTableCharacter *notFound;
93
TranslationTableCharacter *character;
94
TranslationTableOffset bucket;
95
unsigned long int makeHash = (unsigned long int) c % HASHNUM;
98
bucket = table->characters[makeHash];
103
bucket = table->dots[makeHash];
108
character = (TranslationTableCharacter *) & table->ruleArea[bucket];
109
if (character->realchar == c)
111
bucket = character->next;
113
notFound->realchar = notFound->uppercase = notFound->lowercase = c;
118
checkAttr (const widechar c, const TranslationTableCharacterAttributes
121
static widechar prevc = 0;
122
static TranslationTableCharacterAttributes preva = 0;
125
preva = (findCharOrDots (c, m))->attributes;
128
return ((preva & a) ? 1 : 0);
132
findAttribOrSwapRules (void)
134
int save_transCharslen = transCharslen;
135
const TranslationTableRule *save_transRule = transRule;
136
TranslationTableOpcode save_transOpcode = transOpcode;
137
TranslationTableOffset ruleOffset;
138
ruleOffset = table->attribOrSwapRules[currentPass];
142
transRule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
143
transOpcode = transRule->opcode;
146
ruleOffset = transRule->charsnext;
148
transCharslen = save_transCharslen;
149
transRule = save_transRule;
150
transOpcode = save_transOpcode;
155
compareChars (const widechar * address1, const widechar * address2, int
161
for (k = 0; k < count; k++)
162
if ((findCharOrDots (address1[k], m))->lowercase !=
163
(findCharOrDots (address2[k], m))->lowercase)
169
makeCorrections (void)
172
if (!table->corrections)
177
for (k = 0; k < NUMVAR; k++)
178
passVariables[k] = 0;
181
int length = srcmax - src;
182
const TranslationTableCharacter *character = findCharOrDots
183
(currentInput[src], 0);
184
const TranslationTableCharacter *character2;
186
if (!findAttribOrSwapRules ())
189
TranslationTableOffset ruleOffset = 0;
190
unsigned long int makeHash = 0;
196
makeHash = (unsigned long int) character->lowercase << 8;
197
character2 = findCharOrDots (currentInput[src + 1], 0);
198
makeHash += (unsigned long int) character2->lowercase;
200
ruleOffset = table->forRules[makeHash];
206
ruleOffset = character->otherRules;
208
case 2: /*No rule found */
209
transOpcode = CTO_Always;
216
(TranslationTableRule *) & table->ruleArea[ruleOffset];
217
transOpcode = transRule->opcode;
218
transCharslen = transRule->charslen;
219
if (tryThis == 1 || (transCharslen <= length &&
220
compareChars (&transRule->
225
if (srcIncremented && transOpcode == CTO_Correct &&
232
ruleOffset = transRule->charsnext;
243
srcMapping[dest] = srcMapping[src];
244
currentOutput[dest++] = currentInput[src++];
247
if (!passDoAction ())
249
if (endReplace == src)
263
matchcurrentInput (void)
267
for (k = passIC + 2; k < passIC + 2 + passInstructions[passIC + 1]; k++)
268
if (passInstructions[k] != currentInput[kk++])
274
swapTest (int swapIC, int *callSrc)
278
int curSrc = *callSrc;
279
TranslationTableOffset swapRuleOffset;
280
TranslationTableRule *swapRule;
282
(passInstructions[swapIC + 1] << 16) | passInstructions[swapIC + 2];
283
swapRule = (TranslationTableRule *) & table->ruleArea[swapRuleOffset];
284
for (curLen = 0; curLen < passInstructions[swapIC + 3]; curLen++)
286
if (swapRule->opcode == CTO_SwapDd)
288
for (curTest = 1; curTest < swapRule->charslen; curTest += 2)
290
if (currentInput[curSrc] == swapRule->charsdots[curTest])
296
for (curTest = 0; curTest < swapRule->charslen; curTest++)
298
if (currentInput[curSrc] == swapRule->charsdots[curTest])
302
if (curTest >= swapRule->charslen)
306
if (passInstructions[swapIC + 3] == passInstructions[swapIC + 4])
311
while (curLen < passInstructions[swapIC + 4])
313
if (swapRule->opcode == CTO_SwapDd)
315
for (curTest = 1; curTest < swapRule->charslen; curTest += 2)
317
if (currentInput[curSrc] == swapRule->charsdots[curTest])
323
for (curTest = 0; curTest < swapRule->charslen; curTest++)
325
if (currentInput[curSrc] == swapRule->charsdots[curTest])
329
if (curTest >= swapRule->charslen)
342
swapReplace (int start, int end)
344
TranslationTableOffset swapRuleOffset;
345
TranslationTableRule *swapRule;
346
widechar *replacements;
352
(passInstructions[passIC + 1] << 16) | passInstructions[passIC + 2];
353
swapRule = (TranslationTableRule *) & table->ruleArea[swapRuleOffset];
354
replacements = &swapRule->charsdots[swapRule->charslen];
355
for (curSrc = start; curSrc < end; curSrc++)
357
for (curTest = 0; curTest < swapRule->charslen; curTest++)
358
if (currentInput[curSrc] == swapRule->charsdots[curTest])
360
if (curTest == swapRule->charslen)
363
for (curRep = 0; curRep < curTest; curRep++)
364
if (swapRule->opcode == CTO_SwapCc)
367
curPos += replacements[curPos];
368
if (swapRule->opcode == CTO_SwapCc)
370
if ((dest + 1) >= srcmax)
372
srcMapping[dest] = srcMapping[curSrc];
373
currentOutput[dest++] = replacements[curPos];
378
if ((dest + replacements[curPos] - 1) >= destmax)
380
for (k = dest + replacements[curPos] - 1; k >= dest; --k)
381
srcMapping[k] = srcMapping[curSrc];
382
memcpy (¤tOutput[dest], &replacements[curPos + 1],
383
(replacements[curPos]) * CHARSIZE);
384
dest += replacements[curPos] - 1;
390
static TranslationTableRule *groupingRule;
391
static widechar groupingOp;
394
replaceGrouping (void)
396
widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
397
widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
398
widechar *curin = (widechar *) currentInput;
401
TranslationTableOffset replaceOffset = passInstructions[passIC + 1] <<
402
16 | (passInstructions[passIC + 2] & 0xff);
403
TranslationTableRule *replaceRule = (TranslationTableRule *) &
404
table->ruleArea[replaceOffset];
405
widechar replaceStart = replaceRule->charsdots[2 * passCharDots];
406
widechar replaceEnd = replaceRule->charsdots[2 * passCharDots + 1];
407
if (groupingOp == pass_groupstart)
409
curin[startReplace] = replaceStart;
410
for (curPos = startReplace + 1; curPos < srcmax; curPos++)
412
if (currentInput[curPos] == startCharDots)
414
if (currentInput[curPos] == endCharDots)
419
if (curPos == srcmax)
421
curin[curPos] = replaceEnd;
425
if (transOpcode == CTO_Context)
427
startCharDots = groupingRule->charsdots[2];
428
endCharDots = groupingRule->charsdots[3];
429
replaceStart = replaceRule->charsdots[2];
430
replaceEnd = replaceRule->charsdots[3];
432
currentOutput[dest] = replaceEnd;
433
for (curPos = dest - 1; curPos >= 0; curPos--)
435
if (currentOutput[curPos] == endCharDots)
437
if (currentOutput[curPos] == startCharDots)
444
currentOutput[curPos] = replaceStart;
451
removeGrouping (void)
453
widechar startCharDots = groupingRule->charsdots[2 * passCharDots];
454
widechar endCharDots = groupingRule->charsdots[2 * passCharDots + 1];
455
widechar *curin = (widechar *) currentInput;
458
if (groupingOp == pass_groupstart)
460
for (curPos = startReplace + 1; curPos < srcmax; curPos++)
462
if (currentInput[curPos] == startCharDots)
464
if (currentInput[curPos] == endCharDots)
469
if (curPos == srcmax)
472
for (; curPos < srcmax; curPos++)
473
curin[curPos - 1] = curin[curPos];
478
for (curPos = dest - 1; curPos >= 0; curPos--)
480
if (currentOutput[curPos] == endCharDots)
482
if (currentOutput[curPos] == startCharDots)
490
for (; curPos < dest; curPos++)
491
currentOutput[curPos - 1] = currentOutput[curPos];
498
static int searchSrc;
506
TranslationTableOffset ruleOffset;
507
TranslationTableRule *rule;
508
TranslationTableCharacterAttributes attributes;
509
int stepper = passSrc;
510
while (stepper < srcmax)
512
searchIC = passIC + 1;
514
while (searchIC < transRule->dotslen)
517
if (searchSrc > srcmax)
519
switch (passInstructions[searchIC])
522
searchSrc -= passInstructions[searchIC + 1];
534
for (k = searchIC + 2;
535
k < searchIC + 2 + passInstructions[searchIC + 1]; k++)
536
if (passInstructions[k] != currentInput[kk++])
541
searchSrc += passInstructions[searchIC + 1];
542
searchIC += passInstructions[searchIC + 1] + 2;
544
case pass_startReplace:
547
case pass_endReplace:
550
case pass_attributes:
552
(passInstructions[searchIC + 1] << 16) |
553
passInstructions[searchIC + 2];
554
for (k = 0; k < passInstructions[searchIC + 3]; k++)
557
(((findCharOrDots (currentInput[searchSrc++],
559
attributes & attributes)) ? 1 : 0);
564
for (k = passInstructions[searchIC + 3]; k <
565
passInstructions[searchIC + 4]; k++)
568
(findCharOrDots (currentInput[searchSrc],
570
attributes & attributes))
576
case pass_groupstart:
578
ruleOffset = (passInstructions[searchIC + 1] << 16) |
579
passInstructions[searchIC + 2];
580
rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
581
if (passInstructions[searchIC] == pass_groupstart)
583
(currentInput[searchSrc] == rule->charsdots[2 *
588
(currentInput[searchSrc] == rule->charsdots[2 *
591
if (groupingRule != NULL && groupingOp == pass_groupstart
592
&& rule == groupingRule)
594
if (currentInput[searchSrc] == rule->charsdots[2 *
597
else if (currentInput[searchSrc] ==
598
rule->charsdots[2 * passCharDots + 1])
605
itsTrue = swapTest (searchIC, &searchSrc);
609
if (passVariables[passInstructions[searchIC + 1]] !=
610
passInstructions[searchIC + 2])
615
if (passVariables[passInstructions[searchIC + 1]] >=
616
passInstructions[searchIC + 2])
621
if (passVariables[passInstructions[searchIC + 1]] <=
622
passInstructions[searchIC + 2])
627
if (passVariables[passInstructions[searchIC + 1]] >
628
passInstructions[searchIC + 2])
633
if (passVariables[passInstructions[searchIC + 1]] <
634
passInstructions[searchIC + 2])
641
if ((groupingRule && level == 1) || !groupingRule)
644
searchIC = transRule->dotslen;
649
if ((!not && !itsTrue) || (not && itsTrue))
663
TranslationTableOffset ruleOffset;
664
TranslationTableRule *rule;
665
TranslationTableCharacterAttributes attributes;
668
passInstructions = &transRule->charsdots[transCharslen];
670
startMatch = endMatch = passSrc;
671
startReplace = endReplace = -1;
672
if (transOpcode == CTO_Context || transOpcode == CTO_Correct)
676
while (passIC < transRule->dotslen)
679
if (passSrc > srcmax)
681
switch (passInstructions[passIC])
689
if (passSrc != (srcmax - 1))
694
passSrc -= passInstructions[passIC + 1];
705
itsTrue = matchcurrentInput ();
706
passSrc += passInstructions[passIC + 1];
707
passIC += passInstructions[passIC + 1] + 2;
709
case pass_startReplace:
710
startReplace = passSrc;
713
case pass_endReplace:
714
endReplace = passSrc;
717
case pass_attributes:
719
(passInstructions[passIC + 1] << 16) | passInstructions[passIC +
721
for (k = 0; k < passInstructions[passIC + 3]; k++)
724
(((findCharOrDots (currentInput[passSrc++],
726
attributes & attributes)) ? 1 : 0);
731
for (k = passInstructions[passIC + 3]; k <
732
passInstructions[passIC + 4]; k++)
735
(findCharOrDots (currentInput[passSrc],
736
passCharDots)->attributes & attributes))
742
case pass_groupstart:
744
ruleOffset = (passInstructions[passIC + 1] << 16) |
745
passInstructions[passIC + 2];
746
rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
747
if (passIC == 0 || (passIC > 0 && passInstructions[passIC - 1] ==
751
groupingOp = passInstructions[passIC];
753
if (passInstructions[passIC] == pass_groupstart)
754
itsTrue = (currentInput[passSrc] == rule->charsdots[2 *
758
itsTrue = (currentInput[passSrc] == rule->charsdots[2 *
765
itsTrue = swapTest (passIC, &passSrc);
769
if (passVariables[passInstructions[passIC + 1]] !=
770
passInstructions[passIC + 2])
775
if (passVariables[passInstructions[passIC + 1]] >=
776
passInstructions[passIC + 2])
781
if (passVariables[passInstructions[passIC + 1]] <=
782
passInstructions[passIC + 2])
787
if (passVariables[passInstructions[passIC + 1]] >
788
passInstructions[passIC + 2])
793
if (passVariables[passInstructions[passIC + 1]] <
794
passInstructions[passIC + 2])
799
itsTrue = doPassSearch ();
800
if ((!not && !itsTrue) || (not && itsTrue))
807
if (startReplace == -1)
809
startReplace = startMatch;
810
endReplace = endMatch;
817
if ((!not && !itsTrue) || (not && itsTrue))
828
TranslationTableOffset ruleOffset;
829
TranslationTableRule *rule;
830
if ((dest + startReplace - startMatch) > destmax)
832
if (transOpcode != CTO_Context)
833
memmove (&srcMapping[dest], &srcMapping[startMatch],
834
(startReplace - startMatch) * sizeof (int));
835
for (k = startMatch; k < startReplace; k++)
836
if (transOpcode == CTO_Context)
838
if (!putCharacter (currentInput[k]))
842
currentOutput[dest++] = currentInput[k];
843
while (passIC < transRule->dotslen)
844
switch (passInstructions[passIC])
848
if ((dest + passInstructions[passIC + 1]) > destmax)
850
for (k = 0; k < passInstructions[passIC + 1]; ++k)
851
srcMapping[dest + k] = startMatch;
852
memcpy (¤tOutput[dest], &passInstructions[passIC + 2],
853
passInstructions[passIC + 1] * CHARSIZE);
854
dest += passInstructions[passIC + 1];
855
passIC += passInstructions[passIC + 1] + 2;
858
passVariables[passInstructions[passIC + 1]] =
859
passInstructions[passIC + 2];
863
passVariables[passInstructions[passIC + 1]]--;
864
if (passVariables[passInstructions[passIC + 1]] < 0)
865
passVariables[passInstructions[passIC + 1]] = 0;
869
passVariables[passInstructions[passIC + 1]]++;
872
case pass_groupstart:
873
ruleOffset = (passInstructions[passIC + 1] << 16) |
874
passInstructions[passIC + 2];
875
rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
876
srcMapping[dest] = startMatch;
877
currentOutput[dest++] = rule->charsdots[2 * passCharDots];
881
ruleOffset = (passInstructions[passIC + 1] << 16) |
882
passInstructions[passIC + 2];
883
rule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
884
srcMapping[dest] = startMatch;
885
currentOutput[dest++] = rule->charsdots[2 * passCharDots + 1];
889
if (!swapReplace (startReplace, endReplace))
893
case pass_groupreplace:
894
if (!groupingRule || !replaceGrouping ())
904
dest -= startReplace - startMatch;
905
k = endReplace - startReplace;
906
if ((dest + k) > destmax)
908
memmove (&srcMapping[dest], &srcMapping[startReplace],
910
memcpy (¤tOutput[dest], ¤tInput[startReplace],
914
endReplace = passSrc;
927
for (k = 0; k < transCharslen; k++)
928
if (transRule->charsdots[k] != currentInput[kk++])
934
passSelectRule (void)
936
int length = srcmax - src;
937
const TranslationTableCharacter *dots;
938
const TranslationTableCharacter *dots2;
940
TranslationTableOffset ruleOffset = 0;
941
unsigned long int makeHash = 0;
942
if (findAttribOrSwapRules ())
944
dots = findCharOrDots (currentInput[src], 1);
945
for (tryThis = 0; tryThis < 3; tryThis++)
952
/*Hash function optimized for forward translation */
953
makeHash = (unsigned long int) dots->lowercase << 8;
954
dots2 = findCharOrDots (currentInput[src + 1], 1);
955
makeHash += (unsigned long int) dots2->lowercase;
957
ruleOffset = table->forRules[makeHash];
963
ruleOffset = dots->otherRules;
965
case 2: /*No rule found */
966
transOpcode = CTO_Always;
972
transRule = (TranslationTableRule *) & table->ruleArea[ruleOffset];
973
transOpcode = transRule->opcode;
974
transCharslen = transRule->charslen;
975
if (tryThis == 1 || ((transCharslen <= length) && checkDots ()))
977
{ /*check validity of this Translation */
979
if (currentPass != 2 || !srcIncremented)
985
if (currentPass != 3 || !srcIncremented)
991
if (currentPass != 4 || !srcIncremented)
999
ruleOffset = transRule->charsnext;
1006
translatePass (void)
1009
prevTransOpcode = CTO_None;
1012
for (k = 0; k < NUMVAR; k++)
1013
passVariables[k] = 0;
1014
while (src < srcmax)
1015
{ /*the main multipass translation loop */
1018
switch (transOpcode)
1024
if (!passDoAction ())
1026
if (endReplace == src)
1031
if ((dest + 1) > destmax)
1033
srcMapping[dest] = srcMapping[src];
1034
currentOutput[dest++] = currentInput[src++];
1040
srcMapping[dest] = srcMapping[src];
1041
failure:if (src < srcmax)
1043
while (checkAttr (currentInput[src], CTC_Space, 1))
1044
if (++src == srcmax)