1
/* ------------------------------------------------------------------------- */
2
/* "text" : Text translation, the abbreviations optimiser, the dictionary */
4
/* Part of Inform 6.30 */
5
/* copyright (c) Graham Nelson 1993 - 2004 */
7
/* ------------------------------------------------------------------------- */
11
uchar *low_strings, *low_strings_top; /* Start and next free byte in the low
14
int32 static_strings_extent; /* Number of bytes of static strings
16
memory_block static_strings_area; /* Used if (!temporary_files_switch) to
17
hold the static strings area so far */
19
static uchar *strings_holding_area; /* Area holding translated strings
20
until they are moved into either
21
a temporary file, or the
22
static_strings_area below */
24
char *all_text, *all_text_top; /* Start and next byte free in (large)
25
text buffer holding the entire text
26
of the game, when it is being
28
int put_strings_in_low_memory, /* When TRUE, put static strings in
29
the low strings pool at 0x100 rather
30
than in the static strings area */
31
is_abbreviation, /* When TRUE, the string being trans
32
is itself an abbreviation string
33
so can't make use of abbreviations */
34
abbrevs_lookup_table_made, /* The abbreviations lookup table is
35
constructed when the first non-
36
abbreviation string is translated:
37
this flag is TRUE after that */
38
abbrevs_lookup[256]; /* Once this has been constructed,
39
abbrevs_lookup[n] = the smallest
40
number of any abbreviation beginning
41
with ASCII character n, or -1
42
if none of the abbreviations do */
43
int no_abbreviations; /* No of abbreviations defined so far */
44
uchar *abbreviations_at; /* Memory to hold the text of any
45
abbreviation strings declared */
46
/* ------------------------------------------------------------------------- */
47
/* Glulx string compression storage */
48
/* ------------------------------------------------------------------------- */
50
int no_strings; /* No of strings in static strings
52
int no_dynamic_strings; /* No. of @.. string escapes used
53
(actually, the highest value used
56
static int MAX_CHARACTER_SET; /* Number of possible entities */
57
huffentity_t *huff_entities; /* The list of entities (characters,
58
abbreviations, @.. escapes, and
60
static huffentity_t **hufflist; /* Copy of the list, for sorting */
62
int no_huff_entities; /* The number of entities in the list */
63
int huff_abbrev_start; /* Position in the list where string
64
abbreviations begin. */
65
int huff_dynam_start; /* Position in the list where @..
67
int huff_entity_root; /* The position in the list of the root
68
entry (when considering the table
71
int done_compression; /* Has the game text been compressed? */
72
int32 compression_table_size; /* Length of the Huffman table, in
74
int32 compression_string_size; /* Length of the compressed string
76
int32 *compressed_offsets; /* The beginning of every string in
77
the game, relative to the beginning
78
of the Huffman table. (So entry 0
79
is equal to compression_table_size)*/
81
/* ------------------------------------------------------------------------- */
82
/* Abbreviation arrays */
83
/* ------------------------------------------------------------------------- */
89
/* ------------------------------------------------------------------------- */
91
int32 total_chars_trans, /* Number of ASCII chars of text in */
92
total_bytes_trans, /* Number of bytes of Z-code text out */
93
zchars_trans_in_last_string; /* Number of Z-chars in last string:
94
needed only for abbrev efficiency
95
calculation in "directs.c" */
96
static int32 total_zchars_trans, /* Number of Z-chars of text out
97
(only used to calculate the above) */
98
no_chars_transcribed; /* Number of ASCII chars written to
99
the text transcription area (used
100
for the -r and -u switches) */
102
static int zchars_out_buffer[3], /* During text translation, a buffer of
103
3 Z-chars at a time: when it's full
104
these are written as a 2-byte word */
105
zob_index; /* Index (0 to 2) into it */
107
static unsigned char *text_out_pc; /* The "program counter" during text
108
translation: the next address to
109
write Z-coded text output to */
111
/* ------------------------------------------------------------------------- */
112
/* For variables/arrays used by the dictionary manager, see below */
113
/* ------------------------------------------------------------------------- */
115
/* ------------------------------------------------------------------------- */
116
/* Prepare the abbreviations lookup table (used to speed up abbreviation */
117
/* detection in text translation). We first bubble-sort the abbrevs into */
118
/* alphabetical order (this is necessary for the detection algorithm to */
119
/* to work). Since the table is only prepared once, and for a table */
120
/* of size at most 96, there's no point using an efficient sort algorithm. */
121
/* ------------------------------------------------------------------------- */
123
static void make_abbrevs_lookup(void)
124
{ int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2;
126
{ bubble_sort = FALSE;
127
for (j=0; j<no_abbreviations; j++)
128
for (k=j+1; k<no_abbreviations; k++)
129
{ p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
130
p2=(char *)abbreviations_at+k*MAX_ABBREV_LENGTH;
132
{ strcpy(p,p1); strcpy(p1,p2); strcpy(p2,p);
133
l=abbrev_values[j]; abbrev_values[j]=abbrev_values[k];
135
l=abbrev_quality[j]; abbrev_quality[j]=abbrev_quality[k];
140
} while (bubble_sort);
142
for (j=no_abbreviations-1; j>=0; j--)
143
{ p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
144
abbrevs_lookup[p1[0]]=j;
147
abbrevs_lookup_table_made = TRUE;
150
/* ------------------------------------------------------------------------- */
151
/* Search the abbreviations lookup table (a routine which must be fast). */
152
/* The source text to compare is text[i], text[i+1], ... and this routine */
153
/* is only called if text[i] is indeed the first character of at least one */
154
/* abbreviation, "from" begin the least index into the abbreviations table */
155
/* of an abbreviation for which text[i] is the first character. Recall */
156
/* that the abbrevs table is in alphabetical order. */
158
/* The return value is -1 if there is no match. If there is a match, the */
159
/* text to be abbreviated out is over-written by a string of null chars */
160
/* with "ASCII" value 1, and the abbreviation number is returned. */
162
/* In Glulx, we *do not* do this overwriting with 1's. */
163
/* ------------------------------------------------------------------------- */
165
static int try_abbreviations_from(unsigned char *text, int i, int from)
166
{ int j, k; char *p, c;
168
for (j=from, p=(char *)abbreviations_at+from*MAX_ABBREV_LENGTH;
169
(j<no_abbreviations)&&(c==p[0]); j++, p+=MAX_ABBREV_LENGTH)
170
{ if (text[i+1]==p[1])
171
{ for (k=2; p[k]!=0; k++)
172
if (text[i+k]!=p[k]) goto NotMatched;
174
for (k=0; p[k]!=0; k++) text[i+k]=1;
184
extern void make_abbreviation(char *text)
186
strcpy((char *)abbreviations_at
187
+ no_abbreviations*MAX_ABBREV_LENGTH, text);
189
is_abbreviation = TRUE;
190
abbrev_values[no_abbreviations] = compile_string(text, TRUE, TRUE);
191
is_abbreviation = FALSE;
193
/* The quality is the number of Z-chars saved by using this */
194
/* abbreviation: note that it takes 2 Z-chars to print it. */
196
abbrev_quality[no_abbreviations++] = zchars_trans_in_last_string - 2;
199
/* ------------------------------------------------------------------------- */
200
/* The front end routine for text translation */
201
/* ------------------------------------------------------------------------- */
203
extern int32 compile_string(char *b, int in_low_memory, int is_abbrev)
204
{ int i, j; uchar *c;
206
is_abbreviation = is_abbrev;
208
/* Put into the low memory pool (at 0x100 in the Z-machine) of strings */
209
/* which may be wanted as possible entries in the abbreviations table */
211
if (!glulx_mode && in_low_memory)
212
{ j=subtract_pointers(low_strings_top,low_strings);
213
low_strings_top=translate_text(low_strings_top,b);
214
i= subtract_pointers(low_strings_top,low_strings);
215
is_abbreviation = FALSE;
216
if (i>MAX_LOW_STRINGS)
217
memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS);
221
if (glulx_mode && done_compression)
222
compiler_error("Tried to add a string after compression was done.");
224
c = translate_text(strings_holding_area, b);
225
i = subtract_pointers(c, strings_holding_area);
227
if (i>MAX_STATIC_STRINGS)
228
memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);
230
/* Insert null bytes as needed to ensure that the next static string */
231
/* also occurs at an address expressible as a packed address */
234
if (oddeven_packing_switch)
235
while ((i%(scale_factor*2))!=0)
236
{ i+=2; *c++ = 0; *c++ = 0;
239
while ((i%scale_factor)!=0)
240
{ i+=2; *c++ = 0; *c++ = 0;
244
j = static_strings_extent;
246
if (temporary_files_switch)
247
for (c=strings_holding_area; c<strings_holding_area+i;
248
c++, static_strings_extent++)
251
for (c=strings_holding_area; c<strings_holding_area+i;
252
c++, static_strings_extent++)
253
write_byte_to_memory_block(&static_strings_area,
254
static_strings_extent, *c);
256
is_abbreviation = FALSE;
259
return(j/scale_factor);
262
/* The marker value is a one-based string number. (We reserve zero
263
to mean "not a string at all". */
264
return (++no_strings);
268
/* ------------------------------------------------------------------------- */
269
/* Output a single Z-character into the buffer, and flush it if full */
270
/* ------------------------------------------------------------------------- */
272
static void write_z_char_z(int i)
275
total_zchars_trans++;
276
zchars_out_buffer[zob_index++]=(i%32);
277
if (zob_index!=3) return;
279
j= zchars_out_buffer[0]*0x0400 + zchars_out_buffer[1]*0x0020
280
+ zchars_out_buffer[2];
281
text_out_pc[0] = j/256; text_out_pc[1] = j%256; text_out_pc+=2;
282
total_bytes_trans+=2;
285
static void write_zscii(int zsc)
287
int lookup_value, in_alphabet;
294
if (zsc < 0x100) lookup_value = zscii_to_alphabet_grid[zsc];
296
else lookup_value = -1;
298
if (lookup_value >= 0)
299
{ alphabet_used[lookup_value] = 'Y';
300
in_alphabet = lookup_value/26;
301
if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */
302
if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */
303
write_z_char_z(lookup_value%26 + 6);
306
{ write_z_char_z(5); write_z_char_z(6);
307
write_z_char_z(zsc/32); write_z_char_z(zsc%32);
311
/* ------------------------------------------------------------------------- */
312
/* Finish a Z-coded string, padding out with Z-char 5s if necessary and */
313
/* setting the "end" bit on the final 2-byte word */
314
/* ------------------------------------------------------------------------- */
316
static void end_z_chars(void)
318
zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string;
319
while (zob_index!=0) write_z_char_z(5);
320
p=(unsigned char *) text_out_pc;
324
/* Glulx handles this much more simply -- compression is done elsewhere. */
325
static void write_z_char_g(int i)
328
total_zchars_trans++;
334
/* ------------------------------------------------------------------------- */
335
/* The main routine "text.c" provides to the rest of Inform: the text */
336
/* translator. p is the address to write output to, s_text the source text */
337
/* and the return value is the next free address to write output to. */
338
/* Note that the source text may be corrupted by this routine. */
339
/* ------------------------------------------------------------------------- */
341
extern uchar *translate_text(uchar *p, char *s_text)
342
{ int i, j, k, in_alphabet, lookup_value;
343
int32 unicode; int zscii;
344
unsigned char *text_in;
346
/* Cast the input and output streams to unsigned char: text_out_pc will
347
advance as bytes of Z-coded text are written, but text_in doesn't */
349
text_in = (unsigned char *) s_text;
350
text_out_pc = (unsigned char *) p;
352
/* Remember the Z-chars total so that later we can subtract to find the
353
number of Z-chars translated on this string */
355
zchars_trans_in_last_string = total_zchars_trans;
357
/* Start with the Z-characters output buffer empty */
361
/* If this is the first text translated since the abbreviations were
362
declared, and if some were declared, then it's time to make the
363
lookup table for abbreviations
365
(Except: we don't if the text being translated is itself
366
the text of an abbreviation currently being defined) */
368
if ((!abbrevs_lookup_table_made) && (no_abbreviations > 0)
369
&& (!is_abbreviation))
370
make_abbrevs_lookup();
372
/* If we're storing the whole game text to memory, then add this text */
374
if ((!is_abbreviation) && (store_the_text))
375
{ no_chars_transcribed += strlen(s_text)+2;
376
if (no_chars_transcribed >= MAX_TRANSCRIPT_SIZE)
377
memoryerror("MAX_TRANSCRIPT_SIZE", MAX_TRANSCRIPT_SIZE);
378
sprintf(all_text_top, "%s\n\n", s_text);
379
all_text_top += strlen(all_text_top);
382
if (transcript_switch && (!veneer_mode))
383
write_to_transcript_file(s_text);
387
/* The empty string of Z-text is illegal, since it can't carry an end
388
bit: so we translate an empty string of ASCII text to just the
389
pad character 5. Printing this causes nothing to appear on screen. */
391
if (text_in[0]==0) write_z_char_z(5);
393
/* Loop through the characters of the null-terminated input text: note
394
that if 1 is written over a character in the input text, it is
395
afterwards ignored */
397
for (i=0; text_in[i]!=0; i++)
398
{ total_chars_trans++;
400
/* Contract ". " into ". " if double-space-removing switch set:
401
likewise "? " and "! " if the setting is high enough */
403
if ((double_space_setting >= 1)
404
&& (text_in[i+1]==' ') && (text_in[i+2]==' '))
405
{ if (text_in[i]=='.') text_in[i+2]=1;
406
if (double_space_setting >= 2)
407
{ if (text_in[i]=='?') text_in[i+2]=1;
408
if (text_in[i]=='!') text_in[i+2]=1;
412
/* Try abbreviations if the economy switch set */
414
if ((economy_switch) && (!is_abbreviation)
415
&& ((k=abbrevs_lookup[text_in[i]])!=-1))
416
{ if ((j=try_abbreviations_from(text_in, i, k))!=-1)
417
{ if (j<32) { write_z_char_z(2); write_z_char_z(j); }
418
else { write_z_char_z(3); write_z_char_z(j-32); }
422
/* '@' is the escape character in Inform string notation: the various
426
@@decimalnumber : write this ZSCII char (0 to 1023)
427
@twodigits : write the abbreviation string with this
431
@accentcode : this accented character: e.g.,
432
for @'e write an E-acute
433
@{...} : this Unicode char (in hex) */
436
{ if (text_in[i+1]=='@')
440
i+=2; j=atoi((char *) (text_in+i));
442
{ /* Prevent ~ and ^ from being translated to double-quote
443
and new-line, as they ordinarily would be */
445
case 94: write_z_char_z(5); write_z_char_z(6);
446
write_z_char_z(94/32); write_z_char_z(94%32);
448
case 126: write_z_char_z(5); write_z_char_z(6);
449
write_z_char_z(126/32); write_z_char_z(126%32);
452
default: write_zscii(j); break;
454
while (isdigit(text_in[i])) i++; i--;
456
else if (isdigit(text_in[i+1])!=0)
461
d1 = character_digit_value[text_in[i+1]];
462
d2 = character_digit_value[text_in[i+2]];
463
if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10))
464
error("'@..' must have two decimal digits");
467
write_z_char_z(1); write_z_char_z(d1*10 + d2);
472
/* A string escape specifying an unusual character */
474
unicode = text_to_unicode((char *) (text_in+i));
475
zscii = unicode_to_zscii(unicode);
476
if (zscii != 5) write_zscii(zscii);
478
{ unicode_char_error(
479
"Character can only be used if declared in \
480
advance as part of 'Zcharacter table':", unicode);
482
i += textual_form_length - 1;
486
{ /* Skip a character which has been over-written with the null
487
value 1 earlier on */
490
{ if (text_in[i]==' ') write_z_char_z(0);
492
{ j = (int) text_in[i];
493
lookup_value = iso_to_alphabet_grid[j];
494
if (lookup_value < 0)
495
{ /* The character isn't in the standard alphabets, so
496
we have to use the ZSCII 4-Z-char sequence */
498
if (lookup_value == -5)
499
{ /* Character isn't in the ZSCII set at all */
501
unicode = iso_to_unicode(j);
503
"Character can only be used if declared in \
504
advance as part of 'Zcharacter table':", unicode);
505
write_zscii(0x200 + unicode/0x100);
506
write_zscii(0x300 + unicode%0x100);
508
else write_zscii(-lookup_value);
511
{ /* The character is in one of the standard alphabets:
512
write a SHIFT to temporarily change alphabet if
513
it isn't in alphabet 0, then write the Z-char */
515
alphabet_used[lookup_value] = 'Y';
516
in_alphabet = lookup_value/26;
517
if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */
518
if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */
519
write_z_char_z(lookup_value%26 + 6);
526
/* Flush the Z-characters output buffer and set the "end" bit */
533
/* The text storage here is, of course, temporary. Compression
534
will occur when we're finished compiling, so that all the
535
clever Huffman stuff will work.
536
In the stored text, we use "@@" to indicate @,
537
"@0" to indicate a zero byte,
538
"@ANNNN" to indicate an abbreviation,
539
"@DNNNN" to indicate a dynamic string thing.
540
(NNNN is a four-digit hex number using the letters A-P... an
541
ugly representation but a convenient one.)
544
for (i=0; text_in[i]!=0; i++) {
546
/* Contract ". " into ". " if double-space-removing switch set:
547
likewise "? " and "! " if the setting is high enough. */
548
if ((double_space_setting >= 1)
549
&& (text_in[i+1]==' ') && (text_in[i+2]==' ')) {
551
|| (double_space_setting >= 2
552
&& (text_in[i]=='?' || text_in[i]=='!'))) {
553
text_in[i+1] = text_in[i];
560
/* Try abbreviations if the economy switch set. We have to be in
561
compression mode too, since the abbreviation mechanism is part
562
of string decompression. */
564
if ((economy_switch) && (compression_switch) && (!is_abbreviation)
565
&& ((k=abbrevs_lookup[text_in[i]])!=-1)
566
&& ((j=try_abbreviations_from(text_in, i, k)) != -1)) {
567
char *cx = (char *)abbreviations_at+j*MAX_ABBREV_LENGTH;
571
write_z_char_g('A' + ((j >>12) & 0x0F));
572
write_z_char_g('A' + ((j >> 8) & 0x0F));
573
write_z_char_g('A' + ((j >> 4) & 0x0F));
574
write_z_char_g('A' + ((j ) & 0x0F));
576
else if (text_in[i] == '@') {
577
if (text_in[i+1]=='@') {
579
i+=2; j=atoi((char *) (text_in+i));
580
if (j == '@' || j == '\0') {
584
if (!compression_switch)
585
warning("Ascii @@0 will prematurely terminate non-compressed \
590
while (isdigit(text_in[i])) i++; i--;
592
else if (isdigit(text_in[i+1])) {
594
d1 = character_digit_value[text_in[i+1]];
595
d2 = character_digit_value[text_in[i+2]];
596
if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) {
597
error("'@..' must have two decimal digits");
600
if (!compression_switch)
601
warning("'@..' print variable will not work in non-compressed \
602
string; substituting ' '.");
605
if (j >= MAX_DYNAMIC_STRINGS) {
606
memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);
609
if (j+1 >= no_dynamic_strings)
610
no_dynamic_strings = j+1;
613
write_z_char_g('A' + ((j >>12) & 0x0F));
614
write_z_char_g('A' + ((j >> 8) & 0x0F));
615
write_z_char_g('A' + ((j >> 4) & 0x0F));
616
write_z_char_g('A' + ((j ) & 0x0F));
620
unicode = text_to_unicode((char *) (text_in+i));
621
i += textual_form_length - 1;
622
if (unicode >= 0 && unicode < 256) {
623
write_z_char_g(unicode);
626
error("Unicode characters beyond Latin-1 are not yet supported in Glulx");
630
else if (text_in[i] == '^')
631
write_z_char_g(0x0A);
632
else if (text_in[i] == '~')
635
write_z_char_g(text_in[i]);
641
return((uchar *) text_out_pc);
644
/* ------------------------------------------------------------------------- */
645
/* Glulx compression code */
646
/* ------------------------------------------------------------------------- */
649
static void compress_makebits(int entnum, int depth, int prevbit,
650
huffbitlist_t *bits);
651
static void compress_dumptable(int entnum, int depth);
653
/* The compressor. This uses the usual Huffman compression algorithm. */
654
void compress_game_text()
656
int entities, branchstart, branches;
664
if (compression_switch) {
666
/* How many entities have we currently got? Well, 256 plus the
667
string-terminator plus abbrevations plus dynamic strings. */
669
huff_abbrev_start = entities;
671
entities += no_abbreviations;
672
huff_dynam_start = entities;
673
entities += no_dynamic_strings;
675
if (entities > MAX_CHARACTER_SET)
676
memoryerror("MAX_CHARACTER_SET",MAX_CHARACTER_SET);
679
for (jx=0; jx<256; jx++) {
680
huff_entities[jx].type = 2;
681
huff_entities[jx].count = 0;
682
huff_entities[jx].u.ch = jx;
685
huff_entities[256].type = 1;
686
huff_entities[256].count = 0;
687
if (economy_switch) {
688
for (jx=0; jx<no_abbreviations; jx++) {
689
huff_entities[huff_abbrev_start+jx].type = 3;
690
huff_entities[huff_abbrev_start+jx].count = 0;
691
huff_entities[huff_abbrev_start+jx].u.val = jx;
694
for (jx=0; jx<no_dynamic_strings; jx++) {
695
huff_entities[huff_dynam_start+jx].type = 9;
696
huff_entities[huff_dynam_start+jx].count = 0;
697
huff_entities[huff_dynam_start+jx].u.val = jx;
701
/* No compression; use defaults that will make it easy to check
703
no_huff_entities = 257;
704
huff_abbrev_start = 257;
705
huff_dynam_start = 257+MAX_ABBREVS;
706
compression_table_size = 0;
709
if (temporary_files_switch) {
711
Temp1_fp=fopen(Temp1_Name,"rb");
713
fatalerror("I/O failure: couldn't reopen temporary file 1");
716
if (compression_switch) {
718
for (lx=0, ix=0; lx<no_strings; lx++) {
719
int escapelen=0, escapetype=0;
723
if (temporary_files_switch)
724
ch = fgetc(Temp1_fp);
726
ch = read_byte_from_memory_block(&static_strings_area, ix);
728
if (ix > static_strings_extent || ch < 0)
729
compiler_error("Read too much not-yet-compressed text.");
730
if (escapelen == -1) {
735
else if (ch == '0') {
738
else if (ch == 'A' || ch == 'D') {
745
compiler_error("Strange @ escape in processed text.");
748
else if (escapelen) {
749
escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);
751
if (escapelen == 0) {
752
if (escapetype == 'A') {
753
ch = huff_abbrev_start+escapeval;
755
else if (escapetype == 'D') {
756
ch = huff_dynam_start+escapeval;
759
compiler_error("Strange @ escape in processed text.");
775
huff_entities[ch].count++;
780
for (jx=0; jx<entities; jx++) {
781
if (huff_entities[jx].count) {
782
hufflist[numlive] = &(huff_entities[jx]);
787
branchstart = entities;
790
while (numlive > 1) {
792
int best1num, best2num;
795
if (hufflist[0]->count < hufflist[1]->count) {
804
best1num = hufflist[best1]->count;
805
best2num = hufflist[best2]->count;
807
for (jx=2; jx<numlive; jx++) {
808
if (hufflist[jx]->count < best1num) {
812
best1num = hufflist[best1]->count;
814
else if (hufflist[jx]->count < best2num) {
816
best2num = hufflist[best2]->count;
820
bran = &(huff_entities[branchstart+branches]);
823
bran->count = hufflist[best1]->count + hufflist[best2]->count;
824
bran->u.branch[0] = (hufflist[best1] - huff_entities);
825
bran->u.branch[1] = (hufflist[best2] - huff_entities);
826
hufflist[best1] = bran;
827
if (best2 < numlive-1) {
828
memmove(&(hufflist[best2]), &(hufflist[best2+1]),
829
((numlive-1) - best2) * sizeof(huffentity_t *));
834
huff_entity_root = (hufflist[0] - huff_entities);
835
no_huff_entities = branchstart+branches;
837
for (ix=0; ix<MAXHUFFBYTES; ix++)
839
compression_table_size = 12;
841
compress_makebits(huff_entity_root, 0, -1, &bits);
842
/* compress_dumptable(huff_entity_root, 0); */
846
/* Now, sadly, we have to compute the size of the string section,
847
without actually doing the compression. */
848
compression_string_size = 0;
850
if (temporary_files_switch) {
851
fseek(Temp1_fp, 0, SEEK_SET);
854
if (no_strings >= MAX_NUM_STATIC_STRINGS)
855
memoryerror("MAX_NUM_STATIC_STRINGS", MAX_NUM_STATIC_STRINGS);
857
for (lx=0, ix=0; lx<no_strings; lx++) {
858
int escapelen=0, escapetype=0;
862
compressed_offsets[lx] = compression_table_size + compression_string_size;
863
compression_string_size++; /* for the type byte */
865
if (temporary_files_switch)
866
ch = fgetc(Temp1_fp);
868
ch = read_byte_from_memory_block(&static_strings_area, ix);
870
if (ix > static_strings_extent || ch < 0)
871
compiler_error("Read too much not-yet-compressed text.");
872
if (escapelen == -1) {
877
else if (ch == '0') {
880
else if (ch == 'A' || ch == 'D') {
887
compiler_error("Strange @ escape in processed text.");
890
else if (escapelen) {
891
escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);
893
if (escapelen == 0) {
894
if (escapetype == 'A') {
895
ch = huff_abbrev_start+escapeval;
897
else if (escapetype == 'D') {
898
ch = huff_dynam_start+escapeval;
901
compiler_error("Strange @ escape in processed text.");
918
if (compression_switch) {
919
jx += huff_entities[ch].depth;
920
compression_string_size += (jx/8);
924
if (ch >= huff_dynam_start) {
925
compression_string_size += 3;
927
else if (ch >= huff_abbrev_start) {
928
compiler_error("Abbreviation in non-compressed string should \
932
compression_string_size += 1;
935
if (compression_switch && jx)
936
compression_string_size++;
939
done_compression = TRUE;
942
static void compress_dumptable(int entnum, int depth)
944
huffentity_t *ent = &(huff_entities[entnum]);
949
printf("%6d: ", ent->count);
950
for (ix=0; ix<depth; ix++) {
951
int bt = ent->bits.b[ix / 8] & (1 << (ix % 8));
952
printf("%d", (bt != 0));
959
compress_dumptable(ent->u.branch[0], depth+1);
960
compress_dumptable(ent->u.branch[1], depth+1);
966
printf("0x%02X ", ent->u.ch);
967
if (ent->u.ch >= ' ')
968
printf("'%c'\n", ent->u.ch);
970
printf("'ctrl-%c'\n", ent->u.ch + '@');
973
cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;
974
printf("abbrev %d, \"%s\"\n", ent->u.val, cx);
977
printf("print-var @%02d\n", ent->u.val);
982
static void compress_makebits(int entnum, int depth, int prevbit,
985
huffentity_t *ent = &(huff_entities[entnum]);
988
ent->addr = compression_table_size;
993
ent->bits.b[(depth-1) / 8] |= (1 << ((depth-1) % 8));
998
compression_table_size += 9;
999
compress_makebits(ent->u.branch[0], depth+1, 0, &ent->bits);
1000
compress_makebits(ent->u.branch[1], depth+1, 1, &ent->bits);
1003
compression_table_size += 1;
1006
compression_table_size += 2;
1009
cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;
1010
compression_table_size += (1 + 1 + strlen(cx));
1013
compression_table_size += 5;
1018
/* ------------------------------------------------------------------------- */
1019
/* The abbreviations optimiser */
1021
/* This is a very complex, memory and time expensive algorithm to */
1022
/* approximately solve the problem of which abbreviation strings would */
1023
/* minimise the total number of Z-chars to which the game text translates. */
1024
/* It is in some ways a quite separate program but remains inside Inform */
1025
/* for compatibility with previous releases. */
1026
/* ------------------------------------------------------------------------- */
1028
typedef struct tlb_s
1030
int32 intab, occurrences;
1033
static int32 no_occs;
1035
static int32 *grandtable;
1036
static int32 *grandflags;
1037
typedef struct optab_s
1044
static optab *bestyet, *bestyet2;
1048
static char *sub_buffer;
1050
static void optimise_pass(void)
1051
{ int32 i; int t1, t2;
1052
int32 j, j2, k, nl, matches, noflags, score, min, minat, x, scrabble, c;
1053
for (i=0; i<256; i++) bestyet[i].length=0;
1054
for (i=0; i<no_occs; i++)
1055
{ if ((*(tlbtab[i].text)!=(int) '\n')&&(tlbtab[i].occurrences!=0))
1058
if (i%((**g_pm_hndl).linespercheck) == 0)
1059
{ ProcessEvents (&g_proc);
1063
my_free(&all_text,"transcription text");
1064
longjmp (g_fallback, 1);
1068
printf("Pass %d, %4ld/%ld '%s' (%ld occurrences) ",
1069
pass_no, (long int) i, (long int) no_occs, tlbtab[i].text,
1070
(long int) tlbtab[i].occurrences);
1072
for (j=0; j<tlbtab[i].occurrences; j++)
1073
{ for (j2=0; j2<tlbtab[i].occurrences; j2++) grandflags[j2]=1;
1074
nl=2; noflags=tlbtab[i].occurrences;
1075
while ((noflags>=2)&&(nl<=62))
1077
for (j2=0; j2<nl; j2++)
1078
if (all_text[grandtable[tlbtab[i].intab+j]+j2]=='\n')
1081
for (j2=j; j2<tlbtab[i].occurrences; j2++)
1082
{ if (grandflags[j2]==1)
1083
{ x=grandtable[tlbtab[i].intab+j2]
1084
- grandtable[tlbtab[i].intab+j];
1085
if (((x>-nl)&&(x<nl))
1086
|| (memcmp(all_text+grandtable[tlbtab[i].intab+j],
1087
all_text+grandtable[tlbtab[i].intab+j2],
1089
{ grandflags[j2]=0; noflags--; }
1094
for (k=0; k<nl; k++)
1096
c=all_text[grandtable[tlbtab[i].intab+j+k]];
1098
{ if (iso_to_alphabet_grid[c]<0)
1101
if (iso_to_alphabet_grid[c]>=26)
1105
score=(matches-1)*(scrabble-2);
1107
for (j2=0; j2<256; j2++)
1108
{ if ((nl==bestyet[j2].length)
1109
&& (memcmp(all_text+bestyet[j2].location,
1110
all_text+grandtable[tlbtab[i].intab+j],
1112
{ j2=256; min=score; }
1114
{ if (bestyet[j2].score<min)
1115
{ min=bestyet[j2].score; minat=j2;
1120
{ bestyet[minat].score=score;
1121
bestyet[minat].length=nl;
1122
bestyet[minat].location=grandtable[tlbtab[i].intab+j];
1123
bestyet[minat].popularity=matches;
1124
for (j2=0; j2<nl; j2++) sub_buffer[j2]=
1125
all_text[bestyet[minat].location+j2];
1131
t2=((int) time(0)) - t1;
1132
printf(" (%d seconds)\n",t2);
1137
static int any_overlap(char *s1, char *s2)
1138
{ int a, b, i, j, flag;
1139
a=strlen(s1); b=strlen(s2);
1140
for (i=1-b; i<a; i++)
1143
if ((0<=i+j)&&(i+j<=a-1))
1144
if (s1[i+j]!=s2[j]) flag=1;
1145
if (flag==0) return(1);
1150
#define MAX_TLBS 8000
1152
extern void optimise_abbreviations(void)
1153
{ int32 i, j, t, max=0, MAX_GTABLE;
1154
int32 j2, selected, available, maxat, nl;
1157
printf("Beginning calculation of optimal abbreviations...\n");
1160
tlbtab=my_calloc(sizeof(tlb), MAX_TLBS, "tlb table"); no_occs=0;
1161
sub_buffer=my_calloc(sizeof(char), 4000, "sub_buffer");
1162
for (i=0; i<MAX_TLBS; i++) tlbtab[i].occurrences=0;
1164
bestyet=my_calloc(sizeof(optab), 256, "bestyet");
1165
bestyet2=my_calloc(sizeof(optab), 64, "bestyet2");
1167
bestyet2[0].text[0]='.';
1168
bestyet2[0].text[1]=' ';
1169
bestyet2[0].text[2]=0;
1171
bestyet2[1].text[0]=',';
1172
bestyet2[1].text[1]=' ';
1173
bestyet2[1].text[2]=0;
1175
for (i=0; all_text+i<all_text_top; i++)
1177
if ((all_text[i]=='.') && (all_text[i+1]==' ') && (all_text[i+2]==' '))
1178
{ all_text[i]='\n'; all_text[i+1]='\n'; all_text[i+2]='\n';
1179
bestyet2[0].popularity++;
1182
if ((all_text[i]=='.') && (all_text[i+1]==' '))
1183
{ all_text[i]='\n'; all_text[i+1]='\n';
1184
bestyet2[0].popularity++;
1187
if ((all_text[i]==',') && (all_text[i+1]==' '))
1188
{ all_text[i]='\n'; all_text[i+1]='\n';
1189
bestyet2[1].popularity++;
1193
MAX_GTABLE=subtract_pointers(all_text_top,all_text)+1;
1194
grandtable=my_calloc(4*sizeof(int32), MAX_GTABLE/4, "grandtable");
1196
for (i=0, t=0; all_text+i<all_text_top; i++)
1197
{ test.text[0]=all_text[i];
1198
test.text[1]=all_text[i+1];
1199
test.text[2]=all_text[i+2];
1201
if ((test.text[0]=='\n')||(test.text[1]=='\n')||(test.text[2]=='\n'))
1203
for (j=0; j<no_occs; j++)
1204
if (strcmp(test.text,tlbtab[j].text)==0)
1207
for (j=i+3; all_text+j<all_text_top; j++)
1210
if (j%((**g_pm_hndl).linespercheck) == 0)
1211
{ ProcessEvents (&g_proc);
1215
my_free(&all_text,"transcription text");
1216
longjmp (g_fallback, 1);
1220
if ((all_text[i]==all_text[j])
1221
&& (all_text[i+1]==all_text[j+1])
1222
&& (all_text[i+2]==all_text[j+2]))
1223
{ grandtable[t+test.occurrences]=j;
1225
if (t+test.occurrences==MAX_GTABLE)
1226
{ printf("All %ld cross-references used\n",
1227
(long int) MAX_GTABLE);
1232
if (test.occurrences>=2)
1233
{ tlbtab[no_occs]=test;
1234
tlbtab[no_occs].intab=t; t+=tlbtab[no_occs].occurrences;
1235
if (max<tlbtab[no_occs].occurrences)
1236
max=tlbtab[no_occs].occurrences;
1238
if (no_occs==MAX_TLBS)
1239
{ printf("All %d three-letter-blocks used\n",
1248
grandflags=my_calloc(sizeof(int), max, "grandflags");
1251
printf("Cross-reference table (%ld entries) built...\n",
1252
(long int) no_occs);
1253
/* for (i=0; i<no_occs; i++)
1254
printf("%4d %4d '%s' %d\n",i,tlbtab[i].intab,tlbtab[i].text,
1255
tlbtab[i].occurrences);
1258
for (i=0; i<64; i++) bestyet2[i].length=0; selected=2;
1260
while ((available>0)&&(selected<64))
1261
{ printf("Pass %d\n", ++pass_no);
1265
for (i=0; i<256; i++)
1266
if (bestyet[i].score!=0)
1268
nl=bestyet[i].length;
1269
for (j2=0; j2<nl; j2++) bestyet[i].text[j2]=
1270
all_text[bestyet[i].location+j2];
1271
bestyet[i].text[nl]=0;
1274
/* printf("End of pass results:\n");
1275
printf("\nno score freq string\n");
1276
for (i=0; i<256; i++)
1277
if (bestyet[i].score>0)
1278
printf("%02d: %4d %4d '%s'\n", i, bestyet[i].score,
1279
bestyet[i].popularity, bestyet[i].text);
1284
for (i=0; i<256; i++)
1285
if (max<bestyet[i].score)
1286
{ max=bestyet[i].score;
1291
{ bestyet2[selected++]=bestyet[maxat];
1294
"Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n",
1295
(long int) selected,bestyet[maxat].text,
1296
(long int) bestyet[maxat].popularity,
1297
(long int) bestyet[maxat].score);
1299
test.text[0]=bestyet[maxat].text[0];
1300
test.text[1]=bestyet[maxat].text[1];
1301
test.text[2]=bestyet[maxat].text[2];
1304
for (i=0; i<no_occs; i++)
1305
if (strcmp(test.text,tlbtab[i].text)==0)
1308
for (j=0; j<tlbtab[i].occurrences; j++)
1309
{ if (memcmp(bestyet[maxat].text,
1310
all_text+grandtable[tlbtab[i].intab+j],
1311
bestyet[maxat].length)==0)
1312
{ for (j2=0; j2<bestyet[maxat].length; j2++)
1313
all_text[grandtable[tlbtab[i].intab+j]+j2]='\n';
1317
for (i=0; i<256; i++)
1318
if ((bestyet[i].score>0)&&
1319
(any_overlap(bestyet[maxat].text,bestyet[i].text)==1))
1320
{ bestyet[i].score=0;
1321
/* printf("Discarding '%s' as overlapping\n",
1322
bestyet[i].text); */
1325
} while ((max>0)&&(available>0)&&(selected<64));
1328
printf("\nChosen abbreviations (in Inform syntax):\n\n");
1329
for (i=0; i<selected; i++)
1330
printf("Abbreviate \"%s\";\n", bestyet2[i].text);
1335
/* ------------------------------------------------------------------------- */
1336
/* The dictionary manager begins here. */
1338
/* Speed is extremely important in these algorithms. If a linear-time */
1339
/* routine were used to search the dictionary words so far built up, then */
1340
/* Inform would crawl. */
1342
/* Instead, the dictionary is stored as a binary tree, which is kept */
1343
/* balanced with the red-black algorithm. */
1344
/* ------------------------------------------------------------------------- */
1345
/* A dictionary table similar to the Z-machine format is kept: there is a */
1346
/* 7-byte header (left blank here to be filled in at the */
1347
/* construct_storyfile() stage in "tables.c") and then a sequence of */
1348
/* records, one per word, in the form */
1350
/* <Z-coded text> <flags> <adjectivenumber> <verbnumber> */
1351
/* 4 or 6 bytes byte byte byte */
1353
/* These records are stored in "accession order" (i.e. in order of their */
1354
/* first being received by these routines) and only alphabetically sorted */
1355
/* by construct_storyfile() (using the array below). */
1356
/* ------------------------------------------------------------------------- */
1358
uchar *dictionary, /* (These two pointers are externally
1359
used only in "tables.c" when
1360
building the story-file) */
1361
*dictionary_top; /* Pointer to next free record */
1363
int dict_entries; /* Total number of records entered */
1365
/* ------------------------------------------------------------------------- */
1366
/* dict_word is a typedef for a struct of 6 unsigned chars (defined in */
1367
/* "header.h"): it holds the (4 or) 6 bytes of Z-coded text of a word. */
1368
/* Usefully, because the PAD character 5 is < all alphabetic characters, */
1369
/* alphabetic order corresponds to numeric order. For this reason, the */
1370
/* dict_word is called the "sort code" of the original text word. */
1372
/* ###- In modifying the compiler, I've found it easier to discard the */
1373
/* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */
1374
/* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */
1375
/* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */
1376
/* (That limit is defined as 40 in the header; it exists only for a few */
1377
/* static buffers, and can be increased without using significant memory.) */
1378
/* ------------------------------------------------------------------------- */
1380
extern int compare_sorts(uchar *d1, uchar *d2)
1382
for (i=0; i<DICT_WORD_SIZE; i++)
1383
if (d1[i]!=d2[i]) return((int)(d1[i]) - (int)(d2[i]));
1384
/* (since memcmp(d1, d2, DICT_WORD_SIZE); runs into a bug on some Unix
1389
extern void copy_sorts(uchar *d1, uchar *d2)
1391
for (i=0; i<DICT_WORD_SIZE; i++)
1395
static uchar prepared_sort[MAX_DICT_WORD_SIZE]; /* Holds the sort code
1398
static int number_and_case;
1400
/* Also used by verbs.c */
1401
static void dictionary_prepare_z(char *dword, uchar *optresult)
1402
{ int i, j, k, k2, wd[13]; int32 tot;
1404
/* A rapid text translation algorithm using only the simplified rules
1405
applying to the text of dictionary entries: first produce a sequence
1406
of 6 (v3) or 9 (v4+) Z-characters */
1408
number_and_case = 0;
1410
for (i=0, j=0; dword[j]!=0; i++, j++)
1411
{ if ((dword[j] == '/') && (dword[j+1] == '/'))
1412
{ for (j+=2; dword[j] != 0; j++)
1414
{ case 'p': number_and_case |= 4; break;
1416
error_named("Expected 'p' after '//' \
1417
to give number of dictionary word", dword);
1427
warning_named("Obsolete usage: use the ^ character for the \
1428
apostrophe in", dword);
1429
if (k==(int) '^') k=(int) '\'';
1433
{ int unicode = text_to_unicode(dword+j);
1434
if ((unicode < 128) && isupper(unicode)) unicode = tolower(unicode);
1435
k = unicode_to_zscii(unicode);
1436
j += textual_form_length - 1;
1437
if ((k == 5) || (k >= 0x100))
1438
{ unicode_char_error(
1439
"Character can be printed but not input:", unicode);
1442
k2 = zscii_to_alphabet_grid[(uchar) k];
1445
{ if (isupper(k)) k = tolower(k);
1446
k2 = iso_to_alphabet_grid[(uchar) k];
1450
{ if ((k2 == -5) || (k2 <= -0x100))
1451
char_error("Character can be printed but not input:", k);
1453
{ /* Use 4 more Z-chars to encode a ZSCII escape sequence */
1455
wd[i++] = 5; wd[i++] = 6;
1457
wd[i++] = k2/32; wd[i] = k2%32;
1461
{ alphabet_used[k2] = 'Y';
1463
wd[i++]=3+(k2/26); /* Change alphabet for symbols */
1464
wd[i]=6+(k2%26); /* Write the Z character */
1468
/* Fill up to the end of the dictionary block with PAD characters */
1470
for (; i<9; i++) wd[i]=5;
1472
/* The array of Z-chars is converted to three 2-byte blocks */
1474
tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10);
1475
prepared_sort[1]=tot%0x100;
1476
prepared_sort[0]=(tot/0x100)%0x100;
1477
tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10);
1478
prepared_sort[3]=tot%0x100;
1479
prepared_sort[2]=(tot/0x100)%0x100;
1480
tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);
1481
prepared_sort[5]=tot%0x100;
1482
prepared_sort[4]=(tot/0x100)%0x100;
1484
/* Set the "end bit" on the 2nd (in v3) or the 3rd (v4+) 2-byte block */
1486
if (version_number==3) prepared_sort[2]+=0x80;
1487
else prepared_sort[4]+=0x80;
1489
if (optresult) copy_sorts(optresult, prepared_sort);
1492
/* Also used by verbs.c */
1493
static void dictionary_prepare_g(char *dword, uchar *optresult)
1497
number_and_case = 0;
1499
for (i=0, j=0; (dword[j]!=0); i++, j++) {
1500
if ((dword[j] == '/') && (dword[j+1] == '/')) {
1501
for (j+=2; dword[j] != 0; j++) {
1504
number_and_case |= 4;
1507
error_named("Expected 'p' after '//' \
1508
to give gender or number of dictionary word", dword);
1514
if (i>=DICT_WORD_SIZE) break;
1518
warning_named("Obsolete usage: use the ^ character for the \
1519
apostrophe in", dword);
1522
if (k=='~') /* as in iso_to_alphabet_grid */
1526
int32 unicode = text_to_unicode(dword+j);
1527
j += textual_form_length - 1;
1528
if (unicode >= 0 && unicode < 256) {
1532
error("Unicode characters beyond Latin-1 are not yet supported in Glulx");
1537
if (k >= 'A' && k <= 'Z')
1540
prepared_sort[i] = k;
1543
for (; i<DICT_WORD_SIZE; i++)
1544
prepared_sort[i] = 0;
1546
if (optresult) copy_sorts(optresult, prepared_sort);
1549
extern void dictionary_prepare(char *dword, uchar *optresult)
1552
dictionary_prepare_z(dword, optresult);
1554
dictionary_prepare_g(dword, optresult);
1557
/* ------------------------------------------------------------------------- */
1558
/* The arrays below are all concerned with the problem of alphabetically */
1559
/* sorting the dictionary during the compilation pass. */
1560
/* Note that it is not enough simply to apply qsort to the dictionary at */
1561
/* the end of the pass: we need to ensure that no duplicates are ever */
1564
/* dict_sort_codes[n] the sort code of record n: i.e., of the nth */
1565
/* word to be entered into the dictionary, where */
1566
/* n counts upward from 0 */
1567
/* (n is also called the "accession number") */
1569
/* The tree structure encodes an ordering. The special value VACANT means */
1570
/* "no node here": otherwise, node numbers are the same as accession */
1571
/* numbers. At all times, "root" holds the node number of the top of the */
1572
/* tree; each node has up to two branches, such that the subtree of the */
1573
/* left branch is always alphabetically before what's at the node, and */
1574
/* the subtree to the right is always after; and all branches are coloured */
1575
/* either "black" or "red". These colours are used to detect points where */
1576
/* the tree is growing asymmetrically (and therefore becoming inefficient */
1578
/* ------------------------------------------------------------------------- */
1585
typedef struct dict_tree_node_s
1586
{ int branch[2]; /* Branch 0 is "left", 1 is "right" */
1587
char colour; /* The colour of the branch to the parent */
1590
static dict_tree_node *dtree;
1592
int *final_dict_order;
1593
static uchar *dict_sort_codes;
1595
static void dictionary_begin_pass(void)
1597
/* Leave room for the 7-byte header (added in "tables.c" much later) */
1598
/* Glulx has a 4-byte header instead. */
1601
dictionary_top=dictionary+7;
1603
dictionary_top=dictionary+4;
1609
static int fdo_count;
1610
static void recursively_sort(int node)
1611
{ if (dtree[node].branch[0] != VACANT)
1612
recursively_sort(dtree[node].branch[0]);
1613
final_dict_order[node] = fdo_count++;
1614
if (dtree[node].branch[1] != VACANT)
1615
recursively_sort(dtree[node].branch[1]);
1618
extern void sort_dictionary(void)
1621
{ for (i=0; i<dict_entries; i++)
1622
final_dict_order[i] = i;
1627
{ fdo_count = 0; recursively_sort(root);
1631
/* ------------------------------------------------------------------------- */
1632
/* If "dword" is in the dictionary, return its accession number plus 1; */
1633
/* If not, return 0. */
1634
/* ------------------------------------------------------------------------- */
1636
static int dictionary_find(char *dword)
1639
dictionary_prepare(dword, NULL);
1641
while (at != VACANT)
1642
{ n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_SIZE);
1643
if (n==0) return at + 1;
1644
if (n>0) at = dtree[at].branch[1]; else at = dtree[at].branch[0];
1649
/* ------------------------------------------------------------------------- */
1650
/* Add "dword" to the dictionary with (x,y,z) as its data bytes; unless */
1651
/* it already exists, in which case OR the data with (x,y,z) */
1653
/* Returns: the accession number. */
1654
/* ------------------------------------------------------------------------- */
1656
extern int dictionary_add(char *dword, int x, int y, int z)
1658
int ggfr, gfr, fr, r;
1659
int ggf = VACANT, gf = VACANT, f = VACANT, at = root;
1661
int res=((version_number==3)?4:6);
1663
dictionary_prepare(dword, NULL);
1666
{ root = 0; goto CreateEntry;
1670
n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_SIZE);
1674
p = dictionary+7 + at*(3+res) + res;
1675
p[0]=(p[0])|x; p[1]=(p[1])|y; p[2]=(p[2])|z;
1676
if (x & 128) p[0] = (p[0])|number_and_case;
1679
p = dictionary+4 + at*(7+DICT_WORD_SIZE) + 1+DICT_WORD_SIZE;
1680
p[1]=(p[1])|x; p[3]=(p[3])|y; p[5]=(p[5])|z;
1681
if (x & 128) p[1] = (p[1]) | number_and_case;
1685
if (n>0) r=1; else r=0;
1687
a = dtree[at].branch[0]; b = dtree[at].branch[1];
1688
if ((a != VACANT) && (dtree[a].colour == RED) &&
1689
(b != VACANT) && (dtree[b].colour == RED))
1690
{ dtree[a].colour = BLACK;
1691
dtree[b].colour = BLACK;
1693
dtree[at].colour = RED;
1695
/* A tree rotation may be needed to avoid two red links in a row:
1697
ggf (or else gf is root) ggf (or f is root)
1700
/ \(red) / \ (both red)
1706
In effect we rehang the "gf" subtree from "f".
1707
See the Technical Manual for further details.
1710
if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))
1713
{ if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;
1714
dtree[gf].branch[gfr] = dtree[f].branch[1-fr];
1715
dtree[f].branch[1-fr] = gf;
1716
dtree[f].colour = BLACK;
1717
dtree[gf].colour = RED;
1718
gf = ggf; gfr = ggfr;
1721
{ if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;
1722
dtree[at].colour = BLACK;
1723
dtree[gf].colour = RED;
1724
dtree[f].branch[fr] = dtree[at].branch[gfr];
1725
dtree[gf].branch[gfr] = dtree[at].branch[fr];
1726
dtree[at].branch[gfr] = f;
1727
dtree[at].branch[fr] = gf;
1729
r = 1-r; n = at; if (r==fr) at = f; else at = gf;
1730
f = n; gf = ggf; fr = 1-r; gfr = ggfr;
1735
if (dtree[at].branch[r] == VACANT)
1736
{ dtree[at].colour = RED;
1738
if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))
1740
{ if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;
1741
dtree[gf].branch[gfr] = dtree[f].branch[1-fr];
1742
dtree[f].branch[1-fr] = gf;
1743
dtree[f].colour = BLACK;
1744
dtree[gf].colour = RED;
1747
{ if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;
1748
dtree[at].colour = BLACK;
1749
dtree[gf].colour = RED;
1750
dtree[f].branch[fr] = dtree[at].branch[gfr];
1751
dtree[gf].branch[gfr] = dtree[at].branch[fr];
1752
dtree[at].branch[gfr] = f;
1753
dtree[at].branch[fr] = gf;
1755
r = 1-r; n = at; if (r==fr) at = f; else at = gf;
1759
dtree[at].branch[r] = dict_entries;
1762
ggf = gf; gf = f; f = at; at = dtree[at].branch[r];
1763
ggfr = gfr; gfr = fr; fr = r;
1768
if (dict_entries==MAX_DICT_ENTRIES)
1769
memoryerror("MAX_DICT_ENTRIES",MAX_DICT_ENTRIES);
1771
dtree[dict_entries].branch[0] = VACANT;
1772
dtree[dict_entries].branch[1] = VACANT;
1773
dtree[dict_entries].colour = BLACK;
1775
/* Address in Inform's own dictionary table to write the record to */
1779
p = dictionary + (3+res)*dict_entries + 7;
1781
/* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data
1784
p[0]=prepared_sort[0]; p[1]=prepared_sort[1];
1785
p[2]=prepared_sort[2]; p[3]=prepared_sort[3];
1786
if (version_number > 3)
1787
{ p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; }
1788
p[res]=x; p[res+1]=y; p[res+2]=z;
1789
if (x & 128) p[res] = (p[res])|number_and_case;
1791
dictionary_top += res+3;
1796
p = dictionary + 4 + (7+DICT_WORD_SIZE)*dict_entries;
1797
p[0] = 0x60; /* type byte -- dict word */
1799
for (i=0; i<DICT_WORD_SIZE; i++)
1800
p[1+i] = prepared_sort[i];
1802
p[1+DICT_WORD_SIZE+0] = 0; p[1+DICT_WORD_SIZE+1] = x;
1803
p[1+DICT_WORD_SIZE+2] = 0; p[1+DICT_WORD_SIZE+3] = y;
1804
p[1+DICT_WORD_SIZE+4] = 0; p[1+DICT_WORD_SIZE+5] = z;
1806
p[1+DICT_WORD_SIZE+1] |= number_and_case;
1808
dictionary_top += (7+DICT_WORD_SIZE);
1812
copy_sorts(dict_sort_codes+dict_entries*DICT_WORD_SIZE, prepared_sort);
1814
return dict_entries++;
1817
/* ------------------------------------------------------------------------- */
1818
/* Used in "tables.c" for "Extend ... only", to renumber a verb-word to a */
1819
/* new verb syntax of its own. (Otherwise existing verb-words never */
1820
/* change their verb-numbers.) */
1821
/* ------------------------------------------------------------------------- */
1823
extern void dictionary_set_verb_number(char *dword, int to)
1825
int res=((version_number==3)?4:6);
1826
i=dictionary_find(dword);
1830
p=dictionary+7+(i-1)*(3+res)+res;
1834
p=dictionary+4 + (i-1)*(7+DICT_WORD_SIZE) + 1+DICT_WORD_SIZE;
1840
/* ------------------------------------------------------------------------- */
1841
/* Tracing code for the dictionary: used not only by "trace" and text */
1842
/* transcription, but also (in the case of "word_to_ascii") in a vital */
1843
/* by the linker. */
1844
/* ------------------------------------------------------------------------- */
1846
static char *d_show_to;
1847
static int d_show_total;
1849
static void show_char(char c)
1850
{ if (d_show_to == NULL) printf("%c", c);
1852
{ int i = strlen(d_show_to);
1853
d_show_to[i] = c; d_show_to[i+1] = 0;
1857
extern void word_to_ascii(uchar *p, char *results)
1858
{ int i, shift, cc, zchar; uchar encoded_word[9];
1859
encoded_word[0] = (((int) p[0])&0x7c)/4;
1860
encoded_word[1] = 8*(((int) p[0])&0x3) + (((int) p[1])&0xe0)/32;
1861
encoded_word[2] = ((int) p[1])&0x1f;
1862
encoded_word[3] = (((int) p[2])&0x7c)/4;
1863
encoded_word[4] = 8*(((int) p[2])&0x3) + (((int) p[3])&0xe0)/32;
1864
encoded_word[5] = ((int) p[3])&0x1f;
1865
if (version_number > 3)
1866
{ encoded_word[6] = (((int) p[4])&0x7c)/4;
1867
encoded_word[7] = 8*(((int) p[4])&0x3) + (((int) p[5])&0xe0)/32;
1868
encoded_word[8] = ((int) p[5])&0x1f;
1872
for (i=0; i< ((version_number==3)?6:9); i++)
1873
{ zchar = encoded_word[i];
1875
if (zchar == 4) shift = 1;
1877
if (zchar == 5) shift = 2;
1879
{ if ((shift == 2) && (zchar == 6))
1880
{ zchar = 32*encoded_word[i+1] + encoded_word[i+2];
1882
if ((zchar>=32) && (zchar<=126))
1883
results[cc++] = zchar;
1885
{ zscii_to_text(results+cc, zchar);
1886
cc = strlen(results);
1890
{ zscii_to_text(results+cc, (alphabet[shift])[zchar-6]);
1891
cc = strlen(results);
1899
static void recursively_show_z(int node)
1900
{ int i, cprinted, flags; uchar *p;
1901
char textual_form[32];
1902
int res = (version_number == 3)?4:6;
1904
if (dtree[node].branch[0] != VACANT)
1905
recursively_show_z(dtree[node].branch[0]);
1907
p = (uchar *)dictionary + 7 + (3+res)*node;
1909
word_to_ascii(p, textual_form);
1911
for (cprinted = 0; textual_form[cprinted]!=0; cprinted++)
1912
show_char(textual_form[cprinted]);
1913
for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++)
1916
if (d_show_to == NULL)
1917
{ for (i=0; i<3+res; i++) printf("%02x ",p[i]);
1919
flags = (int) p[res];
1922
if (flags & 4) printf("p"); else printf(" ");
1927
{ if (grammar_version_number == 1)
1928
printf("preposition:%d ", (int) p[res+2]);
1930
printf("preposition ");
1932
if ((flags & 3) == 3) printf("metaverb:%d ", (int) p[res+1]);
1933
else if ((flags & 3) == 1) printf("verb:%d ", (int) p[res+1]);
1937
if (d_show_total++ == 5)
1939
if (d_show_to != NULL)
1940
{ write_to_transcript_file(d_show_to);
1945
if (dtree[node].branch[1] != VACANT)
1946
recursively_show_z(dtree[node].branch[1]);
1949
static void recursively_show_g(int node)
1951
warning("### Glulx dictionary-show not yet implemented.\n");
1954
static void show_alphabet(int i)
1955
{ int j, c; char chartext[8];
1957
for (j=0; j<26; j++)
1958
{ c = alphabet[i][j];
1960
if (alphabet_used[26*i+j] == 'N') printf("("); else printf(" ");
1962
zscii_to_text(chartext, c);
1963
printf("%s", chartext);
1965
if (alphabet_used[26*i+j] == 'N') printf(")"); else printf(" ");
1970
extern void show_dictionary(void)
1971
{ printf("Dictionary contains %d entries:\n",dict_entries);
1972
if (dict_entries != 0)
1973
{ d_show_total = 0; d_show_to = NULL;
1975
recursively_show_z(root);
1977
recursively_show_g(root);
1979
printf("\nZ-machine alphabet entries:\n");
1985
extern void write_dictionary_to_transcript(void)
1986
{ char d_buffer[81];
1988
sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries);
1990
d_buffer[0] = 0; write_to_transcript_file(d_buffer);
1992
if (dict_entries != 0)
1993
{ d_show_total = 0; d_show_to = d_buffer;
1995
recursively_show_z(root);
1997
recursively_show_g(root);
1999
if (d_show_total != 0) write_to_transcript_file(d_buffer);
2002
/* ========================================================================= */
2003
/* Data structure management routines */
2004
/* ------------------------------------------------------------------------- */
2006
extern void init_text_vars(void)
2013
no_chars_transcribed = 0;
2014
is_abbreviation = FALSE;
2015
put_strings_in_low_memory = FALSE;
2017
for (j=0; j<256; j++) abbrevs_lookup[j] = -1;
2019
total_zchars_trans = 0;
2022
final_dict_order = NULL;
2023
dict_sort_codes = NULL;
2026
initialise_memory_block(&static_strings_area);
2029
extern void text_begin_pass(void)
2030
{ abbrevs_lookup_table_made = FALSE;
2032
total_chars_trans=0; total_bytes_trans=0;
2033
if (store_the_text) all_text_top=all_text;
2034
dictionary_begin_pass();
2035
low_strings_top = low_strings;
2037
static_strings_extent = 0;
2039
no_dynamic_strings = 0;
2042
/* Note: for allocation and deallocation of all_the_text, see inform.c */
2044
extern void text_allocate_arrays(void)
2045
{ abbreviations_at = my_malloc(MAX_ABBREVS*MAX_ABBREV_LENGTH,
2047
abbrev_values = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev values");
2048
abbrev_quality = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev quality");
2049
abbrev_freqs = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev freqs");
2051
dtree = my_calloc(sizeof(dict_tree_node), MAX_DICT_ENTRIES,
2052
"red-black tree for dictionary");
2053
final_dict_order = my_calloc(sizeof(int), MAX_DICT_ENTRIES,
2054
"final dictionary ordering table");
2055
dict_sort_codes = my_calloc(DICT_WORD_SIZE, MAX_DICT_ENTRIES,
2056
"dictionary sort codes");
2059
dictionary = my_malloc(9*MAX_DICT_ENTRIES+7,
2062
dictionary = my_malloc((7+DICT_WORD_SIZE)*MAX_DICT_ENTRIES+4,
2065
strings_holding_area
2066
= my_malloc(MAX_STATIC_STRINGS,"static strings holding area");
2067
low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings");
2069
huff_entities = NULL;
2071
done_compression = FALSE;
2072
compression_table_size = 0;
2073
compressed_offsets = NULL;
2075
MAX_CHARACTER_SET = 0;
2078
if (compression_switch) {
2079
MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS;
2080
huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1,
2081
"huffman entities");
2082
hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET,
2083
"huffman node list");
2085
compressed_offsets = my_calloc(sizeof(int32), MAX_NUM_STATIC_STRINGS,
2086
"static strings index table");
2090
extern void text_free_arrays(void)
2092
my_free(&strings_holding_area, "static strings holding area");
2093
my_free(&low_strings, "low (abbreviation) strings");
2094
my_free(&abbreviations_at, "abbreviations");
2095
my_free(&abbrev_values, "abbrev values");
2096
my_free(&abbrev_quality, "abbrev quality");
2097
my_free(&abbrev_freqs, "abbrev freqs");
2099
my_free(&dtree, "red-black tree for dictionary");
2100
my_free(&final_dict_order, "final dictionary ordering table");
2101
my_free(&dict_sort_codes, "dictionary sort codes");
2103
my_free(&dictionary,"dictionary");
2105
my_free(&compressed_offsets, "static strings index table");
2106
my_free(&hufflist, "huffman node list");
2107
my_free(&huff_entities, "huffman entities");
2109
deallocate_memory_block(&static_strings_area);
2112
extern void ao_free_arrays(void)
2113
{ my_free (&tlbtab,"tlb table");
2114
my_free (&sub_buffer,"sub_buffer");
2115
my_free (&bestyet,"bestyet");
2116
my_free (&bestyet2,"bestyet2");
2117
my_free (&grandtable,"grandtable");
2118
my_free (&grandflags,"grandflags");
2121
/* ========================================================================= */