~ubuntu-branches/ubuntu/vivid/inform/vivid

Viewing changes to src/text.c

Committer: Bazaar Package Importer
Author(s): Jan Christoph Nordholz
Date: 2008-05-26 22:09:44 UTC
mfrom: (2.1.1 lenny)
Revision ID: james.westby@ubuntu.com-20080526220944-ba7phz0d1k4vo7wx

Tags: 6.31.1+dfsg-1

* Remove a considerable number of files from the package
due to unacceptable licensing terms.
* Repair library symlinks.

files added:
debian/Makefile.upstream

debian/README.Debian-source

debian/compat

debian/inform-docs.doc-base.dm4

debian/inform-docs.doc-base.ibg

debian/inform-docs.install

debian/inform.install

debian/inform.links

debian/inform.postinst

debian/inform.prerm

inform-6.31.1

inform-6.31.1/AUTHORS

inform-6.31.1/COPYING

inform-6.31.1/INSTALL

inform-6.31.1/Makefile.am

inform-6.31.1/Makefile.in

inform-6.31.1/NEWS

inform-6.31.1/README

inform-6.31.1/VERSION

inform-6.31.1/aclocal.m4

inform-6.31.1/config

inform-6.31.1/config/commondefs

inform-6.31.1/config/config.guess

inform-6.31.1/config/config.sub

inform-6.31.1/config/depcomp

inform-6.31.1/config/install-sh

inform-6.31.1/config/missing

inform-6.31.1/config/mkinstalldirs

inform-6.31.1/configure

inform-6.31.1/configure.in

inform-6.31.1/contrib

inform-6.31.1/contrib/README

inform-6.31.1/contrib/inform-mode.el

inform-6.31.1/demos

inform-6.31.1/demos/Makefile.am

inform-6.31.1/demos/Makefile.in

inform-6.31.1/demos/README

inform-6.31.1/demos/advent.inf

inform-6.31.1/demos/adventureland.inf

inform-6.31.1/demos/balances.inf

inform-6.31.1/demos/museum.inf

inform-6.31.1/demos/ruins1.inf

inform-6.31.1/demos/ruins2.inf

inform-6.31.1/demos/ruins3.inf

inform-6.31.1/demos/toyshop.inf

inform-6.31.1/docs

inform-6.31.1/docs/README.glulx

inform-6.31.1/docs/README.include

inform-6.31.1/docs/README.library

inform-6.31.1/docs/README.paths

inform-6.31.1/docs/RelNotes.pdf

inform-6.31.1/include

inform-6.31.1/include/Makefile.am

inform-6.31.1/include/Makefile.in

inform-6.31.1/include/calyx_adjectives.h

inform-6.31.1/include/daemons.h

inform-6.31.1/include/easydoors.h

inform-6.31.1/include/hintsms.h

inform-6.31.1/include/links.h

inform-6.31.1/include/links.inf

inform-6.31.1/include/longint.h

inform-6.31.1/include/money.h

inform-6.31.1/include/nameable.h

inform-6.31.1/include/newbiegrammar.h

inform-6.31.1/include/printtime.h

inform-6.31.1/include/timepiece.h

inform-6.31.1/include/trinitystat.h

inform-6.31.1/include/whatis.h

inform-6.31.1/lib

inform-6.31.1/lib/Makefile.am

inform-6.31.1/lib/Makefile.in

inform-6.31.1/lib/english.h

inform-6.31.1/lib/grammar.h

inform-6.31.1/lib/infix.h

inform-6.31.1/lib/linklpa.h

inform-6.31.1/lib/linklv.h

inform-6.31.1/lib/parser.h

inform-6.31.1/lib/parserm.h

inform-6.31.1/lib/verblib.h

inform-6.31.1/lib/verblibm.h

inform-6.31.1/manual

inform-6.31.1/manual/Makefile.am

inform-6.31.1/manual/Makefile.in

inform-6.31.1/manual/app.html

inform-6.31.1/manual/backcover.html

inform-6.31.1/manual/ch1.html

inform-6.31.1/manual/ch2.html

inform-6.31.1/manual/ch3.html

inform-6.31.1/manual/ch4.html

inform-6.31.1/manual/ch5.html

inform-6.31.1/manual/ch6.html

inform-6.31.1/manual/ch7.html

inform-6.31.1/manual/ch8.html

inform-6.31.1/manual/cited.html

inform-6.31.1/manual/colophon.html

inform-6.31.1/manual/contents.html

inform-6.31.1/manual/dm4-011_1.jpg

inform-6.31.1/manual/dm4-015_1.jpg

inform-6.31.1/manual/dm4-048_1.jpg

inform-6.31.1/manual/dm4-057_1.jpg

inform-6.31.1/manual/dm4-084_1.jpg

inform-6.31.1/manual/dm4-092_1.jpg

inform-6.31.1/manual/dm4-097_1.jpg

inform-6.31.1/manual/dm4-106_1.jpg

inform-6.31.1/manual/dm4-116_1.jpg

inform-6.31.1/manual/dm4-122_1.jpg

inform-6.31.1/manual/dm4-125_1.jpg

inform-6.31.1/manual/dm4-126_1.jpg

inform-6.31.1/manual/dm4-128_1.jpg

inform-6.31.1/manual/dm4-132_1.jpg

inform-6.31.1/manual/dm4-135_1.jpg

inform-6.31.1/manual/dm4-137_1.jpg

inform-6.31.1/manual/dm4-141_1.jpg

inform-6.31.1/manual/dm4-144_1.jpg

inform-6.31.1/manual/dm4-148_1.jpg

inform-6.31.1/manual/dm4-154_1.jpg

inform-6.31.1/manual/dm4-157_1.jpg

inform-6.31.1/manual/dm4-162_1.jpg

inform-6.31.1/manual/dm4-170_1.jpg

inform-6.31.1/manual/dm4-176_1.jpg

inform-6.31.1/manual/dm4-178_1.jpg

inform-6.31.1/manual/dm4-186_1.jpg

inform-6.31.1/manual/dm4-198_1.jpg

inform-6.31.1/manual/dm4-204_1.jpg

inform-6.31.1/manual/dm4-211_1.jpg

inform-6.31.1/manual/dm4-216_1.jpg

inform-6.31.1/manual/dm4-222_1.jpg

inform-6.31.1/manual/dm4-225_1.jpg

inform-6.31.1/manual/dm4-232_1.jpg

inform-6.31.1/manual/dm4-241_1.jpg

inform-6.31.1/manual/dm4-247_1.jpg

inform-6.31.1/manual/dm4-253_1.jpg

inform-6.31.1/manual/dm4-260_1.jpg

inform-6.31.1/manual/dm4-262_1.jpg

inform-6.31.1/manual/dm4-267_1.jpg

inform-6.31.1/manual/dm4-279_1.jpg

inform-6.31.1/manual/dm4-288_1.jpg

inform-6.31.1/manual/dm4-295_1.jpg

inform-6.31.1/manual/dm4-300_1.jpg

inform-6.31.1/manual/dm4-314_1.jpg

inform-6.31.1/manual/dm4-318_1.jpg

inform-6.31.1/manual/dm4-331_1.jpg

inform-6.31.1/manual/dm4-336_1.jpg

inform-6.31.1/manual/dm4-344_1.jpg

inform-6.31.1/manual/dm4-349_1.jpg

inform-6.31.1/manual/dm4-352_1.jpg

inform-6.31.1/manual/dm4-374_1.jpg

inform-6.31.1/manual/dm4-378_1.jpg

inform-6.31.1/manual/dm4-391_1.jpg

inform-6.31.1/manual/dm4-392_1.jpg

inform-6.31.1/manual/dm4-406_1.jpg

inform-6.31.1/manual/dm4-412_1.jpg

inform-6.31.1/manual/dm4-538_1.jpg

inform-6.31.1/manual/dm4-546_1.jpg

inform-6.31.1/manual/dm4-572_1.jpg

inform-6.31.1/manual/dm4index.html

inform-6.31.1/manual/exdm4index.html

inform-6.31.1/manual/index.html

inform-6.31.1/manual/intro.html

inform-6.31.1/manual/roller2.jpg

inform-6.31.1/manual/s1.html

inform-6.31.1/manual/s10.html

inform-6.31.1/manual/s11.html

inform-6.31.1/manual/s12.html

inform-6.31.1/manual/s13.html

inform-6.31.1/manual/s14.html

inform-6.31.1/manual/s15.html

inform-6.31.1/manual/s16.html

inform-6.31.1/manual/s17.html

inform-6.31.1/manual/s18.html

inform-6.31.1/manual/s19.html

inform-6.31.1/manual/s2.html

inform-6.31.1/manual/s20.html

inform-6.31.1/manual/s21.html

inform-6.31.1/manual/s22.html

inform-6.31.1/manual/s23.html

inform-6.31.1/manual/s24.html

inform-6.31.1/manual/s25.html

inform-6.31.1/manual/s26.html

inform-6.31.1/manual/s27.html

inform-6.31.1/manual/s28.html

inform-6.31.1/manual/s29.html

inform-6.31.1/manual/s3.html

inform-6.31.1/manual/s30.html

inform-6.31.1/manual/s31.html

inform-6.31.1/manual/s32.html

inform-6.31.1/manual/s33.html

inform-6.31.1/manual/s34.html

inform-6.31.1/manual/s35.html

inform-6.31.1/manual/s36.html

inform-6.31.1/manual/s37.html

inform-6.31.1/manual/s38.html

inform-6.31.1/manual/s39.html

inform-6.31.1/manual/s4.html

inform-6.31.1/manual/s40.html

inform-6.31.1/manual/s41.html

inform-6.31.1/manual/s42.html

inform-6.31.1/manual/s43.html

inform-6.31.1/manual/s44.html

inform-6.31.1/manual/s45.html

inform-6.31.1/manual/s46.html

inform-6.31.1/manual/s47.html

inform-6.31.1/manual/s48.html

inform-6.31.1/manual/s49.html

inform-6.31.1/manual/s5.html

inform-6.31.1/manual/s50.html

inform-6.31.1/manual/s51.html

inform-6.31.1/manual/s52.html

inform-6.31.1/manual/s6.html

inform-6.31.1/manual/s7.html

inform-6.31.1/manual/s8.html

inform-6.31.1/manual/s9.html

inform-6.31.1/manual/sa1.html

inform-6.31.1/manual/sa2.html

inform-6.31.1/manual/sa3.html

inform-6.31.1/manual/sa4.html

inform-6.31.1/manual/sa5.html

inform-6.31.1/manual/sa6.html

inform-6.31.1/manual/tables.html

inform-6.31.1/src

inform-6.31.1/src/Makefile.am

inform-6.31.1/src/Makefile.in

inform-6.31.1/src/arrays.c

inform-6.31.1/src/asm.c

inform-6.31.1/src/bpatch.c

inform-6.31.1/src/chars.c

inform-6.31.1/src/directs.c

inform-6.31.1/src/errors.c

inform-6.31.1/src/expressc.c

inform-6.31.1/src/expressp.c

inform-6.31.1/src/files.c

inform-6.31.1/src/header.h

inform-6.31.1/src/inform.c

inform-6.31.1/src/lexer.c

inform-6.31.1/src/linker.c

inform-6.31.1/src/memory.c

inform-6.31.1/src/objects.c

inform-6.31.1/src/states.c

inform-6.31.1/src/symbols.c

inform-6.31.1/src/syntax.c

inform-6.31.1/src/tables.c

inform-6.31.1/src/text.c

inform-6.31.1/src/veneer.c

inform-6.31.1/src/verbs.c

inform_BG.pdf

inform_DM4.pdf

files removed:
AUTHORS

COPYING

Craft.Of.Adventure.log

INSTALL

Makefile

Makefile.am

Makefile.in

NEWS

README

README.glulx

README.paths

README.source

RelNotes.pdf

VERSION

aclocal.m4

config

config.guess

config.sub

config/commondefs

config/config.guess

config/config.sub

config/depcomp

config/install-sh

config/missing

config/mkinstalldirs

configure

configure.in

contrib

contrib/README

contrib/inform-hilite.el

contrib/inform-mode.el

debian/doc.postinst

debian/doc.postrm

debian/doc.prerm

debian/inform-menu

demos

demos/Makefile

demos/Makefile.am

demos/Makefile.in

demos/README

demos/advent.inf

demos/adventureland.inf

demos/balances.inf

demos/museum.inf

demos/ruins1.inf

demos/ruins2.inf

demos/ruins3.inf

demos/toyshop.inf

extra

extra/Craft.Of.Adventure.tex

html

html/Makefile

html/Makefile.am

html/Makefile.in

html/answers1

html/answers1/answer1.html

html/answers1/answer10.html

html/answers1/answer11.html

html/answers1/answer12.html

html/answers1/answer13.html

html/answers1/answer14.html

html/answers1/answer15.html

html/answers1/answer16.html

html/answers1/answer17.html

html/answers1/answer18.html

html/answers1/answer19.html

html/answers1/answer2.html

html/answers1/answer20.html

html/answers1/answer21.html

html/answers1/answer22.html

html/answers1/answer23.html

html/answers1/answer24.html

html/answers1/answer25.html

html/answers1/answer26.html

html/answers1/answer27.html

html/answers1/answer28.html

html/answers1/answer29.html

html/answers1/answer3.html

html/answers1/answer30.html

html/answers1/answer31.html

html/answers1/answer32.html

html/answers1/answer33.html

html/answers1/answer34.html

html/answers1/answer35.html

html/answers1/answer36.html

html/answers1/answer37.html

html/answers1/answer38.html

html/answers1/answer39.html

html/answers1/answer4.html

html/answers1/answer40.html

html/answers1/answer41.html

html/answers1/answer42.html

html/answers1/answer43.html

html/answers1/answer44.html

html/answers1/answer45.html

html/answers1/answer46.html

html/answers1/answer47.html

html/answers1/answer48.html

html/answers1/answer49.html

html/answers1/answer5.html

html/answers1/answer50.html

html/answers1/answer6.html

html/answers1/answer7.html

html/answers1/answer8.html

html/answers1/answer9.html

html/answers2

html/answers2/answer51.html

html/answers2/answer52.html

html/answers2/answer53.html

html/answers2/answer54.html

html/answers2/answer55.html

html/answers2/answer56.html

html/answers2/answer57.html

html/answers2/answer58.html

html/answers2/answer59.html

html/answers2/answer60.html

html/answers2/answer61.html

html/answers2/answer62.html

html/answers2/answer63.html

html/answers2/answer64.html

html/answers2/answer65.html

html/answers2/answer66.html

html/answers2/answer67.html

html/answers2/answer68.html

html/answers2/answer69.html

html/answers2/answer70.html

html/answers2/answer71.html

html/answers2/answer72.html

html/answers2/answer73.html

html/answers2/answer74.html

html/answers2/answer75.html

html/answers2/answer76.html

html/answers2/answer77.html

html/answers2/answer78.html

html/answers2/answer79.html

html/answers2/answer80.html

html/answers2/answer81.html

html/answers2/answer82.html

html/answers2/answer83.html

html/answers2/answer84.html

html/answers2/answer85.html

html/answers2/answer86.html

html/answers2/answer87.html

html/answers2/answer88.html

html/answers2/answer89.html

html/answers2/answer90.html

html/answers2/answer91.html

html/answers2/answer92.html

html/answers2/answer93.html

html/answers2/answer94.html

html/answers2/answer95.html

html/answers2/answer96.html

html/chapter1.html

html/chapter2.html

html/chapter3.html

html/chapter4.html

html/chapter5.html

html/chapter6.html

html/chapterA.html

html/contents.html

html/icons

html/icons/dbend.gif

html/icons/ddbend.gif

html/icons/dddbend.gif

html/icons/ddexercise.gif

html/icons/dexercise.gif

html/icons/exercise.gif

html/icons/refs.gif

html/icons/warning.gif

html/index.html

html/intro.html

html/misprints.html

html/section1.html

html/section10.html

html/section11.html

html/section12.html

html/section13.html

html/section14.html

html/section15.html

html/section16.html

html/section17.html

html/section18.html

html/section19.html

html/section2.html

html/section20.html

html/section21.html

html/section22.html

html/section23.html

html/section24.html

html/section25.html

html/section26.html

html/section27.html

html/section28.html

html/section29.html

html/section3.html

html/section30.html

html/section31.html

html/section32.html

html/section33.html

html/section4.html

html/section5.html

html/section6.html

html/section7.html

html/section8.html

html/section9.html

html/sectionA1.html

html/sectionA10.html

html/sectionA11.html

html/sectionA12.html

html/sectionA2.html

html/sectionA3.html

html/sectionA4.html

html/sectionA5.html

html/sectionA6.html

html/sectionA7.html

html/sectionA8.html

html/sectionA9.html

include

include/Makefile

include/Makefile.am

include/Makefile.in

include/Makefile.in~

include/adhints.h

include/adhints.txt

include/altmenu.h

include/calyx_adjectives.h

include/center.h

include/clothing.h

include/compass.h

include/dirs.h

include/domenu.h

include/doors.h

include/footnote.h

include/hints.h

include/info.h

include/istring.h

include/longint.h

include/manual.h

include/menus.h

include/money.h

include/moveclass.h

include/movie.h

include/newmenu.h

include/newmenus.h

include/outofrch.h

include/physics.h

include/pluralob.h

include/printtime.h

include/showobj.h

include/smartcantgo.h

include/timepiece.h

include/timesys.h

include/timewait.h

include/utility.h

include/waittime.h

include/whatis.h

include/whowhat.h

include/yesno.h

include/znsi.h

info

info/Makefile

info/Makefile.am

info/Makefile.in

info/README

info/inform

info/inform-1

info/inform-10

info/inform-11

info/inform-12

info/inform-13

info/inform-14

info/inform-2

info/inform-3

info/inform-4

info/inform-5

info/inform-6

info/inform-7

info/inform-8

info/inform-9

lib/Makefile

lib/Makefile.am

lib/Makefile.in

lib/Makefile.in~

lib/english.h

lib/grammar.h

lib/infix.h

lib/linklpa.h

lib/linklv.h

lib/parser.h

lib/parserm.h

lib/readme.txt

lib/verblib.h

lib/verblibm.h

src/Makefile

src/Makefile.am

src/Makefile.in

src/Makefile.in~

src/arrays.c

src/asm.c

src/bpatch.c

src/chars.c

src/directs.c

src/errors.c

src/expressc.c

src/expressp.c

src/files.c

src/header.h

src/inform.c

src/lexer.c

src/linker.c

src/memory.c

src/objects.c

src/states.c

src/symbols.c

src/syntax.c

src/tables.c

src/text.c

src/veneer.c

src/verbs.c

tutor

tutor/Makefile

tutor/README

tutor/alice.inf

tutor/alice.txt

tutor/aloneice.inf

tutor/house01.inf

tutor/house02.inf

tutor/house03.inf

tutor/house04.inf

tutor/house05.inf

tutor/house06.inf

tutor/house07.inf

files modified:
debian/changelog

debian/control

debian/copyright

debian/inform.1

debian/rules

Show diffs side-by-side

added added

removed removed

src/text.c

/* ------------------------------------------------------------------------- */

/* "text" : Text translation, the abbreviations optimiser, the dictionary */

/* */

/* Part of Inform 6.30 */

/* copyright (c) Graham Nelson 1993 - 2004 */

/* */

/* ------------------------------------------------------------------------- */

#include "header.h"

uchar *low_strings, *low_strings_top; /* Start and next free byte in the low

strings pool */

int32 static_strings_extent; /* Number of bytes of static strings

made so far */

memory_block static_strings_area; /* Used if (!temporary_files_switch) to

hold the static strings area so far */

static uchar *strings_holding_area; /* Area holding translated strings

until they are moved into either

a temporary file, or the

static_strings_area below */

char *all_text, *all_text_top; /* Start and next byte free in (large)

text buffer holding the entire text

of the game, when it is being

recorded */

int put_strings_in_low_memory, /* When TRUE, put static strings in

the low strings pool at 0x100 rather

than in the static strings area */

is_abbreviation, /* When TRUE, the string being trans

is itself an abbreviation string

so can't make use of abbreviations */

abbrevs_lookup_table_made, /* The abbreviations lookup table is

constructed when the first non-

abbreviation string is translated:

this flag is TRUE after that */

abbrevs_lookup[256]; /* Once this has been constructed,

abbrevs_lookup[n] = the smallest

number of any abbreviation beginning

with ASCII character n, or -1

if none of the abbreviations do */

int no_abbreviations; /* No of abbreviations defined so far */

uchar *abbreviations_at; /* Memory to hold the text of any

abbreviation strings declared */

/* ------------------------------------------------------------------------- */

/* Glulx string compression storage */

/* ------------------------------------------------------------------------- */

int no_strings; /* No of strings in static strings

area. */

int no_dynamic_strings; /* No. of @.. string escapes used

(actually, the highest value used

plus one) */

static int MAX_CHARACTER_SET; /* Number of possible entities */

huffentity_t *huff_entities; /* The list of entities (characters,

abbreviations, @.. escapes, and

the terminator) */

static huffentity_t **hufflist; /* Copy of the list, for sorting */

int no_huff_entities; /* The number of entities in the list */

int huff_abbrev_start; /* Position in the list where string

abbreviations begin. */

int huff_dynam_start; /* Position in the list where @..

entities begin. */

int huff_entity_root; /* The position in the list of the root

entry (when considering the table

as a tree). */

int done_compression; /* Has the game text been compressed? */

int32 compression_table_size; /* Length of the Huffman table, in

bytes */

int32 compression_string_size; /* Length of the compressed string

data, in bytes */

int32 *compressed_offsets; /* The beginning of every string in

the game, relative to the beginning

of the Huffman table. (So entry 0

is equal to compression_table_size)*/

/* ------------------------------------------------------------------------- */

/* Abbreviation arrays */

/* ------------------------------------------------------------------------- */

int *abbrev_values;

int *abbrev_quality;

int *abbrev_freqs;

/* ------------------------------------------------------------------------- */

int32 total_chars_trans, /* Number of ASCII chars of text in */

total_bytes_trans, /* Number of bytes of Z-code text out */

zchars_trans_in_last_string; /* Number of Z-chars in last string:

needed only for abbrev efficiency

calculation in "directs.c" */

static int32 total_zchars_trans, /* Number of Z-chars of text out

(only used to calculate the above) */

no_chars_transcribed; /* Number of ASCII chars written to

the text transcription area (used

100

for the -r and -u switches) */

101

102

static int zchars_out_buffer[3], /* During text translation, a buffer of

103

3 Z-chars at a time: when it's full

104

these are written as a 2-byte word */

105

zob_index; /* Index (0 to 2) into it */

106

107

static unsigned char *text_out_pc; /* The "program counter" during text

108

translation: the next address to

109

write Z-coded text output to */

110

111

/* ------------------------------------------------------------------------- */

112

/* For variables/arrays used by the dictionary manager, see below */

113

/* ------------------------------------------------------------------------- */

114

115

/* ------------------------------------------------------------------------- */

116

/* Prepare the abbreviations lookup table (used to speed up abbreviation */

117

/* detection in text translation). We first bubble-sort the abbrevs into */

118

/* alphabetical order (this is necessary for the detection algorithm to */

119

/* to work). Since the table is only prepared once, and for a table */

120

/* of size at most 96, there's no point using an efficient sort algorithm. */

121

/* ------------------------------------------------------------------------- */

122

123

static void make_abbrevs_lookup(void)

124

{ int bubble_sort, j, k, l; char p[MAX_ABBREV_LENGTH]; char *p1, *p2;

125

126

{ bubble_sort = FALSE;

127

for (j=0; j<no_abbreviations; j++)

128

for (k=j+1; k<no_abbreviations; k++)

129

{ p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;

130

p2=(char *)abbreviations_at+k*MAX_ABBREV_LENGTH;

131

if (strcmp(p1,p2)<0)

132

{ strcpy(p,p1); strcpy(p1,p2); strcpy(p2,p);

133

l=abbrev_values[j]; abbrev_values[j]=abbrev_values[k];

134

abbrev_values[k]=l;

135

l=abbrev_quality[j]; abbrev_quality[j]=abbrev_quality[k];

136

abbrev_quality[k]=l;

137

bubble_sort = TRUE;

138

}

139

}

140

} while (bubble_sort);

141

142

for (j=no_abbreviations-1; j>=0; j--)

143

{ p1=(char *)abbreviations_at+j*MAX_ABBREV_LENGTH;

144

abbrevs_lookup[p1[0]]=j;

145

abbrev_freqs[j]=0;

146

}

147

abbrevs_lookup_table_made = TRUE;

148

}

149

150

/* ------------------------------------------------------------------------- */

151

/* Search the abbreviations lookup table (a routine which must be fast). */

152

/* The source text to compare is text[i], text[i+1], ... and this routine */

153

/* is only called if text[i] is indeed the first character of at least one */

154

/* abbreviation, "from" begin the least index into the abbreviations table */

155

/* of an abbreviation for which text[i] is the first character. Recall */

156

/* that the abbrevs table is in alphabetical order. */

157

/* */

158

/* The return value is -1 if there is no match. If there is a match, the */

159

/* text to be abbreviated out is over-written by a string of null chars */

160

/* with "ASCII" value 1, and the abbreviation number is returned. */

161

/* */

162

/* In Glulx, we *do not* do this overwriting with 1's. */

163

/* ------------------------------------------------------------------------- */

164

165

static int try_abbreviations_from(unsigned char *text, int i, int from)

166

{ int j, k; char *p, c;

167

c=text[i];

168

for (j=from, p=(char *)abbreviations_at+from*MAX_ABBREV_LENGTH;

169

(j<no_abbreviations)&&(c==p[0]); j++, p+=MAX_ABBREV_LENGTH)

170

{ if (text[i+1]==p[1])

171

{ for (k=2; p[k]!=0; k++)

172

if (text[i+k]!=p[k]) goto NotMatched;

173

if (!glulx_mode) {

174

for (k=0; p[k]!=0; k++) text[i+k]=1;

175

}

176

abbrev_freqs[j]++;

177

return(j);

178

NotMatched: ;

179

}

180

}

181

return(-1);

182

}

183

184

extern void make_abbreviation(char *text)

185

{

186

strcpy((char *)abbreviations_at

187

+ no_abbreviations*MAX_ABBREV_LENGTH, text);

188

189

is_abbreviation = TRUE;

190

abbrev_values[no_abbreviations] = compile_string(text, TRUE, TRUE);

191

is_abbreviation = FALSE;

192

193

/* The quality is the number of Z-chars saved by using this */

194

/* abbreviation: note that it takes 2 Z-chars to print it. */

195

196

abbrev_quality[no_abbreviations++] = zchars_trans_in_last_string - 2;

197

}

198

199

/* ------------------------------------------------------------------------- */

200

/* The front end routine for text translation */

201

/* ------------------------------------------------------------------------- */

202

203

extern int32 compile_string(char *b, int in_low_memory, int is_abbrev)

204

{ int i, j; uchar *c;

205

206

is_abbreviation = is_abbrev;

207

208

/* Put into the low memory pool (at 0x100 in the Z-machine) of strings */

209

/* which may be wanted as possible entries in the abbreviations table */

210

211

if (!glulx_mode && in_low_memory)

212

{ j=subtract_pointers(low_strings_top,low_strings);

213

low_strings_top=translate_text(low_strings_top,b);

214

i= subtract_pointers(low_strings_top,low_strings);

215

is_abbreviation = FALSE;

216

if (i>MAX_LOW_STRINGS)

217

memoryerror("MAX_LOW_STRINGS", MAX_LOW_STRINGS);

218

return(0x21+(j/2));

219

}

220

221

if (glulx_mode && done_compression)

222

compiler_error("Tried to add a string after compression was done.");

223

224

c = translate_text(strings_holding_area, b);

225

i = subtract_pointers(c, strings_holding_area);

226

227

if (i>MAX_STATIC_STRINGS)

228

memoryerror("MAX_STATIC_STRINGS",MAX_STATIC_STRINGS);

229

230

/* Insert null bytes as needed to ensure that the next static string */

231

/* also occurs at an address expressible as a packed address */

232

233

if (!glulx_mode) {

234

if (oddeven_packing_switch)

235

while ((i%(scale_factor*2))!=0)

236

{ i+=2; *c++ = 0; *c++ = 0;

237

}

238

else

239

while ((i%scale_factor)!=0)

240

{ i+=2; *c++ = 0; *c++ = 0;

241

}

242

}

243

244

j = static_strings_extent;

245

246

if (temporary_files_switch)

247

for (c=strings_holding_area; c<strings_holding_area+i;

248

c++, static_strings_extent++)

249

fputc(*c,Temp1_fp);

250

else

251

for (c=strings_holding_area; c<strings_holding_area+i;

252

c++, static_strings_extent++)

253

write_byte_to_memory_block(&static_strings_area,

254

static_strings_extent, *c);

255

256

is_abbreviation = FALSE;

257

258

if (!glulx_mode) {

259

return(j/scale_factor);

260

}

261

else {

262

/* The marker value is a one-based string number. (We reserve zero

263

to mean "not a string at all". */

264

return (++no_strings);

265

}

266

}

267

268

/* ------------------------------------------------------------------------- */

269

/* Output a single Z-character into the buffer, and flush it if full */

270

/* ------------------------------------------------------------------------- */

271

272

static void write_z_char_z(int i)

273

{ uint32 j;

274

ASSERT_ZCODE();

275

total_zchars_trans++;

276

zchars_out_buffer[zob_index++]=(i%32);

277

if (zob_index!=3) return;

278

zob_index=0;

279

j= zchars_out_buffer[0]*0x0400 + zchars_out_buffer[1]*0x0020

280

+ zchars_out_buffer[2];

281

text_out_pc[0] = j/256; text_out_pc[1] = j%256; text_out_pc+=2;

282

total_bytes_trans+=2;

283

}

284

285

static void write_zscii(int zsc)

286

{

287

int lookup_value, in_alphabet;

288

289

if (zsc==' ')

290

{ write_z_char_z(0);

291

return;

292

}

293

294

if (zsc < 0x100) lookup_value = zscii_to_alphabet_grid[zsc];

295

296

else lookup_value = -1;

297

298

if (lookup_value >= 0)

299

{ alphabet_used[lookup_value] = 'Y';

300

in_alphabet = lookup_value/26;

301

if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */

302

if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */

303

write_z_char_z(lookup_value%26 + 6);

304

}

305

else

306

{ write_z_char_z(5); write_z_char_z(6);

307

write_z_char_z(zsc/32); write_z_char_z(zsc%32);

308

}

309

}

310

311

/* ------------------------------------------------------------------------- */

312

/* Finish a Z-coded string, padding out with Z-char 5s if necessary and */

313

/* setting the "end" bit on the final 2-byte word */

314

/* ------------------------------------------------------------------------- */

315

316

static void end_z_chars(void)

317

{ unsigned char *p;

318

zchars_trans_in_last_string=total_zchars_trans-zchars_trans_in_last_string;

319

while (zob_index!=0) write_z_char_z(5);

320

p=(unsigned char *) text_out_pc;

321

*(p-2)= *(p-2)+128;

322

}

323

324

/* Glulx handles this much more simply -- compression is done elsewhere. */

325

static void write_z_char_g(int i)

326

{

327

ASSERT_GLULX();

328

total_zchars_trans++;

329

text_out_pc[0] = i;

330

text_out_pc++;

331

total_bytes_trans++;

332

}

333

334

/* ------------------------------------------------------------------------- */

335

/* The main routine "text.c" provides to the rest of Inform: the text */

336

/* translator. p is the address to write output to, s_text the source text */

337

/* and the return value is the next free address to write output to. */

338

/* Note that the source text may be corrupted by this routine. */

339

/* ------------------------------------------------------------------------- */

340

341

extern uchar *translate_text(uchar *p, char *s_text)

342

{ int i, j, k, in_alphabet, lookup_value;

343

int32 unicode; int zscii;

344

unsigned char *text_in;

345

346

/* Cast the input and output streams to unsigned char: text_out_pc will

347

advance as bytes of Z-coded text are written, but text_in doesn't */

348

349

text_in = (unsigned char *) s_text;

350

text_out_pc = (unsigned char *) p;

351

352

/* Remember the Z-chars total so that later we can subtract to find the

353

number of Z-chars translated on this string */

354

355

zchars_trans_in_last_string = total_zchars_trans;

356

357

/* Start with the Z-characters output buffer empty */

358

359

zob_index=0;

360

361

/* If this is the first text translated since the abbreviations were

362

declared, and if some were declared, then it's time to make the

363

lookup table for abbreviations

364

365

(Except: we don't if the text being translated is itself

366

the text of an abbreviation currently being defined) */

367

368

if ((!abbrevs_lookup_table_made) && (no_abbreviations > 0)

369

&& (!is_abbreviation))

370

make_abbrevs_lookup();

371

372

/* If we're storing the whole game text to memory, then add this text */

373

374

if ((!is_abbreviation) && (store_the_text))

375

{ no_chars_transcribed += strlen(s_text)+2;

376

if (no_chars_transcribed >= MAX_TRANSCRIPT_SIZE)

377

memoryerror("MAX_TRANSCRIPT_SIZE", MAX_TRANSCRIPT_SIZE);

378

sprintf(all_text_top, "%s\n\n", s_text);

379

all_text_top += strlen(all_text_top);

380

}

381

382

if (transcript_switch && (!veneer_mode))

383

write_to_transcript_file(s_text);

384

385

if (!glulx_mode) {

386

387

/* The empty string of Z-text is illegal, since it can't carry an end

388

bit: so we translate an empty string of ASCII text to just the

389

pad character 5. Printing this causes nothing to appear on screen. */

390

391

if (text_in[0]==0) write_z_char_z(5);

392

393

/* Loop through the characters of the null-terminated input text: note

394

that if 1 is written over a character in the input text, it is

395

afterwards ignored */

396

397

for (i=0; text_in[i]!=0; i++)

398

{ total_chars_trans++;

399

400

/* Contract ". " into ". " if double-space-removing switch set:

401

likewise "? " and "! " if the setting is high enough */

402

403

if ((double_space_setting >= 1)

404

&& (text_in[i+1]==' ') && (text_in[i+2]==' '))

405

{ if (text_in[i]=='.') text_in[i+2]=1;

406

if (double_space_setting >= 2)

407

{ if (text_in[i]=='?') text_in[i+2]=1;

408

if (text_in[i]=='!') text_in[i+2]=1;

409

}

410

}

411

412

/* Try abbreviations if the economy switch set */

413

414

if ((economy_switch) && (!is_abbreviation)

415

&& ((k=abbrevs_lookup[text_in[i]])!=-1))

416

{ if ((j=try_abbreviations_from(text_in, i, k))!=-1)

417

{ if (j<32) { write_z_char_z(2); write_z_char_z(j); }

418

else { write_z_char_z(3); write_z_char_z(j-32); }

419

}

420

}

421

422

/* '@' is the escape character in Inform string notation: the various

423

possibilities are:

424

425

(printing only)

426

@@decimalnumber : write this ZSCII char (0 to 1023)

427

@twodigits : write the abbreviation string with this

428

decimal number

429

430

(any string context)

431

@accentcode : this accented character: e.g.,

432

for @'e write an E-acute

433

@{...} : this Unicode char (in hex) */

434

435

if (text_in[i]=='@')

436

{ if (text_in[i+1]=='@')

437

{

438

/* @@... */

439

440

i+=2; j=atoi((char *) (text_in+i));

441

switch(j)

442

{ /* Prevent ~ and ^ from being translated to double-quote

443

and new-line, as they ordinarily would be */

444

445

case 94: write_z_char_z(5); write_z_char_z(6);

446

write_z_char_z(94/32); write_z_char_z(94%32);

447

break;

448

case 126: write_z_char_z(5); write_z_char_z(6);

449

write_z_char_z(126/32); write_z_char_z(126%32);

450

break;

451

452

default: write_zscii(j); break;

453

}

454

while (isdigit(text_in[i])) i++; i--;

455

}

456

else if (isdigit(text_in[i+1])!=0)

457

{ int d1, d2;

458

459

/* @.. */

460

461

d1 = character_digit_value[text_in[i+1]];

462

d2 = character_digit_value[text_in[i+2]];

463

if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10))

464

error("'@..' must have two decimal digits");

465

else

466

{ i+=2;

467

write_z_char_z(1); write_z_char_z(d1*10 + d2);

468

}

469

}

470

else

471

{

472

/* A string escape specifying an unusual character */

473

474

unicode = text_to_unicode((char *) (text_in+i));

475

zscii = unicode_to_zscii(unicode);

476

if (zscii != 5) write_zscii(zscii);

477

else

478

{ unicode_char_error(

479

"Character can only be used if declared in \

480

advance as part of 'Zcharacter table':", unicode);

481

}

482

i += textual_form_length - 1;

483

}

484

}

485

else

486

{ /* Skip a character which has been over-written with the null

487

value 1 earlier on */

488

489

if (text_in[i]!=1)

490

{ if (text_in[i]==' ') write_z_char_z(0);

491

else

492

{ j = (int) text_in[i];

493

lookup_value = iso_to_alphabet_grid[j];

494

if (lookup_value < 0)

495

{ /* The character isn't in the standard alphabets, so

496

we have to use the ZSCII 4-Z-char sequence */

497

498

if (lookup_value == -5)

499

{ /* Character isn't in the ZSCII set at all */

500

501

unicode = iso_to_unicode(j);

502

unicode_char_error(

503

"Character can only be used if declared in \

504

advance as part of 'Zcharacter table':", unicode);

505

write_zscii(0x200 + unicode/0x100);

506

write_zscii(0x300 + unicode%0x100);

507

}

508

else write_zscii(-lookup_value);

509

}

510

else

511

{ /* The character is in one of the standard alphabets:

512

write a SHIFT to temporarily change alphabet if

513

it isn't in alphabet 0, then write the Z-char */

514

515

alphabet_used[lookup_value] = 'Y';

516

in_alphabet = lookup_value/26;

517

if (in_alphabet==1) write_z_char_z(4); /* SHIFT to A1 */

518

if (in_alphabet==2) write_z_char_z(5); /* SHIFT to A2 */

519

write_z_char_z(lookup_value%26 + 6);

520

}

521

}

522

}

523

}

524

}

525

526

/* Flush the Z-characters output buffer and set the "end" bit */

527

528

end_z_chars();

529

530

}

531

else {

532

533

/* The text storage here is, of course, temporary. Compression

534

will occur when we're finished compiling, so that all the

535

clever Huffman stuff will work.

536

In the stored text, we use "@@" to indicate @,

537

"@0" to indicate a zero byte,

538

"@ANNNN" to indicate an abbreviation,

539

"@DNNNN" to indicate a dynamic string thing.

540

(NNNN is a four-digit hex number using the letters A-P... an

541

ugly representation but a convenient one.)

542

543

544

for (i=0; text_in[i]!=0; i++) {

545

546

/* Contract ". " into ". " if double-space-removing switch set:

547

likewise "? " and "! " if the setting is high enough. */

548

if ((double_space_setting >= 1)

549

&& (text_in[i+1]==' ') && (text_in[i+2]==' ')) {

550

if (text_in[i]=='.'

551

|| (double_space_setting >= 2

552

&& (text_in[i]=='?' || text_in[i]=='!'))) {

553

text_in[i+1] = text_in[i];

554

i++;

555

}

556

}

557

558

total_chars_trans++;

559

560

/* Try abbreviations if the economy switch set. We have to be in

561

compression mode too, since the abbreviation mechanism is part

562

of string decompression. */

563

564

if ((economy_switch) && (compression_switch) && (!is_abbreviation)

565

&& ((k=abbrevs_lookup[text_in[i]])!=-1)

566

&& ((j=try_abbreviations_from(text_in, i, k)) != -1)) {

567

char *cx = (char *)abbreviations_at+j*MAX_ABBREV_LENGTH;

568

i += (strlen(cx)-1);

569

write_z_char_g('@');

570

write_z_char_g('A');

571

write_z_char_g('A' + ((j >>12) & 0x0F));

572

write_z_char_g('A' + ((j >> 8) & 0x0F));

573

write_z_char_g('A' + ((j >> 4) & 0x0F));

574

write_z_char_g('A' + ((j ) & 0x0F));

575

}

576

else if (text_in[i] == '@') {

577

if (text_in[i+1]=='@') {

578

/* An ASCII code */

579

i+=2; j=atoi((char *) (text_in+i));

580

if (j == '@' || j == '\0') {

581

write_z_char_g('@');

582

if (j == 0) {

583

j = '0';

584

if (!compression_switch)

585

warning("Ascii @@0 will prematurely terminate non-compressed \

586

string.");

587

}

588

}

589

write_z_char_g(j);

590

while (isdigit(text_in[i])) i++; i--;

591

}

592

else if (isdigit(text_in[i+1])) {

593

int d1, d2;

594

d1 = character_digit_value[text_in[i+1]];

595

d2 = character_digit_value[text_in[i+2]];

596

if ((d1 == 127) || (d1 >= 10) || (d2 == 127) || (d2 >= 10)) {

597

error("'@..' must have two decimal digits");

598

}

599

else {

600

if (!compression_switch)

601

warning("'@..' print variable will not work in non-compressed \

602

string; substituting ' '.");

603

i += 2;

604

j = d1*10 + d2;

605

if (j >= MAX_DYNAMIC_STRINGS) {

606

memoryerror("MAX_DYNAMIC_STRINGS", MAX_DYNAMIC_STRINGS);

607

j = 0;

608

}

609

if (j+1 >= no_dynamic_strings)

610

no_dynamic_strings = j+1;

611

write_z_char_g('@');

612

write_z_char_g('D');

613

write_z_char_g('A' + ((j >>12) & 0x0F));

614

write_z_char_g('A' + ((j >> 8) & 0x0F));

615

write_z_char_g('A' + ((j >> 4) & 0x0F));

616

write_z_char_g('A' + ((j ) & 0x0F));

617

}

618

}

619

else {

620

unicode = text_to_unicode((char *) (text_in+i));

621

i += textual_form_length - 1;

622

if (unicode >= 0 && unicode < 256) {

623

write_z_char_g(unicode);

624

}

625

else {

626

error("Unicode characters beyond Latin-1 are not yet supported in Glulx");

627

}

628

}

629

}

630

else if (text_in[i] == '^')

631

write_z_char_g(0x0A);

632

else if (text_in[i] == '~')

633

write_z_char_g('"');

634

else

635

write_z_char_g(text_in[i]);

636

}

637

write_z_char_g(0);

638

639

}

640

641

return((uchar *) text_out_pc);

642

}

643

644

/* ------------------------------------------------------------------------- */

645

/* Glulx compression code */

646

/* ------------------------------------------------------------------------- */

647

648

649

static void compress_makebits(int entnum, int depth, int prevbit,

650

huffbitlist_t *bits);

651

static void compress_dumptable(int entnum, int depth);

652

653

/* The compressor. This uses the usual Huffman compression algorithm. */

654

void compress_game_text()

655

{

656

int entities, branchstart, branches;

657

int numlive;

658

int32 lx;

659

int jx;

660

int ch;

661

int32 ix;

662

huffbitlist_t bits;

663

664

if (compression_switch) {

665

666

/* How many entities have we currently got? Well, 256 plus the

667

string-terminator plus abbrevations plus dynamic strings. */

668

entities = 256+1;

669

huff_abbrev_start = entities;

670

if (economy_switch)

671

entities += no_abbreviations;

672

huff_dynam_start = entities;

673

entities += no_dynamic_strings;

674

675

if (entities > MAX_CHARACTER_SET)

676

memoryerror("MAX_CHARACTER_SET",MAX_CHARACTER_SET);

677

678

/* Characters */

679

for (jx=0; jx<256; jx++) {

680

huff_entities[jx].type = 2;

681

huff_entities[jx].count = 0;

682

huff_entities[jx].u.ch = jx;

683

}

684

/* Terminator */

685

huff_entities[256].type = 1;

686

huff_entities[256].count = 0;

687

if (economy_switch) {

688

for (jx=0; jx<no_abbreviations; jx++) {

689

huff_entities[huff_abbrev_start+jx].type = 3;

690

huff_entities[huff_abbrev_start+jx].count = 0;

691

huff_entities[huff_abbrev_start+jx].u.val = jx;

692

}

693

}

694

for (jx=0; jx<no_dynamic_strings; jx++) {

695

huff_entities[huff_dynam_start+jx].type = 9;

696

huff_entities[huff_dynam_start+jx].count = 0;

697

huff_entities[huff_dynam_start+jx].u.val = jx;

698

}

699

}

700

else {

701

/* No compression; use defaults that will make it easy to check

702

for errors. */

703

no_huff_entities = 257;

704

huff_abbrev_start = 257;

705

huff_dynam_start = 257+MAX_ABBREVS;

706

compression_table_size = 0;

707

}

708

709

if (temporary_files_switch) {

710

fclose(Temp1_fp);

711

Temp1_fp=fopen(Temp1_Name,"rb");

712

if (Temp1_fp==NULL)

713

fatalerror("I/O failure: couldn't reopen temporary file 1");

714

}

715

716

if (compression_switch) {

717

718

for (lx=0, ix=0; lx<no_strings; lx++) {

719

int escapelen=0, escapetype=0;

720

int done=FALSE;

721

int32 escapeval;

722

while (!done) {

723

if (temporary_files_switch)

724

ch = fgetc(Temp1_fp);

725

else

726

ch = read_byte_from_memory_block(&static_strings_area, ix);

727

ix++;

728

if (ix > static_strings_extent || ch < 0)

729

compiler_error("Read too much not-yet-compressed text.");

730

if (escapelen == -1) {

731

escapelen = 0;

732

if (ch == '@') {

733

ch = '@';

734

}

735

else if (ch == '0') {

736

ch = '\0';

737

}

738

else if (ch == 'A' || ch == 'D') {

739

escapelen = 4;

740

escapetype = ch;

741

escapeval = 0;

742

continue;

743

}

744

else {

745

compiler_error("Strange @ escape in processed text.");

746

}

747

}

748

else if (escapelen) {

749

escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);

750

escapelen--;

751

if (escapelen == 0) {

752

if (escapetype == 'A') {

753

ch = huff_abbrev_start+escapeval;

754

}

755

else if (escapetype == 'D') {

756

ch = huff_dynam_start+escapeval;

757

}

758

else {

759

compiler_error("Strange @ escape in processed text.");

760

}

761

}

762

else

763

continue;

764

}

765

else {

766

if (ch == '@') {

767

escapelen = -1;

768

continue;

769

}

770

if (ch == 0) {

771

ch = 256;

772

done = TRUE;

773

}

774

}

775

huff_entities[ch].count++;

776

}

777

}

778

779

numlive = 0;

780

for (jx=0; jx<entities; jx++) {

781

if (huff_entities[jx].count) {

782

hufflist[numlive] = &(huff_entities[jx]);

783

numlive++;

784

}

785

}

786

787

branchstart = entities;

788

branches = 0;

789

790

while (numlive > 1) {

791

int best1, best2;

792

int best1num, best2num;

793

huffentity_t *bran;

794

795

if (hufflist[0]->count < hufflist[1]->count) {

796

best1 = 0;

797

best2 = 1;

798

}

799

else {

800

best2 = 0;

801

best1 = 1;

802

}

803

804

best1num = hufflist[best1]->count;

805

best2num = hufflist[best2]->count;

806

807

for (jx=2; jx<numlive; jx++) {

808

if (hufflist[jx]->count < best1num) {

809

best2 = best1;

810

best2num = best1num;

811

best1 = jx;

812

best1num = hufflist[best1]->count;

813

}

814

else if (hufflist[jx]->count < best2num) {

815

best2 = jx;

816

best2num = hufflist[best2]->count;

817

}

818

}

819

820

bran = &(huff_entities[branchstart+branches]);

821

branches++;

822

bran->type = 0;

823

bran->count = hufflist[best1]->count + hufflist[best2]->count;

824

bran->u.branch[0] = (hufflist[best1] - huff_entities);

825

bran->u.branch[1] = (hufflist[best2] - huff_entities);

826

hufflist[best1] = bran;

827

if (best2 < numlive-1) {

828

memmove(&(hufflist[best2]), &(hufflist[best2+1]),

829

((numlive-1) - best2) * sizeof(huffentity_t *));

830

}

831

numlive--;

832

}

833

834

huff_entity_root = (hufflist[0] - huff_entities);

835

no_huff_entities = branchstart+branches;

836

837

for (ix=0; ix<MAXHUFFBYTES; ix++)

838

bits.b[ix] = 0;

839

compression_table_size = 12;

840

841

compress_makebits(huff_entity_root, 0, -1, &bits);

842

/* compress_dumptable(huff_entity_root, 0); */

843

844

}

845

846

/* Now, sadly, we have to compute the size of the string section,

847

without actually doing the compression. */

848

compression_string_size = 0;

849

850

if (temporary_files_switch) {

851

fseek(Temp1_fp, 0, SEEK_SET);

852

}

853

854

if (no_strings >= MAX_NUM_STATIC_STRINGS)

855

memoryerror("MAX_NUM_STATIC_STRINGS", MAX_NUM_STATIC_STRINGS);

856

857

for (lx=0, ix=0; lx<no_strings; lx++) {

858

int escapelen=0, escapetype=0;

859

int done=FALSE;

860

int32 escapeval;

861

jx = 0;

862

compressed_offsets[lx] = compression_table_size + compression_string_size;

863

compression_string_size++; /* for the type byte */

864

while (!done) {

865

if (temporary_files_switch)

866

ch = fgetc(Temp1_fp);

867

else

868

ch = read_byte_from_memory_block(&static_strings_area, ix);

869

ix++;

870

if (ix > static_strings_extent || ch < 0)

871

compiler_error("Read too much not-yet-compressed text.");

872

if (escapelen == -1) {

873

escapelen = 0;

874

if (ch == '@') {

875

ch = '@';

876

}

877

else if (ch == '0') {

878

ch = '\0';

879

}

880

else if (ch == 'A' || ch == 'D') {

881

escapelen = 4;

882

escapetype = ch;

883

escapeval = 0;

884

continue;

885

}

886

else {

887

compiler_error("Strange @ escape in processed text.");

888

}

889

}

890

else if (escapelen) {

891

escapeval = (escapeval << 4) | ((ch-'A') & 0x0F);

892

escapelen--;

893

if (escapelen == 0) {

894

if (escapetype == 'A') {

895

ch = huff_abbrev_start+escapeval;

896

}

897

else if (escapetype == 'D') {

898

ch = huff_dynam_start+escapeval;

899

}

900

else {

901

compiler_error("Strange @ escape in processed text.");

902

}

903

}

904

else

905

continue;

906

}

907

else {

908

if (ch == '@') {

909

escapelen = -1;

910

continue;

911

}

912

if (ch == 0) {

913

ch = 256;

914

done = TRUE;

915

}

916

}

917

918

if (compression_switch) {

919

jx += huff_entities[ch].depth;

920

compression_string_size += (jx/8);

921

jx = (jx % 8);

922

}

923

else {

924

if (ch >= huff_dynam_start) {

925

compression_string_size += 3;

926

}

927

else if (ch >= huff_abbrev_start) {

928

compiler_error("Abbreviation in non-compressed string should \

929

be impossible.");

930

}

931

else

932

compression_string_size += 1;

933

}

934

}

935

if (compression_switch && jx)

936

compression_string_size++;

937

}

938

939

done_compression = TRUE;

940

}

941

942

static void compress_dumptable(int entnum, int depth)

943

{

944

huffentity_t *ent = &(huff_entities[entnum]);

945

int ix;

946

char *cx;

947

948

if (ent->type) {

949

printf("%6d: ", ent->count);

950

for (ix=0; ix<depth; ix++) {

951

int bt = ent->bits.b[ix / 8] & (1 << (ix % 8));

952

printf("%d", (bt != 0));

953

}

954

printf(": ");

955

}

956

957

switch (ent->type) {

958

case 0:

959

compress_dumptable(ent->u.branch[0], depth+1);

960

compress_dumptable(ent->u.branch[1], depth+1);

961

break;

962

case 1:

963

printf("<EOS>\n");

964

break;

965

case 2:

966

printf("0x%02X ", ent->u.ch);

967

if (ent->u.ch >= ' ')

968

printf("'%c'\n", ent->u.ch);

969

else

970

printf("'ctrl-%c'\n", ent->u.ch + '@');

971

break;

972

case 3:

973

cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;

974

printf("abbrev %d, \"%s\"\n", ent->u.val, cx);

975

break;

976

case 9:

977

printf("print-var @%02d\n", ent->u.val);

978

break;

979

}

980

}

981

982

static void compress_makebits(int entnum, int depth, int prevbit,

983

huffbitlist_t *bits)

984

{

985

huffentity_t *ent = &(huff_entities[entnum]);

986

char *cx;

987

988

ent->addr = compression_table_size;

989

ent->depth = depth;

990

ent->bits = *bits;

991

if (depth > 0) {

992

if (prevbit)

993

ent->bits.b[(depth-1) / 8] |= (1 << ((depth-1) % 8));

994

}

995

996

switch (ent->type) {

997

case 0:

998

compression_table_size += 9;

999

compress_makebits(ent->u.branch[0], depth+1, 0, &ent->bits);

1000

compress_makebits(ent->u.branch[1], depth+1, 1, &ent->bits);

1001

break;

1002

case 1:

1003

compression_table_size += 1;

1004

break;

1005

case 2:

1006

compression_table_size += 2;

1007

break;

1008

case 3:

1009

cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH;

1010

compression_table_size += (1 + 1 + strlen(cx));

1011

break;

1012

case 9:

1013

compression_table_size += 5;

1014

break;

1015

}

1016

}

1017

1018

/* ------------------------------------------------------------------------- */

1019

/* The abbreviations optimiser */

1020

/* */

1021

/* This is a very complex, memory and time expensive algorithm to */

1022

/* approximately solve the problem of which abbreviation strings would */

1023

/* minimise the total number of Z-chars to which the game text translates. */

1024

/* It is in some ways a quite separate program but remains inside Inform */

1025

/* for compatibility with previous releases. */

1026

/* ------------------------------------------------------------------------- */

1027

1028

typedef struct tlb_s

1029

{ char text[4];

1030

int32 intab, occurrences;

1031

} tlb;

1032

static tlb *tlbtab;

1033

static int32 no_occs;

1034

1035

static int32 *grandtable;

1036

static int32 *grandflags;

1037

typedef struct optab_s

1038

{ int32 length;

1039

int32 popularity;

1040

int32 score;

1041

int32 location;

1042

char text[64];

1043

} optab;

1044

static optab *bestyet, *bestyet2;

1045

1046

static int pass_no;

1047

1048

static char *sub_buffer;

1049

1050

static void optimise_pass(void)

1051

{ int32 i; int t1, t2;

1052

int32 j, j2, k, nl, matches, noflags, score, min, minat, x, scrabble, c;

1053

for (i=0; i<256; i++) bestyet[i].length=0;

1054

for (i=0; i<no_occs; i++)

1055

{ if ((*(tlbtab[i].text)!=(int) '\n')&&(tlbtab[i].occurrences!=0))

1056

{

1057

#ifdef MAC_FACE

1058

if (i%((**g_pm_hndl).linespercheck) == 0)

1059

{ ProcessEvents (&g_proc);

1060

if (g_proc != true)

1061

{ free_arrays();

1062

if (store_the_text)

1063

my_free(&all_text,"transcription text");

1064

longjmp (g_fallback, 1);

1065

}

1066

}

1067

#endif

1068

printf("Pass %d, %4ld/%ld '%s' (%ld occurrences) ",

1069

pass_no, (long int) i, (long int) no_occs, tlbtab[i].text,

1070

(long int) tlbtab[i].occurrences);

1071

t1=(int) (time(0));

1072

for (j=0; j<tlbtab[i].occurrences; j++)

1073

{ for (j2=0; j2<tlbtab[i].occurrences; j2++) grandflags[j2]=1;

1074

nl=2; noflags=tlbtab[i].occurrences;

1075

while ((noflags>=2)&&(nl<=62))

1076

{ nl++;

1077

for (j2=0; j2<nl; j2++)

1078

if (all_text[grandtable[tlbtab[i].intab+j]+j2]=='\n')

1079

goto FinishEarly;

1080

matches=0;

1081

for (j2=j; j2<tlbtab[i].occurrences; j2++)

1082

{ if (grandflags[j2]==1)

1083

{ x=grandtable[tlbtab[i].intab+j2]

1084

- grandtable[tlbtab[i].intab+j];

1085

if (((x>-nl)&&(x<nl))

1086

|| (memcmp(all_text+grandtable[tlbtab[i].intab+j],

1087

all_text+grandtable[tlbtab[i].intab+j2],

1088

nl)!=0))

1089

{ grandflags[j2]=0; noflags--; }

1090

else matches++;

1091

}

1092

}

1093

scrabble=0;

1094

for (k=0; k<nl; k++)

1095

{ scrabble++;

1096

c=all_text[grandtable[tlbtab[i].intab+j+k]];

1097

if (c!=(int) ' ')

1098

{ if (iso_to_alphabet_grid[c]<0)

1099

scrabble+=2;

1100

else

1101

if (iso_to_alphabet_grid[c]>=26)

1102

scrabble++;

1103

}

1104

}

1105

score=(matches-1)*(scrabble-2);

1106

min=score;

1107

for (j2=0; j2<256; j2++)

1108

{ if ((nl==bestyet[j2].length)

1109

&& (memcmp(all_text+bestyet[j2].location,

1110

all_text+grandtable[tlbtab[i].intab+j],

1111

nl)==0))

1112

{ j2=256; min=score; }

1113

else

1114

{ if (bestyet[j2].score<min)

1115

{ min=bestyet[j2].score; minat=j2;

1116

}

1117

}

1118

}

1119

if (min!=score)

1120

{ bestyet[minat].score=score;

1121

bestyet[minat].length=nl;

1122

bestyet[minat].location=grandtable[tlbtab[i].intab+j];

1123

bestyet[minat].popularity=matches;

1124

for (j2=0; j2<nl; j2++) sub_buffer[j2]=

1125

all_text[bestyet[minat].location+j2];

1126

sub_buffer[nl]=0;

1127

}

1128

}

1129

FinishEarly: ;

1130

}

1131

t2=((int) time(0)) - t1;

1132

printf(" (%d seconds)\n",t2);

1133

}

1134

}

1135

}

1136

1137

static int any_overlap(char *s1, char *s2)

1138

{ int a, b, i, j, flag;

1139

a=strlen(s1); b=strlen(s2);

1140

for (i=1-b; i<a; i++)

1141

{ flag=0;

1142

for (j=0; j<b; j++)

1143

if ((0<=i+j)&&(i+j<=a-1))

1144

if (s1[i+j]!=s2[j]) flag=1;

1145

if (flag==0) return(1);

1146

}

1147

return(0);

1148

}

1149

1150

#define MAX_TLBS 8000

1151

1152

extern void optimise_abbreviations(void)

1153

{ int32 i, j, t, max=0, MAX_GTABLE;

1154

int32 j2, selected, available, maxat, nl;

1155

tlb test;

1156

1157

printf("Beginning calculation of optimal abbreviations...\n");

1158

1159

pass_no = 0;

1160

tlbtab=my_calloc(sizeof(tlb), MAX_TLBS, "tlb table"); no_occs=0;

1161

sub_buffer=my_calloc(sizeof(char), 4000, "sub_buffer");

1162

for (i=0; i<MAX_TLBS; i++) tlbtab[i].occurrences=0;

1163

1164

bestyet=my_calloc(sizeof(optab), 256, "bestyet");

1165

bestyet2=my_calloc(sizeof(optab), 64, "bestyet2");

1166

1167

bestyet2[0].text[0]='.';

1168

bestyet2[0].text[1]=' ';

1169

bestyet2[0].text[2]=0;

1170

1171

bestyet2[1].text[0]=',';

1172

bestyet2[1].text[1]=' ';

1173

bestyet2[1].text[2]=0;

1174

1175

for (i=0; all_text+i<all_text_top; i++)

1176

{

1177

if ((all_text[i]=='.') && (all_text[i+1]==' ') && (all_text[i+2]==' '))

1178

{ all_text[i]='\n'; all_text[i+1]='\n'; all_text[i+2]='\n';

1179

bestyet2[0].popularity++;

1180

}

1181

1182

if ((all_text[i]=='.') && (all_text[i+1]==' '))

1183

{ all_text[i]='\n'; all_text[i+1]='\n';

1184

bestyet2[0].popularity++;

1185

}

1186

1187

if ((all_text[i]==',') && (all_text[i+1]==' '))

1188

{ all_text[i]='\n'; all_text[i+1]='\n';

1189

bestyet2[1].popularity++;

1190

}

1191

}

1192

1193

MAX_GTABLE=subtract_pointers(all_text_top,all_text)+1;

1194

grandtable=my_calloc(4*sizeof(int32), MAX_GTABLE/4, "grandtable");

1195

1196

for (i=0, t=0; all_text+i<all_text_top; i++)

1197

{ test.text[0]=all_text[i];

1198

test.text[1]=all_text[i+1];

1199

test.text[2]=all_text[i+2];

1200

test.text[3]=0;

1201

if ((test.text[0]=='\n')||(test.text[1]=='\n')||(test.text[2]=='\n'))

1202

goto DontKeep;

1203

for (j=0; j<no_occs; j++)

1204

if (strcmp(test.text,tlbtab[j].text)==0)

1205

goto DontKeep;

1206

test.occurrences=0;

1207

for (j=i+3; all_text+j<all_text_top; j++)

1208

{

1209

#ifdef MAC_FACE

1210

if (j%((**g_pm_hndl).linespercheck) == 0)

1211

{ ProcessEvents (&g_proc);

1212

if (g_proc != true)

1213

{ free_arrays();

1214

if (store_the_text)

1215

my_free(&all_text,"transcription text");

1216

longjmp (g_fallback, 1);

1217

}

1218

}

1219

#endif

1220

if ((all_text[i]==all_text[j])

1221

&& (all_text[i+1]==all_text[j+1])

1222

&& (all_text[i+2]==all_text[j+2]))

1223

{ grandtable[t+test.occurrences]=j;

1224

test.occurrences++;

1225

if (t+test.occurrences==MAX_GTABLE)

1226

{ printf("All %ld cross-references used\n",

1227

(long int) MAX_GTABLE);

1228

goto Built;

1229

}

1230

}

1231

}

1232

if (test.occurrences>=2)

1233

{ tlbtab[no_occs]=test;

1234

tlbtab[no_occs].intab=t; t+=tlbtab[no_occs].occurrences;

1235

if (max<tlbtab[no_occs].occurrences)

1236

max=tlbtab[no_occs].occurrences;

1237

no_occs++;

1238

if (no_occs==MAX_TLBS)

1239

{ printf("All %d three-letter-blocks used\n",

1240

MAX_TLBS);

1241

goto Built;

1242

}

1243

}

1244

DontKeep: ;

1245

}

1246

1247

Built:

1248

grandflags=my_calloc(sizeof(int), max, "grandflags");

1249

1250

1251

printf("Cross-reference table (%ld entries) built...\n",

1252

(long int) no_occs);

1253

/* for (i=0; i<no_occs; i++)

1254

printf("%4d %4d '%s' %d\n",i,tlbtab[i].intab,tlbtab[i].text,

1255

tlbtab[i].occurrences);

1256

1257

1258

for (i=0; i<64; i++) bestyet2[i].length=0; selected=2;

1259

available=256;

1260

while ((available>0)&&(selected<64))

1261

{ printf("Pass %d\n", ++pass_no);

1262

1263

optimise_pass();

1264

available=0;

1265

for (i=0; i<256; i++)

1266

if (bestyet[i].score!=0)

1267

{ available++;

1268

nl=bestyet[i].length;

1269

for (j2=0; j2<nl; j2++) bestyet[i].text[j2]=

1270

all_text[bestyet[i].location+j2];

1271

bestyet[i].text[nl]=0;

1272

}

1273

1274

/* printf("End of pass results:\n");

1275

printf("\nno score freq string\n");

1276

for (i=0; i<256; i++)

1277

if (bestyet[i].score>0)

1278

printf("%02d: %4d %4d '%s'\n", i, bestyet[i].score,

1279

bestyet[i].popularity, bestyet[i].text);

1280

1281

1282

1283

{ max=0;

1284

for (i=0; i<256; i++)

1285

if (max<bestyet[i].score)

1286

{ max=bestyet[i].score;

1287

maxat=i;

1288

}

1289

1290

if (max>0)

1291

{ bestyet2[selected++]=bestyet[maxat];

1292

1293

printf(

1294

"Selection %2ld: '%s' (repeated %ld times, scoring %ld)\n",

1295

(long int) selected,bestyet[maxat].text,

1296

(long int) bestyet[maxat].popularity,

1297

(long int) bestyet[maxat].score);

1298

1299

test.text[0]=bestyet[maxat].text[0];

1300

test.text[1]=bestyet[maxat].text[1];

1301

test.text[2]=bestyet[maxat].text[2];

1302

test.text[3]=0;

1303

1304

for (i=0; i<no_occs; i++)

1305

if (strcmp(test.text,tlbtab[i].text)==0)

1306

break;

1307

1308

for (j=0; j<tlbtab[i].occurrences; j++)

1309

{ if (memcmp(bestyet[maxat].text,

1310

all_text+grandtable[tlbtab[i].intab+j],

1311

bestyet[maxat].length)==0)

1312

{ for (j2=0; j2<bestyet[maxat].length; j2++)

1313

all_text[grandtable[tlbtab[i].intab+j]+j2]='\n';

1314

}

1315

}

1316

1317

for (i=0; i<256; i++)

1318

if ((bestyet[i].score>0)&&

1319

(any_overlap(bestyet[maxat].text,bestyet[i].text)==1))

1320

{ bestyet[i].score=0;

1321

/* printf("Discarding '%s' as overlapping\n",

1322

bestyet[i].text); */

1323

}

1324

}

1325

} while ((max>0)&&(available>0)&&(selected<64));

1326

}

1327

1328

printf("\nChosen abbreviations (in Inform syntax):\n\n");

1329

for (i=0; i<selected; i++)

1330

printf("Abbreviate \"%s\";\n", bestyet2[i].text);

1331

1332

text_free_arrays();

1333

}

1334

1335

/* ------------------------------------------------------------------------- */

1336

/* The dictionary manager begins here. */

1337

/* */

1338

/* Speed is extremely important in these algorithms. If a linear-time */

1339

/* routine were used to search the dictionary words so far built up, then */

1340

/* Inform would crawl. */

1341

/* */

1342

/* Instead, the dictionary is stored as a binary tree, which is kept */

1343

/* balanced with the red-black algorithm. */

1344

/* ------------------------------------------------------------------------- */

1345

/* A dictionary table similar to the Z-machine format is kept: there is a */

1346

/* 7-byte header (left blank here to be filled in at the */

1347

/* construct_storyfile() stage in "tables.c") and then a sequence of */

1348

/* records, one per word, in the form */

1349

/* */

1350

/* <Z-coded text> <flags> <adjectivenumber> <verbnumber> */

1351

/* 4 or 6 bytes byte byte byte */

1352

/* */

1353

/* These records are stored in "accession order" (i.e. in order of their */

1354

/* first being received by these routines) and only alphabetically sorted */

1355

/* by construct_storyfile() (using the array below). */

1356

/* ------------------------------------------------------------------------- */

1357

1358

uchar *dictionary, /* (These two pointers are externally

1359

used only in "tables.c" when

1360

building the story-file) */

1361

*dictionary_top; /* Pointer to next free record */

1362

1363

int dict_entries; /* Total number of records entered */

1364

1365

/* ------------------------------------------------------------------------- */

1366

/* dict_word is a typedef for a struct of 6 unsigned chars (defined in */

1367

/* "header.h"): it holds the (4 or) 6 bytes of Z-coded text of a word. */

1368

/* Usefully, because the PAD character 5 is < all alphabetic characters, */

1369

/* alphabetic order corresponds to numeric order. For this reason, the */

1370

/* dict_word is called the "sort code" of the original text word. */

1371

/* */

1372

/* ###- In modifying the compiler, I've found it easier to discard the */

1373

/* typedef, and operate directly on uchar arrays of length DICT_WORD_SIZE. */

1374

/* In Z-code, DICT_WORD_SIZE will be 6, so the Z-code compiler will work */

1375

/* as before. In Glulx, it can be any value up to MAX_DICT_WORD_SIZE. */

1376

/* (That limit is defined as 40 in the header; it exists only for a few */

1377

/* static buffers, and can be increased without using significant memory.) */

1378

/* ------------------------------------------------------------------------- */

1379

1380

extern int compare_sorts(uchar *d1, uchar *d2)

1381

{ int i;

1382

for (i=0; i<DICT_WORD_SIZE; i++)

1383

if (d1[i]!=d2[i]) return((int)(d1[i]) - (int)(d2[i]));

1384

/* (since memcmp(d1, d2, DICT_WORD_SIZE); runs into a bug on some Unix

1385

libraries) */

1386

return(0);

1387

}

1388

1389

extern void copy_sorts(uchar *d1, uchar *d2)

1390

{ int i;

1391

for (i=0; i<DICT_WORD_SIZE; i++)

1392

d1[i] = d2[i];

1393

}

1394

1395

static uchar prepared_sort[MAX_DICT_WORD_SIZE]; /* Holds the sort code

1396

of current word */

1397

1398

static int number_and_case;

1399

1400

/* Also used by verbs.c */

1401

static void dictionary_prepare_z(char *dword, uchar *optresult)

1402

{ int i, j, k, k2, wd[13]; int32 tot;

1403

1404

/* A rapid text translation algorithm using only the simplified rules

1405

applying to the text of dictionary entries: first produce a sequence

1406

of 6 (v3) or 9 (v4+) Z-characters */

1407

1408

number_and_case = 0;

1409

1410

for (i=0, j=0; dword[j]!=0; i++, j++)

1411

{ if ((dword[j] == '/') && (dword[j+1] == '/'))

1412

{ for (j+=2; dword[j] != 0; j++)

1413

{ switch(dword[j])

1414

{ case 'p': number_and_case |= 4; break;

1415

default:

1416

error_named("Expected 'p' after '//' \

1417

to give number of dictionary word", dword);

1418

break;

1419

}

1420

}

1421

break;

1422

}

1423

if (i>=9) break;

1424

1425

k=(int) dword[j];

1426

if (k==(int) '\'')

1427

warning_named("Obsolete usage: use the ^ character for the \

1428

apostrophe in", dword);

1429

if (k==(int) '^') k=(int) '\'';

1430

if (k=='\"') k='~';

1431

1432

if (k==(int) '@')

1433

{ int unicode = text_to_unicode(dword+j);

1434

if ((unicode < 128) && isupper(unicode)) unicode = tolower(unicode);

1435

k = unicode_to_zscii(unicode);

1436

j += textual_form_length - 1;

1437

if ((k == 5) || (k >= 0x100))

1438

{ unicode_char_error(

1439

"Character can be printed but not input:", unicode);

1440

k = '?';

1441

}

1442

k2 = zscii_to_alphabet_grid[(uchar) k];

1443

}

1444

else

1445

{ if (isupper(k)) k = tolower(k);

1446

k2 = iso_to_alphabet_grid[(uchar) k];

1447

}

1448

1449

if (k2 < 0)

1450

{ if ((k2 == -5) || (k2 <= -0x100))

1451

char_error("Character can be printed but not input:", k);

1452

else

1453

{ /* Use 4 more Z-chars to encode a ZSCII escape sequence */

1454

1455

wd[i++] = 5; wd[i++] = 6;

1456

k2 = -k2;

1457

wd[i++] = k2/32; wd[i] = k2%32;

1458

}

1459

}

1460

else

1461

{ alphabet_used[k2] = 'Y';

1462

if ((k2/26)!=0)

1463

wd[i++]=3+(k2/26); /* Change alphabet for symbols */

1464

wd[i]=6+(k2%26); /* Write the Z character */

1465

}

1466

}

1467

1468

/* Fill up to the end of the dictionary block with PAD characters */

1469

1470

for (; i<9; i++) wd[i]=5;

1471

1472

/* The array of Z-chars is converted to three 2-byte blocks */

1473

1474

tot = wd[2] + wd[1]*(1<<5) + wd[0]*(1<<10);

1475

prepared_sort[1]=tot%0x100;

1476

prepared_sort[0]=(tot/0x100)%0x100;

1477

tot = wd[5] + wd[4]*(1<<5) + wd[3]*(1<<10);

1478

prepared_sort[3]=tot%0x100;

1479

prepared_sort[2]=(tot/0x100)%0x100;

1480

tot = wd[8] + wd[7]*(1<<5) + wd[6]*(1<<10);

1481

prepared_sort[5]=tot%0x100;

1482

prepared_sort[4]=(tot/0x100)%0x100;

1483

1484

/* Set the "end bit" on the 2nd (in v3) or the 3rd (v4+) 2-byte block */

1485

1486

if (version_number==3) prepared_sort[2]+=0x80;

1487

else prepared_sort[4]+=0x80;

1488

1489

if (optresult) copy_sorts(optresult, prepared_sort);

1490

}

1491

1492

/* Also used by verbs.c */

1493

static void dictionary_prepare_g(char *dword, uchar *optresult)

1494

{

1495

int i, j, k;

1496

1497

number_and_case = 0;

1498

1499

for (i=0, j=0; (dword[j]!=0); i++, j++) {

1500

if ((dword[j] == '/') && (dword[j+1] == '/')) {

1501

for (j+=2; dword[j] != 0; j++) {

1502

switch(dword[j]) {

1503

case 'p':

1504

number_and_case |= 4;

1505

break;

1506

default:

1507

error_named("Expected 'p' after '//' \

1508

to give gender or number of dictionary word", dword);

1509

break;

1510

}

1511

}

1512

break;

1513

}

1514

if (i>=DICT_WORD_SIZE) break;

1515

1516

k= (int)dword[j];

1517

if (k=='\'')

1518

warning_named("Obsolete usage: use the ^ character for the \

1519

apostrophe in", dword);

1520

if (k=='^')

1521

k='\'';

1522

if (k=='~') /* as in iso_to_alphabet_grid */

1523

k='\"';

1524

1525

if (k=='@') {

1526

int32 unicode = text_to_unicode(dword+j);

1527

j += textual_form_length - 1;

1528

if (unicode >= 0 && unicode < 256) {

1529

k = unicode;

1530

}

1531

else {

1532

error("Unicode characters beyond Latin-1 are not yet supported in Glulx");

1533

k = '?';

1534

}

1535

}

1536

1537

if (k >= 'A' && k <= 'Z')

1538

k += ('a' - 'A');

1539

1540

prepared_sort[i] = k;

1541

}

1542

1543

for (; i<DICT_WORD_SIZE; i++)

1544

prepared_sort[i] = 0;

1545

1546

if (optresult) copy_sorts(optresult, prepared_sort);

1547

}

1548

1549

extern void dictionary_prepare(char *dword, uchar *optresult)

1550

{

1551

if (!glulx_mode)

1552

dictionary_prepare_z(dword, optresult);

1553

else

1554

dictionary_prepare_g(dword, optresult);

1555

}

1556

1557

/* ------------------------------------------------------------------------- */

1558

/* The arrays below are all concerned with the problem of alphabetically */

1559

/* sorting the dictionary during the compilation pass. */

1560

/* Note that it is not enough simply to apply qsort to the dictionary at */

1561

/* the end of the pass: we need to ensure that no duplicates are ever */

1562

/* created. */

1563

/* */

1564

/* dict_sort_codes[n] the sort code of record n: i.e., of the nth */

1565

/* word to be entered into the dictionary, where */

1566

/* n counts upward from 0 */

1567

/* (n is also called the "accession number") */

1568

/* */

1569

/* The tree structure encodes an ordering. The special value VACANT means */

1570

/* "no node here": otherwise, node numbers are the same as accession */

1571

/* numbers. At all times, "root" holds the node number of the top of the */

1572

/* tree; each node has up to two branches, such that the subtree of the */

1573

/* left branch is always alphabetically before what's at the node, and */

1574

/* the subtree to the right is always after; and all branches are coloured */

1575

/* either "black" or "red". These colours are used to detect points where */

1576

/* the tree is growing asymmetrically (and therefore becoming inefficient */

1577

/* to search). */

1578

/* ------------------------------------------------------------------------- */

1579

1580

#define RED 'r'

1581

#define BLACK 'b'

1582

#define VACANT -1

1583

1584

static int root;

1585

typedef struct dict_tree_node_s

1586

{ int branch[2]; /* Branch 0 is "left", 1 is "right" */

1587

char colour; /* The colour of the branch to the parent */

1588

} dict_tree_node;

1589

1590

static dict_tree_node *dtree;

1591

1592

int *final_dict_order;

1593

static uchar *dict_sort_codes;

1594

1595

static void dictionary_begin_pass(void)

1596

{

1597

/* Leave room for the 7-byte header (added in "tables.c" much later) */

1598

/* Glulx has a 4-byte header instead. */

1599

1600

if (!glulx_mode)

1601

dictionary_top=dictionary+7;

1602

else

1603

dictionary_top=dictionary+4;

1604

1605

root = VACANT;

1606

dict_entries = 0;

1607

}

1608

1609

static int fdo_count;

1610

static void recursively_sort(int node)

1611

{ if (dtree[node].branch[0] != VACANT)

1612

recursively_sort(dtree[node].branch[0]);

1613

final_dict_order[node] = fdo_count++;

1614

if (dtree[node].branch[1] != VACANT)

1615

recursively_sort(dtree[node].branch[1]);

1616

}

1617

1618

extern void sort_dictionary(void)

1619

{ int i;

1620

if (module_switch)

1621

{ for (i=0; i<dict_entries; i++)

1622

final_dict_order[i] = i;

1623

return;

1624

}

1625

1626

if (root != VACANT)

1627

{ fdo_count = 0; recursively_sort(root);

1628

}

1629

}

1630

1631

/* ------------------------------------------------------------------------- */

1632

/* If "dword" is in the dictionary, return its accession number plus 1; */

1633

/* If not, return 0. */

1634

/* ------------------------------------------------------------------------- */

1635

1636

static int dictionary_find(char *dword)

1637

{ int at = root, n;

1638

1639

dictionary_prepare(dword, NULL);

1640

1641

while (at != VACANT)

1642

{ n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_SIZE);

1643

if (n==0) return at + 1;

1644

if (n>0) at = dtree[at].branch[1]; else at = dtree[at].branch[0];

1645

}

1646

return 0;

1647

}

1648

1649

/* ------------------------------------------------------------------------- */

1650

/* Add "dword" to the dictionary with (x,y,z) as its data bytes; unless */

1651

/* it already exists, in which case OR the data with (x,y,z) */

1652

/* */

1653

/* Returns: the accession number. */

1654

/* ------------------------------------------------------------------------- */

1655

1656

extern int dictionary_add(char *dword, int x, int y, int z)

1657

{ int n; uchar *p;

1658

int ggfr, gfr, fr, r;

1659

int ggf = VACANT, gf = VACANT, f = VACANT, at = root;

1660

int a, b;

1661

int res=((version_number==3)?4:6);

1662

1663

dictionary_prepare(dword, NULL);

1664

1665

if (root == VACANT)

1666

{ root = 0; goto CreateEntry;

1667

}

1668

while (TRUE)

1669

{

1670

n = compare_sorts(prepared_sort, dict_sort_codes+at*DICT_WORD_SIZE);

1671

if (n==0)

1672

{

1673

if (!glulx_mode) {

1674

p = dictionary+7 + at*(3+res) + res;

1675

p[0]=(p[0])|x; p[1]=(p[1])|y; p[2]=(p[2])|z;

1676

if (x & 128) p[0] = (p[0])|number_and_case;

1677

}

1678

else {

1679

p = dictionary+4 + at*(7+DICT_WORD_SIZE) + 1+DICT_WORD_SIZE;

1680

p[1]=(p[1])|x; p[3]=(p[3])|y; p[5]=(p[5])|z;

1681

if (x & 128) p[1] = (p[1]) | number_and_case;

1682

}

1683

return at;

1684

}

1685

if (n>0) r=1; else r=0;

1686

1687

a = dtree[at].branch[0]; b = dtree[at].branch[1];

1688

if ((a != VACANT) && (dtree[a].colour == RED) &&

1689

(b != VACANT) && (dtree[b].colour == RED))

1690

{ dtree[a].colour = BLACK;

1691

dtree[b].colour = BLACK;

1692

1693

dtree[at].colour = RED;

1694

1695

/* A tree rotation may be needed to avoid two red links in a row:

1696

e.g.

1697

ggf (or else gf is root) ggf (or f is root)

1698

| |

1699

gf f

1700

/ \(red) / \ (both red)

1701

f becomes gf at

1702

/ \(red) / \ / \

1703

1704

/ \

1705

1706

In effect we rehang the "gf" subtree from "f".

1707

See the Technical Manual for further details.

1708

1709

1710

if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))

1711

{

1712

if (fr == gfr)

1713

{ if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;

1714

dtree[gf].branch[gfr] = dtree[f].branch[1-fr];

1715

dtree[f].branch[1-fr] = gf;

1716

dtree[f].colour = BLACK;

1717

dtree[gf].colour = RED;

1718

gf = ggf; gfr = ggfr;

1719

}

1720

else

1721

{ if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;

1722

dtree[at].colour = BLACK;

1723

dtree[gf].colour = RED;

1724

dtree[f].branch[fr] = dtree[at].branch[gfr];

1725

dtree[gf].branch[gfr] = dtree[at].branch[fr];

1726

dtree[at].branch[gfr] = f;

1727

dtree[at].branch[fr] = gf;

1728

1729

r = 1-r; n = at; if (r==fr) at = f; else at = gf;

1730

f = n; gf = ggf; fr = 1-r; gfr = ggfr;

1731

}

1732

}

1733

}

1734

1735

if (dtree[at].branch[r] == VACANT)

1736

{ dtree[at].colour = RED;

1737

1738

if ((f != VACANT) && (gf != VACANT) && (dtree[f].colour == RED))

1739

{ if (fr == gfr)

1740

{ if (ggf == VACANT) root = f; else dtree[ggf].branch[ggfr] = f;

1741

dtree[gf].branch[gfr] = dtree[f].branch[1-fr];

1742

dtree[f].branch[1-fr] = gf;

1743

dtree[f].colour = BLACK;

1744

dtree[gf].colour = RED;

1745

}

1746

else

1747

{ if (ggf == VACANT) root = at; else dtree[ggf].branch[ggfr] = at;

1748

dtree[at].colour = BLACK;

1749

dtree[gf].colour = RED;

1750

dtree[f].branch[fr] = dtree[at].branch[gfr];

1751

dtree[gf].branch[gfr] = dtree[at].branch[fr];

1752

dtree[at].branch[gfr] = f;

1753

dtree[at].branch[fr] = gf;

1754

1755

r = 1-r; n = at; if (r==fr) at = f; else at = gf;

1756

f = n; gf = ggf;

1757

}

1758

}

1759

dtree[at].branch[r] = dict_entries;

1760

goto CreateEntry;

1761

}

1762

ggf = gf; gf = f; f = at; at = dtree[at].branch[r];

1763

ggfr = gfr; gfr = fr; fr = r;

1764

}

1765

1766

CreateEntry:

1767

1768

if (dict_entries==MAX_DICT_ENTRIES)

1769

memoryerror("MAX_DICT_ENTRIES",MAX_DICT_ENTRIES);

1770

1771

dtree[dict_entries].branch[0] = VACANT;

1772

dtree[dict_entries].branch[1] = VACANT;

1773

dtree[dict_entries].colour = BLACK;

1774

1775

/* Address in Inform's own dictionary table to write the record to */

1776

1777

if (!glulx_mode) {

1778

1779

p = dictionary + (3+res)*dict_entries + 7;

1780

1781

/* So copy in the 4 (or 6) bytes of Z-coded text and the 3 data

1782

bytes */

1783

1784

p[0]=prepared_sort[0]; p[1]=prepared_sort[1];

1785

p[2]=prepared_sort[2]; p[3]=prepared_sort[3];

1786

if (version_number > 3)

1787

{ p[4]=prepared_sort[4]; p[5]=prepared_sort[5]; }

1788

p[res]=x; p[res+1]=y; p[res+2]=z;

1789

if (x & 128) p[res] = (p[res])|number_and_case;

1790

1791

dictionary_top += res+3;

1792

1793

}

1794

else {

1795

int i;

1796

p = dictionary + 4 + (7+DICT_WORD_SIZE)*dict_entries;

1797

p[0] = 0x60; /* type byte -- dict word */

1798

1799

for (i=0; i<DICT_WORD_SIZE; i++)

1800

p[1+i] = prepared_sort[i];

1801

1802

p[1+DICT_WORD_SIZE+0] = 0; p[1+DICT_WORD_SIZE+1] = x;

1803

p[1+DICT_WORD_SIZE+2] = 0; p[1+DICT_WORD_SIZE+3] = y;

1804

p[1+DICT_WORD_SIZE+4] = 0; p[1+DICT_WORD_SIZE+5] = z;

1805

if (x & 128)

1806

p[1+DICT_WORD_SIZE+1] |= number_and_case;

1807

1808

dictionary_top += (7+DICT_WORD_SIZE);

1809

1810

}

1811

1812

copy_sorts(dict_sort_codes+dict_entries*DICT_WORD_SIZE, prepared_sort);

1813

1814

return dict_entries++;

1815

}

1816

1817

/* ------------------------------------------------------------------------- */

1818

/* Used in "tables.c" for "Extend ... only", to renumber a verb-word to a */

1819

/* new verb syntax of its own. (Otherwise existing verb-words never */

1820

/* change their verb-numbers.) */

1821

/* ------------------------------------------------------------------------- */

1822

1823

extern void dictionary_set_verb_number(char *dword, int to)

1824

{ int i; uchar *p;

1825

int res=((version_number==3)?4:6);

1826

i=dictionary_find(dword);

1827

if (i!=0)

1828

{

1829

if (!glulx_mode) {

1830

p=dictionary+7+(i-1)*(3+res)+res;

1831

p[1]=to;

1832

}

1833

else {

1834

p=dictionary+4 + (i-1)*(7+DICT_WORD_SIZE) + 1+DICT_WORD_SIZE;

1835

p[3]=to;

1836

}

1837

}

1838

}

1839

1840

/* ------------------------------------------------------------------------- */

1841

/* Tracing code for the dictionary: used not only by "trace" and text */

1842

/* transcription, but also (in the case of "word_to_ascii") in a vital */

1843

/* by the linker. */

1844

/* ------------------------------------------------------------------------- */

1845

1846

static char *d_show_to;

1847

static int d_show_total;

1848

1849

static void show_char(char c)

1850

{ if (d_show_to == NULL) printf("%c", c);

1851

else

1852

{ int i = strlen(d_show_to);

1853

d_show_to[i] = c; d_show_to[i+1] = 0;

1854

}

1855

}

1856

1857

extern void word_to_ascii(uchar *p, char *results)

1858

{ int i, shift, cc, zchar; uchar encoded_word[9];

1859

encoded_word[0] = (((int) p[0])&0x7c)/4;

1860

encoded_word[1] = 8*(((int) p[0])&0x3) + (((int) p[1])&0xe0)/32;

1861

encoded_word[2] = ((int) p[1])&0x1f;

1862

encoded_word[3] = (((int) p[2])&0x7c)/4;

1863

encoded_word[4] = 8*(((int) p[2])&0x3) + (((int) p[3])&0xe0)/32;

1864

encoded_word[5] = ((int) p[3])&0x1f;

1865

if (version_number > 3)

1866

{ encoded_word[6] = (((int) p[4])&0x7c)/4;

1867

encoded_word[7] = 8*(((int) p[4])&0x3) + (((int) p[5])&0xe0)/32;

1868

encoded_word[8] = ((int) p[5])&0x1f;

1869

}

1870

1871

shift = 0; cc = 0;

1872

for (i=0; i< ((version_number==3)?6:9); i++)

1873

{ zchar = encoded_word[i];

1874

1875

if (zchar == 4) shift = 1;

1876

else

1877

if (zchar == 5) shift = 2;

1878

else

1879

{ if ((shift == 2) && (zchar == 6))

1880

{ zchar = 32*encoded_word[i+1] + encoded_word[i+2];

1881

i += 2;

1882

if ((zchar>=32) && (zchar<=126))

1883

results[cc++] = zchar;

1884

else

1885

{ zscii_to_text(results+cc, zchar);

1886

cc = strlen(results);

1887

}

1888

}

1889

else

1890

{ zscii_to_text(results+cc, (alphabet[shift])[zchar-6]);

1891

cc = strlen(results);

1892

}

1893

shift = 0;

1894

}

1895

}

1896

results[cc] = 0;

1897

}

1898

1899

static void recursively_show_z(int node)

1900

{ int i, cprinted, flags; uchar *p;

1901

char textual_form[32];

1902

int res = (version_number == 3)?4:6;

1903

1904

if (dtree[node].branch[0] != VACANT)

1905

recursively_show_z(dtree[node].branch[0]);

1906

1907

p = (uchar *)dictionary + 7 + (3+res)*node;

1908

1909

word_to_ascii(p, textual_form);

1910

1911

for (cprinted = 0; textual_form[cprinted]!=0; cprinted++)

1912

show_char(textual_form[cprinted]);

1913

for (; cprinted < 4 + ((version_number==3)?6:9); cprinted++)

1914

show_char(' ');

1915

1916

if (d_show_to == NULL)

1917

{ for (i=0; i<3+res; i++) printf("%02x ",p[i]);

1918

1919

flags = (int) p[res];

1920

if (flags & 128)

1921

{ printf("noun ");

1922

if (flags & 4) printf("p"); else printf(" ");

1923

printf(" ");

1924

}

1925

else printf(" ");

1926

if (flags & 8)

1927

{ if (grammar_version_number == 1)

1928

printf("preposition:%d ", (int) p[res+2]);

1929

else

1930

printf("preposition ");

1931

}

1932

if ((flags & 3) == 3) printf("metaverb:%d ", (int) p[res+1]);

1933

else if ((flags & 3) == 1) printf("verb:%d ", (int) p[res+1]);

1934

printf("\n");

1935

}

1936

1937

if (d_show_total++ == 5)

1938

{ d_show_total = 0;

1939

if (d_show_to != NULL)

1940

{ write_to_transcript_file(d_show_to);

1941

d_show_to[0] = 0;

1942

}

1943

}

1944

1945

if (dtree[node].branch[1] != VACANT)

1946

recursively_show_z(dtree[node].branch[1]);

1947

}

1948

1949

static void recursively_show_g(int node)

1950

{

1951

warning("### Glulx dictionary-show not yet implemented.\n");

1952

}

1953

1954

static void show_alphabet(int i)

1955

{ int j, c; char chartext[8];

1956

1957

for (j=0; j<26; j++)

1958

{ c = alphabet[i][j];

1959

1960

if (alphabet_used[26*i+j] == 'N') printf("("); else printf(" ");

1961

1962

zscii_to_text(chartext, c);

1963

printf("%s", chartext);

1964

1965

if (alphabet_used[26*i+j] == 'N') printf(")"); else printf(" ");

1966

}

1967

printf("\n");

1968

}

1969

1970

extern void show_dictionary(void)

1971

{ printf("Dictionary contains %d entries:\n",dict_entries);

1972

if (dict_entries != 0)

1973

{ d_show_total = 0; d_show_to = NULL;

1974

if (!glulx_mode)

1975

recursively_show_z(root);

1976

else

1977

recursively_show_g(root);

1978

}

1979

printf("\nZ-machine alphabet entries:\n");

1980

show_alphabet(0);

1981

show_alphabet(1);

1982

show_alphabet(2);

1983

}

1984

1985

extern void write_dictionary_to_transcript(void)

1986

{ char d_buffer[81];

1987

1988

sprintf(d_buffer, "\n[Dictionary contains %d entries:]\n", dict_entries);

1989

1990

d_buffer[0] = 0; write_to_transcript_file(d_buffer);

1991

1992

if (dict_entries != 0)

1993

{ d_show_total = 0; d_show_to = d_buffer;

1994

if (!glulx_mode)

1995

recursively_show_z(root);

1996

else

1997

recursively_show_g(root);

1998

}

1999

if (d_show_total != 0) write_to_transcript_file(d_buffer);

2000

}

2001

2002

/* ========================================================================= */

2003

/* Data structure management routines */

2004

/* ------------------------------------------------------------------------- */

2005

2006

extern void init_text_vars(void)

2007

{ int j;

2008

bestyet = NULL;

2009

bestyet2 = NULL;

2010

tlbtab = NULL;

2011

grandtable = NULL;

2012

grandflags = NULL;

2013

no_chars_transcribed = 0;

2014

is_abbreviation = FALSE;

2015

put_strings_in_low_memory = FALSE;

2016

2017

for (j=0; j<256; j++) abbrevs_lookup[j] = -1;

2018

2019

total_zchars_trans = 0;

2020

2021

dtree = NULL;

2022

final_dict_order = NULL;

2023

dict_sort_codes = NULL;

2024

dict_entries=0;

2025

2026

initialise_memory_block(&static_strings_area);

2027

}

2028

2029

extern void text_begin_pass(void)

2030

{ abbrevs_lookup_table_made = FALSE;

2031

no_abbreviations=0;

2032

total_chars_trans=0; total_bytes_trans=0;

2033

if (store_the_text) all_text_top=all_text;

2034

dictionary_begin_pass();

2035

low_strings_top = low_strings;

2036

2037

static_strings_extent = 0;

2038

no_strings = 0;

2039

no_dynamic_strings = 0;

2040

}

2041

2042

/* Note: for allocation and deallocation of all_the_text, see inform.c */

2043

2044

extern void text_allocate_arrays(void)

2045

{ abbreviations_at = my_malloc(MAX_ABBREVS*MAX_ABBREV_LENGTH,

2046

"abbreviations");

2047

abbrev_values = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev values");

2048

abbrev_quality = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev quality");

2049

abbrev_freqs = my_calloc(sizeof(int), MAX_ABBREVS, "abbrev freqs");

2050

2051

dtree = my_calloc(sizeof(dict_tree_node), MAX_DICT_ENTRIES,

2052

"red-black tree for dictionary");

2053

final_dict_order = my_calloc(sizeof(int), MAX_DICT_ENTRIES,

2054

"final dictionary ordering table");

2055

dict_sort_codes = my_calloc(DICT_WORD_SIZE, MAX_DICT_ENTRIES,

2056

"dictionary sort codes");

2057

2058

if (!glulx_mode)

2059

dictionary = my_malloc(9*MAX_DICT_ENTRIES+7,

2060

"dictionary");

2061

else

2062

dictionary = my_malloc((7+DICT_WORD_SIZE)*MAX_DICT_ENTRIES+4,

2063

"dictionary");

2064

2065

strings_holding_area

2066

= my_malloc(MAX_STATIC_STRINGS,"static strings holding area");

2067

low_strings = my_malloc(MAX_LOW_STRINGS,"low (abbreviation) strings");

2068

2069

huff_entities = NULL;

2070

hufflist = NULL;

2071

done_compression = FALSE;

2072

compression_table_size = 0;

2073

compressed_offsets = NULL;

2074

2075

MAX_CHARACTER_SET = 0;

2076

2077

if (glulx_mode) {

2078

if (compression_switch) {

2079

MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS;

2080

huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1,

2081

"huffman entities");

2082

hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET,

2083

"huffman node list");

2084

}

2085

compressed_offsets = my_calloc(sizeof(int32), MAX_NUM_STATIC_STRINGS,

2086

"static strings index table");

2087

}

2088

}

2089

2090

extern void text_free_arrays(void)

2091

{

2092

my_free(&strings_holding_area, "static strings holding area");

2093

my_free(&low_strings, "low (abbreviation) strings");

2094

my_free(&abbreviations_at, "abbreviations");

2095

my_free(&abbrev_values, "abbrev values");

2096

my_free(&abbrev_quality, "abbrev quality");

2097

my_free(&abbrev_freqs, "abbrev freqs");

2098

2099

my_free(&dtree, "red-black tree for dictionary");

2100

my_free(&final_dict_order, "final dictionary ordering table");

2101

my_free(&dict_sort_codes, "dictionary sort codes");

2102

2103

my_free(&dictionary,"dictionary");

2104

2105

my_free(&compressed_offsets, "static strings index table");

2106

my_free(&hufflist, "huffman node list");

2107

my_free(&huff_entities, "huffman entities");

2108

2109

deallocate_memory_block(&static_strings_area);

2110

}

2111

2112

extern void ao_free_arrays(void)

2113

{ my_free (&tlbtab,"tlb table");

2114

my_free (&sub_buffer,"sub_buffer");

2115

my_free (&bestyet,"bestyet");

2116

my_free (&bestyet2,"bestyet2");

2117

my_free (&grandtable,"grandtable");

2118

my_free (&grandflags,"grandflags");

2119

}

2120

2121

/* ========================================================================= */

Older »