~ubuntu-branches/ubuntu/wily/gargoyle-free/wily-proposed

« back to all changes in this revision

Viewing changes to tads/tads3/tctok.cpp

Committer: Bazaar Package Importer
Author(s): Sylvain Beucler
Date: 2009-09-11 20:09:43 UTC
Revision ID: james.westby@ubuntu.com-20090911200943-idgzoyupq6650zpn

Tags: upstream-2009-08-25

Import upstream version 2009-08-25

files added:

INSTALL

Jamfile

Jamrules

Jamshared

License.txt

garglk

garglk/CharterBT.txt

garglk/Jamfile

garglk/TODO

garglk/cbb.hex

garglk/cbi.hex

garglk/cbr.hex

garglk/cbz.hex

garglk/cgblorb.c

garglk/cgfref.c

garglk/cggestal.c

garglk/cgmisc.c

garglk/cgstream.c

garglk/cgstyle.c

garglk/cgunicod.c

garglk/cgunigen.c

garglk/config.c

garglk/docu2.ico

garglk/draw.c

garglk/event.c

garglk/fontdata.c

garglk/fonts

garglk/fonts/Changes.txt

garglk/fonts/CharterBT-Bold.ttf

garglk/fonts/CharterBT-BoldItalic.ttf

garglk/fonts/CharterBT-Italic.ttf

garglk/fonts/CharterBT-Roman.ttf

garglk/fonts/convert.pe

garglk/fonts/modkpx.pl

garglk/garglk.h

garglk/garglk.ini

garglk/garversion.h

garglk/gi_blorb.c

garglk/gi_blorb.h

garglk/gi_dispa.c

garglk/gi_dispa.h

garglk/glk.h

garglk/glkstart.c

garglk/glkstart.h

garglk/house.ico

garglk/icons.rc

garglk/imgload.c

garglk/imgnull.c

garglk/imgscale.c

garglk/launcher.c

garglk/launcher.sh

garglk/main.c

garglk/nohyper.c

garglk/sndfmod.c

garglk/sndnull.c

garglk/sndsdl.c

garglk/sysgtk.c

garglk/syswin.c

garglk/tags

garglk/ttswin.c

garglk/winblank.c

garglk/window.c

garglk/wingfx.c

garglk/wingrid.c

garglk/winmask.c

garglk/winpair.c

garglk/wintext.c

gargoyle_build.cmd

gargoyle_package.cmd

installer.nsi

licenses

licenses/GNU General Public License.txt

licenses/MIT License.txt

tads

tads/COPYING

tads/Jamfile

tads/README

tads/memicmp.c

tads/os.h

tads/osansi.h

tads/osansi1.c

tads/osansi2.c

tads/osansi3.c

tads/osdummy.c

tads/osglk.c

tads/osglkban.c

tads/osnoban.c

tads/t23run.cpp

tads/t2askf.c

tads/t2indlg.c

tads/t3askf.cpp

tads/t3indlg.cpp

tads/tads2

tads/tads2/437la1.tcs

tads/tads2/LICENSE.TXT

tads/tads2/adv.t

tads/tads2/appctx.h

tads/tads2/argize.c

tads/tads2/argize.h

tads/tads2/askf_os.c

tads/tads2/askf_tx.c

tads/tads2/atari

tads/tads2/atari/MAKEFILE

tads/tads2/atari/OS0TC_ST.C

tads/tads2/atari/OS0TR_ST.C

tads/tads2/atari/OSATARI.C

tads/tads2/atari/OSATARI.H

tads/tads2/beos

tads/tads2/beos/makefile

tads/tads2/beos/oem_beos.c

tads/tads2/beos/os0tc_beos.c

tads/tads2/beos/os0td_beos.c

tads/tads2/beos/os0tr_beos.c

tads/tads2/beos/osbeos.cpp

tads/tads2/beos/osbeos.h

tads/tads2/beos/osbeos_nohtml.cpp

tads/tads2/beos/osdbg.h

tads/tads2/bif.c

tads/tads2/bif.h

tads/tads2/bifgdum.c

tads/tads2/cmap.c

tads/tads2/cmap.h

tads/tads2/cmd.c

tads/tads2/cmd.h

tads/tads2/dat.c

tads/tads2/dat.h

tads/tads2/dbg.c

tads/tads2/dbg.h

tads/tads2/dbgrun.c

tads/tads2/dbgtr.c

tads/tads2/dummy.c

tads/tads2/emt.c

tads/tads2/emt.h

tads/tads2/err.c

tads/tads2/err.h

tads/tads2/errmsg.c

tads/tads2/execmd.c

tads/tads2/fio.c

tads/tads2/fio.h

tads/tads2/fiowrt.c

tads/tads2/fioxor.c

tads/tads2/gameinfo.t

tads/tads2/getstr.c

tads/tads2/glk

tads/tads2/glk/Makefile

tads/tads2/glk/glkstart.c

tads/tads2/glk/glkstart.h

tads/tads2/glk/glkver.txt

tads/tads2/glk/oem_glk.c

tads/tads2/glk/os_glk.c

tads/tads2/glk/os_glk.h

tads/tads2/glk/oss_glk.c

tads/tads2/glk/oss_glk.h

tads/tads2/h_ix86.h

tads/tads2/h_ix86_64.h

tads/tads2/h_ppc.h

tads/tads2/indlg_os.c

tads/tads2/indlg_tx.c

tads/tads2/ler.c

tads/tads2/ler.h

tads/tads2/lermsg.c

tads/tads2/lib.h

tads/tads2/lin.h

tads/tads2/linf.c

tads/tads2/linf.h

tads/tads2/linfdum.c

tads/tads2/los.h

tads/tads2/lst.c

tads/tads2/lst.h

tads/tads2/ltk.h

tads/tads2/mch.c

tads/tads2/mch.h

tads/tads2/mcl.h

tads/tads2/mcm.c

tads/tads2/mcm.h

tads/tads2/mcs.c

tads/tads2/mcs.h

tads/tads2/mkchrtab.c

tads/tads2/msdos

tads/tads2/msdos/dbgu.c

tads/tads2/msdos/dosmktrx.c

tads/tads2/msdos/dosver.htm

tads/tads2/msdos/help.tdb

tads/tads2/msdos/ltkwin.asm

tads/tads2/msdos/ltkwin.c

tads/tads2/msdos/ltkwin.h

tads/tads2/msdos/makefile.bc

tads/tads2/msdos/makefile.dj

tads/tads2/msdos/makefile.vc5

tads/tads2/msdos/oem_dj.c

tads/tads2/msdos/oem_tr.c

tads/tads2/msdos/oem_tr32.c

tads/tads2/msdos/oem_trx.c

tads/tads2/msdos/os0tc.c

tads/tads2/msdos/os0td.c

tads/tads2/msdos/os0tr.c

tads/tads2/msdos/os0trxb.c

tads/tads2/msdos/osdbg.c

tads/tads2/msdos/osdbg.h

tads/tads2/msdos/osdos.c

tads/tads2/msdos/osdos.h

tads/tads2/msdos/osdosbas.h

tads/tads2/msdos/osdoscon.c

tads/tads2/msdos/osdosex.c

tads/tads2/msdos/osdosex0.c

tads/tads2/msdos/osdosnui.c

tads/tads2/msdos/osdosstk.c

tads/tads2/msdos/osnogen.c

tads/tads2/msdos/osscolor.c

tads/tads2/msdos/ossdos.asm

tads/tads2/msdos/ossdos32.c

tads/tads2/msdos/ossdosdj.c

tads/tads2/msdos/ossrelpa.c

tads/tads2/msdos/oswin.c

tads/tads2/msdos/oswin.h

tads/tads2/msdos/oswinmem.c

tads/tads2/msdos/seticon.c

tads/tads2/msdos/t2r32.ico

tads/tads2/msdos/t2r32.rc

tads/tads2/msdos/tadsux.asm

tads/tads2/msdos/tc32.def

tads/tads2/msdos/tc_dpmi.def

tads/tads2/msdos/tdb32.def

tads/tads2/msdos/tdb_dpmi.def

tads/tads2/msdos/testux.c

tads/tads2/msdos/testux.def

tads/tads2/msdos/tr32.def

tads/tads2/msdos/tr_dpmi.def

tads/tads2/msdos/trwin.def

tads/tads2/obj.c

tads/tads2/obj.h

tads/tads2/objcomp.c

tads/tads2/oem.c

tads/tads2/oem.h

tads/tads2/opc.h

tads/tads2/os.h

tads/tads2/os0.c

tads/tads2/osbigmem.h

tads/tads2/oserr.c

tads/tads2/osgen.c

tads/tads2/osgen.h

tads/tads2/osgen3.c

tads/tads2/osifc.c

tads/tads2/osifc.h

tads/tads2/osifcext.h

tads/tads2/osifctyp.h

tads/tads2/osnoui.c

tads/tads2/osrestad.c

tads/tads2/osterm.c

tads/tads2/out.c

tads/tads2/output.c

tads/tads2/ply.c

tads/tads2/ply.h

tads/tads2/portnote.txt

tads/tads2/prp.h

tads/tads2/prs.c

tads/tads2/prs.h

tads/tads2/prscomp.c

tads/tads2/qas.c

tads/tads2/readtads.src

tads/tads2/regex.c

tads/tads2/regex.h

tads/tads2/res.h

tads/tads2/run.c

tads/tads2/run.h

tads/tads2/runstat.c

tads/tads2/std.h

tads/tads2/std.t

tads/tads2/sup.c

tads/tads2/sup.h

tads/tads2/suprun.c

tads/tads2/tadserr.msg

tads/tads2/tadsexit.h

tads/tads2/tadsrsc.c

tads/tads2/tadsver.htm

tads/tads2/tcd.c

tads/tads2/tcd.h

tads/tads2/tcg.h

tads/tads2/tcgdum.c

tads/tads2/tdd.c

tads/tads2/tests

tads/tads2/tests/BUGS.IN

tads/tads2/tests/BUGS.T

tads/tads2/tests/DDDWALK.IN

tads/tads2/tests/DDDWALK.LOG

tads/tads2/tests/DSDWALK.IN

tads/tads2/tests/DSDWALK.LOG

tads/tads2/tests/bugs.log

tads/tads2/testux.t

tads/tads2/tio.h

tads/tads2/tok.c

tads/tads2/tok.h

tads/tads2/tokth.c

tads/tads2/trcolor.c

tads/tads2/trd.c

tads/tads2/trd.h

tads/tads2/u437la1.tcs

tads/tads2/unix

tads/tads2/unix/readme

tads/tads2/voc.c

tads/tads2/voc.h

tads/tads2/vocab.c

tads/tads2/voccomp.c

tads/tads2/win_la1.tcs

tads/tads2/win_lat2.tcs

tads/tads3

tads/tads3/LICENSE.TXT

tads/tads3/README.TXT

tads/tads3/askf_os.cpp

tads/tads3/askf_os3.cpp

tads/tads3/askf_tx.cpp

tads/tads3/askf_tx3.cpp

tads/tads3/charmap

tads/tads3/charmap.cpp

tads/tads3/charmap.h

tads/tads3/charmap/README.txt

tads/tads3/charmap/cmaplib.t3r

tads/tads3/charmap/cp1250.tcm

tads/tads3/charmap/cp1251.tcm

tads/tads3/charmap/cp1252.tcm

tads/tads3/charmap/cp1253.tcm

tads/tads3/charmap/cp1254.tcm

tads/tads3/charmap/cp1255.tcm

tads/tads3/charmap/cp1256.tcm

tads/tads3/charmap/cp1257.tcm

tads/tads3/charmap/cp1258.tcm

tads/tads3/charmap/cp437.tcm

tads/tads3/charmap/cp737.tcm

tads/tads3/charmap/cp775.tcm

tads/tads3/charmap/cp850.tcm

tads/tads3/charmap/cp852.tcm

tads/tads3/charmap/cp855.tcm

tads/tads3/charmap/cp857.tcm

tads/tads3/charmap/cp860.tcm

tads/tads3/charmap/cp861.tcm

tads/tads3/charmap/cp862.tcm

tads/tads3/charmap/cp863.tcm

tads/tads3/charmap/cp864.tcm

tads/tads3/charmap/cp865.tcm

tads/tads3/charmap/cp866.tcm

tads/tads3/charmap/cp869.tcm

tads/tads3/charmap/cp874.tcm

tads/tads3/charmap/iso1.tcm

tads/tads3/charmap/iso10.tcm

tads/tads3/charmap/iso2.tcm

tads/tads3/charmap/iso3.tcm

tads/tads3/charmap/iso4.tcm

tads/tads3/charmap/iso5.tcm

tads/tads3/charmap/iso6.tcm

tads/tads3/charmap/iso7.tcm

tads/tads3/charmap/iso8.tcm

tads/tads3/charmap/iso9.tcm

tads/tads3/charmap/koi8-r.tcm

tads/tads3/charmap/mac.tcm

tads/tads3/charmap/macce.tcm

tads/tads3/charmap/maccyr.tcm

tads/tads3/charmap/macgreek.tcm

tads/tads3/charmap/maciceland.tcm

tads/tads3/charmap/mactur.tcm

tads/tads3/charmap/us-ascii.tcm

tads/tads3/charmap/us-ascii.txt

tads/tads3/core.h

tads/tads3/derived

tads/tads3/derived/vmuni_cs.cpp

tads/tads3/doc

tads/tads3/doc/getacro.gif

tads/tads3/doc/index.htm

tads/tads3/doc/nodoc.htm

tads/tads3/doc/nolibref.htm

tads/tads3/doc/t3changes.htm

tads/tads3/doc/title2.gif

tads/tads3/gameinfl.cpp

tads/tads3/gameinfo.cpp

tads/tads3/gameinfo.h

tads/tads3/include

tads/tads3/include/bignum.h

tads/tads3/include/bytearr.h

tads/tads3/include/charset.h

tads/tads3/include/dict.h

tads/tads3/include/file.h

tads/tads3/include/gramprod.h

tads/tads3/include/lookup.h

tads/tads3/include/reflect.h

tads/tads3/include/strcomp.h

tads/tads3/include/systype.h

tads/tads3/include/t3.h

tads/tads3/include/t3test.h

tads/tads3/include/tads.h

tads/tads3/include/tadsgen.h

tads/tads3/include/tadsio.h

tads/tads3/include/tadsiox.h

tads/tads3/include/tok.h

tads/tads3/include/vector.h

tads/tads3/indlg_os.cpp

tads/tads3/indlg_os3.cpp

tads/tads3/indlg_tx.cpp

tads/tads3/indlg_tx3.cpp

tads/tads3/lib

tads/tads3/lib/_main.t

tads/tads3/lib/adv3

tads/tads3/lib/adv3/action.t

tads/tads3/lib/adv3/actions.t

tads/tads3/lib/adv3/actor.t

tads/tads3/lib/adv3/adv3.h

tads/tads3/lib/adv3/adv3.tl

tads/tads3/lib/adv3/banner.t

tads/tads3/lib/adv3/changes.htm

tads/tads3/lib/adv3/disambig.t

tads/tads3/lib/adv3/en_us

tads/tads3/lib/adv3/en_us/en_us.h

tads/tads3/lib/adv3/en_us/en_us.t

tads/tads3/lib/adv3/en_us/en_us.tl

tads/tads3/lib/adv3/en_us/instruct.t

tads/tads3/lib/adv3/en_us/msg_neu.t

tads/tads3/lib/adv3/events.t

tads/tads3/lib/adv3/exec.t

tads/tads3/lib/adv3/exits.t

tads/tads3/lib/adv3/extras.t

tads/tads3/lib/adv3/footnote.t

tads/tads3/lib/adv3/hintsys.t

tads/tads3/lib/adv3/input.t

tads/tads3/lib/adv3/lister.t

tads/tads3/lib/adv3/menusys.t

tads/tads3/lib/adv3/misc.t

tads/tads3/lib/adv3/modid.t

tads/tads3/lib/adv3/numbers.t

tads/tads3/lib/adv3/objects.t

tads/tads3/lib/adv3/output.t

tads/tads3/lib/adv3/parser.t

tads/tads3/lib/adv3/pov.t

tads/tads3/lib/adv3/precond.t

tads/tads3/lib/adv3/report.t

tads/tads3/lib/adv3/resolver.t

tads/tads3/lib/adv3/score.t

tads/tads3/lib/adv3/sense.t

tads/tads3/lib/adv3/settings.t

tads/tads3/lib/adv3/status.t

tads/tads3/lib/adv3/thing.t

tads/tads3/lib/adv3/tips.t

tads/tads3/lib/adv3/to_do.txt

tads/tads3/lib/adv3/travel.t

tads/tads3/lib/adv3/verify.t

tads/tads3/lib/extensions

tads/tads3/lib/extensions/CustomStatus.t

tads/tads3/lib/extensions/SimpleAttachable.t

tads/tads3/lib/extensions/TCommand

tads/tads3/lib/extensions/TCommand/GiveToAskFor.t

tads/tads3/lib/extensions/TCommand/TCommand.t

tads/tads3/lib/extensions/TCommand/doc

tads/tads3/lib/extensions/TCommand/doc/cicon9.gif

tads/tads3/lib/extensions/TCommand/doc/contpage.htm

tads/tads3/lib/extensions/TCommand/doc/givetoaskfor.htm

tads/tads3/lib/extensions/TCommand/doc/index.html

tads/tads3/lib/extensions/TCommand/doc/introduction.htm

tads/tads3/lib/extensions/TCommand/doc/tcommand.htm

tads/tads3/lib/extensions/TCommand/doc/telltoaction.htm

tads/tads3/lib/extensions/combineReports.t

tads/tads3/lib/extensions/cquotes.t

tads/tads3/lib/extensions/custmsg.t

tads/tads3/lib/extensions/customBanner.t

tads/tads3/lib/extensions/newNames.t

tads/tads3/lib/extensions/pathfind.t

tads/tads3/lib/extensions/showTranscript.t

tads/tads3/lib/extensions/smartAccompany.t

tads/tads3/lib/extensions/subtime.t

tads/tads3/lib/file.t

tads/tads3/lib/gameinfo.t

tads/tads3/lib/gramprod.t

tads/tads3/lib/multmeth.t

tads/tads3/lib/reflect.t

tads/tads3/lib/system.tl

tads/tads3/lib/tok.t

tads/tads3/mkchrtab.cpp

tads/tads3/msdos

tads/tads3/msdos/makefile.dj

tads/tads3/msgcomp.cpp

tads/tads3/os_stdio.cpp

tads/tads3/portnote.htm

tads/tads3/rcmain.cpp

tads/tads3/rcmain.h

tads/tads3/rcmaincl.cpp

tads/tads3/resfind.cpp

tads/tads3/resfind.h

tads/tads3/resldexe.cpp

tads/tads3/resload.cpp

tads/tads3/resload.h

tads/tads3/resnoexe.cpp

tads/tads3/samples

tads/tads3/samples/bantest.t

tads/tads3/samples/gramdisp.t

tads/tads3/samples/sample.t

tads/tads3/samples/sample.t3m

tads/tads3/samples/startB3.t

tads/tads3/samples/starta3.t

tads/tads3/samples/starti3.t

tads/tads3/std.cpp

tads/tads3/std_dbg.cpp

tads/tads3/t3_os.h

tads/tads3/t3std.h

tads/tads3/tccmdutl.cpp

tads/tads3/tccmdutl.h

tads/tads3/tcerr.cpp

tads/tads3/tcerr.h

tads/tads3/tcerrmsg.cpp

tads/tads3/tcerrnum.h

tads/tads3/tcgen.cpp

tads/tads3/tcgen.h

tads/tads3/tcglob.cpp

tads/tads3/tcglob.h

tads/tads3/tchost.h

tads/tads3/tchostsi.cpp

tads/tads3/tchostsi.h

tads/tads3/tcjs.cpp

tads/tads3/tcjs.h

tads/tads3/tcjsbase.h

tads/tads3/tcjsdrv.h

tads/tads3/tcjsint.h

tads/tads3/tcjsty.h

tads/tads3/tclibprs.cpp

tads/tads3/tclibprs.h

tads/tads3/tcmain.cpp

tads/tads3/tcmain.h

tads/tads3/tcmake.cpp

tads/tads3/tcmake.h

tads/tads3/tcmakecl.cpp

tads/tads3/tcpnbase.h

tads/tads3/tcpndrv.h

tads/tads3/tcpnint.h

tads/tads3/tcprs.cpp

tads/tads3/tcprs.h

tads/tads3/tcprs_d.cpp

tads/tads3/tcprsimg.cpp

tads/tads3/tcprsnl.cpp

tads/tads3/tcprsstm.cpp

tads/tads3/tcprstyp.h

tads/tads3/tcsrc.cpp

tads/tads3/tcsrc.h

tads/tads3/tct3.cpp

tads/tads3/tct3.h

tads/tads3/tct3_d.cpp

tads/tads3/tct3base.h

tads/tads3/tct3drv.h

tads/tads3/tct3img.cpp

tads/tads3/tct3int.h

tads/tads3/tct3nl.cpp

tads/tads3/tct3stm.cpp

tads/tads3/tct3ty.h

tads/tads3/tct3unas.cpp

tads/tads3/tct3unas.h

tads/tads3/tctarg.h

tads/tads3/tctargty.h

tads/tads3/tctok.cpp

tads/tads3/tctok.h

tads/tads3/tcunas.h

tads/tads3/tcvsn.h

tads/tads3/test

tads/tads3/test/data

tads/tads3/test/data/aboutbox.t

tads/tads3/test/data/addlist.t

tads/tads3/test/data/adv3.h

tads/tads3/test/data/adv3.t

tads/tads3/test/data/adv3_adesc_test.t

tads/tads3/test/data/adv3_eng.t

tads/tads3/test/data/adv3_exe.t

tads/tads3/test/data/adv3_num.t

tads/tads3/test/data/adv3_num_test.t

tads/tads3/test/data/adv_test.t

tads/tads3/test/data/anon.t

tads/tads3/test/data/anon_err.t

tads/tads3/test/data/anon_func_bug.t

tads/tads3/test/data/anonlist.t

tads/tads3/test/data/anonobj.t

tads/tads3/test/data/anonvarg.t

tads/tads3/test/data/ansi.c

tads/tads3/test/data/arith.t

tads/tads3/test/data/array.t

tads/tads3/test/data/badnest.t

tads/tads3/test/data/banner_api.t

tads/tads3/test/data/banner_api2.t

tads/tads3/test/data/basic.t

tads/tads3/test/data/bignum.t

tads/tads3/test/data/bignum2.t

tads/tads3/test/data/bignum3.t

tads/tads3/test/data/bignum4.t

tads/tads3/test/data/bigvec.t

tads/tads3/test/data/binfile.t

tads/tads3/test/data/bniter.t

tads/tads3/test/data/builtin.t

tads/tads3/test/data/bytarr.t

tads/tads3/test/data/bytarr2.t

tads/tads3/test/data/calc.t

tads/tads3/test/data/calc2.t

tads/tads3/test/data/callpropvar.t

tads/tads3/test/data/catch.t

tads/tads3/test/data/catch2.t

tads/tads3/test/data/charconv.t

tads/tads3/test/data/charset.t

tads/tads3/test/data/circ.c

tads/tads3/test/data/circ2.c

tads/tads3/test/data/circref.t

tads/tads3/test/data/clock.t

tads/tads3/test/data/clone.t

tads/tads3/test/data/color.t

tads/tads3/test/data/concat.c

tads/tads3/test/data/concat2.t

tads/tads3/test/data/conflict1.t

tads/tads3/test/data/conflict2.t

tads/tads3/test/data/coretest.t

tads/tads3/test/data/cp437.t

tads/tads3/test/data/cre_inst.t

tads/tads3/test/data/csetobj.t

tads/tads3/test/data/cube.t

tads/tads3/test/data/debugTrace.t

tads/tads3/test/data/define.c

tads/tads3/test/data/defmod.t

tads/tads3/test/data/dispmeth.t

tads/tads3/test/data/dstr-in-list.t

tads/tads3/test/data/dstr.t

tads/tads3/test/data/dstr1.t

tads/tads3/test/data/embed.c

tads/tads3/test/data/embed.t

tads/tads3/test/data/enum.t

tads/tads3/test/data/enum2.t

tads/tads3/test/data/enumInSwitchInAnonFunc.t

tads/tads3/test/data/enumprop.t

tads/tads3/test/data/error.t

tads/tads3/test/data/exp_err.t

tads/tads3/test/data/expr.t

tads/tads3/test/data/expr_eof.t

tads/tads3/test/data/expr_err.t

tads/tads3/test/data/extern1.t

tads/tads3/test/data/extern2.t

tads/tads3/test/data/extern3.t

tads/tads3/test/data/extfunc1.t

tads/tads3/test/data/extfunc2.t

tads/tads3/test/data/fake_mbcs.t

tads/tads3/test/data/fi_tst_1.t

tads/tads3/test/data/fi_util.h

tads/tads3/test/data/fib.t

tads/tads3/test/data/files.t

tads/tads3/test/data/files2.t

tads/tads3/test/data/files_old.t

tads/tads3/test/data/finalize.t

tads/tads3/test/data/finally.t

tads/tads3/test/data/fivelocals.t

tads/tads3/test/data/fnredef.t

tads/tads3/test/data/fold.t

tads/tads3/test/data/fonts.t

tads/tads3/test/data/fontsizes.t

tads/tads3/test/data/foreach.t

tads/tads3/test/data/forvar.t

tads/tads3/test/data/funcparm.t

tads/tads3/test/data/funcrep1.t

tads/tads3/test/data/funcrep2.t

tads/tads3/test/data/getproplist.t

tads/tads3/test/data/gotofin.t

tads/tads3/test/data/gram.t

tads/tads3/test/data/gram2.t

tads/tads3/test/data/gram_or.t

tads/tads3/test/data/gram_or2.t

tads/tads3/test/data/gramerr.t

tads/tads3/test/data/header.t

tads/tads3/test/data/header2.t

tads/tads3/test/data/html.t

tads/tads3/test/data/htmlify.t

tads/tads3/test/data/ifdef.c

tads/tads3/test/data/infloop.t

tads/tads3/test/data/inh_next.t

tads/tads3/test/data/inh_undef.t

tads/tads3/test/data/inkey.t

tads/tads3/test/data/input.in

tads/tads3/test/data/input.t

tads/tads3/test/data/int_exc.t

tads/tads3/test/data/intcl_ov_inh.t

tads/tads3/test/data/intcls.t

tads/tads3/test/data/intcls2.t

tads/tads3/test/data/inval.t

tads/tads3/test/data/isin.t

tads/tads3/test/data/iter.t

tads/tads3/test/data/kf_sample5.t

tads/tads3/test/data/labeled-local.t

tads/tads3/test/data/lclprop.t

tads/tads3/test/data/list_perf.t

tads/tads3/test/data/listobjs.t

tads/tads3/test/data/listpar.t

tads/tads3/test/data/listprop.t

tads/tads3/test/data/listsub.t

tads/tads3/test/data/lookup.t

tads/tads3/test/data/ltgt.t

tads/tads3/test/data/macro_if.t

tads/tads3/test/data/main.c

tads/tads3/test/data/main.t

tads/tads3/test/data/main2.t

tads/tads3/test/data/mainargs.t

tads/tads3/test/data/miscchars-cyr.t

tads/tads3/test/data/miscchars.t

tads/tads3/test/data/mod_bignum.t

tads/tads3/test/data/mod_bignum2.t

tads/tads3/test/data/mod_dict.t

tads/tads3/test/data/mod_dict_ext1.t

tads/tads3/test/data/mod_dict_ext2.t

tads/tads3/test/data/mod_dict_ext3.t

tads/tads3/test/data/mod_int.t

tads/tads3/test/data/mod_obj.t

tads/tads3/test/data/modfunc1.t

tads/tads3/test/data/modfunc2.t

tads/tads3/test/data/modfunc3.t

tads/tads3/test/data/modtobj.t

tads/tads3/test/data/multi_inh_tpl.t

tads/tads3/test/data/multidyn.t

tads/tads3/test/data/multimethod.t

tads/tads3/test/data/nbsp.t

tads/tads3/test/data/nbsp2.t

tads/tads3/test/data/nested-anon-2.t

tads/tads3/test/data/nested-anon.t

tads/tads3/test/data/nested.t

tads/tads3/test/data/nested_comment.t

tads/tads3/test/data/newgame.t

tads/tads3/test/data/newnew.t

tads/tads3/test/data/newprop.t

tads/tads3/test/data/noun_ph.t

tads/tads3/test/data/novec.t

tads/tads3/test/data/novec2.t

tads/tads3/test/data/null_ptr.t

tads/tads3/test/data/obj.t

tads/tads3/test/data/obj_and_expr.t

tads/tads3/test/data/objbrace.t

tads/tads3/test/data/object.t

tads/tads3/test/data/objloop.t

tads/tads3/test/data/objloop2.t

tads/tads3/test/data/objmod1.t

tads/tads3/test/data/objmod2.t

tads/tads3/test/data/objmod3.t

tads/tads3/test/data/objrep1.t

tads/tads3/test/data/objrep2.t

tads/tads3/test/data/op_prec.t

tads/tads3/test/data/overflow.t

tads/tads3/test/data/parse.t

tads/tads3/test/data/part_list.t

tads/tads3/test/data/pi.t

tads/tads3/test/data/predef.t

tads/tads3/test/data/preinit.t

tads/tads3/test/data/prop_perf.t

tads/tads3/test/data/propaddr.t

tads/tads3/test/data/propdecl.t

tads/tads3/test/data/propdef.t

tads/tads3/test/data/propexpr.t

tads/tads3/test/data/props.t

tads/tads3/test/data/propset-errors.t

tads/tads3/test/data/propset.t

tads/tads3/test/data/rand.t

tads/tads3/test/data/randPhone.t

tads/tads3/test/data/rand_perf.t

tads/tads3/test/data/rand_perf_arr.t

tads/tads3/test/data/randpct.t

tads/tads3/test/data/randvec.t

tads/tads3/test/data/regex.t

tads/tads3/test/data/replaced_in_anonfn.t

tads/tads3/test/data/res.t

tads/tads3/test/data/resfile.t

tads/tads3/test/data/retbreak.t

tads/tads3/test/data/rpl_no_sc.t

tads/tads3/test/data/rterr.t

tads/tads3/test/data/sample.in

tads/tads3/test/data/save.t

tads/tads3/test/data/scope.t

tads/tads3/test/data/setsc.t

tads/tads3/test/data/sha.t

tads/tads3/test/data/stack.t

tads/tads3/test/data/startA3.in

tads/tads3/test/data/startI3.in

tads/tads3/test/data/stathtml.t

tads/tads3/test/data/static.t

tads/tads3/test/data/status.t

tads/tads3/test/data/str_err.t

tads/tads3/test/data/str_macro.t

tads/tads3/test/data/strcomp.t

tads/tads3/test/data/strcomp2.t

tads/tads3/test/data/strings.t

tads/tads3/test/data/switch_hang.t

tads/tads3/test/data/switch_warn.t

tads/tads3/test/data/symtab.t

tads/tads3/test/data/tabs.t

tads/tads3/test/data/tadsobj_inst.t

tads/tads3/test/data/targprop.t

tads/tads3/test/data/template.t

tads/tads3/test/data/tertiary.c

tads/tads3/test/data/tertiary.t

tads/tads3/test/data/test.c

tads/tads3/test/data/test_exc.t

tads/tads3/test/data/test_ff.t

tads/tads3/test/data/test_func.t

tads/tads3/test/data/test_quest.t

tads/tads3/test/data/test_top.t

tads/tads3/test/data/tilford-stringof-list.t

tads/tads3/test/data/tilford-stringof-list2.t

tads/tads3/test/data/time.h

tads/tads3/test/data/timeout.t

tads/tads3/test/data/tokpaste.c

tads/tads3/test/data/tpl.t

tads/tads3/test/data/tpl_cls.t

tads/tads3/test/data/tpl_ext.t

tads/tads3/test/data/transient-template.t

tads/tads3/test/data/try_catch.t

tads/tads3/test/data/ucs2_src.t

tads/tads3/test/data/undef.t

tads/tads3/test/data/undef2.t

tads/tads3/test/data/undo.t

tads/tads3/test/data/unhandled_exc.t

tads/tads3/test/data/unicode.t

tads/tads3/test/data/unicode_file.t

tads/tads3/test/data/untermobj.t

tads/tads3/test/data/utf-16be.t

tads/tads3/test/data/utf-16le.t

tads/tads3/test/data/utf-8.t

tads/tads3/test/data/varmac.t

tads/tads3/test/data/varmacpp.c

tads/tads3/test/data/vec_bug.t

tads/tads3/test/data/vec_each.t

tads/tads3/test/data/vec_pre.t

tads/tads3/test/data/vector.t

tads/tads3/test/data/vector2.t

tads/tads3/test/data/vers2.h

tads/tads3/test/data/vminfo.t

tads/tads3/test/data/vocab.t

tads/tads3/test/data/vocext1.t

tads/tads3/test/data/vocext2.t

tads/tads3/test/data/weird_gram.t

tads/tads3/test/data/wordpre.t

tads/tads3/test/data/words.t

tads/tads3/test/data/wordsav.t

tads/tads3/test/data/xxx.t

tads/tads3/test/log

tads/tads3/test/log/addlist.log

tads/tads3/test/log/anon.log

tads/tads3/test/log/anonobj.log

tads/tads3/test/log/anonvarg.log

tads/tads3/test/log/ansi.log

tads/tads3/test/log/arith.log

tads/tads3/test/log/array.log

tads/tads3/test/log/badnest.log

tads/tads3/test/log/basic.log

tads/tads3/test/log/bignum.log

tads/tads3/test/log/bignum2.log

tads/tads3/test/log/builtin.log

tads/tads3/test/log/catch.log

tads/tads3/test/log/circ.log

tads/tads3/test/log/circ2.log

tads/tads3/test/log/clone.log

tads/tads3/test/log/concat.log

tads/tads3/test/log/conflict.log

tads/tads3/test/log/define.log

tads/tads3/test/log/ditch3_walkthru.log

tads/tads3/test/log/dstr.log

tads/tads3/test/log/embed.log

tads/tads3/test/log/enumprop.log

tads/tads3/test/log/extern.log

tads/tads3/test/log/extfunc.log

tads/tads3/test/log/finalize.log

tads/tads3/test/log/finally.log

tads/tads3/test/log/fnredef.log

tads/tads3/test/log/foreach.log

tads/tads3/test/log/funcparm.log

tads/tads3/test/log/funcrep.log

tads/tads3/test/log/gotofin.log

tads/tads3/test/log/gram2.log

tads/tads3/test/log/html.log

tads/tads3/test/log/htmlify.log

tads/tads3/test/log/ifdef.log

tads/tads3/test/log/inh_next.log

tads/tads3/test/log/isin.log

tads/tads3/test/log/iter.log

tads/tads3/test/log/iter2.log

tads/tads3/test/log/lclprop.log

tads/tads3/test/log/listpar.log

tads/tads3/test/log/listprop.log

tads/tads3/test/log/lookup.log

tads/tads3/test/log/modtobj.log

tads/tads3/test/log/multidyn.log

tads/tads3/test/log/multimethod.log

tads/tads3/test/log/multimethod_dynamic.log

tads/tads3/test/log/multimethod_static.log

tads/tads3/test/log/nested.log

tads/tads3/test/log/newprop.log

tads/tads3/test/log/objloop.log

tads/tads3/test/log/objmod.log

tads/tads3/test/log/objrep.log

tads/tads3/test/log/preinit.log

tads/tads3/test/log/propaddr.log

tads/tads3/test/log/resfile.log

tads/tads3/test/log/sample.log

tads/tads3/test/log/sample_run.log

tads/tads3/test/log/save.log

tads/tads3/test/log/stack.log

tads/tads3/test/log/startA3.log

tads/tads3/test/log/startA3_run.log

tads/tads3/test/log/startI3.log

tads/tads3/test/log/startI3_run.log

tads/tads3/test/log/symtab.log

tads/tads3/test/log/targprop.log

tads/tads3/test/log/undef.log

tads/tads3/test/log/undef2.log

tads/tads3/test/log/undo.log

tads/tads3/test/log/unicode.log

tads/tads3/test/log/varmac.log

tads/tads3/test/log/varmacpp.log

tads/tads3/test/log/vec_pre.log

tads/tads3/test/log/vector.log

tads/tads3/test/log/vector2.log

tads/tads3/test/log/vocext.log

tads/tads3/test/os_exe.cpp

tads/tads3/test/readme.txt

tads/tads3/test/t3test.h

tads/tads3/test/test_chr.cpp

tads/tads3/test/test_comp_obj.cpp

tads/tads3/test/test_err.cpp

tads/tads3/test/test_exec.cpp

tads/tads3/test/test_gets.cpp

tads/tads3/test/test_link.cpp

tads/tads3/test/test_obj.cpp

tads/tads3/test/test_pool.cpp

tads/tads3/test/test_pre.cpp

tads/tads3/test/test_prs.cpp

tads/tads3/test/test_prs_top.cpp

tads/tads3/test/test_regex.cpp

tads/tads3/test/test_sort.cpp

tads/tads3/test/test_sym.cpp

tads/tads3/test/test_tok.cpp

tads/tads3/test/test_utf8.cpp

tads/tads3/test/test_write.cpp

tads/tads3/uni_case.cpp

tads/tads3/unicode.txt

tads/tads3/unix

tads/tads3/unix/readme

tads/tads3/unix/test

tads/tads3/unix/test/README

tads/tads3/unix/test/run_all_tests

tads/tads3/unix/test/run_test

tads/tads3/unix/test/test_diff

tads/tads3/unix/test/test_env

tads/tads3/unix/test/test_ex

tads/tads3/unix/test/test_make

tads/tads3/unix/test/test_pp

tads/tads3/unix/test/test_pre

tads/tads3/unix/test/test_restore

tads/tads3/utf8.cpp

tads/tads3/utf8.h

tads/tads3/vmanonfn.cpp

tads/tads3/vmanonfn.h

tads/tads3/vmbif.cpp

tads/tads3/vmbif.h

tads/tads3/vmbifc.cpp

tads/tads3/vmbifl.cpp

tads/tads3/vmbifreg.cpp

tads/tads3/vmbifreg.h

tads/tads3/vmbifreg_core.cpp

tads/tads3/vmbifregx.cpp

tads/tads3/vmbift3.cpp

tads/tads3/vmbift3.h

tads/tads3/vmbiftad.cpp

tads/tads3/vmbiftad.h

tads/tads3/vmbiftio.cpp

tads/tads3/vmbiftio.h

tads/tads3/vmbiftix.cpp

tads/tads3/vmbiftix.h

tads/tads3/vmbignum.cpp

tads/tads3/vmbignum.h

tads/tads3/vmbt3_d.cpp

tads/tads3/vmbt3_nd.cpp

tads/tads3/vmbytarr.cpp

tads/tads3/vmbytarr.h

tads/tads3/vmcfgfl.cpp

tads/tads3/vmcfgmem.cpp

tads/tads3/vmcfgsw.cpp

tads/tads3/vmcl23.cpp

tads/tads3/vmcoll.cpp

tads/tads3/vmcoll.h

tads/tads3/vmconhmp.cpp

tads/tads3/vmconhtm.cpp

tads/tads3/vmconmor.cpp

tads/tads3/vmconnom.cpp

tads/tads3/vmconsol.cpp

tads/tads3/vmconsol.h

tads/tads3/vmcore.cpp

tads/tads3/vmcore.h

tads/tads3/vmcrc.cpp

tads/tads3/vmcrc.h

tads/tads3/vmcset.cpp

tads/tads3/vmcset.h

tads/tads3/vmdbg.cpp

tads/tads3/vmdbg.h

tads/tads3/vmdict.cpp

tads/tads3/vmdict.h

tads/tads3/vmerr.cpp

tads/tads3/vmerr.h

tads/tads3/vmerrmsg.cpp

tads/tads3/vmerrnum.h

tads/tads3/vmfile.cpp

tads/tads3/vmfile.h

tads/tads3/vmfilobj.cpp

tads/tads3/vmfilobj.h

tads/tads3/vmfunc.cpp

tads/tads3/vmfunc.h

tads/tads3/vmglob.cpp

tads/tads3/vmglob.h

tads/tads3/vmglobv.h

tads/tads3/vmgram.cpp

tads/tads3/vmgram.h

tads/tads3/vmhash.cpp

tads/tads3/vmhash.h

tads/tads3/vmhost.h

tads/tads3/vmhostsi.cpp

tads/tads3/vmhostsi.h

tads/tads3/vmhosttx.cpp

tads/tads3/vmhosttx.h

tads/tads3/vmimage.cpp

tads/tads3/vmimage.h

tads/tads3/vmimg_d.cpp

tads/tads3/vmimg_nd.cpp

tads/tads3/vmimgrb.cpp

tads/tads3/vmimgrb.h

tads/tads3/vmimport.h

tads/tads3/vmini_d.cpp

tads/tads3/vmini_nd.cpp

tads/tads3/vminit.cpp

tads/tads3/vminit.h

tads/tads3/vminitfl.cpp

tads/tads3/vminitim.cpp

tads/tads3/vminitsw.cpp

tads/tads3/vmintcls.cpp

tads/tads3/vmintcls.h

tads/tads3/vmiter.cpp

tads/tads3/vmiter.h

tads/tads3/vmlookup.cpp

tads/tads3/vmlookup.h

tads/tads3/vmlst.cpp

tads/tads3/vmlst.h

tads/tads3/vmmain.cpp

tads/tads3/vmmain.h

tads/tads3/vmmaincl.cpp

tads/tads3/vmmaincn.h

tads/tads3/vmmccore.h

tads/tads3/vmmcreg.cpp

tads/tads3/vmmcreg.h

tads/tads3/vmmeta.cpp

tads/tads3/vmmeta.h

tads/tads3/vmobj.cpp

tads/tads3/vmobj.h

tads/tads3/vmop.h

tads/tads3/vmosc.cpp

tads/tads3/vmparam.h

tads/tads3/vmpat.cpp

tads/tads3/vmpat.h

tads/tads3/vmpool.cpp

tads/tads3/vmpool.h

tads/tads3/vmpoolfl.cpp

tads/tads3/vmpoolim.cpp

tads/tads3/vmpoolsl.h

tads/tads3/vmpredef.h

tads/tads3/vmpreini.cpp

tads/tads3/vmpreini.h

tads/tads3/vmprof.h

tads/tads3/vmprofty.h

tads/tads3/vmregex.cpp

tads/tads3/vmregex.h

tads/tads3/vmres.cpp

tads/tads3/vmres.h

tads/tads3/vmrun.cpp

tads/tads3/vmrun.h

tads/tads3/vmrunsym.cpp

tads/tads3/vmrunsym.h

tads/tads3/vmsa.cpp

tads/tads3/vmsave.cpp

tads/tads3/vmsave.h

tads/tads3/vmsort.cpp

tads/tads3/vmsort.h

tads/tads3/vmsortv.cpp

tads/tads3/vmsrcf.cpp

tads/tads3/vmsrcf.h

tads/tads3/vmstack.cpp

tads/tads3/vmstack.h

tads/tads3/vmstr.cpp

tads/tads3/vmstr.h

tads/tads3/vmstrcmp.cpp

tads/tads3/vmstrcmp.h

tads/tads3/vmstrres.h

tads/tads3/vmtobj.cpp

tads/tads3/vmtobj.h

tads/tads3/vmtype.cpp

tads/tads3/vmtype.h

tads/tads3/vmtypedh.cpp

tads/tads3/vmundo.cpp

tads/tads3/vmundo.h

tads/tads3/vmuni.h

tads/tads3/vmvec.cpp

tads/tads3/vmvec.h

tads/tads3/vmvsn.h

tads/tads3/vmwrtimg.cpp

tads/tads3/vmwrtimg.h

tads/tads3/win32

tads/tads3/win32/makefile.vc5

tads/tads3/win32/t3run.ico

tads/tads3/win32/t3run.rc

tads/tads3/win32/test

tads/tads3/win32/test/capture.cpp

tads/tads3/win32/test/long.bat

tads/tads3/win32/test/test_env.bat

tads/tads3/win32/test/testdiff.bat

tads/tads3/win32/test/testditch.bat

tads/tads3/win32/test/testexec.bat

tads/tads3/win32/test/testmake.bat

tads/tads3/win32/test/testpp.bat

tads/tads3/win32/test/testpre.bat

tads/tads3/win32/test/testres.bat

tads/tads3/win32/test/testrestore.bat

tads/tads3/win32/test/testsample.bat

tads/tads3/win32/tr32.ico

tads/tads3/win32/tr32.rc

tads/tads3/win32/vmglob.mak

tads/vmuni_cs.cpp

terps

terps/Jamfile

terps/advsys

terps/advsys/LICENSE

terps/advsys/advdbs.c

terps/advsys/advdbs.h

terps/advsys/advexe.c

terps/advsys/advint.c

terps/advsys/advint.h

terps/advsys/advjunk.c

terps/advsys/advmsg.c

terps/advsys/advprs.c

terps/advsys/advtrm.c

terps/advsys/glkstart.c

terps/advsys/header.h

terps/agility

terps/agility/GPL

terps/agility/Makefile.dos

terps/agility/Makefile.glk

terps/agility/Makefile.linux

terps/agility/Makefile.plain

terps/agility/agil.c

terps/agility/agility.h

terps/agility/agility.hdr

terps/agility/agilstub.c

terps/agility/agt2agx.c

terps/agility/agtout.c

terps/agility/agtread.c

terps/agility/agxfile.c

terps/agility/auxfile.c

terps/agility/changes.txt

terps/agility/config.h

terps/agility/debugcmd.c

terps/agility/disassemble.c

terps/agility/exec.c

terps/agility/exec.h

terps/agility/filename.c

terps/agility/gamedata.c

terps/agility/interface.c

terps/agility/interp.h

terps/agility/linux_pict

terps/agility/metacommand.c

terps/agility/object.c

terps/agility/os_curses.c

terps/agility/os_dos.c

terps/agility/os_glk.c

terps/agility/os_none.c

terps/agility/os_termcap.c

terps/agility/parser.c

terps/agility/porting.txt

terps/agility/readme.agility

terps/agility/readme.dos

terps/agility/readme.glk

terps/agility/readme.linux

terps/agility/runverb.c

terps/agility/savegame.c

terps/agility/token.c

terps/agility/util.c

terps/frotz

terps/frotz/AUTHORS

terps/frotz/COPYING

terps/frotz/README

terps/frotz/TODO

terps/frotz/buffer.c

terps/frotz/err.c

terps/frotz/fastmem.c

terps/frotz/files.c

terps/frotz/frotz.h

terps/frotz/glkfrotz.h

terps/frotz/glkio.h

terps/frotz/glkmisc.c

terps/frotz/glkscreen.c

terps/frotz/input.c

terps/frotz/main.c

terps/frotz/math.c

terps/frotz/object.c

terps/frotz/process.c

terps/frotz/quetzal.c

terps/frotz/random.c

terps/frotz/redirect.c

terps/frotz/setup.h

terps/frotz/sound.c

terps/frotz/stream.c

terps/frotz/table.c

terps/frotz/text.c

terps/frotz/variable.c

terps/geas

terps/geas/GeasRunner.hh

terps/geas/LimitStack.hh

terps/geas/geas-impl.hh

terps/geas/geas-runner.cc

terps/geas/geas-state.cc

terps/geas/geas-state.hh

terps/geas/geas-util.cc

terps/geas/geas-util.hh

terps/geas/geasfile.cc

terps/geas/geasfile.hh

terps/geas/geasglk.cc

terps/geas/geasglkterm.c

terps/geas/general.hh

terps/geas/istring.cc

terps/geas/istring.hh

terps/geas/readfile.cc

terps/geas/readfile.hh

terps/geas/reserved_words.hh

terps/geas/uncas.pl

terps/git

terps/git/Makefile

terps/git/Makefile.win

terps/git/README.txt

terps/git/accel.c

terps/git/compiler.c

terps/git/compiler.h

terps/git/config.h

terps/git/gestalt.c

terps/git/git.c

terps/git/git.h

terps/git/git_mac.c

terps/git/git_unix.c

terps/git/git_windows.c

terps/git/glkop.c

terps/git/heap.c

terps/git/labels.inc

terps/git/memory.c

terps/git/memory.h

terps/git/opcodes.c

terps/git/opcodes.h

terps/git/operands.c

terps/git/peephole.c

terps/git/savefile.c

terps/git/saveundo.c

terps/git/search.c

terps/git/terp.c

terps/git/test

terps/git/test/Alabaster.gblorb

terps/git/test/Alabaster.golden

terps/git/test/Alabaster.walk

terps/git/test/test.sh

terps/git/version.h

terps/git/win

terps/git/win/git.rc

terps/git/win/res

terps/git/win/res/Blorb.ico

terps/git/win/res/Git.manifest

terps/git/win/res/Glulx.ico

terps/git/win/res/Ulx.ico

terps/glulxe

terps/glulxe/Makefile

terps/glulxe/README

terps/glulxe/accel.c

terps/glulxe/exec.c

terps/glulxe/files.c

terps/glulxe/funcs.c

terps/glulxe/gestalt.c

terps/glulxe/gestalt.h

terps/glulxe/glkop.c

terps/glulxe/glulxdump.c

terps/glulxe/glulxe.h

terps/glulxe/heap.c

terps/glulxe/macstart.c

terps/glulxe/main.c

terps/glulxe/opcodes.h

terps/glulxe/operand.c

terps/glulxe/osdepend.c

terps/glulxe/profile-analyze.py

terps/glulxe/profile.c

terps/glulxe/search.c

terps/glulxe/serial.c

terps/glulxe/string.c

terps/glulxe/unixstrt.c

terps/glulxe/vm.c

terps/glulxe/winstart.c

terps/jacl

terps/jacl/constants.h

terps/jacl/display.c

terps/jacl/encapsulate.c

terps/jacl/errors.c

terps/jacl/findroute.c

terps/jacl/glk_saver.c

terps/jacl/glk_startup.c

terps/jacl/interpreter.c

terps/jacl/jacl.c

terps/jacl/jacl.h

terps/jacl/jpp.c

terps/jacl/language.h

terps/jacl/loader.c

terps/jacl/logging.c

terps/jacl/parser.c

terps/jacl/prototypes.h

terps/jacl/resolvers.c

terps/jacl/types.h

terps/jacl/utils.c

terps/jacl/version.h

terps/level9

terps/level9/COPYING

terps/level9/Glk

terps/level9/Glk/Makefile.glk

terps/level9/Glk/glk.c

terps/level9/Glk/glk_readme.txt

terps/level9/Glk/level9.hdr

terps/level9/MakeDist.bat

terps/level9/generic.c

terps/level9/level9.c

terps/level9/level9.h

terps/level9/level9.txt

terps/level9/porting.txt

terps/magnetic

terps/magnetic/COPYING

terps/magnetic/Generic

terps/magnetic/Generic/defs.h

terps/magnetic/Generic/emu.c

terps/magnetic/Generic/games.txt

terps/magnetic/Generic/gfxlink.c

terps/magnetic/Generic/gfxlink2.c

terps/magnetic/Generic/hintlink.c

terps/magnetic/Generic/main.c

terps/magnetic/Generic/myth.c

terps/magnetic/Generic/passwd.c

terps/magnetic/Generic/sndlink.c

terps/magnetic/Generic/tech.txt

terps/magnetic/Generic/xtract64.c

terps/magnetic/Generic/xtractmw.c

terps/magnetic/Generic/xtractpc.c

terps/magnetic/Glk

terps/magnetic/Glk/Makefile.glk

terps/magnetic/Glk/glk.c

terps/magnetic/Glk/glk_readme.txt

terps/magnetic/Glk/magnetic.hdr

terps/magnetic/MakeDist.bat

terps/magnetic/Scripts

terps/magnetic/Scripts/Corrupt.rec

terps/magnetic/Scripts/Fish.rec

terps/magnetic/Scripts/Fish103.rec

terps/magnetic/Scripts/FishColl.rec

terps/magnetic/Scripts/Guild.rec

terps/magnetic/Scripts/GuildColl.rec

terps/magnetic/Scripts/Jinxter.rec

terps/magnetic/Scripts/Jinxter13.rec

terps/magnetic/Scripts/JinxterBug.rec

terps/magnetic/Scripts/Myth.rec

terps/magnetic/Scripts/Pawn.rec

terps/magnetic/Scripts/Wonder.rec

terps/nitfol

terps/nitfol/COPYING

terps/nitfol/ChangeLog

terps/nitfol/INSTALL

terps/nitfol/README

terps/nitfol/automap.c

terps/nitfol/automap.h

terps/nitfol/binary.h

terps/nitfol/blorb.c

terps/nitfol/copying.awk

terps/nitfol/copying.c

terps/nitfol/copying.h

terps/nitfol/crashme.inf

terps/nitfol/dbg_help.h

terps/nitfol/dbg_help.texi

terps/nitfol/debug.c

terps/nitfol/debug.h

terps/nitfol/decode.c

terps/nitfol/decode.h

terps/nitfol/errmesg.c

terps/nitfol/errmesg.h

terps/nitfol/gi_blorb.h

terps/nitfol/glk.h

terps/nitfol/glkstart.h

terps/nitfol/globals.c

terps/nitfol/globals.h

terps/nitfol/graphics.c

terps/nitfol/graphics.h

terps/nitfol/hash.c

terps/nitfol/hash.h

terps/nitfol/iff.c

terps/nitfol/iff.h

terps/nitfol/infix.c

terps/nitfol/infix.h

terps/nitfol/inform.c

terps/nitfol/inform.h

terps/nitfol/inform.hhh

terps/nitfol/inform.y

terps/nitfol/init.c

terps/nitfol/init.h

terps/nitfol/io.c

terps/nitfol/linkevil.h

terps/nitfol/main.c

terps/nitfol/main.h

terps/nitfol/nio.h

terps/nitfol/nitfol

terps/nitfol/nitfol.6

terps/nitfol/nitfol.h

terps/nitfol/nitfol.html

terps/nitfol/nitfol.info

terps/nitfol/nitfol.opt

terps/nitfol/nitfol.texi

terps/nitfol/no_blorb.c

terps/nitfol/no_blorb.h

terps/nitfol/no_graph.c

terps/nitfol/no_graph.h

terps/nitfol/no_snd.c

terps/nitfol/no_snd.h

terps/nitfol/objects.c

terps/nitfol/objects.h

terps/nitfol/op_call.c

terps/nitfol/op_call.h

terps/nitfol/op_jmp.c

terps/nitfol/op_jmp.h

terps/nitfol/op_math.c

terps/nitfol/op_math.h

terps/nitfol/op_save.c

terps/nitfol/op_save.h

terps/nitfol/op_table.c

terps/nitfol/op_table.h

terps/nitfol/op_v6.c

terps/nitfol/op_v6.h

terps/nitfol/oplist.c

terps/nitfol/oplist.h

terps/nitfol/opt2glkc.pl

terps/nitfol/options.texi

terps/nitfol/portfunc.c

terps/nitfol/portfunc.h

terps/nitfol/quetzal.c

terps/nitfol/quetzal.h

terps/nitfol/rg_qsort.h

terps/nitfol/solve.c

terps/nitfol/solve.h

terps/nitfol/sound.c

terps/nitfol/sound.h

terps/nitfol/stack.c

terps/nitfol/stack.h

terps/nitfol/startdos.c

terps/nitfol/startmac.c

terps/nitfol/startunix.c

terps/nitfol/startwin.c

terps/nitfol/struct.c

terps/nitfol/struct.h

terps/nitfol/tags

terps/nitfol/test.inf

terps/nitfol/tokenise.c

terps/nitfol/tokenise.h

terps/nitfol/undo.c

terps/nitfol/undo.h

terps/nitfol/y2help.pl

terps/nitfol/z_io.c

terps/nitfol/z_io.h

terps/nitfol/zscii.c

terps/nitfol/zscii.h

terps/scare

terps/scare/COPYING

terps/scare/ChangeLog

terps/scare/Makefile

terps/scare/README

terps/scare/os_ansi.c

terps/scare/os_glk.c

terps/scare/scare.h

terps/scare/scare.hdr

terps/scare/scdebug.c

terps/scare/scevents.c

terps/scare/scexpr.c

terps/scare/scgamest.c

terps/scare/scgamest.h

terps/scare/scinterf.c

terps/scare/sclibrar.c

terps/scare/sclocale.c

terps/scare/scmemos.c

terps/scare/scnpcs.c

terps/scare/scobjcts.c

terps/scare/scparser.c

terps/scare/scprintf.c

terps/scare/scprops.c

terps/scare/scprotos.h

terps/scare/scresour.c

terps/scare/screstrs.c

terps/scare/scrunner.c

terps/scare/scserial.c

terps/scare/sctaffil.c

terps/scare/sctafpar.c

terps/scare/sctasks.c

terps/scare/scutils.c

terps/scare/scvars.c

terps/scare/sxfile.c

terps/scare/sxglob.c

terps/scare/sxmain.c

terps/scare/sxprotos.h

terps/scare/sxscript.c

terps/scare/sxstubs.c

terps/scare/sxtester.c

terps/scare/sxutils.c

terps/status.txt

Show diffs side-by-side

added added

removed removed

tads/tads3/tctok.cpp

#ifdef RCSID

static char RCSid[] =

"$Header: d:/cvsroot/tads/tads3/tctok.cpp,v 1.5 1999/07/11 00:46:58 MJRoberts Exp $";

#endif

* Please see the accompanying license file, LICENSE.TXT, for information

* on using and copying this software.

Name

tctok.cpp - TADS3 compiler tokenizer

Function

Notes

The tokenizer features an integrated C-style preprocessor. The

preprocessor is integrated into the tokenizer for efficiency; since

the preprocessor uses the same lexical structure as the the TADS

language, we need only tokenize the input stream once, and the result

can be used both for preprocessing and for parsing.

Modified

04/12/99 MJRoberts - Creation

#include <stdio.h>

#include <string.h>

#include <stdarg.h>

#include <time.h>

#include "os.h"

#include "t3std.h"

#include "vmerr.h"

#include "vmhash.h"

#include "tcerr.h"

#include "tcerrnum.h"

#include "tctok.h"

#include "tcsrc.h"

#include "tcmain.h"

#include "tchost.h"

#include "tcprs.h"

#include "tctarg.h"

#include "charmap.h"

#include "vmfile.h"

/* ------------------------------------------------------------------------ */

* Initialize the tokenizer

CTcTokenizer::CTcTokenizer(CResLoader *res_loader,

const char *default_charset)

{

int i;

time_t timer;

struct tm *tblk;

const char *tstr;

char timebuf[50];

struct kwdef

{

const char *kw_text;

tc_toktyp_t kw_tok_id;

};

static const kwdef kwlist[] =

{

{ "self", TOKT_SELF },

{ "targetprop", TOKT_TARGETPROP },

{ "targetobj", TOKT_TARGETOBJ },

{ "definingobj", TOKT_DEFININGOBJ },

{ "inherited", TOKT_INHERITED },

{ "delegated", TOKT_DELEGATED },

{ "argcount", TOKT_ARGCOUNT },

{ "if", TOKT_IF },

{ "else", TOKT_ELSE },

{ "for", TOKT_FOR },

{ "while", TOKT_WHILE },

{ "do", TOKT_DO },

{ "switch", TOKT_SWITCH },

{ "case", TOKT_CASE },

{ "default", TOKT_DEFAULT },

{ "goto", TOKT_GOTO },

{ "break", TOKT_BREAK },

{ "continue", TOKT_CONTINUE },

// { "and", TOKT_AND },

// { "or", TOKT_OR },

// { "not", TOKT_NOT },

{ "function", TOKT_FUNCTION },

{ "return", TOKT_RETURN },

{ "local", TOKT_LOCAL },

{ "object", TOKT_OBJECT },

{ "nil", TOKT_NIL },

{ "true", TOKT_TRUE },

{ "pass", TOKT_PASS },

{ "external", TOKT_EXTERNAL },

{ "extern", TOKT_EXTERN },

{ "formatstring", TOKT_FORMATSTRING },

{ "class", TOKT_CLASS },

{ "replace", TOKT_REPLACE },

100

{ "modify", TOKT_MODIFY },

101

{ "new", TOKT_NEW },

102

// { "delete", TOKT_DELETE },

103

{ "throw", TOKT_THROW },

104

{ "try", TOKT_TRY },

105

{ "catch", TOKT_CATCH },

106

{ "finally", TOKT_FINALLY },

107

{ "intrinsic", TOKT_INTRINSIC },

108

{ "dictionary", TOKT_DICTIONARY },

109

{ "grammar", TOKT_GRAMMAR },

110

{ "enum", TOKT_ENUM },

111

{ "template", TOKT_TEMPLATE },

112

{ "static", TOKT_STATIC },

113

{ "foreach", TOKT_FOREACH },

114

{ "export", TOKT_EXPORT },

115

{ "propertyset", TOKT_PROPERTYSET },

116

{ "transient", TOKT_TRANSIENT },

117

{ "replaced", TOKT_REPLACED },

118

{ "property", TOKT_PROPERTY },

119

120

// { "void", TOKT_VOID },

121

// { "int", TOKT_INT },

122

// { "string", TOKT_STRING },

123

// { "list", TOKT_LIST },

124

// { "boolean", TOKT_BOOLEAN },

125

// { "any", TOKT_ANY },

126

127

/* end-of-table marker */

128

{ 0, TOKT_INVALID }

129

};

130

const kwdef *kwp;

131

132

/* remember my resource loader */

133

res_loader_ = res_loader;

134

135

/* there's no stream yet */

136

str_ = 0;

137

138

/* no external source yet */

139

ext_src_ = 0;

140

141

/* start numbering the file descriptors at zero */

142

next_filedesc_id_ = 0;

143

144

/* there are no file descriptors yet */

145

desc_head_ = 0;

146

desc_tail_ = 0;

147

desc_list_ = 0;

148

desc_list_cnt_ = desc_list_alo_ = 0;

149

150

/* empty out the input line buffer */

151

clear_linebuf();

152

153

/* start out with a minimal line buffer size */

154

linebuf_.ensure_space(4096);

155

expbuf_.ensure_space(4096);

156

157

/* set up at the beginning of the input line buffer */

158

start_new_line(&linebuf_, 0);

159

160

/* remember the default character set */

161

default_charset_ = lib_copy_str(default_charset);

162

163

/* we don't have a default character mapper yet */

164

default_mapper_ = 0;

165

166

/* create an input mapper for the default character set, if specified */

167

if (default_charset != 0)

168

default_mapper_ = CCharmapToUni::load(res_loader, default_charset);

169

170

171

* if the default character set wasn't specified, or we failed to

172

* load a mapper for the specified character set, use a plain ASCII

173

* mapper

174

175

if (default_mapper_ == 0)

176

default_mapper_ = new CCharmapToUniASCII();

177

178

/* presume we're not in preprocessor-only mode */

179

pp_only_mode_ = FALSE;

180

181

/* presume we're not in list-includes mode */

182

list_includes_mode_ = FALSE;

183

184

/* presume we're not in test report mode */

185

test_report_mode_ = FALSE;

186

187

/* allow preprocessing directives */

188

allow_pp_ = TRUE;

189

190

/* there are no previously-included files yet */

191

prev_includes_ = 0;

192

193

/* presume we'll convert newlines in strings to whitespace */

194

string_newline_spacing_ = TRUE;

195

196

/* start out with ALL_ONCE mode off */

197

all_once_ = FALSE;

198

199

/* by default, ignore redundant includes without warning */

200

warn_on_ignore_incl_ = FALSE;

201

202

/* there are no include path entries yet */

203

incpath_head_ = incpath_tail_ = 0;

204

205

/* not in a quoted string yet */

206

in_quote_ = '\0';

207

208

/* not in an embedded expression yet */

209

comment_in_embedding_ = FALSE;

210

macro_in_embedding_ = FALSE;

211

main_in_embedding_ = FALSE;

212

213

/* not in a #if block yet */

214

if_sp_ = 0;

215

if_false_level_ = 0;

216

217

/* not processing a preprocessor constant expression */

218

in_pp_expr_ = FALSE;

219

220

/* we don't have a current or appended line yet */

221

last_desc_ = 0;

222

last_linenum_ = 0;

223

appended_desc_ = 0;

224

appended_linenum_ = 0;

225

226

/* allocate the first token-list block */

227

init_src_block_list();

228

229

/* create the #define and #undef symbol tables */

230

defines_ = new CVmHashTable(512, new CVmHashFuncCS(), TRUE);

231

undefs_ = new CVmHashTable(64, new CVmHashFuncCS(), TRUE);

232

233

/* create the special __LINE__ and __FILE__ macros */

234

defines_->add(new CTcHashEntryPpLINE(this));

235

defines_->add(new CTcHashEntryPpFILE(this));

236

237

/* get the current time and date */

238

timer = time(0);

239

tblk = localtime(&timer);

240

tstr = asctime(tblk);

241

242

243

* add the __DATE__ macro - the format is "Mmm dd yyyy", where "Mmm"

244

* is the three-letter month name generated by asctime(), "dd" is

245

* the day of the month, with a leading space for numbers less than

246

* ten, and "yyyy" is the year.

247

248

sprintf(timebuf, "'%.3s %2d %4d'",

249

tstr + 4, tblk->tm_mday, tblk->tm_year + 1900);

250

add_define("__DATE__", timebuf);

251

252

/* add the __TIME__ macro - 24-hour "hh:mm:ss" format */

253

sprintf(timebuf, "'%.8s'", tstr + 11);

254

add_define("__TIME__", timebuf);

255

256

257

* Allocate a pool of macro resources. The number we start with is

258

* arbitrary, since we'll add more as needed, but we want to try to

259

* allocate enough up front that we avoid time-consuming memory

260

* allocations later. On the other hand, we don't want to

261

* pre-allocate a huge number of objects that we'll never use.

262

263

for (macro_res_avail_ = 0, macro_res_head_ = 0, i = 0 ; i < 7 ; ++i)

264

{

265

CTcMacroRsc *rsc;

266

267

/* allocate a new object */

268

rsc = new CTcMacroRsc();

269

270

/* add it onto the master list */

271

rsc->next_ = macro_res_head_;

272

macro_res_head_ = rsc;

273

274

/* add it onto the available list */

275

rsc->next_avail_ = macro_res_avail_;

276

macro_res_avail_ = rsc;

277

}

278

279

/* create the keyword hash table */

280

kw_ = new CVmHashTable(64, new CVmHashFuncCS(), TRUE);

281

282

/* populate the keyword table */

283

for (kwp = kwlist ; kwp->kw_text != 0 ; ++kwp)

284

kw_->add(new CTcHashEntryKw(kwp->kw_text, kwp->kw_tok_id));

285

286

/* no ungot token yet */

287

nxttok_valid_ = FALSE;

288

289

/* no string capture file */

290

string_fp_ = 0;

291

string_fp_map_ = 0;

292

}

293

294

295

* Initialize the source save block list

296

297

void CTcTokenizer::init_src_block_list()

298

{

299

/* allocate the first source block */

300

src_cur_ = src_head_ = new CTcTokSrcBlock();

301

302

/* set up to write into the first block */

303

src_ptr_ = src_head_->get_buf();

304

src_rem_ = TCTOK_SRC_BLOCK_SIZE;

305

}

306

307

308

/* ------------------------------------------------------------------------ */

309

310

* Delete the tokenizer

311

312

CTcTokenizer::~CTcTokenizer()

313

{

314

/* delete all streams */

315

delete_source();

316

317

/* delete all file descriptors */

318

while (desc_head_ != 0)

319

{

320

CTcTokFileDesc *nxt;

321

322

/* remember the next descriptor */

323

nxt = desc_head_->get_next();

324

325

/* delete this one */

326

delete desc_head_;

327

328

/* move on to the next one */

329

desc_head_ = nxt;

330

}

331

332

/* delete the file descriptor index array */

333

if (desc_list_ != 0)

334

t3free(desc_list_);

335

336

/* delete our default character set string copy */

337

lib_free_str(default_charset_);

338

339

/* release our reference on our default character mapper */

340

default_mapper_->release_ref();

341

342

/* forget about all of our previous include files */

343

while (prev_includes_ != 0)

344

{

345

tctok_incfile_t *nxt;

346

347

/* remember the next file */

348

nxt = prev_includes_->nxt;

349

350

/* delete this one */

351

t3free(prev_includes_);

352

353

/* move on to the next one */

354

prev_includes_ = nxt;

355

}

356

357

/* delete the include path list */

358

while (incpath_head_ != 0)

359

{

360

tctok_incpath_t *nxt;

361

362

/* remember the next entry in the path */

363

nxt = incpath_head_->nxt;

364

365

/* delete this entry */

366

t3free(incpath_head_);

367

368

/* move on to the next one */

369

incpath_head_ = nxt;

370

}

371

372

/* delete the macro resources */

373

while (macro_res_head_ != 0)

374

{

375

CTcMacroRsc *nxt;

376

377

/* remember the next one */

378

nxt = macro_res_head_->next_;

379

380

/* delete this one */

381

delete macro_res_head_;

382

383

/* move on to the next one */

384

macro_res_head_ = nxt;

385

}

386

387

/* delete the token list */

388

delete src_head_;

389

390

/* delete the #define and #undef symbol tables */

391

delete defines_;

392

delete undefs_;

393

394

/* delete the keyword hash table */

395

delete kw_;

396

397

/* if we created a mapping for the string capture file, release it */

398

if (string_fp_map_ != 0)

399

string_fp_map_->release_ref();

400

}

401

402

/* ------------------------------------------------------------------------ */

403

404

* Clear the line buffer

405

406

void CTcTokenizer::clear_linebuf()

407

{

408

/* clear the buffer */

409

linebuf_.clear_text();

410

411

/* reset our read point to the start of the line buffer */

412

p_.set(linebuf_.get_buf());

413

}

414

415

/* ------------------------------------------------------------------------ */

416

417

* Get a textual representation of an operator token

418

419

const char *CTcTokenizer::get_op_text(tc_toktyp_t op)

420

{

421

struct tokname_t

422

{

423

tc_toktyp_t typ;

424

const char *nm;

425

};

426

static const tokname_t toknames[] =

427

{

428

{ TOKT_EOF, "<end of file>" },

429

{ TOKT_SYM, "<symbol>" },

430

{ TOKT_INT, "<integer>" },

431

{ TOKT_SSTR, "<single-quoted string>" },

432

{ TOKT_DSTR, "<double-quoted string>" },

433

{ TOKT_DSTR_START, "<double-quoted string>" },

434

{ TOKT_DSTR_MID, "<double-quoted string>" },

435

{ TOKT_DSTR_END, "<double-quoted string>" },

436

{ TOKT_LPAR, "(" },

437

{ TOKT_RPAR, ")" },

438

{ TOKT_COMMA, "," },

439

{ TOKT_DOT, "." },

440

{ TOKT_LBRACE, "{" },

441

{ TOKT_RBRACE, "}", },

442

{ TOKT_LBRACK, "[", },

443

{ TOKT_RBRACK, "]", },

444

{ TOKT_EQ, "=", },

445

{ TOKT_EQEQ, "==", },

446

{ TOKT_ASI, ":=" },

447

{ TOKT_PLUS, "+" },

448

{ TOKT_MINUS, "-" },

449

{ TOKT_TIMES, "*" },

450

{ TOKT_DIV, "/", },

451

{ TOKT_MOD, "%" },

452

{ TOKT_GT, ">" },

453

{ TOKT_LT, "<" },

454

{ TOKT_GE, ">=" },

455

{ TOKT_LE, "<=" },

456

{ TOKT_NE, "!=" },

457

{ TOKT_ARROW, "->" },

458

{ TOKT_COLON, ":" },

459

{ TOKT_SEM, ";" },

460

{ TOKT_AND, "&" },

461

{ TOKT_ANDAND, "&&" },

462

{ TOKT_OR, "|" },

463

{ TOKT_OROR, "||" },

464

{ TOKT_XOR, "^" },

465

{ TOKT_SHL, "<<" },

466

{ TOKT_SHR, ">>" },

467

{ TOKT_INC, "++" },

468

{ TOKT_DEC, "--" },

469

{ TOKT_PLUSEQ, "+=" },

470

{ TOKT_MINEQ, "-=" },

471

{ TOKT_TIMESEQ, "*=" },

472

{ TOKT_DIVEQ, "/=" },

473

{ TOKT_MODEQ, "%=" },

474

{ TOKT_ANDEQ, "&=" },

475

{ TOKT_OREQ, "|=" },

476

{ TOKT_XOREQ, "^=" },

477

{ TOKT_SHLEQ, "<<=" },

478

{ TOKT_SHREQ, ">>=" },

479

{ TOKT_NOT, "! (not)" },

480

{ TOKT_BNOT, "~" },

481

{ TOKT_POUND, "#" },

482

{ TOKT_POUNDPOUND, "##" },

483

{ TOKT_POUNDAT, "#@" },

484

{ TOKT_ELLIPSIS, "..." },

485

{ TOKT_QUESTION, "?" },

486

{ TOKT_COLONCOLON, "::" },

487

{ TOKT_FLOAT, "<float>" },

488

{ TOKT_AT, "@" },

489

{ TOKT_SELF, "self" },

490

{ TOKT_TARGETPROP, "targetprop" },

491

{ TOKT_TARGETOBJ, "targetobj" },

492

{ TOKT_DEFININGOBJ, "definingobj" },

493

{ TOKT_INHERITED, "inherited" },

494

{ TOKT_DELEGATED, "delegated" },

495

{ TOKT_IF, "if" },

496

{ TOKT_ELSE, "else" },

497

{ TOKT_FOR, "for" },

498

{ TOKT_WHILE, "while" },

499

{ TOKT_DO, "do" },

500

{ TOKT_SWITCH, "switch" },

501

{ TOKT_CASE, "case" },

502

{ TOKT_DEFAULT, "default" },

503

{ TOKT_GOTO, "goto" },

504

{ TOKT_BREAK, "break" },

505

{ TOKT_CONTINUE, "continue" },

506

{ TOKT_FUNCTION, "function" },

507

{ TOKT_RETURN, "return" },

508

{ TOKT_LOCAL, "local" },

509

{ TOKT_OBJECT, "object" },

510

{ TOKT_NIL, "nil" },

511

{ TOKT_TRUE, "true" },

512

{ TOKT_PASS, "pass" },

513

{ TOKT_EXTERNAL, "external" },

514

{ TOKT_EXTERN, "extern" },

515

{ TOKT_FORMATSTRING, "formatstring" },

516

{ TOKT_CLASS, "class" },

517

{ TOKT_REPLACE, "replace" },

518

{ TOKT_MODIFY, "modify" },

519

{ TOKT_NEW, "new" },

520

// { TOKT_DELETE, "delete" },

521

{ TOKT_THROW, "throw" },

522

{ TOKT_TRY, "try" },

523

{ TOKT_CATCH, "catch" },

524

{ TOKT_FINALLY, "finally" },

525

{ TOKT_INTRINSIC, "intrinsic" },

526

{ TOKT_DICTIONARY, "dictionary" },

527

{ TOKT_GRAMMAR, "grammar" },

528

{ TOKT_ENUM, "enum" },

529

{ TOKT_TEMPLATE, "template" },

530

{ TOKT_STATIC, "static" },

531

{ TOKT_FOREACH, "foreach" },

532

{ TOKT_EXPORT, "export" },

533

{ TOKT_PROPERTYSET, "propertyset" },

534

{ TOKT_TRANSIENT, "transient" },

535

{ TOKT_REPLACED, "replaced" },

536

{ TOKT_PROPERTY, "property" },

537

538

// { TOKT_VOID, "void" },

539

// { TOKT_INTKW, "int" },

540

// { TOKT_STRING, "string" },

541

// { TOKT_LIST, "list" },

542

// { TOKT_BOOLEAN, "boolean" },

543

// { TOKT_ANY, "any"},

544

545

{ TOKT_INVALID, 0 }

546

};

547

const tokname_t *p;

548

549

/* search for the token */

550

for (p = toknames ; p->nm != 0 ; ++p)

551

{

552

/* if this is our token, return the associated name string */

553

if (p->typ == op)

554

return p->nm;

555

}

556

557

/* we didn't find it */

558

return "<unknown>";

559

}

560

561

/* ------------------------------------------------------------------------ */

562

563

* Reset the tokenizer. Delete the current source object and all of the

564

* saved source text. This can be used after compilation of a unit

565

* (such as a debugger expression) is completed and the intermediate

566

* parser state is no longer needed.

567

568

void CTcTokenizer::reset()

569

{

570

/* delete the source object */

571

delete_source();

572

573

/* delete saved token text */

574

if (src_head_ != 0)

575

{

576

/* delete the list */

577

delete src_head_;

578

579

/* re-initialize the source block list */

580

init_src_block_list();

581

}

582

}

583

584

/* ------------------------------------------------------------------------ */

585

586

* Delete the source file, if any, including any parent include files.

587

588

void CTcTokenizer::delete_source()

589

{

590

/* delete the current stream and all enclosing parents */

591

while (str_ != 0)

592

{

593

CTcTokStream *nxt;

594

595

/* remember the next stream in the list */

596

nxt = str_->get_parent();

597

598

/* delete this stream */

599

delete str_;

600

601

/* move up to the next one */

602

str_ = nxt;

603

}

604

605

/* there are no more streams */

606

str_ = 0;

607

}

608

609

610

/* ------------------------------------------------------------------------ */

611

612

* Set up to read a source file. Returns zero on success, or a non-zero

613

* error code on failure.

614

615

int CTcTokenizer::set_source(const char *src_filename, const char *orig_name)

616

{

617

CTcTokFileDesc *desc;

618

CTcSrcFile *src;

619

int charset_error;

620

int default_charset_error;

621

622

/* empty out the input line buffer */

623

clear_linebuf();

624

625

/* set up at the beginning of the input line buffer */

626

start_new_line(&linebuf_, 0);

627

628

/* create a reader for the source file */

629

src = CTcSrcFile::open_source(src_filename, res_loader_,

630

default_charset_, &charset_error,

631

&default_charset_error);

632

if (src == 0)

633

{

634

/* if we had a problem loading the default character set, log it */

635

if (default_charset_error)

636

log_error(TCERR_CANT_LOAD_DEFAULT_CHARSET, default_charset_);

637

638

/* return failure */

639

return TCERR_CANT_OPEN_SRC;

640

}

641

642

/* find or create a file descriptor for this filename */

643

desc = get_file_desc(src_filename, strlen(src_filename), FALSE,

644

orig_name, strlen(orig_name));

645

646

647

* Create a stream to read the source file. The new stream has no

648

* parent, because this is the top-level source file, and was not

649

* included from any other file.

650

651

str_ = new CTcTokStream(desc, src, 0, charset_error, if_sp_);

652

653

/* success */

654

return 0;

655

}

656

657

658

* Set up to read source code from a memory buffer

659

660

void CTcTokenizer::set_source_buf(const char *buf)

661

{

662

CTcSrcMemory *src;

663

664

/* empty out the input line buffer */

665

clear_linebuf();

666

667

/* reset the scanning state to the start of a brand new stream */

668

in_pp_expr_ = FALSE;

669

last_linenum_ = 0;

670

unsplicebuf_.clear_text();

671

in_quote_ = 0;

672

comment_in_embedding_ = FALSE;

673

macro_in_embedding_ = FALSE;

674

main_in_embedding_ = FALSE;

675

if_sp_ = 0;

676

if_false_level_ = 0;

677

nxttok_valid_ = FALSE;

678

679

/* set up at the beginning of the input line buffer */

680

start_new_line(&linebuf_, 0);

681

682

/* create a reader for the memory buffer */

683

src = new CTcSrcMemory(buf, default_mapper_);

684

685

686

* Create a stream to read the source file. The new stream has no

687

* parent, because this is the top-level source file, and was not

688

* included from any other file.

689

690

str_ = new CTcTokStream(0, src, 0, 0, if_sp_);

691

}

692

693

/* ------------------------------------------------------------------------ */

694

695

* Stuff text into the source stream.

696

697

void CTcTokenizer::stuff_text(const char *txt, size_t len, int expand)

698

{

699

CTcTokString expbuf;

700

int p_ofs;

701

702

/* if desired, expand macros */

703

if (expand)

704

{

705

/* expand macros in the text, storing the result in 'expbuf' */

706

expand_macros(&expbuf, txt, len);

707

708

/* use the expanded version as the stuffed text now */

709

txt = expbuf.get_text();

710

len = expbuf.get_text_len();

711

}

712

713

/* get the current p_ offset */

714

p_ofs = p_.getptr() - curbuf_->get_text();

715

716

/* insert the text into the buffer */

717

curbuf_->insert(p_ofs, txt, len);

718

719

/* reset p_ in case the curbuf_ buffer was reallocated for expansion */

720

start_new_line(curbuf_, p_ofs);

721

}

722

723

/* ------------------------------------------------------------------------ */

724

725

* Find or create a file descriptor for a given filename

726

727

CTcTokFileDesc *CTcTokenizer::get_file_desc(const char *fname,

728

size_t fname_len,

729

int always_create,

730

const char *orig_fname,

731

size_t orig_fname_len)

732

{

733

CTcTokFileDesc *orig_desc;

734

CTcTokFileDesc *desc;

735

736

/* presume we won't find an original descriptor in the list */

737

orig_desc = 0;

738

739

740

* Search the list of existing descriptors to find one that matches.

741

* Do this regardless of whether we're allowed to re-use an existing

742

* one or not - even if we're creating a new one unconditionaly, we

743

* need to know if there's an earlier copy that already exists so we

744

* can associate the new one with the original.

745

746

for (desc = desc_head_ ; desc != 0 ; desc = desc->get_next())

747

{

748

/* check for a name match */

749

if (strlen(desc->get_fname()) == fname_len

750

&& memcmp(desc->get_fname(), fname, fname_len) == 0)

751

{

752

753

* if we're allowed to return an existing descriptor, return

754

* this one, since it's for the same filename

755

756

if (!always_create)

757

return desc;

758

759

760

* we have to create a new descriptor even though we have an

761

* existing one - remember the original so we can point the

762

* new one back to the original

763

764

orig_desc = desc;

765

766

767

* no need to look any further - we've found the first

768

* instance of this filename in our list

769

770

break;

771

}

772

}

773

774

/* we didn't find a match - create a new descriptor */

775

desc = new CTcTokFileDesc(fname, fname_len, next_filedesc_id_++,

776

orig_desc, orig_fname, orig_fname_len);

777

778

/* link it in at the end of the master list */

779

desc->set_next(0);

780

if (desc_tail_ == 0)

781

desc_head_ = desc;

782

else

783

desc_tail_->set_next(desc);

784

desc_tail_ = desc;

785

786

/* expand our array index if necessary */

787

if (desc_list_cnt_ >= desc_list_alo_)

788

{

789

size_t siz;

790

791

/* allocate or expand the array */

792

desc_list_alo_ += 10;

793

siz = desc_list_alo_ * sizeof(desc_list_[0]);

794

if (desc_list_ == 0)

795

desc_list_ = (CTcTokFileDesc **)t3malloc(siz);

796

else

797

desc_list_ = (CTcTokFileDesc **)t3realloc(desc_list_, siz);

798

}

799

800

/* add the new array entry */

801

desc_list_[desc_list_cnt_++] = desc;

802

803

/* return it */

804

return desc;

805

}

806

807

808

/* ------------------------------------------------------------------------ */

809

810

* Add an include path entry. Each new entry goes at the end of the

811

* list, after all previous entries.

812

813

void CTcTokenizer::add_inc_path(const char *path)

814

{

815

tctok_incpath_t *entry;

816

817

/* create a new path list entry */

818

entry = (tctok_incpath_t *)t3malloc(sizeof(tctok_incpath_t)

819

+ strlen(path));

820

821

/* store the path in the entry */

822

strcpy(entry->path, path);

823

824

/* link this entry at the end of our list */

825

if (incpath_tail_ != 0)

826

incpath_tail_->nxt = entry;

827

else

828

incpath_head_ = entry;

829

incpath_tail_ = entry;

830

entry->nxt = 0;

831

}

832

833

834

/* ------------------------------------------------------------------------ */

835

836

* Set the string capture file.

837

838

void CTcTokenizer::set_string_capture(osfildef *fp)

839

{

840

/* remember the capture file */

841

string_fp_ = fp;

842

843

844

* if we don't already have a character mapping to translate from

845

* our internal unicode characters back into the source file

846

* character set, create one now

847

848

if (string_fp_map_ == 0)

849

{

850

/* try creating a mapping for the default character set */

851

if (default_charset_ != 0)

852

string_fp_map_ =

853

CCharmapToLocal::load(res_loader_, default_charset_);

854

855

/* if we couldn't create the mapping, use a default ASCII mapping */

856

if (string_fp_map_ == 0)

857

string_fp_map_ = CCharmapToLocal::load(res_loader_, "us-ascii");

858

}

859

}

860

861

862

/* ------------------------------------------------------------------------ */

863

864

* Get the next token in the input stream, reading additional lines from

865

* the source file as needed.

866

867

tc_toktyp_t CTcTokenizer::next()

868

{

869

/* the current token is about to become the previous token */

870

prvtok_ = curtok_;

871

872

/* if there's an un-got token, return it */

873

if (nxttok_valid_)

874

{

875

/* get the previously-saved token */

876

curtok_ = nxttok_;

877

878

/* we've now consumed nxttok_ */

879

nxttok_valid_ = FALSE;

880

881

/* return the new token's type */

882

return curtok_.gettyp();

883

}

884

885

/* if there's an external source, get its next token */

886

if (ext_src_ != 0)

887

{

888

const CTcToken *ext_tok;

889

890

/* get the next token from the external source */

891

ext_tok = ext_src_->get_next_token();

892

893

/* check to see if we got a token */

894

if (ext_tok == 0)

895

{

896

897

* restore the current token in effect before this source was

898

* active

899

900

curtok_ = *ext_src_->get_enclosing_curtok();

901

902

903

* this source has no more tokens - restore the enclosing

904

* source, and keep going so we try getting a token from it

905

906

ext_src_ = ext_src_->get_enclosing_source();

907

908

/* return the token type */

909

return curtok_.gettyp();

910

}

911

else

912

{

913

/* we got a token - copy it to our internal token buffer */

914

curtok_ = *ext_tok;

915

916

/* return its type */

917

return curtok_.gettyp();

918

}

919

}

920

921

/* keep going until we get a valid token */

922

for (;;)

923

{

924

tc_toktyp_t typ;

925

926

927

* read the next token from the current line, applying

928

* appropriate string translations and storing strings and

929

* symbols in the source block list

930

931

typ = next_on_line_xlat_keep();

932

933

/* if it's the "null" token, skip it and read another token */

934

if (typ == TOKT_NULLTOK)

935

continue;

936

937

/* if we found a valid token, we're done - return the token */

938

if (typ != TOKT_EOF)

939

return typ;

940

941

942

* if we're at the end of a preprocess line, don't read another

943

* line - just return end of file

944

945

if (p_.getch() == TOK_END_PP_LINE)

946

return TOKT_EOF;

947

948

949

* we've reached the end of the line - read another line,

950

* applying preprocessing directives and expanding macros as

951

* needed

952

953

if (read_line_pp())

954

{

955

/* no more lines are available - return end of file */

956

return TOKT_EOF;

957

}

958

}

959

}

960

961

/* ------------------------------------------------------------------------ */

962

963

* clear external token sources, returning to the true input stream

964

965

void CTcTokenizer::clear_external_sources()

966

{

967

968

* restore the current token as it was before the outermost external

969

* source was first established

970

971

if (ext_src_ != 0)

972

{

973

CTcTokenSource *outer;

974

975

/* find the outermost source */

976

for (outer = ext_src_ ; outer->get_enclosing_source() != 0 ;

977

outer = ext_src_->get_enclosing_source()) ;

978

979

/* restore its original next token */

980

curtok_ = *ext_src_->get_enclosing_curtok();

981

}

982

983

/* there's no external source now */

984

ext_src_ = 0;

985

}

986

987

/* ------------------------------------------------------------------------ */

988

989

* Make a safely storable copy of the current token.

990

991

const CTcToken *CTcTokenizer::copycur()

992

{

993

/* if the current token is a symbol, it already has a safe copy */

994

if (curtok_.gettyp() == TOKT_SYM)

995

return getcur();

996

997

/* save the current token's text in permanent tokenizer memory */

998

curtok_.set_text(store_source(curtok_.get_text(), curtok_.get_text_len()),

999

curtok_.get_text_len());

1000

1001

/* return the current token, now that we've made it safe */

1002

return &curtok_;

1003

}

1004

1005

1006

* Make a safely storable copy of a given token.

1007

1008

void CTcTokenizer::copytok(CTcToken *dst, const CTcToken *src)

1009

{

1010

/* start with an exact copy of the token */

1011

*dst = *src;

1012

1013

/* if the token is a symbol, it already has a safe copy */

1014

if (src->gettyp() == TOKT_SYM)

1015

return;

1016

1017

/* save the token's text in permanent tokenizer memory */

1018

dst->set_text(store_source(dst->get_text(), dst->get_text_len()),

1019

dst->get_text_len());

1020

}

1021

1022

1023

/* ------------------------------------------------------------------------ */

1024

1025

* Check to see if the current token matches the given text

1026

1027

int CTcTokenizer::cur_tok_matches(const char *txt, size_t len)

1028

{

1029

/* if the length matches, and the text matches exactly, it matches */

1030

return (getcur()->get_text_len() == len

1031

&& memcmp(getcur()->get_text(), txt, len) == 0);

1032

}

1033

1034

/* ------------------------------------------------------------------------ */

1035

1036

* Un-get the current token

1037

1038

void CTcTokenizer::unget()

1039

{

1040

1041

* remember the current token as the next one to fetch, and flag

1042

* that this is valid

1043

1044

nxttok_ = curtok_;

1045

nxttok_valid_ = TRUE;

1046

1047

/* go back to the previous token */

1048

curtok_ = prvtok_;

1049

}

1050

1051

/* ------------------------------------------------------------------------ */

1052

1053

* Assume that we should have just found a '>>' terminating an embedded

1054

* expression in a double-quoted string. If possible, back out the

1055

* previous token and re-scan it as though it had started with '>>'.

1056

1057

* This is to be called by a higher-level parser when it determines

1058

* that, syntactically, we should have found the '>>' leaving an

1059

* embedded expression.

1060

1061

void CTcTokenizer::assume_missing_dstr_cont()

1062

{

1063

/* act as though we had just seen '>>' */

1064

xlat_string_to_src(&main_in_embedding_, TRUE);

1065

}

1066

1067

1068

/* ------------------------------------------------------------------------ */

1069

1070

* Skip whitespace and macro expansion markers

1071

1072

void CTcTokenizer::skip_ws_and_markers(utf8_ptr *p)

1073

{

1074

/* keep going until we find something interesting */

1075

for (;;)

1076

{

1077

wchar_t cur;

1078

1079

/* get the current character */

1080

cur = p->getch();

1081

1082

1083

* if it's a macro expansion end marker, skip it as though it

1084

* were whitespace; otherwise, if it's whitespace, skip it;

1085

* otherwise, we're done skipping leading whitespace

1086

1087

if (cur == TOK_MACRO_EXP_END)

1088

{

1089

/* skip the embedded pointer value that follows */

1090

p->set(p->getptr() + 1 + sizeof(CTcHashEntryPp *));

1091

}

1092

else if (is_space(cur))

1093

{

1094

/* skip the space */

1095

p->inc();

1096

}

1097

else

1098

{

1099

/* it's not whitespace or equivalent - we're done */

1100

return;

1101

}

1102

}

1103

}

1104

1105

/* ------------------------------------------------------------------------ */

1106

1107

* Get the next token from the input stream, operating on the current

1108

* line only.

1109

1110

tc_toktyp_t CTcTokenizer::next_on_line(utf8_ptr *p, CTcToken *tok,

1111

int *in_embedding, int expanding)

1112

{

1113

wchar_t cur;

1114

tc_toktyp_t typ;

1115

utf8_ptr start;

1116

int num_minus;

1117

1118

/* skip whitespace */

1119

skip_ws_and_markers(p);

1120

1121

/* remember where the token starts */

1122

start = *p;

1123

1124

/* if there's nothing left in the current line, return EOF */

1125

if (p->getch() == '\0')

1126

{

1127

/* indicate end of file */

1128

typ = TOKT_EOF;

1129

goto done;

1130

}

1131

1132

/* get the initial character, and skip it */

1133

cur = p->getch();

1134

p->inc();

1135

1136

/* presume the token will not be marked as fully macro-expanded */

1137

tok->set_fully_expanded(FALSE);

1138

1139

/* presume it's not a number with a minus sign */

1140

num_minus = FALSE;

1141

1142

/* see what we have */

1143

switch(cur)

1144

{

1145

case TOK_MACRO_FORMAL_FLAG:

1146

1147

* this is a two-byte formal parameter sequence in a macro

1148

* expansion - skip the second byte of the two-byte sequence,

1149

* and return the special token type for this sequence

1150

1151

typ = TOKT_MACRO_FORMAL;

1152

1153

1154

* skip the second byte - note that we want to skip exactly one

1155

* byte, regardless of what the byte looks like as a utf-8

1156

* partial character, since it's not a utf-8 character at all

1157

1158

p->set(p->getptr() + 1);

1159

break;

1160

1161

case TOK_MACRO_FOREACH_FLAG:

1162

1163

* this is the special macro '#foreach' flag - return it as a

1164

* special pseudo-token

1165

1166

typ = TOKT_MACRO_FOREACH;

1167

break;

1168

1169

case TOK_MACRO_IFEMPTY_FLAG:

1170

/* #ifempty macro flag */

1171

typ = TOKT_MACRO_IFEMPTY;

1172

break;

1173

1174

case TOK_MACRO_IFNEMPTY_FLAG:

1175

/* #ifnempty macro flag */

1176

typ = TOKT_MACRO_IFNEMPTY;

1177

break;

1178

1179

case TOK_MACRO_ARGCOUNT_FLAG:

1180

/* it's the special macro '#argcount' flag */

1181

typ = TOKT_MACRO_ARGCOUNT;

1182

break;

1183

1184

case TOK_FULLY_EXPANDED_FLAG:

1185

/* set the token flag indicating that it has been fully expanded */

1186

tok->set_fully_expanded(TRUE);

1187

1188

/* the token symbol starts at the byte after the flag byte */

1189

start = p->getptr();

1190

1191

/* read the first character of the symbol */

1192

cur = p->getch();

1193

p->inc();

1194

1195

/* tokenize the symbol that follows */

1196

goto tokenize_symbol;

1197

1198

case TOK_END_PP_LINE:

1199

1200

* Preprocess line-ending marker - when we reach the end of a

1201

* preprocessor line, we can't read another source line, because

1202

* a preprocessor directive consists of only a single logical

1203

* source line. Once we see this, return end-of-file until the

1204

* caller explicitly reads a new source line.

1205

1206

* Keep the read pointer stuck on this flag byte, so that we

1207

* return end-of-file on a subsequent attempt to get the next

1208

* token.

1209

1210

*p = start;

1211

typ = TOKT_EOF;

1212

break;

1213

1214

case '0':

1215

case '1':

1216

case '2':

1217

case '3':

1218

case '4':

1219

case '5':

1220

case '6':

1221

case '7':

1222

case '8':

1223

case '9':

1224

{

1225

long acc;

1226

1227

1228

* Start out with the leading digit in the accumulator. Note

1229

* that the character set internally is always UTF-8.

1230

1231

acc = value_of_digit(cur);

1232

1233

1234

* If it's a leading zero, treat as octal or hex. '0x' means

1235

* hex; otherwise, '0' means octal.

1236

1237

if (cur == '0')

1238

{

1239

/* check for hex - if it's not hex, it's octal */

1240

if (p->getch() == 'x' || p->getch() == 'X')

1241

{

1242

/* skip the 'x' */

1243

p->inc();

1244

1245

1246

* scan the hex number - keep going until we find

1247

* something that's not a hex digit

1248

1249

for (;;)

1250

{

1251

/* get this character */

1252

cur = p->getch();

1253

1254

/* if it's not a hex digit, stop scanning */

1255

if (!is_xdigit(cur))

1256

break;

1257

1258

1259

* Shift the accumulator and add this digit's value.

1260

* Note that we can save a test - if the character is

1261

* >= lower-case 'a', we know it's not an upper-case

1262

* letter because the lower-case letters all have

1263

* values above the upper-case letters in UTF-8

1264

* encoding (which we always use as the internal

1265

* character set). Since we already know it's a

1266

* valid hex digit (we wouldn't be here if it

1267

* weren't), we can just check to see if it's at

1268

* least lower-case 'a', and we automatically know

1269

* then whether it's in the 'a'-'f' range or the

1270

* 'A'-'F' range.

1271

1272

acc *= 16;

1273

acc += value_of_xdigit(cur);

1274

1275

/* move on */

1276

p->inc();

1277

}

1278

}

1279

else

1280

{

1281

/* scan octal digits */

1282

for ( ; is_odigit(p->getch()) ; p->inc())

1283

acc = 8*acc + value_of_odigit(p->getch());

1284

1285

1286

* If we stopped on a digit outside of the octal range,

1287

* consume any remaining digits, and flag it as an

1288

* error. Leaving subsequent decimal digits as a

1289

* separate token tends to be confusing, since in most

1290

* cases the inclusion of decimal digits means that the

1291

* user didn't really intend this to be an octal number

1292

* after all. For instance, the leading zero might be

1293

* there for formatting reasons, and the user simply

1294

* forgot to take into account that it triggers octal

1295

* interpretation.

1296

1297

if (is_digit(p->getch()))

1298

{

1299

/* skip subsequent digits */

1300

for (p->inc() ; is_digit(p->getch()) ; p->inc()) ;

1301

1302

/* flag the error */

1303

if (!expanding)

1304

log_error(TCERR_DECIMAL_IN_OCTAL,

1305

p->getptr() - start.getptr(),

1306

start.getptr());

1307

}

1308

}

1309

}

1310

else

1311

{

1312

/* scan decimal digits */

1313

for ( ; is_digit(p->getch()) ; p->inc())

1314

acc = 10*acc + value_of_digit(p->getch());

1315

}

1316

1317

/* negate the value if we had a minus sign */

1318

if (num_minus)

1319

acc = -acc;

1320

1321

1322

* if we stopped at a decimal point or an exponent, it's a

1323

* floating point number

1324

1325

if (p->getch() == '.' || p->getch() == 'e' || p->getch() == 'E')

1326

goto do_float;

1327

1328

/* it's an integer value */

1329

typ = TOKT_INT;

1330

1331

/* set the integer value */

1332

tok->set_int_val(acc);

1333

}

1334

break;

1335

1336

do_float:

1337

{

1338

int found_decpt;

1339

1340

/* start over and parse the float */

1341

for (*p = start, found_decpt = FALSE ; ; p->inc())

1342

{

1343

/* get this character and move on */

1344

cur = p->getch();

1345

1346

/* see what we have */

1347

if (is_digit(cur))

1348

{

1349

/* we have another digit; just keep going */

1350

}

1351

else if (!found_decpt && cur == '.')

1352

{

1353

/* it's the decimal point - note it and keep going */

1354

found_decpt = TRUE;

1355

}

1356

else if (cur == 'e' || cur == 'E')

1357

{

1358

utf8_ptr p2;

1359

1360

/* it might not be an exponent - look ahead to find out */

1361

p2 = *p;

1362

p2.inc();

1363

1364

/* if we have a sign, skip it */

1365

if ((cur = p2.getch()) == '-' || cur == '+')

1366

p2.inc();

1367

1368

/* we need at least one digit to make an exponent */

1369

if (!is_digit(p2.getch()))

1370

break;

1371

1372

/* skip digits */

1373

while (is_digit(p2.getch()))

1374

p2.inc();

1375

1376

/* advance to the end of the exponent */

1377

*p = p2;

1378

1379

/* the end of the exponent is the end of the number */

1380

break;

1381

}

1382

else

1383

{

1384

/* everything else ends the number */

1385

break;

1386

}

1387

}

1388

}

1389

1390

/* it's a float */

1391

typ = TOKT_FLOAT;

1392

break;

1393

1394

case '"':

1395

case '\'':

1396

*p = start;

1397

return tokenize_string(p, tok, in_embedding);

1398

1399

case '(':

1400

typ = TOKT_LPAR;

1401

break;

1402

1403

case ')':

1404

typ = TOKT_RPAR;

1405

break;

1406

1407

case ',':

1408

typ = TOKT_COMMA;

1409

break;

1410

1411

case '.':

1412

/* check for '...' and floating-point numbers */

1413

if (p->getch() == '.' && p->getch_at(1) == '.')

1414

{

1415

p->inc();

1416

p->inc();

1417

typ = TOKT_ELLIPSIS;

1418

}

1419

else if (is_digit(p->getch()))

1420

goto do_float;

1421

else

1422

typ = TOKT_DOT;

1423

break;

1424

1425

case '{':

1426

typ = TOKT_LBRACE;

1427

break;

1428

1429

case '}':

1430

typ = TOKT_RBRACE;

1431

break;

1432

1433

case '[':

1434

typ = TOKT_LBRACK;

1435

break;

1436

1437

case ']':

1438

typ = TOKT_RBRACK;

1439

break;

1440

1441

case '=':

1442

/* check for '==' */

1443

if (p->getch() == '=')

1444

{

1445

p->inc();

1446

typ = TOKT_EQEQ;

1447

}

1448

else

1449

typ = TOKT_EQ;

1450

break;

1451

1452

case ':':

1453

/* check for '::' */

1454

if (p->getch() == ':')

1455

{

1456

p->inc();

1457

typ = TOKT_COLONCOLON;

1458

}

1459

else

1460

typ = TOKT_COLON;

1461

break;

1462

1463

case '?':

1464

typ = TOKT_QUESTION;

1465

break;

1466

1467

case '+':

1468

/* check for '++' and '+=' */

1469

if (p->getch() == '+')

1470

{

1471

p->inc();

1472

typ = TOKT_INC;

1473

}

1474

else if (p->getch() == '=')

1475

{

1476

p->inc();

1477

typ = TOKT_PLUSEQ;

1478

}

1479

else

1480

typ = TOKT_PLUS;

1481

break;

1482

1483

case '-':

1484

/* check for '--', '->' and '-=' */

1485

if (p->getch() == '-')

1486

{

1487

p->inc();

1488

typ = TOKT_DEC;

1489

}

1490

else if (p->getch() == '=')

1491

{

1492

p->inc();

1493

typ = TOKT_MINEQ;

1494

}

1495

else if (p->getch() == '>')

1496

{

1497

p->inc();

1498

typ = TOKT_ARROW;

1499

}

1500

else

1501

typ = TOKT_MINUS;

1502

break;

1503

1504

case '*':

1505

/* check for '*=' */

1506

if (p->getch() == '=')

1507

{

1508

p->inc();

1509

typ = TOKT_TIMESEQ;

1510

}

1511

else

1512

typ = TOKT_TIMES;

1513

break;

1514

1515

case '/':

1516

/* check for '/=' */

1517

if (p->getch() == '=')

1518

{

1519

p->inc();

1520

typ = TOKT_DIVEQ;

1521

}

1522

else

1523

typ = TOKT_DIV;

1524

break;

1525

1526

case '%':

1527

/* check for '%=' */

1528

if (p->getch() == '=')

1529

{

1530

p->inc();

1531

typ = TOKT_MODEQ;

1532

}

1533

else

1534

typ = TOKT_MOD;

1535

break;

1536

1537

case '>':

1538

/* check for '>>=', '>>' and '>=' */

1539

if (p->getch() == '=')

1540

{

1541

p->inc();

1542

typ = TOKT_GE;

1543

}

1544

else if (p->getch() == '>')

1545

{

1546

/* check for the end of an embedded expression */

1547

if (in_embedding != 0 && *in_embedding)

1548

{

1549

*p = start;

1550

return tokenize_string(p, tok, in_embedding);

1551

}

1552

1553

/* check for '>>=' */

1554

p->inc();

1555

if (p->getch() == '=')

1556

{

1557

p->inc();

1558

typ = TOKT_SHREQ;

1559

}

1560

else

1561

typ = TOKT_SHR;

1562

}

1563

else

1564

typ = TOKT_GT;

1565

break;

1566

1567

case '<':

1568

/* check for '<<=', '<<', '<>', and '<=' */

1569

if (p->getch() == '=')

1570

{

1571

p->inc();

1572

typ = TOKT_LE;

1573

}

1574

else if (p->getch() == '<')

1575

{

1576

/* check for '<<=' */

1577

p->inc();

1578

if (p->getch() == '=')

1579

{

1580

p->inc();

1581

typ = TOKT_SHLEQ;

1582

}

1583

else

1584

typ = TOKT_SHL;

1585

}

1586

#if 0

1587

else if (p->getch() == '>')

1588

{

1589

/* '<>' is obsolete */

1590

if (!expanding)

1591

log_error(TCERR_LTGT_OBSOLETE);

1592

1593

/* ... but for now proceed as though it's != */

1594

p->inc();

1595

typ = TOKT_NE;

1596

}

1597

#endif

1598

else

1599

typ = TOKT_LT;

1600

break;

1601

1602

case ';':

1603

typ = TOKT_SEM;

1604

break;

1605

1606

case '&':

1607

/* check for '&&' and '&=' */

1608

if (p->getch() == '&')

1609

{

1610

p->inc();

1611

typ = TOKT_ANDAND;

1612

}

1613

else if (p->getch() == '=')

1614

{

1615

p->inc();

1616

typ = TOKT_ANDEQ;

1617

}

1618

else

1619

typ = TOKT_AND;

1620

break;

1621

1622

case '|':

1623

/* check for '||' and '|=' */

1624

if (p->getch() == '|')

1625

{

1626

p->inc();

1627

typ = TOKT_OROR;

1628

}

1629

else if (p->getch() == '=')

1630

{

1631

p->inc();

1632

typ = TOKT_OREQ;

1633

}

1634

else

1635

typ = TOKT_OR;

1636

break;

1637

1638

case '^':

1639

/* check for '^=' */

1640

if (p->getch() == '=')

1641

{

1642

p->inc();

1643

typ = TOKT_XOREQ;

1644

}

1645

else

1646

typ = TOKT_XOR;

1647

break;

1648

1649

case '!':

1650

/* check for '!=' */

1651

if (p->getch() == '=')

1652

{

1653

p->inc();

1654

typ = TOKT_NE;

1655

}

1656

else

1657

typ = TOKT_NOT;

1658

break;

1659

1660

case '~':

1661

typ = TOKT_BNOT;

1662

break;

1663

1664

case '@':

1665

typ = TOKT_AT;

1666

break;

1667

1668

case '#':

1669

/* check for '##' and '#@' */

1670

if (p->getch() == '#')

1671

{

1672

p->inc();

1673

typ = TOKT_POUNDPOUND;

1674

}

1675

else if (p->getch() == '@')

1676

{

1677

p->inc();

1678

typ = TOKT_POUNDAT;

1679

}

1680

else

1681

typ = TOKT_POUND;

1682

break;

1683

1684

default:

1685

/* check to see if it's a symbol */

1686

if (is_syminit(cur))

1687

{

1688

size_t len, full_len;

1689

1690

1691

* scan the identifier (note that we've already skipped the

1692

* first character, so we start out at length = 1)

1693

1694

tokenize_symbol:

1695

for (len = full_len = 1 ; is_sym(p->getch()) ; p->inc())

1696

{

1697

/* count the full length */

1698

++full_len;

1699

1700

1701

* count this character if we're not over the maximum

1702

* length

1703

1704

if (len < TOK_SYM_MAX_LEN)

1705

++len;

1706

}

1707

1708

/* if we truncated the symbol, issue a warning */

1709

if (full_len != len && !expanding)

1710

log_warning(TCERR_SYMBOL_TRUNCATED,

1711

(int)full_len, start.getptr(),

1712

(int)len, start.getptr());

1713

1714

/* it's a symbol */

1715

typ = TOKT_SYM;

1716

}

1717

else

1718

{

1719

/* invalid token */

1720

typ = TOKT_INVALID;

1721

}

1722

break;

1723

}

1724

1725

done:

1726

/* set the type */

1727

tok->settyp(typ);

1728

1729

/* set the text */

1730

tok->set_text(start.getptr(), p->getptr() - start.getptr());

1731

1732

/* return the type */

1733

return typ;

1734

}

1735

1736

1737

* get the next token, limiting to the length of the source buffer

1738

1739

tc_toktyp_t CTcTokenizer::next_on_line(const CTcTokString *srcbuf,

1740

utf8_ptr *p, CTcToken *tok,

1741

int *in_embedding, int expanding)

1742

{

1743

/* get the next token */

1744

next_on_line(p, tok, in_embedding, expanding);

1745

1746

/* if the token is past the end of the line, return EOF */

1747

if (tok->get_text() >= srcbuf->get_text_end())

1748

{

1749

/* set the token to indicate end of line */

1750

tok->settyp(TOKT_EOF);

1751

1752

/* set the token to point to the end of the buffer */

1753

tok->set_text(srcbuf->get_text_end(), 0);

1754

}

1755

1756

/* return the token type */

1757

return tok->gettyp();

1758

}

1759

1760

1761

* Get the next token on the line, translating escapes in strings. This

1762

* updates the line buffer in-place to incorporate the translated string

1763

* text.

1764

1765

tc_toktyp_t CTcTokenizer::next_on_line_xlat(utf8_ptr *p, CTcToken *tok,

1766

int *in_embedding)

1767

{

1768

/* skip whitespace */

1769

skip_ws_and_markers(p);

1770

1771

/* if this is a string, translate escapes */

1772

switch(p->getch())

1773

{

1774

case '"':

1775

case '\'':

1776

/* translate the string */

1777

return xlat_string(p, tok, in_embedding);

1778

1779

case '>':

1780

/* if we're in an embedding, check for '>>' */

1781

if (in_embedding != 0 && *in_embedding && p->getch_at(1) == '>')

1782

return tokenize_string(p, tok, in_embedding);

1783

1784

/* use the default case */

1785

goto do_normal;

1786

1787

default:

1788

do_normal:

1789

/* for anything else, use the default tokenizer */

1790

return next_on_line(p, tok, in_embedding, FALSE);

1791

}

1792

}

1793

1794

1795

* Look up a keyword

1796

1797

int CTcTokenizer::look_up_keyword(const CTcToken *tok, tc_toktyp_t *kwtok)

1798

{

1799

CTcHashEntryKw *kw;

1800

1801

/* look it up in the keyword table */

1802

kw = (CTcHashEntryKw *)kw_->find(tok->get_text(), tok->get_text_len());

1803

if (kw != 0)

1804

{

1805

/* we found the keyword - set 'kw' to the keyword token id */

1806

*kwtok = kw->get_tok_id();

1807

1808

/* tell the caller we found it */

1809

return TRUE;

1810

}

1811

else

1812

{

1813

/* tell the caller it's not a keyword */

1814

return FALSE;

1815

}

1816

}

1817

1818

1819

* Get the next token on the line, translating escape sequences in

1820

* strings, and storing strings and symbols in the source block list.

1821

* This routine also translates keywords for token types.

1822

1823

tc_toktyp_t CTcTokenizer::next_on_line_xlat_keep()

1824

{

1825

tc_toktyp_t typ;

1826

1827

/* keep going until we find a valid symbol */

1828

for (;;)

1829

{

1830

/* skip whitespace and macro expansion flags */

1831

skip_ws_and_markers(&p_);

1832

1833

/* see what we have */

1834

switch(p_.getch())

1835

{

1836

case '"':

1837

case '\'':

1838

/* it's a string - translate and save it */

1839

return xlat_string_to_src(&main_in_embedding_, FALSE);

1840

1841

case '>':

1842

/* if we're in an embedding, this is the end of it */

1843

if (main_in_embedding_ && p_.getch_at(1) == '>')

1844

return xlat_string_to_src(&main_in_embedding_, FALSE);

1845

1846

/* use the normal parsing */

1847

goto do_normal;

1848

1849

default:

1850

do_normal:

1851

/* for anything else, use the default tokenizer */

1852

typ = next_on_line(&p_, &curtok_, &main_in_embedding_, FALSE);

1853

1854

/* check the token type */

1855

switch(typ)

1856

{

1857

case TOKT_SYM:

1858

/* symbol */

1859

{

1860

const char *p;

1861

CTcHashEntryKw *kw;

1862

1863

/* look it up in the keyword table */

1864

kw = (CTcHashEntryKw *)kw_->find(curtok_.get_text(),

1865

curtok_.get_text_len());

1866

if (kw != 0)

1867

{

1868

/* replace the token with the keyword token type */

1869

typ = kw->get_tok_id();

1870

curtok_.settyp(typ);

1871

}

1872

else

1873

{

1874

/* ordinary symbol - save the text */

1875

p = store_source(curtok_.get_text(),

1876

curtok_.get_text_len());

1877

1878

1879

* change the token's text to point to the

1880

* source block, so that this token's text

1881

* pointer will remain permanently valid (the

1882

* original copy, in the source line buffer,

1883

* will be overwritten as soon as we read

1884

* another source line; we don't want the caller

1885

* to have to worry about this, so we return the

1886

* permanent copy)

1887

1888

curtok_.set_text(p, curtok_.get_text_len());

1889

}

1890

}

1891

break;

1892

1893

case TOKT_FLOAT:

1894

/* floating-point number */

1895

{

1896

const char *p;

1897

1898

1899

* save the text so that it remains permanently

1900

* valid - we keep track of floats by the original

1901

* text, and let the code generator produce the

1902

* appropriate object file representation

1903

1904

p = store_source(curtok_.get_text(),

1905

curtok_.get_text_len());

1906

curtok_.set_text(p, curtok_.get_text_len());

1907

}

1908

break;

1909

1910

case TOKT_INVALID:

1911

1912

* check for unmappable characters - these will show up as

1913

* Unicode U+FFFD, the "replacement character"; log it as

1914

* 'unmappable' if applicable, otherwise as an invalid

1915

* character

1916

1917

if (utf8_ptr::s_getch(curtok_.get_text()) == 0xfffd)

1918

log_error_curtok(TCERR_UNMAPPABLE_CHAR);

1919

else

1920

log_error_curtok(TCERR_INVALID_CHAR);

1921

1922

/* skip this character */

1923

p_.inc();

1924

1925

/* keep going */

1926

continue;

1927

1928

default:

1929

break;

1930

}

1931

}

1932

1933

/* return the type */

1934

return typ;

1935

}

1936

}

1937

1938

1939

1940

* Translate the string at the current token position in the input

1941

* stream to the source block list.

1942

1943

tc_toktyp_t CTcTokenizer::xlat_string_to_src(int *in_embedding,

1944

int force_embed_end)

1945

{

1946

tc_toktyp_t typ;

1947

1948

1949

* Reserve space for the entire rest of the line. This is

1950

* conservative, in that we will definitely need less space than

1951

* this. This might cause us to waste a little space here and

1952

* there, since we will over-allocate when we have a short string

1953

* early in a long line, but this will save us the time of scanning

1954

* the string twice just to see how long it is.

1955

1956

reserve_source(curbuf_->get_text_len() -

1957

(p_.getptr() - curbuf_->get_text()));

1958

1959

/* translate into the source block */

1960

typ = xlat_string_to(src_ptr_, &p_, &curtok_,

1961

in_embedding, force_embed_end);

1962

1963

/* commit the space in the source block */

1964

commit_source(curtok_.get_text_len() + 1);

1965

1966

/* return the string token */

1967

return typ;

1968

}

1969

1970

1971

* Translate a string, setting up the token structure for the string,

1972

* and writing the translated version of the string directly over the

1973

* original source buffer of the string.

1974

1975

* Since a translated string can only shrink (because a translated

1976

* escape sequence is always shorter than the original source version),

1977

* we don't need a separate buffer, but can simply translate into the

1978

* source buffer, overwriting the original string as we go.

1979

1980

tc_toktyp_t CTcTokenizer::xlat_string(utf8_ptr *p, CTcToken *tok,

1981

int *in_embedding)

1982

{

1983

char *dst;

1984

1985

1986

* write the translated string over the original string's text,

1987

* starting at the character after the quote

1988

1989

dst = p->getptr() + 1;

1990

1991

/* translate the string into our destination buffer */

1992

return xlat_string_to(dst, p, tok, in_embedding, FALSE);

1993

}

1994

1995

1996

* Translate a string, setting up the token structure for the string.

1997

* We will update the line buffer in-place to incorporate the translated

1998

* string text.

1999

2000

tc_toktyp_t CTcTokenizer::xlat_string_to(char *dstp, utf8_ptr *p,

2001

CTcToken *tok, int *in_embedding,

2002

int force_embed_end)

2003

{

2004

utf8_ptr dst;

2005

wchar_t qu;

2006

utf8_ptr start, end;

2007

int i;

2008

2009

/* set up our output utf8 pointer */

2010

dst.set(dstp);

2011

2012

/* note the open quote character */

2013

qu = p->getch();

2014

2015

/* set the appropriate string token type */

2016

tok->settyp(qu == '"'

2017

? TOKT_DSTR

2018

: (qu == '>' ? TOKT_DSTR_END : TOKT_SSTR));

2019

2020

/* skip the open quote */

2021

p->inc();

2022

2023

/* skip the second '>' if it's a '>>' */

2024

if (force_embed_end)

2025

{

2026

2027

* they want us to assume the embedding ends here, regardless of

2028

* what we're looking at - act the same as though we had

2029

* actually seen '>>', but don't skip any input (in fact, back

2030

* up one, since we already skipped one character for what we

2031

* had thought was the open quote

2032

2033

p->dec();

2034

2035

/* clear the caller's in-embedding status */

2036

*in_embedding = FALSE;

2037

2038

/* close with a double quote */

2039

qu = '"';

2040

2041

/* it's a double-quoted string continuation */

2042

tok->settyp(TOKT_DSTR_END);

2043

}

2044

else if (qu == '>')

2045

{

2046

/* skip the second '>' */

2047

p->inc();

2048

2049

/* clear the caller's in-embedding status */

2050

*in_embedding = FALSE;

2051

2052

/* close with a double quote */

2053

qu = '"';

2054

}

2055

2056

/* remember where the string's contents start */

2057

start = *p;

2058

2059

/* scan the string and translate quotes */

2060

for (;;)

2061

{

2062

wchar_t cur;

2063

2064

/* get this character */

2065

cur = p->getch();

2066

2067

/* if this is the matching quote, we're done */

2068

if (cur == qu)

2069

break;

2070

2071

2072

* if we find an end-of-line within the string, it's an error -

2073

* we should always splice strings together onto a single line

2074

* before starting to tokenize the line

2075

2076

if (cur == '\0')

2077

{

2078

size_t len;

2079

utf8_ptr p;

2080

2081

/* note where the string ends */

2082

end = dst;

2083

2084

/* set the token's text pointer */

2085

tok->set_text(dstp, end.getptr() - dstp);

2086

2087

/* null-terminate the result string */

2088

dst.setch('\0');

2089

2090

2091

* get the length of the unterminated string so far, but for

2092

* error logging, limit the length to twenty characters --

2093

* we just want to give the user enough information to find

2094

* the string in error, without making the error message

2095

* huge

2096

2097

p.set(dstp);

2098

len = p.len(end.getptr() - dstp);

2099

if (len > 20)

2100

len = p.bytelen(20);

2101

2102

2103

* Check for a special heuristic case. If the string was of

2104

* zero length, and we have something sitting in our

2105

* unsplice buffer, here's what probably happened: the input

2106

* was missing a ">>" sequence at the end of an embedded

2107

* expression, and the parser told us to put it back in. We

2108

* had earlier decided we needed to splice up to a quote to

2109

* end what looked to us like an unterminated string. If

2110

* this is the case, we and the parser are working at cross

2111

* purposes; the parser is smarter than we are, so we should

2112

* synchronize with it.

2113

2114

if (tok->get_text_len() == 0

2115

&& qu == '"'

2116

&& unsplicebuf_.get_text_len() != 0)

2117

{

2118

char *buf;

2119

2120

2121

* we must have spliced a line to finish a string -

2122

* insert the quote into the splice buffer, and ignore

2123

* it here

2124

2125

2126

2127

* make sure there's room for one more character (plus a

2128

* null byte)

2129

2130

unsplicebuf_.ensure_space(unsplicebuf_.get_text_len() + 2);

2131

2132

/* get the buffer pointer */

2133

buf = unsplicebuf_.get_buf();

2134

2135

/* make room for the '"' */

2136

memmove(buf + 1, buf, unsplicebuf_.get_text_len());

2137

unsplicebuf_.set_text_len(unsplicebuf_.get_text_len() + 1);

2138

2139

/* add the '"' */

2140

*buf = '"';

2141

2142

2143

* return the 'null token' to tell the caller to try

2144

* again - do not log an error at this point

2145

2146

return TOKT_NULLTOK;

2147

}

2148

2149

/* log the error */

2150

log_error(TCERR_UNTERM_STRING,

2151

(char)qu, (int)len, dstp, (char)qu);

2152

2153

/* return the string type */

2154

return tok->gettyp();

2155

}

2156

2157

/* if this is an escape, translate it */

2158

if (cur == '\\')

2159

{

2160

long acc;

2161

2162

/* get the character after the escape */

2163

p->inc();

2164

cur = p->getch();

2165

2166

/* see what we have */

2167

switch(cur)

2168

{

2169

case '^':

2170

/* caps - 0x000F */

2171

cur = 0x000F;

2172

break;

2173

2174

case 'v':

2175

/* miniscules - 0x000E */

2176

cur = 0x000E;

2177

break;

2178

2179

case 'b':

2180

/* blank line - 0x000B */

2181

cur = 0x000B;

2182

break;

2183

2184

case ' ':

2185

/* quoted space - 0x0015 */

2186

cur = 0x0015;

2187

break;

2188

2189

case 'n':

2190

/* newline - explicitly use Unicode 10 character */

2191

cur = 10;

2192

break;

2193

2194

case 't':

2195

/* tab - explicitly use Unicode 9 character */

2196

cur = 9;

2197

break;

2198

2199

case 'u':

2200

2201

* Hex unicode character number. Read up to 4 hex

2202

* digits that follow the 'u', and use that as a Unicode

2203

* character ID.

2204

2205

for (i = 0, acc = 0, p->inc() ; i < 4 ; ++i, p->inc())

2206

{

2207

/* get the next character */

2208

cur = p->getch();

2209

2210

2211

* if it's another hex digit, add it into the

2212

* accumulator; otherwise, we're done

2213

2214

if (is_xdigit(cur))

2215

acc = 16*acc + value_of_xdigit(cur);

2216

else

2217

break;

2218

}

2219

2220

/* use the accumulated value as the character number */

2221

dst.setch((wchar_t)acc);

2222

2223

2224

* continue with the current character, since we've

2225

* already skipped ahead to the next one

2226

2227

continue;

2228

2229

case '0':

2230

case '1':

2231

case '2':

2232

case '3':

2233

case '4':

2234

case '5':

2235

case '6':

2236

case '7':

2237

2238

* Octal ASCII character number. Accumulate up to three

2239

* octal numbers, and use the result as a character ID.

2240

2241

for (i = 0, acc = 0 ; i < 3 ; ++i, p->inc())

2242

{

2243

/* get the next character */

2244

cur = p->getch();

2245

2246

2247

* if it's another digit, and it would leave our

2248

* result in the 0-255 range, count it; if not,

2249

* we're done

2250

2251

if (is_odigit(cur))

2252

{

2253

long new_acc;

2254

2255

/* compute the new value */

2256

new_acc = 8*acc + value_of_odigit(cur);

2257

2258

/* if this would be too high, don't count it */

2259

if (new_acc > 255)

2260

break;

2261

else

2262

acc = new_acc;

2263

}

2264

else

2265

break;

2266

}

2267

2268

/* use the accumulated value as the character number */

2269

dst.setch((wchar_t)acc);

2270

2271

2272

* continue with the current character, since we've

2273

* already skipped ahead to the next one

2274

2275

continue;

2276

2277

case 'x':

2278

2279

* Hex ASCII character number. Read up to two hex

2280

* digits as a character number.

2281

2282

for (i = 0, acc = 0, p->inc() ; i < 2 ; ++i, p->inc())

2283

{

2284

/* get the next character */

2285

cur = p->getch();

2286

2287

2288

* if it's another hex digit, add it into the

2289

* accumulator; otherwise, we're done

2290

2291

if (is_xdigit(cur))

2292

acc = 16*acc + value_of_xdigit(cur);

2293

else

2294

break;

2295

}

2296

2297

/* use the accumulated value as the character number */

2298

dst.setch((wchar_t)acc);

2299

2300

2301

* continue with the current character, since we've

2302

* already skipped ahead to the next one

2303

2304

continue;

2305

2306

default:

2307

/* copy anything else as-is */

2308

break;

2309

}

2310

}

2311

else if (in_embedding != 0 && !*in_embedding

2312

&& cur == '<' && p->getch_at(1) == '<')

2313

{

2314

2315

* it's the start of an embedded expression - change the

2316

* type to so indicate

2317

2318

tok->settyp(tok->gettyp() == TOKT_DSTR

2319

? TOKT_DSTR_START : TOKT_DSTR_MID);

2320

2321

/* tell the caller we're in an embedding */

2322

*in_embedding = TRUE;

2323

2324

/* stop scanning */

2325

break;

2326

}

2327

2328

/* copy this character to the output position */

2329

dst.setch(cur);

2330

2331

/* get the next character */

2332

p->inc();

2333

}

2334

2335

/* note where the string ends */

2336

end = dst;

2337

2338

/* set the token's text pointer */

2339

tok->set_text(dstp, end.getptr() - dstp);

2340

2341

/* null-terminate the result string */

2342

dst.setch('\0');

2343

2344

/* skip an extra character if this is the start of an embedding */

2345

if (p->getch() == '<')

2346

p->inc();

2347

2348

/* skip the closing quote */

2349

p->inc();

2350

2351

/* return the string type */

2352

return tok->gettyp();

2353

}

2354

2355

2356

2357

* Skip a string, setting up the token structure for the string. This

2358

* routine only parses to the end of the line; if the line ends with the

2359

* string unterminated, we'll flag an error

2360

2361

tc_toktyp_t CTcTokenizer::tokenize_string(utf8_ptr *p, CTcToken *tok,

2362

int *in_embedding)

2363

{

2364

const char *start;

2365

const char *contents_start;

2366

const char *contents_end;

2367

tc_toktyp_t typ;

2368

wchar_t qu;

2369

int allow_embedding;

2370

2371

/* remember where the text starts */

2372

start = p->getptr();

2373

2374

/* note the quote type */

2375

qu = p->getch();

2376

2377

/* skip the quote in the input */

2378

p->inc();

2379

2380

/* determine the token type based on the quote type */

2381

switch(qu)

2382

{

2383

case '\'':

2384

/* single-quoted string */

2385

typ = TOKT_SSTR;

2386

allow_embedding = FALSE;

2387

break;

2388

2389

case '>':

2390

2391

* this must be the next part of a string with embeddings; for now,

2392

* assume it's the end of the string, although it may just turn out

2393

* to be the middle

2394

2395

typ = TOKT_DSTR_END;

2396

allow_embedding = (in_embedding != 0);

2397

2398

/* skip the extra '>' character */

2399

p->inc();

2400

2401

/* clear the embedding flag */

2402

if (in_embedding != 0)

2403

*in_embedding = FALSE;

2404

2405

/* look for a closing double quote */

2406

qu = '"';

2407

break;

2408

2409

case '"':

2410

/* regular double-quoted string */

2411

typ = TOKT_DSTR;

2412

allow_embedding = (in_embedding != 0);

2413

break;

2414

2415

default:

2416

/* anything else is invalid */

2417

typ = TOKT_INVALID;

2418

allow_embedding = FALSE;

2419

break;

2420

}

2421

2422

/* this is where the string's contents start */

2423

contents_start = p->getptr();

2424

2425

/* scan the string */

2426

for (;;)

2427

{

2428

wchar_t cur;

2429

2430

/* get the current character */

2431

cur = p->getch();

2432

2433

/* see what we have */

2434

if (cur == '\\')

2435

{

2436

/* escape sequence - skip an extra character */

2437

p->inc();

2438

}

2439

else if (cur == '<' && allow_embedding && p->getch_at(1) == '<')

2440

{

2441

2442

* it's the start of an embedded expression - return the

2443

* appropriate embedded string part type

2444

2445

if (typ == TOKT_DSTR)

2446

typ = TOKT_DSTR_START;

2447

else

2448

typ = TOKT_DSTR_MID;

2449

2450

/* remember that we're in an embedding in the token stream */

2451

*in_embedding = TRUE;

2452

2453

/* this is where the contents end */

2454

contents_end = p->getptr();

2455

2456

/* skip the two embedding characters */

2457

p->inc();

2458

p->inc();

2459

2460

/* we're done - set the text in the token */

2461

tok->set_text(start, p->getptr() - start);

2462

2463

/* done */

2464

break;

2465

}

2466

else if (cur == qu)

2467

{

2468

/* this is where the contents end */

2469

contents_end = p->getptr();

2470

2471

/* skip the closing quote */

2472

p->inc();

2473

2474

/* we're done - set the text in the token */

2475

tok->set_text(start, p->getptr() - start);

2476

2477

/* done */

2478

break;

2479

}

2480

else if (cur == '\0')

2481

{

2482

/* this is where the contents end */

2483

contents_end = p->getptr();

2484

2485

2486

* We have an unterminated string. If we're evaluating a

2487

* preprocessor constant expression, log an error; otherwise

2488

* let it go for now, since we'll catch the error during the

2489

* normal tokenizing pass for parsing.

2490

2491

if (G_tok->in_pp_expr_)

2492

log_error(TCERR_PP_UNTERM_STRING);

2493

2494

/* set the partial text */

2495

tok->set_text(start, p->getptr() - start);

2496

2497

/* end of line - return with the string unfinished */

2498

break;

2499

}

2500

2501

/* skip this charater of input */

2502

p->inc();

2503

}

2504

2505

2506

* if we're not in preprocessor mode, and we're saving string text,

2507

* write the string to the string text output file

2508

2509

if (!G_tok->in_pp_expr_ && G_tok->string_fp_ != 0

2510

&& contents_start != contents_end)

2511

{

2512

/* write the line, translating back to the source character set */

2513

G_tok->string_fp_map_

2514

->write_file(G_tok->string_fp_, contents_start,

2515

(size_t)(contents_end - contents_start));

2516

2517

/* add a newline */

2518

osfwb(G_tok->string_fp_, "\n", 1);

2519

}

2520

2521

/* set the type in the token */

2522

tok->settyp(typ);

2523

2524

/* return the token type */

2525

return tok->gettyp();

2526

}

2527

2528

2529

/* ------------------------------------------------------------------------ */

2530

2531

* Read a source line and handle preprocessor directives. This routine

2532

* will transparently handle #include, #define, and other directives;

2533

* when this routine returns, the input buffer will have a line of text

2534

* that contains no # directive.

2535

2536

* Returns zero on success, non-zero upon reaching the end of the input.

2537

2538

int CTcTokenizer::read_line_pp()

2539

{

2540

int started_in_string;

2541

int ofs;

2542

2543

2544

* Read the next line from the input. If that fails, return an end

2545

* of file indication.

2546

2547

ofs = read_line(FALSE);

2548

if (ofs == -1)

2549

return 1;

2550

2551

2552

* before we process comments, note whether or not the line started

2553

* out within a character string

2554

2555

started_in_string = (in_quote_ != '\0');

2556

2557

/* set up our source pointer to the start of the new line */

2558

start_new_line(&linebuf_, ofs);

2559

2560

/* skip leading whitespace */

2561

while (is_space(p_.getch()))

2562

p_.inc();

2563

2564

2565

* If this line begins with a '#', process the directive. Ignore

2566

* any initial '#' if the line started off in a string.

2567

2568

if (!started_in_string && p_.getch() == '#' && allow_pp_)

2569

{

2570

struct pp_kw_def

2571

{

2572

const char *kw;

2573

int process_in_false_if;

2574

void (CTcTokenizer::*func)();

2575

};

2576

static pp_kw_def kwlist[] =

2577

{

2578

{ "charset", FALSE, &CTcTokenizer::pp_charset },

2579

{ "pragma", FALSE, &CTcTokenizer::pp_pragma },

2580

{ "include", FALSE, &CTcTokenizer::pp_include },

2581

{ "define", FALSE, &CTcTokenizer::pp_define },

2582

{ "if", TRUE, &CTcTokenizer::pp_if },

2583

{ "ifdef", TRUE, &CTcTokenizer::pp_ifdef },

2584

{ "ifndef", TRUE, &CTcTokenizer::pp_ifndef },

2585

{ "else", TRUE, &CTcTokenizer::pp_else },

2586

{ "elif", TRUE, &CTcTokenizer::pp_elif },

2587

{ "endif", TRUE, &CTcTokenizer::pp_endif },

2588

{ "error", FALSE, &CTcTokenizer::pp_error },

2589

{ "undef", FALSE, &CTcTokenizer::pp_undef },

2590

{ "line", FALSE, &CTcTokenizer::pp_line },

2591

{ 0, 0, 0 }

2592

};

2593

pp_kw_def *kwp;

2594

const char *kwtxt;

2595

size_t kwlen;

2596

2597

/* skip the '#' */

2598

p_.inc();

2599

2600

2601

* If the line ended inside a comment, read the next line until

2602

* we're no longer in a comment. The ANSI C preprocessor rules

2603

* say that a newline in a comment should not be treated as a

2604

* lexical newline, so pretend that the next line is part of the

2605

* preprocessor line in such a case.

2606

2607

while (str_->is_in_comment())

2608

{

2609

size_t p_ofs;

2610

2611

/* remember the current offset in the line buffer */

2612

p_ofs = p_.getptr() - linebuf_.get_buf();

2613

2614

/* append another line - stop at the end of the stream */

2615

if (read_line(TRUE) == -1)

2616

break;

2617

2618

/* restore the line pointer, in case the buffer moved */

2619

start_new_line(&linebuf_, p_ofs);

2620

}

2621

2622

/* read the directive */

2623

next_on_line();

2624

2625

2626

* if we've reached the end of the line, it's a null directive;

2627

* simply return an empty line

2628

2629

if (curtok_.gettyp() == TOKT_EOF)

2630

{

2631

clear_linebuf();

2632

return 0;

2633

}

2634

2635

/* get the text and length of the keyword */

2636

kwtxt = curtok_.get_text();

2637

kwlen = curtok_.get_text_len();

2638

2639

/* if it's not a symbol, it's not a valid directive */

2640

if (curtok_.gettyp() != TOKT_SYM)

2641

{

2642

/* log the error and return an empty line */

2643

log_error(TCERR_INV_PP_DIR, (int)kwlen, kwtxt);

2644

clear_linebuf();

2645

return 0;

2646

}

2647

2648

/* determine which keyword we have, and process it */

2649

for (kwp = kwlist ; kwp->kw != 0 ; ++kwp)

2650

{

2651

/* is this our keyword? */

2652

if (strlen(kwp->kw) == kwlen

2653

&& memcmp(kwtxt, kwp->kw, kwlen) == 0)

2654

{

2655

2656

* This is our directive.

2657

2658

* If we're in the false branch of a #if block, only

2659

* process the directive if it's a kind of directive

2660

* that we should process in false #if branches. The

2661

* only directives that we process in #if branches are

2662

* those that would affect the #if branching, such as a

2663

* #endif or a nested #if.

2664

2665

if (!in_false_if() || kwp->process_in_false_if)

2666

{

2667

/* invoke the handler to process the directive */

2668

(this->*(kwp->func))();

2669

}

2670

else

2671

{

2672

2673

* we're in a #if branch not taken - simply clear

2674

* the buffer

2675

2676

clear_linebuf();

2677

}

2678

2679

/* we don't need to look any further */

2680

break;

2681

}

2682

}

2683

2684

2685

* if we didn't find the keyword, log an error and otherwise

2686

* ignore the entire line

2687

2688

if (kwp->kw == 0)

2689

log_error(TCERR_INV_PP_DIR, (int)kwlen, kwtxt);

2690

2691

2692

* Preprocessor lines must always be entirely self-contained.

2693

* Therefore, it's not valid for a string to start on a

2694

* preprocessor line and continue onto subsequent lines. If

2695

* we're marked as being inside a string, there must have been

2696

* an error on the preprocessor line. Simply clear the

2697

* in-string flag; we don't need to issue an error at this

2698

* point, since the preprocessor line handler should have

2699

* already caught the problem and reported an error.

2700

2701

in_quote_ = '\0';

2702

}

2703

else

2704

{

2705

2706

* There's no preprocessor directive.

2707

2708

* If we're in a false #if branch, return an empty line. We

2709

* return an empty line rather than skipping to the next line so

2710

* that the caller sees the same number of lines as are in the

2711

* original source.

2712

2713

if (in_false_if())

2714

{

2715

2716

* it's a #if not taken - we don't want to compile the line

2717

* at all, so just clear it out

2718

2719

clear_linebuf();

2720

expbuf_.clear_text();

2721

}

2722

else

2723

{

2724

2725

* If we ended the line in a string, splice additional lines

2726

* onto the end of this line until we find the end of the

2727

* string, then unsplice the part after the end of the

2728

* string.

2729

2730

if (in_quote_ != '\0')

2731

{

2732

/* splice additional lines to finish the quote */

2733

splice_string();

2734

}

2735

2736

2737

* Expand macros in the line, splicing additional source

2738

* lines if necessary to fill out any incomplete actual

2739

* parameter lists.

2740

2741

start_new_line(&linebuf_, 0);

2742

expand_macros_curline(TRUE, FALSE, FALSE);

2743

}

2744

2745

/* store the line in the appropriate place */

2746

if (pp_only_mode_)

2747

{

2748

2749

* we're only preprocessing - store the macro-expanded line

2750

* back in the line buffer so that the caller can read out

2751

* the final preprocessed text

2752

2753

linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len());

2754

}

2755

else

2756

{

2757

2758

* We're compiling - simply read subsequent tokens out of

2759

* the expansion buffer.

2760

2761

start_new_line(&expbuf_, 0);

2762

}

2763

}

2764

2765

/* return success */

2766

return 0;

2767

}

2768

2769

/* ------------------------------------------------------------------------ */

2770

2771

* Read the next line from the input file. Returns a pointer to the

2772

* start of the newly-read data on success, or null if we reach the end

2773

* of the input.

2774

2775

* If 'append' is true, we'll add the line on to the end of the existing

2776

* buffer; otherwise, we'll overwrite what's in the buffer.

2777

2778

* The only preprocessing performed in this routine is line-splicing.

2779

* Any line that ends with a backslash character will be spliced with

2780

* the following line, with the backslash and newline removed.

2781

2782

* The new line will be stored in our internal buffer, and will be

2783

* null-terminated with the trailing newline removed.

2784

2785

* If we reach the end of the current file, and there's an enclosing

2786

* file, we'll resume reading from the enclosing file. Hence, when this

2787

* routine returns non-zero, it indicates that we've reached the end of

2788

* the entire source, not just of the current file.

2789

2790

int CTcTokenizer::read_line(int append)

2791

{

2792

size_t len;

2793

size_t start_len;

2794

2795

/* if there's no input stream, indicate end-of-file */

2796

if (str_ == 0)

2797

return -1;

2798

2799

/* if we're not appending, clear out the line buffer */

2800

if (!append)

2801

{

2802

/* start with an empty line */

2803

clear_linebuf();

2804

2805

/* note the current input position */

2806

last_desc_ = str_->get_desc();

2807

last_linenum_ = str_->get_next_linenum();

2808

}

2809

2810

/* note where the new data starts */

2811

len = linebuf_.get_text_len();

2812

start_len = len;

2813

2814

2815

* if there's anything in the unsplice buffer, use it as the new

2816

* line

2817

2818

if (unsplicebuf_.get_text_len() != 0)

2819

{

2820

2821

* Copy the unsplice buffer as the current line. Note that we

2822

* don't have to worry about any of the complicated cases, such

2823

* as whether or not it ends with a newline or a backslash,

2824

* because the unspliced line was already processed as an input

2825

* line when we read it in the first place.

2826

2827

linebuf_.append(unsplicebuf_.get_text(), unsplicebuf_.get_text_len());

2828

2829

/* clear the unsplice buffer, since it's been consumed now */

2830

unsplicebuf_.clear_text();

2831

2832

2833

* make the current line the appended line - if we're

2834

* unsplicing, it means that we appended, so the current line is

2835

* now the line from which the last appended text came

2836

2837

last_desc_ = appended_desc_;

2838

last_linenum_ = appended_linenum_;

2839

2840

/* return the offset of the new text */

2841

return start_len;

2842

}

2843

2844

/* if we're appending, note where the appendage is coming from */

2845

if (append)

2846

{

2847

/* remember the last source line appended */

2848

appended_desc_ = str_->get_desc();

2849

appended_linenum_ = str_->get_next_linenum();

2850

}

2851

2852

/* keep going until we finish reading the input line */

2853

for ( ;; )

2854

{

2855

size_t curlen;

2856

2857

/* read a line of text from the input file */

2858

curlen = str_->get_src()->

2859

read_line(linebuf_.get_buf() + len,

2860

linebuf_.get_buf_size() - len);

2861

2862

/* check for end of file */

2863

if (curlen == 0)

2864

{

2865

CTcTokStream *old_str;

2866

2867

2868

* We've reached the end of the current input stream. If

2869

* we've already read anything into the current line, it

2870

* means that the file ended in mid-line, without a final

2871

* newline character; ignore this and proceed with the line

2872

* as it now stands in this case.

2873

2874

if (len > start_len)

2875

break;

2876

2877

2878

* We've finished with this stream. If there's a parent

2879

* stream, return to it; otherwise, we're at the end of the

2880

* source.

2881

2882

2883

2884

* if we didn't close all of the #if/#ifdef levels opened

2885

* within this file, flag one or more errors

2886

2887

while (if_sp_ > str_->get_init_if_level())

2888

{

2889

const char *fname;

2890

2891

/* get the filename from the #if stack */

2892

fname = if_stack_[if_sp_ - 1].desc->get_fname();

2893

2894

/* if we're in test reporting mode, use the root name only */

2895

if (test_report_mode_)

2896

fname = os_get_root_name((char *)fname);

2897

2898

/* log the error */

2899

log_error(TCERR_IF_WITHOUT_ENDIF,

2900

if_stack_[if_sp_ - 1].linenum,

2901

(int)strlen(fname), fname);

2902

2903

/* discard the #if level */

2904

pop_if();

2905

}

2906

2907

/* remember the old stream */

2908

old_str = str_;

2909

2910

/* return to the parent stream, if there is one */

2911

str_ = str_->get_parent();

2912

2913

/* delete the old stream now that we're done with it */

2914

delete old_str;

2915

2916

/* note the new file the line will be coming from */

2917

if (!append && str_ != 0)

2918

{

2919

last_desc_ = str_->get_desc();

2920

last_linenum_ = str_->get_next_linenum();

2921

}

2922

2923

/* if there's no stream, return end of file */

2924

if (str_ == 0)

2925

return -1;

2926

2927

2928

* restore the #pragma newline_spacing mode that was in effect

2929

* when we interrupted the parent stream

2930

2931

string_newline_spacing_ = str_->get_newline_spacing();

2932

2933

/* if there's a parser, notify it of the new pragma C mode */

2934

#if 0 // #pragma C is not currently used

2935

if (G_prs != 0)

2936

G_prs->set_pragma_c(str_->is_pragma_c());

2937

#endif

2938

2939

/* go back to read the next line from the parent */

2940

continue;

2941

}

2942

2943

/* set the new length of the buffer contents */

2944

len += curlen - 1;

2945

linebuf_.set_text_len(len);

2946

2947

2948

* Check the result to see if it ends in a newline. If not, it

2949

* means either that we don't have room in the buffer for the

2950

* full source line, or we've reached the last line in the file,

2951

* and it doesn't end with a newline.

2952

2953

* Note that the file reader will always supply us with '\n'

2954

* newlines, regardless of the local operating system

2955

* conventions.

2956

2957

* Also, check to see if the line ends with '\\'. If so, remove

2958

* the '\\' character and read the next line, since this

2959

* indicates that the logical line continues onto the next

2960

* newline-deliminted line.

2961

2962

if (len != 0 && linebuf_.get_text()[len - 1] != '\n')

2963

{

2964

2965

* There's no newline, hence the file reader wasn't able to

2966

* fit the entire line into our buffer, or else we've read

2967

* the last line in the file and there's no newline at the

2968

* end. If we haven't reached the end of the file, expand

2969

* our line buffer to make room to read more from this same

2970

* line.

2971

2972

if (!str_->get_src()->at_eof())

2973

linebuf_.expand();

2974

}

2975

else if (len > 1 && linebuf_.get_text()[len - 2] == '\\')

2976

{

2977

2978

* There's a backslash at the end of the line, so they want

2979

* to continue this logical line. Remove the backslash, and

2980

* read the next line onto the end of the current line.

2981

2982

* Note that we must remove two characters from the end of

2983

* the line (and tested for buf_[len-2] above) because we

2984

* have both a backslash and a newline at the end of the

2985

* line.

2986

2987

len -= 2;

2988

linebuf_.set_text_len(len);

2989

2990

/* count reading the physical line */

2991

str_->count_line();

2992

}

2993

else

2994

{

2995

/* remove the newline from the buffer */

2996

if (len != 0)

2997

{

2998

--len;

2999

linebuf_.set_text_len(len);

3000

}

3001

3002

/* count reading the line */

3003

str_->count_line();

3004

3005

/* done */

3006

break;

3007

}

3008

}

3009

3010

3011

* remove comments from the newly-read material - this replaces each

3012

* comment by a single whitespace character

3013

3014

process_comments(start_len);

3015

3016

3017

* we've successfully read a line -- return the offset of the start of

3018

* the newly-read text

3019

3020

return start_len;

3021

}

3022

3023

3024

* Un-splice a line at the given point. This breaks the current source

3025

* line in two, keeping the part before the given point as the current

3026

* line, but making the part from the given point to the end of the line

3027

* a new source line. We'll put the new source line into a special

3028

* holding buffer, and then fetch this part as a new line the next time

3029

* we read a line in read_line().

3030

3031

void CTcTokenizer::unsplice_line(const char *new_line_start)

3032

{

3033

size_t keep_len;

3034

3035

/* make sure the starting point is within the current line */

3036

if (!(new_line_start >= linebuf_.get_text()

3037

&& new_line_start <= linebuf_.get_text() + linebuf_.get_text_len()))

3038

{

3039

/* note the error - this is an internal problem */

3040

throw_internal_error(TCERR_UNSPLICE_NOT_CUR);

3041

return;

3042

}

3043

3044

/* calculate the length of the part we're keeping */

3045

keep_len = new_line_start - linebuf_.get_text();

3046

3047

3048

* prepend the remainder of the current line into the unsplice buffer

3049

* (we prepend it because the unsplice line is text that comes after

3050

* the current line - so anything in the current line comes before

3051

* anything already in the unsplice buffer)

3052

3053

unsplicebuf_.prepend(new_line_start, linebuf_.get_text_len() - keep_len);

3054

3055

/* cut off the current line at the given point */

3056

linebuf_.set_text_len(keep_len);

3057

}

3058

3059

3060

/* ------------------------------------------------------------------------ */

3061

3062

* Store text in the source array

3063

3064

const char *CTcTokenizer::store_source(const char *txt, size_t len)

3065

{

3066

/* reserve space for the text */

3067

reserve_source(len);

3068

3069

/* store it */

3070

const char *p = store_source_partial(txt, len);

3071

3072

/* add a null terminator */

3073

static const char nt[1] = { '\0' };

3074

store_source_partial(nt, 1);

3075

3076

/* return the pointer to the stored space */

3077

return p;

3078

}

3079

3080

3081

* Store partial source; use this AFTER reserving the necessary space. If

3082

* you want null-termination, be sure to reserve the extra byte for that

3083

* and include it in the string. This can be used to build a string piece

3084

* by piece; we simply add the text without null-terminating it.

3085

3086

const char *CTcTokenizer::store_source_partial(const char *txt, size_t len)

3087

{

3088

/* remember where the string starts */

3089

const char *p = src_ptr_;

3090

3091

/* store the text */

3092

memcpy(src_ptr_, txt, len);

3093

3094

/* advance the source block write position and length */

3095

src_ptr_ += len;

3096

src_rem_ -= len;

3097

3098

/* return the storage pointer */

3099

return p;

3100

}

3101

3102

3103

* Reserve space for text in the source array. This always reserves the

3104

* requested amount of space, plus an extra byte for null termination.

3105

3106

void CTcTokenizer::reserve_source(size_t len)

3107

{

3108

3109

* if we don't have enough space for this line in the current source

3110

* block, start a new block

3111

3112

if (len + 1 > src_rem_)

3113

{

3114

CTcTokSrcBlock *blk;

3115

3116

3117

* if the line is too long for a source block, throw a fatal

3118

* error

3119

3120

if (len + 1 > TCTOK_SRC_BLOCK_SIZE)

3121

throw_fatal_error(TCERR_SRCLINE_TOO_LONG,

3122

(long)TCTOK_SRC_BLOCK_SIZE);

3123

3124

/* allocate a new block */

3125

blk = new CTcTokSrcBlock();

3126

3127

/* link it into our list */

3128

src_cur_->set_next(blk);

3129

3130

/* it's now the current block */

3131

src_cur_ = blk;

3132

3133

/* start writing at the start of this block */

3134

src_rem_ = TCTOK_SRC_BLOCK_SIZE;

3135

src_ptr_ = blk->get_buf();

3136

}

3137

}

3138

3139

3140

* Commit space previously reserved and now used in the source block

3141

* list

3142

3143

void CTcTokenizer::commit_source(size_t len)

3144

{

3145

/* advance the write position past the committed text */

3146

src_ptr_ += len;

3147

src_rem_ -= len;

3148

}

3149

3150

3151

/* ------------------------------------------------------------------------ */

3152

3153

* Expand macros in the current line from the current source pointer,

3154

* filling in expbuf_ with the expanded result.

3155

3156

int CTcTokenizer::expand_macros_curline(int read_more, int allow_defined,

3157

int append_to_expbuf)

3158

{

3159

int err;

3160

3161

/* expand macros in the current line */

3162

err = expand_macros(&linebuf_, &p_, &expbuf_, read_more, allow_defined,

3163

append_to_expbuf);

3164

3165

/* if that failed, return an error */

3166

if (err != 0)

3167

return err;

3168

3169

3170

* if we're in preprocessor mode, clean up the text for human

3171

* consumption by removing our various expansion flags

3172

3173

if (pp_only_mode_)

3174

remove_expansion_flags(&expbuf_);

3175

3176

/* return the result */

3177

return err;

3178

}

3179

3180

/* ------------------------------------------------------------------------ */

3181

3182

* Remove the special internal macro expansion flags from an expanded macro

3183

* buffer.

3184

3185

void CTcTokenizer::remove_expansion_flags(CTcTokString *buf)

3186

{

3187

utf8_ptr p;

3188

char *src;

3189

char *dst;

3190

3191

3192

* Scan the expansion buffer and remove all of the no-more-expansion

3193

* flag bytes - we're done expanding the macro now, so we don't need

3194

* this information any longer. When we're writing out the

3195

* preprocessed source for human viewing, we don't want to leave these

3196

* internal markers in the expanded source.

3197

3198

for (src = dst = buf->get_buf(), p.set(src) ; p.getch() != '\0' ; )

3199

{

3200

/* if this isn't a macro flag, copy it */

3201

if (p.getch() == TOK_MACRO_EXP_END)

3202

{

3203

/* skip the flag byte and the following embedded pointer */

3204

src += 1 + sizeof(CTcHashEntryPp *);

3205

p.set(src);

3206

}

3207

else if (p.getch() == TOK_FULLY_EXPANDED_FLAG)

3208

{

3209

/* skip the flag byte */

3210

++src;

3211

p.set(src);

3212

}

3213

else

3214

{

3215

/* skip this character */

3216

p.inc();

3217

3218

/* copy the bytes of this character as-is */

3219

while (src < p.getptr())

3220

*dst++ = *src++;

3221

}

3222

}

3223

3224

/* set the new buffer length */

3225

buf->set_text_len(dst - buf->get_buf());

3226

}

3227

3228

/* ------------------------------------------------------------------------ */

3229

3230

* Expand macros in the current line, reading additional source lines if

3231

* necessary.

3232

3233

* 'src' is a pointer to the start of the text to expand; it must point

3234

* into the 'srcbuf' buffer. If 'src' is null, we'll simply start at

3235

* the beginning of the source buffer.

3236

3237

int CTcTokenizer::expand_macros(CTcTokString *srcbuf, utf8_ptr *src,

3238

CTcTokString *expbuf, int read_more,

3239

int allow_defined, int append)

3240

{

3241

tc_toktyp_t typ;

3242

CTcToken tok;

3243

CTcTokString *subexp;

3244

size_t startofs;

3245

utf8_ptr local_src;

3246

CTcTokStringRef local_srcbuf;

3247

CTcMacroRsc *res;

3248

int err;

3249

3250

/* presume success */

3251

err = 0;

3252

3253

/* get a macro expansion resource object */

3254

res = alloc_macro_rsc();

3255

if (res == 0)

3256

return 1;

3257

3258

/* get our subexpression buffer from the resource object */

3259

subexp = &res->line_exp_;

3260

3261

/* if there's no source buffer or source pointer, provide one */

3262

if (srcbuf == 0)

3263

{

3264

3265

* there's no source buffer - provide our own non-allocated

3266

* buffer tied to the caller's buffer

3267

3268

local_srcbuf.set_buffer(src->getptr(), strlen(src->getptr()));

3269

srcbuf = &local_srcbuf;

3270

}

3271

else if (src == 0)

3272

{

3273

3274

* there's no source pointer - start at the beginning of the

3275

* source buffer

3276

3277

local_src.set((char *)srcbuf->get_text());

3278

src = &local_src;

3279

}

3280

3281

/* clear the expansion buffer, unless we're appending to the buffer */

3282

if (!append)

3283

expbuf->clear_text();

3284

3285

3286

* Make sure we have room for a copy of the source line. This is an

3287

* optimization for the simple case where we'll just copy the source

3288

* line unchanged, so that we don't have to repeatedly expand the

3289

* buffer; we will, however, expand the buffer dynamically later, if

3290

* this pre-allocation should prove to be insufficient.

3291

3292

expbuf->ensure_space(expbuf->get_text_len() + srcbuf->get_text_len());

3293

3294

/* note the starting offset, if we have an underlying string buffer */

3295

startofs = src->getptr() - srcbuf->get_text();

3296

3297

/* read the first token */

3298

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, TRUE);

3299

3300

/* scan through the tokens on the line, looking for macros to expand */

3301

while (typ != TOKT_EOF)

3302

{

3303

3304

* if it's a symbol, and it hasn't already been marked as fully

3305

* expanded, look it up in the #define table

3306

3307

if (typ == TOKT_SYM && !tok.get_fully_expanded())

3308

{

3309

CTcHashEntryPp *entry;

3310

3311

3312

* Look up the symbol in the #define symbol table. If we

3313

* find it, expand the macro. Otherwise, if the "defined"

3314

* operator is active, check for that.

3315

3316

* Do not expand the macro if we find that it has already

3317

* been expanded on a prior scan through the current text.

3318

3319

entry = find_define(tok.get_text(), tok.get_text_len());

3320

if ((entry != 0

3321

&& !scan_for_prior_expansion(*src, srcbuf->get_text_end(),

3322

entry))

3323

|| (allow_defined

3324

&& tok.get_text_len() == 7

3325

&& memcmp(tok.get_text(), "defined", 7) == 0))

3326

{

3327

size_t macro_ofs;

3328

size_t rem_len;

3329

int expanded;

3330

3331

/* get the offset of the macro token in the source buffer */

3332

macro_ofs = tok.get_text() - srcbuf->get_text();

3333

3334

/* expand it into our sub-expansion buffer */

3335

if (entry != 0)

3336

{

3337

/* expand the macro */

3338

err = expand_macro(res, subexp, srcbuf, src,

3339

macro_ofs, entry,

3340

read_more, allow_defined, &expanded);

3341

}

3342

else

3343

{

3344

/* parse and expand the defined() operator */

3345

err = expand_defined(subexp, srcbuf, src);

3346

3347

/* "defined" always expands if there's not an error */

3348

expanded = TRUE;

3349

}

3350

3351

/* if an error occurred, return failure */

3352

if (err)

3353

goto done;

3354

3355

3356

* if we expanded something, append everything we

3357

* skipped preceding the macro, then rescan; otherwise,

3358

* just keep going without a rescan

3359

3360

if (expanded)

3361

{

3362

/* copy the preceding text to the output */

3363

expbuf->append(srcbuf->get_text() + startofs,

3364

macro_ofs - startofs);

3365

}

3366

else

3367

{

3368

3369

* we didn't expand - get the next token after the

3370

* macro

3371

3372

typ = next_on_line(srcbuf, src, &tok,

3373

&macro_in_embedding_, TRUE);

3374

3375

/* continue processing from this token */

3376

continue;

3377

}

3378

3379

3380

* We must now insert the expansion into the source

3381

* buffer at the current point, and re-scan the

3382

* expansion, *along with* the rest of the original

3383

* source line (this is how ANSI C specifies the

3384

* process).

3385

3386

* If we can read more, we must be reading out of the

3387

* main input line buffer, so insert the expansion text

3388

* directly into the original source stream, and

3389

* continue reading out of the source stream; this will

3390

* simplify the case where we must read more data from

3391

* the file in the course of the expansion. If we can't

3392

* read more, simply copy the remainder of the current

3393

* input line onto the expanded macro and use it as the

3394

* new input buffer.

3395

3396

3397

/* get the current offset in the source line */

3398

startofs = src->getptr() - srcbuf->get_text();

3399

3400

/* figure out how much is left on the current line */

3401

rem_len = srcbuf->get_text_len() - startofs;

3402

3403

/* check to see if we can read more */

3404

if (read_more)

3405

{

3406

3407

* we're reading from the original line input buffer

3408

* -- insert the expansion into the source buffer at

3409

* the current point, replacing the original macro

3410

* text

3411

3412

3413

/* make sure we have room for adding the expansion text */

3414

srcbuf->ensure_space(macro_ofs + rem_len

3415

+ subexp->get_text_len());

3416

3417

/* make sure src is still pointing to the right place */

3418

src->set(srcbuf->get_buf() + macro_ofs);

3419

3420

/* move the remainder of the current line to make room */

3421

memmove(srcbuf->get_buf() + macro_ofs

3422

+ subexp->get_text_len(),

3423

srcbuf->get_buf() + startofs,

3424

rem_len);

3425

3426

/* insert the expansion text */

3427

memcpy(srcbuf->get_buf() + macro_ofs, subexp->get_buf(),

3428

subexp->get_text_len());

3429

3430

/* set the new source length */

3431

srcbuf->set_text_len(macro_ofs + rem_len

3432

+ subexp->get_text_len());

3433

3434

/* the new starting offset is the current position */

3435

startofs = macro_ofs;

3436

3437

/* get the next token */

3438

typ = next_on_line(srcbuf, src, &tok,

3439

&macro_in_embedding_, TRUE);

3440

3441

/* continue processing from this token */

3442

continue;

3443

}

3444

else

3445

{

3446

3447

* we're reading from a read-only buffer -- add the

3448

* remainder of the source to the expansion buffer,

3449

* and recursively parse the remainder

3450

3451

subexp->append(srcbuf->get_text() + startofs, rem_len);

3452

3453

3454

* evaluate the remainder recursively and append it

3455

* to the expansion already in progress

3456

3457

err = expand_macros(subexp, 0, expbuf, FALSE,

3458

allow_defined, TRUE);

3459

3460

/* we're done */

3461

goto done;

3462

}

3463

}

3464

}

3465

3466

/* get the next token */

3467

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, TRUE);

3468

}

3469

3470

/* add the remainder of the source to the output */

3471

expbuf->append(srcbuf->get_text() + startofs,

3472

tok.get_text() - startofs - srcbuf->get_text());

3473

3474

done:

3475

/* release our macro resource object */

3476

release_macro_rsc(res);

3477

3478

/* return the result */

3479

return err;

3480

}

3481

3482

3483

* Allocate a macro resource object. If we're out of resource objects

3484

* in the pool, we'll add another object to the pool.

3485

3486

CTcMacroRsc *CTcTokenizer::alloc_macro_rsc()

3487

{

3488

CTcMacroRsc *rsc;

3489

3490

3491

* if there's anything in the available list, take the first item

3492

* off the list and return it

3493

3494

if (macro_res_avail_ != 0)

3495

{

3496

/* remember the item to return */

3497

rsc = macro_res_avail_;

3498

3499

/* remove it from the list */

3500

macro_res_avail_ = macro_res_avail_->next_avail_;

3501

3502

/* return it */

3503

return rsc;

3504

}

3505

3506

/* there's nothing on the available list - allocate a new item */

3507

rsc = new CTcMacroRsc();

3508

3509

/* if that failed, return failure */

3510

if (rsc == 0)

3511

{

3512

log_error(TCERR_OUT_OF_MEM_MAC_EXP);

3513

return 0;

3514

}

3515

3516

/* add it onto the master list */

3517

rsc->next_ = macro_res_head_;

3518

macro_res_head_ = rsc;

3519

3520

/* return it */

3521

return rsc;

3522

}

3523

3524

3525

* Release a macro resource, returning it to the pool

3526

3527

void CTcTokenizer::release_macro_rsc(CTcMacroRsc *rsc)

3528

{

3529

/* put it back at the head of the available list */

3530

rsc->next_avail_ = macro_res_avail_;

3531

macro_res_avail_ = rsc;

3532

}

3533

3534

3535

* Scan a buffer for a prior-expansion flag for a given macro. We'll

3536

* look through the buffer for a TOK_MACRO_EXP_END byte that mentions

3537

* the given symbol table entry; we'll return true if found, false if

3538

* not. True means that the symbol has already been expanded on a prior

3539

* scan of the text, so it should not be re-expanded now.

3540

3541

int CTcTokenizer::scan_for_prior_expansion(utf8_ptr src, const char *src_end,

3542

const CTcHashEntryPp *entry)

3543

{

3544

/* scan the buffer for the expansion flag byte */

3545

while (src.getptr() < src_end)

3546

{

3547

/* if this is the flag, check what follows */

3548

if (src.getch() == TOK_MACRO_EXP_END)

3549

{

3550

CTcHashEntryPp *flag_entry;

3551

3552

/* read the entry from the buffer */

3553

memcpy(&flag_entry, src.getptr() + 1, sizeof(flag_entry));

3554

3555

/* if it matches, indicate that we found it */

3556

if (entry == flag_entry)

3557

return TRUE;

3558

3559

/* it's not a match - keep scanning after this flag sequence */

3560

src.set(src.getptr() + 1 + sizeof(flag_entry));

3561

}

3562

else

3563

{

3564

/* it's not the flag - skip this character */

3565

src.inc();

3566

}

3567

}

3568

3569

/* we didn't find it */

3570

return FALSE;

3571

}

3572

3573

3574

* Go through a macro expansion and translate from end-of-expansion

3575

* markers to individual token full-expansion markers. This is used

3576

* after we leave a recursion level to convert expanded text into text

3577

* suitable for use in further expansion at an enclosing recursion

3578

* level.

3579

3580

void CTcTokenizer::mark_full_exp_tokens(CTcTokString *dstbuf,

3581

const CTcTokString *srcbuf,

3582

int append) const

3583

{

3584

utf8_ptr p;

3585

CTcToken tok;

3586

const char *start;

3587

int in_embedding;

3588

3589

/* clear the output buffer if we're not appending to existing text */

3590

if (!append)

3591

dstbuf->clear_text();

3592

3593

/* remember the starting point */

3594

start = srcbuf->get_text();

3595

3596

/* not in an embedded expression within the expansion text yet */

3597

in_embedding = FALSE;

3598

3599

/* scan the source buffer */

3600

p.set((char *)start);

3601

for (;;)

3602

{

3603

CTcHashEntryPp *cur_entry;

3604

tc_toktyp_t typ;

3605

char ch;

3606

3607

/* get the next token; stop at the end of the line */

3608

typ = next_on_line(srcbuf, &p, &tok, &in_embedding, TRUE);

3609

if (typ == TOKT_EOF)

3610

break;

3611

3612

3613

* if this macro token is being expanded, and it's not already

3614

* marked for no more expansion, mark it

3615

3616

if (typ == TOKT_SYM

3617

&& !tok.get_fully_expanded()

3618

&& (cur_entry = find_define(tok.get_text(),

3619

tok.get_text_len())) != 0

3620

&& scan_for_prior_expansion(p, srcbuf->get_text_end(), cur_entry))

3621

{

3622

3623

* This token has been fully expanded in the substitution

3624

* buffer but hasn't yet been marked as such - we must

3625

* insert the fully-expanded marker. First, add up to the

3626

* current point to the output buffer.

3627

3628

if (tok.get_text() > start)

3629

dstbuf->append(start, tok.get_text() - start);

3630

3631

/* add the fully-expanded marker */

3632

ch = TOK_FULLY_EXPANDED_FLAG;

3633

dstbuf->append(&ch, 1);

3634

3635

/* the new starting point is the start of the symbol token */

3636

start = tok.get_text();

3637

}

3638

}

3639

3640

/* copy any remaining text to the output */

3641

if (tok.get_text() > start)

3642

dstbuf->append(start, tok.get_text() - start);

3643

3644

3645

* Remove any macro expansion end markers from the output buffer.

3646

* We don't want to leave these around, because they don't apply to

3647

* the enclosing buffer into which we'll substitute this result.

3648

* Note that we've already ensured that these markers will be

3649

* respected for the substitution text by inserting "fully expanded"

3650

* markers in front of each token to which any of the markers we're

3651

* removing should apply.

3652

3653

remove_end_markers(dstbuf);

3654

}

3655

3656

3657

3658

* Remove end markers from a buffer

3659

3660

void CTcTokenizer::remove_end_markers(CTcTokString *buf)

3661

{

3662

char *src;

3663

char *dst;

3664

utf8_ptr p;

3665

3666

/* scan the buffer */

3667

for (src = dst = buf->get_buf(), p.set(src) ;

3668

p.getptr() < buf->get_text_end() ; )

3669

{

3670

/* check for our flag */

3671

if (p.getch() == TOK_MACRO_EXP_END)

3672

{

3673

/* skip the flag byte and the following embedded pointer */

3674

src += 1 + sizeof(CTcHashEntryPp *);

3675

p.set(src);

3676

}

3677

else

3678

{

3679

/* skip this character */

3680

p.inc();

3681

3682

/* copy the bytes of this character as-is */

3683

while (src < p.getptr())

3684

*dst++ = *src++;

3685

}

3686

}

3687

3688

/* set the new buffer size */

3689

buf->set_text_len(dst - buf->get_buf());

3690

}

3691

3692

3693

3694

* Expand the macro at the current token in the current line.

3695

3696

* 'src' is a pointer to the current position in 'srcbuf'. We'll update

3697

* 'src' to point to the next token after macro or its actual parameters

3698

* list, if it has one.

3699

3700

int CTcTokenizer::expand_macro(CTcMacroRsc *rsc, CTcTokString *expbuf,

3701

const CTcTokString *srcbuf, utf8_ptr *src,

3702

size_t macro_srcbuf_ofs,

3703

CTcHashEntryPp *entry, int read_more,

3704

int allow_defined, int *expanded)

3705

{

3706

CTcTokString *subexp;

3707

size_t argofs[TOK_MAX_MACRO_ARGS];

3708

size_t arglen[TOK_MAX_MACRO_ARGS];

3709

size_t startofs;

3710

const char *start;

3711

const char *end;

3712

int err;

3713

char flagbuf[1 + sizeof(entry)];

3714

3715

/* presume we won't do any expansion */

3716

*expanded = FALSE;

3717

3718

/* get our resources */

3719

subexp = &rsc->macro_exp_;

3720

3721

/* remember our parsing starting offset */

3722

startofs = src->getptr() - srcbuf->get_text();

3723

3724

/* clear the expansion output buffer */

3725

expbuf->clear_text();

3726

3727

/* if the macro has arguments, scan the actuals */

3728

if (entry->has_args())

3729

{

3730

int found_actuals;

3731

3732

/* read the macro arguments */

3733

if (parse_macro_actuals(srcbuf, src, entry, argofs, arglen,

3734

read_more, &found_actuals))

3735

{

3736

err = 1;

3737

goto done;

3738

}

3739

3740

3741

* If we found no actuals, then this wasn't really an invocation

3742

* of the macro after all - a function-like macro invoked with

3743

* no arguments is simply not replaced. Store the original text

3744

* in the output buffer and return success.

3745

3746

if (!found_actuals)

3747

{

3748

/* copy the original text */

3749

expbuf->copy(srcbuf->get_text() + macro_srcbuf_ofs,

3750

startofs - macro_srcbuf_ofs);

3751

3752

3753

* restore the source read pointer to where it was when we

3754

* started

3755

3756

src->set((char *)srcbuf->get_text() + startofs);

3757

3758

/* return success */

3759

err = 0;

3760

goto done;

3761

}

3762

}

3763

3764

3765

* if there are arguments, replace the macro and substitute actuals

3766

* for the formals; otherwise, just copy the replacement text

3767

* directly

3768

3769

if (entry->get_argc() != 0)

3770

{

3771

/* substitute the actuals */

3772

if (substitute_macro_actuals(rsc, subexp, entry, srcbuf,

3773

argofs, arglen, allow_defined))

3774

{

3775

err = 1;

3776

goto done;

3777

}

3778

3779

/* set up to parse from the expansion buffer */

3780

start = subexp->get_text();

3781

end = start + subexp->get_text_len();

3782

}

3783

else

3784

{

3785

3786

* use our local source buffer that simply references the

3787

* original expansion text, rather than making a copy of the

3788

* expansion text

3789

3790

start = entry->get_expansion();

3791

end = start + entry->get_expan_len();

3792

}

3793

3794

/* copy the expansion into the output buffer */

3795

expbuf->copy(start, end - start);

3796

3797

3798

* After the end of the expansion sequence, insert the

3799

* fully-expanded flag plus a pointer to the symbol table entry that

3800

* we just expanded. This will allow us to detect during the

3801

* re-scan of the expansion text that this symbol has already been

3802

* expanded, in which case we must suppress further expansion of the

3803

* symbol. This allows us to follow the ANSI C rules for recursive

3804

* macro usage.

3805

3806

flagbuf[0] = TOK_MACRO_EXP_END;

3807

memcpy(&flagbuf[1], &entry, sizeof(entry));

3808

expbuf->append(flagbuf, sizeof(flagbuf));

3809

3810

/* indicate that we expanded the macro */

3811

*expanded = TRUE;

3812

3813

/* success */

3814

err = 0;

3815

3816

done:

3817

/* return the result */

3818

return err;

3819

}

3820

3821

3822

* Parse a macro's actual parameter list, filling in the given hash

3823

* table with the arguments. Returns zero on success, non-zero on

3824

* error. 'entry' is the macro's defining symbol table entry.

3825

3826

int CTcTokenizer::parse_macro_actuals(const CTcTokString *srcbuf,

3827

utf8_ptr *src,

3828

const CTcHashEntryPp *entry,

3829

size_t argofs[TOK_MAX_MACRO_ARGS],

3830

size_t arglen[TOK_MAX_MACRO_ARGS],

3831

int read_more, int *found_actuals)

3832

{

3833

tc_toktyp_t typ;

3834

CTcToken tok;

3835

int argc;

3836

int spliced;

3837

int i;

3838

3839

/* presume we're not going to do any line splicing */

3840

spliced = FALSE;

3841

3842

/* no arguments parsed yet */

3843

argc = 0;

3844

3845

/* get the next token after the macro symbol */

3846

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, TRUE);

3847

3848

/* splice another line if necessary */

3849

if (typ == TOKT_EOF && read_more)

3850

{

3851

/* splice a line */

3852

typ = actual_splice_next_line(srcbuf, src, &tok);

3853

3854

/* note the splice */

3855

spliced = TRUE;

3856

}

3857

3858

/* if we didn't find an open paren, there's no actual list after all */

3859

if (typ != TOKT_LPAR)

3860

{

3861

/* tell the caller we didn't find any actuals */

3862

*found_actuals = FALSE;

3863

3864

/* if we spliced a line, unsplice it at the current token */

3865

if (spliced)

3866

unsplice_line(tok.get_text());

3867

3868

/* return success */

3869

return 0;

3870

}

3871

3872

/* remember the offset of the start of the first argument */

3873

argofs[argc] = tok.get_text() + tok.get_text_len() - srcbuf->get_text();

3874

3875

/* skip the open paren */

3876

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, TRUE);

3877

3878

/* read the arguments */

3879

while (typ != TOKT_RPAR)

3880

{

3881

utf8_ptr p;

3882

int paren_depth, bracket_depth, brace_depth;

3883

int sp_cnt;

3884

3885

/* if we have too many arguments, it's an error */

3886

if ((argc >= entry->get_argc() && !entry->has_varargs())

3887

|| argc >= TOK_MAX_MACRO_ARGS)

3888

{

3889

/* log the error */

3890

log_error(TCERR_PP_MANY_MACRO_ARGS,

3891

(int)entry->getlen(), entry->getstr());

3892

3893

/* scan ahead to to close paren or end of line */

3894

while (typ != TOKT_RPAR && typ != TOKT_EOF)

3895

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_,

3896

TRUE);

3897

3898

/* done scanning arguments */

3899

break;

3900

}

3901

3902

3903

* skip tokens until we find a comma outside of nested parens,

3904

* square brackets, or curly braces

3905

3906

paren_depth = bracket_depth = brace_depth = 0;

3907

while (paren_depth != 0

3908

|| bracket_depth != 0

3909

|| brace_depth != 0

3910

|| (typ != TOKT_COMMA && typ != TOKT_RPAR))

3911

{

3912

3913

* if it's an open or close paren, brace, or bracket, adjust

3914

* the depth accordingly

3915

3916

switch(typ)

3917

{

3918

case TOKT_LPAR:

3919

++paren_depth;

3920

break;

3921

3922

case TOKT_RPAR:

3923

--paren_depth;

3924

break;

3925

3926

case TOKT_LBRACE:

3927

++brace_depth;

3928

break;

3929

3930

case TOKT_RBRACE:

3931

--brace_depth;

3932

break;

3933

3934

case TOKT_LBRACK:

3935

++bracket_depth;

3936

break;

3937

3938

case TOKT_RBRACK:

3939

--bracket_depth;

3940

break;

3941

3942

default:

3943

break;

3944

}

3945

3946

/* get the next token */

3947

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_,

3948

TRUE);

3949

3950

3951

* if we're at the end of the line, and we're allowed to

3952

* read more, splice the next line onto the current line

3953

3954

if (typ == TOKT_EOF && read_more)

3955

{

3956

/* splice a line */

3957

typ = actual_splice_next_line(srcbuf, src, &tok);

3958

3959

/* note that we've done some line splicing */

3960

spliced = TRUE;

3961

}

3962

3963

/* if we've reached the end of the file, stop */

3964

if (typ == TOKT_EOF)

3965

break;

3966

}

3967

3968

/* if we've reached the end of the file, stop */

3969

if (typ == TOKT_EOF)

3970

break;

3971

3972

/* remove any trailing whitespace from the actual's text */

3973

sp_cnt = 0;

3974

p.set((char *)tok.get_text());

3975

while (p.getptr() > srcbuf->get_text() + argofs[argc])

3976

{

3977

wchar_t ch;

3978

3979

/* move to the prior character */

3980

p.dec();

3981

3982

/* if it's not a space, stop looking */

3983

ch = p.getch();

3984

if (!is_space(ch))

3985

{

3986

3987

* advance past this character so that we keep it in the

3988

* expansion

3989

3990

p.inc();

3991

3992

3993

* if this last character was a backslash, and we removed

3994

* at least one space following it, keep the one space

3995

* that immediately follows the backslash, since that

3996

* space is part of the backslash's two-character escape

3997

* sequence

3998

3999

if (ch == '\\' && sp_cnt != 0)

4000

p.inc();

4001

4002

/* stop scanning */

4003

break;

4004

}

4005

4006

/* that's one more trailing space we've removed - count it */

4007

++sp_cnt;

4008

}

4009

4010

/* note the argument length */

4011

arglen[argc] = (p.getptr() - srcbuf->get_text()) - argofs[argc];

4012

4013

/* count the argument */

4014

++argc;

4015

4016

/* check for another argument */

4017

if (typ == TOKT_COMMA)

4018

{

4019

/* remember the offset of the start of this argument */

4020

argofs[argc] = tok.get_text() + tok.get_text_len()

4021

- srcbuf->get_text();

4022

4023

/* skip the comma and go back for another argument */

4024

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_,

4025

TRUE);

4026

}

4027

else if (typ == TOKT_RPAR)

4028

{

4029

4030

* No need to look any further. Note that we don't want to

4031

* get another token, since we're done parsing the input

4032

* now, and we want to leave the token stream positioned for

4033

* the caller just after the extent of the macro, which, in

4034

* the case of this function-like macro, ends with the

4035

* closing paren.

4036

4037

break;

4038

}

4039

}

4040

4041

/* if we didn't find the right paren, flag the error */

4042

if (typ != TOKT_RPAR)

4043

{

4044

log_error(read_more

4045

? TCERR_PP_MACRO_ARG_RPAR : TCERR_PP_MACRO_ARG_RPAR_1LINE,

4046

(int)entry->getlen(), entry->getstr());

4047

return 1;

4048

}

4049

4050

/* remove leading and trailing whitespace from each argument */

4051

for (i = 0 ; i < argc ; ++i)

4052

{

4053

const char *start;

4054

const char *end;

4055

utf8_ptr p;

4056

size_t del_len;

4057

int sp_cnt;

4058

4059

/* figure the limits of the argument text */

4060

start = srcbuf->get_text() + argofs[i];

4061

end = start + arglen[i];

4062

4063

/* remove leading whitespace */

4064

for (p.set((char *)start) ; p.getptr() < end && is_space(p.getch()) ;

4065

p.inc()) ;

4066

4067

/* set the new offset and length */

4068

del_len = p.getptr() - start;

4069

argofs[i] += del_len;

4070

arglen[i] -= del_len;

4071

start += del_len;

4072

4073

/* remove trailing whitespace */

4074

p.set((char *)end);

4075

sp_cnt = 0;

4076

while (p.getptr() > start)

4077

{

4078

wchar_t ch;

4079

4080

/* go to the prior character */

4081

p.dec();

4082

4083

/* if it's not whitespace, keep it */

4084

ch = p.getch();

4085

if (!is_space(ch))

4086

{

4087

/* put the character back */

4088

p.inc();

4089

4090

4091

* if this is a backslash, and a space follows, keep the

4092

* immediately following space, since it's part of the

4093

* backslash sequence

4094

4095

if (ch == '\\' && sp_cnt != 0)

4096

p.inc();

4097

4098

/* we're done scanning */

4099

break;

4100

}

4101

4102

/* count another removed trailing space */

4103

++sp_cnt;

4104

}

4105

4106

/* adjust the length */

4107

arglen[i] -= (end - p.getptr());

4108

}

4109

4110

4111

* if we did any line splicing, cut off the rest of the line and

4112

* push it back into the logical input stream as a new line - this

4113

* will allow better error message positioning if errors occur in

4114

* the remainder of the line, since this means we'll only

4115

* artificially join onto one line the part of the new line that

4116

* contained the macro parameters

4117

4118

if (spliced)

4119

unsplice_line(tok.get_text() + tok.get_text_len());

4120

4121

/* make sure we found enough arguments */

4122

if (argc < entry->get_min_argc())

4123

{

4124

/* fill in the remaining arguments with empty strings */

4125

for ( ; argc < entry->get_argc() ; ++argc)

4126

{

4127

argofs[argc] = 0;

4128

arglen[argc] = 0;

4129

}

4130

4131

/* note the error, but proceed with empty arguments */

4132

log_warning(TCERR_PP_FEW_MACRO_ARGS,

4133

(int)entry->getlen(), entry->getstr());

4134

}

4135

4136

4137

* if we have varargs, always supply an empty marker for the last

4138

* argument

4139

4140

if (entry->has_varargs() && argc < TOK_MAX_MACRO_ARGS)

4141

{

4142

argofs[argc] = 0;

4143

arglen[argc] = 0;

4144

}

4145

4146

/* success - we found an actual parameter list */

4147

*found_actuals = TRUE;

4148

return 0;

4149

}

4150

4151

4152

* Splice a line for macro actual parameters. Sets the source pointer

4153

* to the start of the new line. Reads the first token on the spliced

4154

* line and returns it.

4155

4156

* We will splice new lines until we find a non-empty line or reach the

4157

* end of the input. If this returns EOF, it indicates that we've

4158

* reached the end of the entire input.

4159

4160

tc_toktyp_t CTcTokenizer::

4161

actual_splice_next_line(const CTcTokString *srcbuf,

4162

utf8_ptr *src, CTcToken *tok)

4163

{

4164

/* add a space onto the end of the current line */

4165

linebuf_.append(" ", 1);

4166

4167

/* keep going until we find a non-empty line */

4168

for (;;)

4169

{

4170

int new_line_ofs;

4171

tc_toktyp_t typ;

4172

4173

/* splice the next line onto the current line */

4174

new_line_ofs = read_line(TRUE);

4175

4176

4177

* make sure we read additional lines as needed to complete any

4178

* strings left open at the end of the line

4179

4180

if (in_quote_ != '\0')

4181

splice_string();

4182

4183

/* if there was no more, return end of file */

4184

if (new_line_ofs == -1)

4185

return TOKT_EOF;

4186

4187

/* set the source to the start of the additional line */

4188

src->set((char *)linebuf_.get_text() + new_line_ofs);

4189

4190

/* get the next token */

4191

typ = next_on_line(srcbuf, src, tok, &macro_in_embedding_, TRUE);

4192

4193

/* if we didn't get EOF, it means we found a non-empty line */

4194

if (typ != TOKT_EOF)

4195

return typ;

4196

}

4197

}

4198

4199

4200

* Substitute the actual parameters in a macro's expansion

4201

4202

int CTcTokenizer::substitute_macro_actuals(CTcMacroRsc *rsc,

4203

CTcTokString *subexp,

4204

CTcHashEntryPp *entry,

4205

const CTcTokString *srcbuf,

4206

const size_t *argofs,

4207

const size_t *arglen,

4208

int allow_defined)

4209

{

4210

const char *start;

4211

utf8_ptr expsrc;

4212

CTcToken prvtok;

4213

CTcToken prvprvtok;

4214

CTcToken tok;

4215

tc_toktyp_t typ;

4216

const CVmHashTable *actuals;

4217

CTcTokString *actual_exp_buf;

4218

const size_t expand_max = 10;

4219

static struct expand_info_t

4220

{

4221

/* type of expansion (#foreach, #ifempty, #ifnempty) */

4222

tc_toktyp_t typ;

4223

4224

4225

* flag: this is an iterator type (if this is true, the varargs

4226

* formal should be expanded to the current argument given by our

4227

* 'arg' member; if this is false, the varargs formal should be

4228

* expanded as the full varargs list)

4229

4230

int is_iterator;

4231

4232

/* the marker character that delimits the foreach arguments */

4233

wchar_t delim;

4234

4235

/* location of start of expansion region for foreach */

4236

utf8_ptr start;

4237

4238

/* current argument index */

4239

int arg;

4240

4241

/* the current expansion part (0 = first part, etc) */

4242

int part;

4243

}

4244

expand_stack[expand_max], *expand_sp;

4245

4246

/* get the actuals table */

4247

actuals = entry->get_params_table();

4248

4249

/* get the actual expansion buffer from the resource object */

4250

actual_exp_buf = &rsc->actual_exp_buf_;

4251

4252

4253

* Scan the replacement text for formals, and replace each formal

4254

* with the actual. Set up a pointer at the start of the expansion

4255

* text.

4256

4257

start = entry->get_expansion();

4258

expsrc.set((char *)start);

4259

4260

/* we don't yet have a previous token */

4261

prvtok.settyp(TOKT_EOF);

4262

prvprvtok.settyp(TOKT_EOF);

4263

4264

/* clear the expansion buffer */

4265

subexp->clear_text();

4266

4267

/* we have no #foreach/#ifempty/#ifnempty stack yet */

4268

expand_sp = expand_stack;

4269

4270

/* scan the tokens in the expansion text */

4271

for (typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE) ;

4272

typ != TOKT_EOF ; )

4273

{

4274

4275

* check to see if we've reached the end of a

4276

* #foreach/#ifempty/#ifnempty

4277

4278

if (expand_sp != expand_stack)

4279

{

4280

/* check to see if we're at the delimiter */

4281

if (utf8_ptr::s_getch(tok.get_text()) == (expand_sp-1)->delim)

4282

{

4283

/* copy the prior expansion so far */

4284

if (tok.get_text() > start)

4285

subexp->append(start, tok.get_text() - start);

4286

4287

/* go back to the start of the token */

4288

expsrc.set((char *)tok.get_text());

4289

4290

/* see what kind of token we're expanding */

4291

switch((expand_sp-1)->typ)

4292

{

4293

case TOKT_MACRO_FOREACH:

4294

/* it's a #foreach - process the appropriate part */

4295

switch ((expand_sp-1)->part)

4296

{

4297

case 0:

4298

4299

* We've been doing the first part, which is the

4300

* main expansion per actual. This delimiter thus

4301

* introduces the 'between' portion, which we copy

4302

* between each iteration, but not after the last

4303

* iteration. So, if we've just done the last

4304

* actual, skip this part entirely; otherwise,

4305

* keep going, using this part.

4306

4307

if (argofs[(expand_sp-1)->arg + 1] == 0)

4308

{

4309

/* skip this one remaining part */

4310

skip_delimited_group(&expsrc, 1);

4311

4312

/* we're finished with the iteration */

4313

goto end_foreach;

4314

}

4315

else

4316

{

4317

4318

* we have more arguments, so we want to

4319

* expand this part - skip the deliter and

4320

* keep going

4321

4322

expsrc.inc();

4323

4324

/* we're now in the next part of the iterator */

4325

(expand_sp-1)->part++;

4326

}

4327

break;

4328

4329

case 1:

4330

4331

* We've reached the end of the entire #foreach

4332

* string, so we're done with this iteration.

4333

* Skip the delimiter.

4334

4335

expsrc.inc();

4336

4337

end_foreach:

4338

4339

* if we have more arguments, start over with the

4340

* next iteration; otherwise, pop the #foreach

4341

* level

4342

4343

if (argofs[(expand_sp-1)->arg + 1] == 0)

4344

{

4345

/* no more arguments - pop the #foreach level */

4346

--expand_sp;

4347

}

4348

else

4349

{

4350

/* we have more arguments - move to the next */

4351

(expand_sp-1)->arg++;

4352

4353

/* go back to the start of the expansion */

4354

expsrc = (expand_sp-1)->start;

4355

4356

/* we have no previous token for pasting ops */

4357

prvtok.settyp(TOKT_EOF);

4358

prvprvtok.settyp(TOKT_EOF);

4359

4360

/* we're back in the first part of the iterator */

4361

(expand_sp-1)->part = 0;

4362

}

4363

break;

4364

}

4365

break;

4366

4367

case TOKT_MACRO_IFEMPTY:

4368

case TOKT_MACRO_IFNEMPTY:

4369

4370

* #ifempty or #ifnempty - we've reached the end of

4371

* the conditional text, so simply pop a level and

4372

* keep going after the delimiter

4373

4374

4375

/* skip the delimiter */

4376

expsrc.inc();

4377

4378

/* pop a level */

4379

--expand_sp;

4380

4381

/* done */

4382

break;

4383

4384

default:

4385

break;

4386

}

4387

4388

/* the next chunk starts here */

4389

start = expsrc.getptr();

4390

4391

/* get the next token */

4392

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4393

4394

/* we have the next token, so back and process it */

4395

continue;

4396

}

4397

}

4398

4399

/* if it's a #foreach marker, start a #foreach iteration */

4400

if (typ == TOKT_MACRO_FOREACH && entry->has_varargs())

4401

{

4402

/* copy the prior expansion so far */

4403

if (tok.get_text() > start)

4404

subexp->append(start, tok.get_text() - start);

4405

4406

/* push a #foreach level, if possible */

4407

if (expand_sp - expand_stack >= expand_max)

4408

{

4409

4410

* we can't create another level - log an error and ignore

4411

* this new level

4412

4413

log_error(TCERR_PP_FOREACH_TOO_DEEP);

4414

}

4415

else if (argofs[entry->get_argc() - 1] == 0)

4416

{

4417

4418

* we have no actuals for the variable part of the

4419

* formals, so we must iterate zero times through the

4420

* #foreach part - in other words, simply skip ahead to

4421

* the end of the #foreach

4422

4423

skip_delimited_group(&expsrc, 2);

4424

}

4425

else

4426

{

4427

/* remember and skip the marker character */

4428

expand_sp->delim = expsrc.getch();

4429

expsrc.inc();

4430

4431

/* set the expansion type */

4432

expand_sp->typ = typ;

4433

4434

4435

* remember the position where the #foreach started, since

4436

* we need to come back here for each use of the variable

4437

4438

expand_sp->start = expsrc;

4439

4440

/* we're an iterator type */

4441

expand_sp->is_iterator = TRUE;

4442

4443

4444

* Start at the first argument in the variable part of the

4445

* argument list. The last formal corresponds to the

4446

* first variable argument.

4447

4448

expand_sp->arg = entry->get_argc() - 1;

4449

4450

/* we're in the main expansion part of the expression */

4451

expand_sp->part = 0;

4452

4453

/* push the new level */

4454

++expand_sp;

4455

}

4456

4457

/* the next chunk starts here */

4458

start = expsrc.getptr();

4459

4460

/* get the next token */

4461

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4462

4463

/* we have the next token, so back and process it */

4464

continue;

4465

}

4466

4467

/* if it's a varargs #ifempty or #ifnempty flag, expand it */

4468

if ((typ == TOKT_MACRO_IFEMPTY || typ == TOKT_MACRO_IFNEMPTY)

4469

&& entry->has_varargs())

4470

{

4471

int is_empty;

4472

int expand;

4473

4474

/* copy the prior expansion so far */

4475

if (tok.get_text() > start)

4476

subexp->append(start, tok.get_text() - start);

4477

4478

/* determine if the varargs list is empty or not */

4479

is_empty = (argofs[entry->get_argc() - 1] == 0);

4480

4481

4482

* decide whether or not expand it, according to the empty

4483

* state and the flag type

4484

4485

expand = ((is_empty && typ == TOKT_MACRO_IFEMPTY)

4486

|| (!is_empty && typ == TOKT_MACRO_IFNEMPTY));

4487

4488

4489

* if we're going to expand it, push a level; otherwise, just

4490

* skip the entire expansion

4491

4492

if (expand)

4493

{

4494

/* make sure we have room for another level */

4495

if (expand_sp - expand_stack >= expand_max)

4496

{

4497

/* no room - log an error and ignore the new level */

4498

log_error(TCERR_PP_FOREACH_TOO_DEEP);

4499

}

4500

else

4501

{

4502

/* remember and skip the delimiter */

4503

expand_sp->delim = expsrc.getch();

4504

expsrc.inc();

4505

4506

4507

* we're not an iterator type, so inherit the

4508

* enclosing level's meaning of the varargs formal

4509

4510

if (expand_sp - expand_stack == 0)

4511

{

4512

/* outermost level - use the whole varargs list */

4513

expand_sp->is_iterator = FALSE;

4514

}

4515

else

4516

{

4517

/* use the enclosing level's meaning */

4518

expand_sp->is_iterator = (expand_sp-1)->is_iterator;

4519

expand_sp->arg = (expand_sp-1)->arg;

4520

}

4521

4522

/* set the expansion type */

4523

expand_sp->typ = typ;

4524

4525

/* push the new level */

4526

++expand_sp;

4527

}

4528

}

4529

else

4530

{

4531

/* not expanding - just skip the entire expansion */

4532

skip_delimited_group(&expsrc, 1);

4533

}

4534

4535

/* the next chunk starts here */

4536

start = expsrc.getptr();

4537

4538

/* get the next token */

4539

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4540

4541

/* we have the next token, so back and process it */

4542

continue;

4543

}

4544

4545

/* if it's a varargs #argcount indicator, expand it */

4546

if (typ == TOKT_MACRO_ARGCOUNT && entry->has_varargs())

4547

{

4548

char buf[20];

4549

int i;

4550

4551

/* copy the prior expansion so far */

4552

if (tok.get_text() > start)

4553

subexp->append(start, tok.get_text() - start);

4554

4555

4556

* count the number of arguments after and including the

4557

* variable argument placeholder

4558

4559

for (i = entry->get_argc() - 1 ; argofs[i] != 0 ; ++i) ;

4560

4561

/* make a string out of the variable argument count */

4562

sprintf(buf, "%d", i - (entry->get_argc() - 1));

4563

4564

/* add the argument count to the output buffer */

4565

subexp->append(buf, strlen(buf));

4566

4567

/* the next chunk starts after the #argcount */

4568

start = expsrc.getptr();

4569

4570

/* get the next token */

4571

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4572

4573

/* we have the next token, so back and process it */

4574

continue;

4575

}

4576

4577

/* if it's a symbol, check for an actual */

4578

if (typ == TOKT_MACRO_FORMAL)

4579

{

4580

const char *p;

4581

int argnum;

4582

size_t argnum_len;

4583

int pasting;

4584

int pasting_at_left, pasting_at_right;

4585

int stringize;

4586

char stringize_qu;

4587

tc_toktyp_t stringize_type;

4588

CTcToken paste_at_right_tok;

4589

4590

/* assume we'll copy up to the start of this token */

4591

p = tok.get_text();

4592

4593

4594

* get the index of the actual in the argument vector --

4595

* this is given by the second byte of the special macro

4596

* parameter flag token

4597

4598

argnum = (int)(uchar)tok.get_text()[1] - 1;

4599

4600

4601

* If we have varargs, and this is the varargs argument, and

4602

* the current #foreach stack level indicates that we're

4603

* iterating through the varargs list, treat this as a

4604

* reference to the current argument in the iteration.

4605

4606

if (expand_sp != expand_stack

4607

&& argnum == entry->get_argc() - 1

4608

&& (expand_sp-1)->is_iterator)

4609

{

4610

4611

* we're on a #foreach iterator, and this is the varargs

4612

* formal - use the current #foreach iteration element

4613

* instead

4614

4615

argnum = (expand_sp-1)->arg;

4616

}

4617

4618

4619

* Get the length of this argument. If we have varargs, and

4620

* this is the last formal, which is the placeholder for the

4621

* variable argument list, and we're not in a #foreach

4622

* iterator, the value is the value of the entire string of

4623

* variable arguments, including the commas.

4624

4625

if (expand_sp == expand_stack

4626

&& entry->has_varargs()

4627

&& argnum == entry->get_argc() - 1)

4628

{

4629

int i;

4630

4631

4632

* It's the full varargs list - use the length from the

4633

* first varargs argument to the last. Find the last

4634

* argument.

4635

4636

for (i = argnum ;

4637

i < TOK_MAX_MACRO_ARGS && argofs[i] != 0 ; ++i) ;

4638

4639

4640

* The full list length is the distance from the offset of

4641

* the first to the end of the last. If there are no

4642

* varargs arguments at all, the length is zero.

4643

4644

if (i == argnum)

4645

argnum_len = 0;

4646

else

4647

argnum_len = argofs[i-1] + arglen[i-1] - argofs[argnum];

4648

}

4649

else

4650

{

4651

4652

* it's not the full varargs list, so just use the length

4653

* of this single actual

4654

4655

argnum_len = arglen[argnum];

4656

}

4657

4658

/* assume we won't do any token pasting or stringizing */

4659

pasting = pasting_at_left = pasting_at_right = FALSE;

4660

stringize = FALSE;

4661

4662

4663

* if the previous token was a token-pasting operator,

4664

* remove it and any preceding whitespace from the source

4665

* material, since we want to append the actual parameter

4666

* text directly after the preceding token

4667

4668

check_paste_left:

4669

if (prvtok.gettyp() == TOKT_POUNDPOUND)

4670

{

4671

wchar_t prv_ch;

4672

4673

4674

* note that we have token pasting - we're pasting

4675

* something to the left of this token (since we had a

4676

* "##" before this token

4677

4678

pasting = TRUE;

4679

pasting_at_left = TRUE;

4680

4681

/* go back to the ## token */

4682

p = prvtok.get_text();

4683

4684

/* remove any preceding whitespace */

4685

for (prv_ch = 0 ; p > start ; )

4686

{

4687

const char *prvp;

4688

4689

/* get the previous character */

4690

prvp = utf8_ptr::s_dec((char *)p);

4691

prv_ch = utf8_ptr::s_getch((char *)prvp);

4692

4693

/* if it's not a space, we're done */

4694

if (!is_space(prv_ch))

4695

break;

4696

4697

/* move back over this character */

4698

p = prvp;

4699

}

4700

4701

4702

* Weird special case: if the previous character was a

4703

* comma, and the formal we're pasting is a variable

4704

* argument formal (i.e., the last formal in a varargs

4705

* macro), and the varargs list is empty, then remove the

4706

* comma. This is a handy shorthand notation that allows

4707

* the varargs list to be added to a comma-delimited list,

4708

* such as a function call's actuals or the contents of a

4709

* list.

4710

4711

if (prv_ch == ','

4712

&& entry->has_varargs()

4713

&& argnum == entry->get_argc() - 1

4714

&& argofs[argnum] == 0)

4715

{

4716

4717

* it's the special case - move back one more

4718

* character to delete the comma

4719

4720

p = utf8_ptr::s_dec((char *)p);

4721

}

4722

}

4723

else if (prvtok.gettyp() == TOKT_POUND

4724

|| prvtok.gettyp() == TOKT_POUNDAT)

4725

{

4726

/* go back to the # token */

4727

p = prvtok.get_text();

4728

4729

/* note that we have stringizing */

4730

stringize = TRUE;

4731

stringize_type = prvtok.gettyp();

4732

stringize_qu = (prvtok.gettyp() == TOKT_POUND

4733

? '"' : '\'');

4734

4735

/* go back one more token */

4736

prvtok = prvprvtok;

4737

prvprvtok.settyp(TOKT_EOF);

4738

4739

4740

* go back and check for pasting again, since we could

4741

* be pasting to a stringized token

4742

4743

goto check_paste_left;

4744

}

4745

4746

/* copy the prior expansion so far */

4747

if (p > start)

4748

subexp->append(start, p - start);

4749

4750

/* remember the symbol as the previous token */

4751

prvprvtok = prvtok;

4752

prvtok = tok;

4753

4754

/* get the next token after the formal */

4755

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4756

4757

4758

* If it's followed by a token-pasting operator, we need to

4759

* paste the next token directly onto the end of the text we

4760

* just added to the buffer, skipping any intervening

4761

* whitespace; otherwise, we want to start adding again at

4762

* the next character after the original token.

4763

4764

if (typ == TOKT_POUNDPOUND)

4765

{

4766

utf8_ptr old_expsrc;

4767

CTcToken old_tok;

4768

4769

/* note that we have pasting to the right of this token */

4770

pasting = TRUE;

4771

pasting_at_right = TRUE;

4772

4773

/* remember where we started */

4774

old_expsrc = expsrc;

4775

4776

/* remember the current token for a moment */

4777

old_tok = tok;

4778

4779

/* skip to the next token after the ## */

4780

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

4781

4782

/* remember the token we're pasting to the right */

4783

paste_at_right_tok = tok;

4784

4785

/* check for pasting to a stringizer */

4786

if (stringize && typ == stringize_type)

4787

{

4788

4789

* leave the ## in the stream for now - we'll fix it

4790

* up when we stringize the next token, rather than

4791

* doing so now

4792

4793

expsrc = old_expsrc;

4794

tok = old_tok;

4795

}

4796

else

4797

{

4798

4799

* remember that we have a token-pasting operator,

4800

* so that we can tell that we're pasting when we

4801

* look at the next token

4802

4803

prvprvtok = prvtok;

4804

prvtok = old_tok;

4805

}

4806

4807

/* start next text from here */

4808

start = tok.get_text();

4809

}

4810

else

4811

{

4812

/* Start at the end of the symbol token */

4813

start = prvtok.get_text() + prvtok.get_text_len();

4814

}

4815

4816

4817

* If we're not doing any pasting, recursively expand macros

4818

* in the actual expansion text. If we're pasting, do not

4819

* expand any macros in the expansion, since we want to do

4820

* the pasting before we do any expanding.

4821

4822

if (pasting && stringize)

4823

{

4824

int add_open;

4825

int add_close;

4826

4827

/* presume we'll include the open and close quotes */

4828

add_close = TRUE;

4829

add_open = TRUE;

4830

4831

4832

* If we're pasting to the left, and the buffer so far

4833

* ends in the same quote we're adding to this token,

4834

* combine the strings by removing the preceding quote

4835

* and not adding the open quote on the new string

4836

4837

if (subexp->get_text_len() > 0

4838

&& *(subexp->get_text_end() - 1) == stringize_qu)

4839

{

4840

/* remove the close quote from the expansion so far */

4841

subexp->set_text_len(subexp->get_text_len() - 1);

4842

4843

/* don't add the open quote to the new string */

4844

add_open = FALSE;

4845

}

4846

4847

4848

* If we're pasting to the right, and we have a string

4849

* of the same type following, or we will be pasting a

4850

* stringizing pair, paste the two strings together to

4851

* form one string by removing the close quote from this

4852

* string and the open quote from the next string

4853

4854

if (pasting_at_right && *tok.get_text() == stringize_qu)

4855

add_close = FALSE;

4856

4857

4858

* We're both stringizing this argument and pasting

4859

* another token - first stringize the actual.

4860

4861

stringize_macro_actual(subexp,

4862

srcbuf->get_text()

4863

+ argofs[argnum], argnum_len,

4864

stringize_qu, add_open, add_close);

4865

4866

4867

* if we decided to remove the closing quote, we want to

4868

* remove the open quote from the following string as

4869

* well - copy in the following string without its open

4870

* quote

4871

4872

if (!add_close)

4873

{

4874

4875

* append the following token without its first

4876

* character (its open quote)

4877

4878

subexp->append(tok.get_text() + 1,

4879

tok.get_text_len() - 1);

4880

4881

/* move on to the next token */

4882

prvprvtok = prvtok;

4883

prvtok = tok;

4884

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_,

4885

TRUE);

4886

4887

/* start from the new token */

4888

start = tok.get_text();

4889

}

4890

}

4891

else if (pasting)

4892

{

4893

const char *argp;

4894

size_t len;

4895

int done;

4896

wchar_t quote_char;

4897

4898

/* get the actual argument information */

4899

argp = srcbuf->get_text() + argofs[argnum];

4900

len = argnum_len;

4901

4902

4903

* if we're pasting to the left of this token, and the

4904

* token starts with a fully-expanded flag, remove the

4905

* flag - we're making up a new token out of this and

4906

* what comes before, so the token that we fully

4907

* expanded is disappearing, so the fully-expanded

4908

* status no longer applies

4909

4910

if (pasting_at_left && *argp == TOK_FULLY_EXPANDED_FLAG)

4911

{

4912

/* skip the flag */

4913

++argp;

4914

--len;

4915

}

4916

4917

/* presume we won't find any quoted strings */

4918

quote_char = 0;

4919

4920

4921

* check for string concatenation to the left - if we're

4922

* concatenating two strings of the same type, remove

4923

* the adjacent quotes to make it a single string

4924

4925

if (pasting_at_left

4926

&& subexp->get_text_len() > 0

4927

&& (*argp == '\'' || *argp == '"')

4928

&& *(subexp->get_text_end() - 1) == *argp)

4929

{

4930

/* remove the close quote from the expansion so far */

4931

subexp->set_text_len(subexp->get_text_len() - 1);

4932

4933

/* remember the quote character */

4934

quote_char = *argp;

4935

4936

/* don't add the open quote to the new string */

4937

++argp;

4938

--len;

4939

}

4940

4941

/* presume we won't have to do anything special */

4942

done = FALSE;

4943

4944

4945

* If we're pasting at the right, also remove any

4946

* fully-expanded flag just before the last token in the

4947

* expansion.

4948

4949

if (pasting_at_right)

4950

{

4951

CTcToken old_tok;

4952

CTcToken tok;

4953

utf8_ptr p;

4954

4955

/* scan for the final token in the expansion string */

4956

p.set((char *)argp);

4957

old_tok.settyp(TOKT_INVALID);

4958

while (p.getptr() < argp + len)

4959

{

4960

4961

* get another token - stop at EOF or if we go

4962

* past the bounds of the expansion text

4963

4964

if (next_on_line(&p, &tok, &macro_in_embedding_,

4965

TRUE)

4966

== TOKT_EOF

4967

|| tok.get_text() >= argp + len)

4968

break;

4969

4970

/* remember the previous token */

4971

old_tok = tok;

4972

}

4973

4974

4975

* if the final token is a symbol, and it has the

4976

* fully-expanded flag, we must omit the flag from

4977

* the appended text

4978

4979

if (old_tok.gettyp() == TOKT_SYM

4980

&& old_tok.get_fully_expanded())

4981

{

4982

4983

* append up to but not including the flag byte

4984

* preceding the final token

4985

4986

subexp->append(argp, tok.get_text() - 1 - argp);

4987

4988

4989

* append from the last token to the end of the

4990

* expansion, skipping the flag byte

4991

4992

subexp->append(tok.get_text(),

4993

len - (tok.get_text() - argp));

4994

4995

/* we've done the appending */

4996

done = TRUE;

4997

}

4998

else if (quote_char != 0

4999

&& paste_at_right_tok.get_text_len() != 0

5000

&& *paste_at_right_tok.get_text() == quote_char)

5001

{

5002

5003

* we're pasting two strings together - append

5004

* up to but not including the close quote

5005

5006

subexp->append(argp, len - 1);

5007

5008

5009

* append the next token, but do not include the

5010

* open quote

5011

5012

subexp->append(paste_at_right_tok.get_text() + 1,

5013

paste_at_right_tok.get_text_len() - 1);

5014

5015

5016

* restart after the right token, since we've

5017

* now fully processed that token

5018

5019

start = paste_at_right_tok.get_text()

5020

+ paste_at_right_tok.get_text_len();

5021

5022

/* we're done */

5023

done = TRUE;

5024

}

5025

}

5026

5027

5028

* append the actual without expansion, if we haven't

5029

* already handled it specially

5030

5031

if (!done)

5032

subexp->append(argp, len);

5033

}

5034

else if (stringize)

5035

{

5036

/* stringize the actual */

5037

stringize_macro_actual(subexp,

5038

srcbuf->get_text()

5039

+ argofs[argnum], argnum_len,

5040

stringize_qu, TRUE, TRUE);

5041

}

5042

else

5043

{

5044

CTcTokStringRef actual_src_buf;

5045

5046

/* recursively expand macros in the actual text */

5047

actual_src_buf.

5048

set_buffer(srcbuf->get_text() + argofs[argnum],

5049

argnum_len);

5050

if (expand_macros(&actual_src_buf, 0, actual_exp_buf,

5051

FALSE, allow_defined, FALSE))

5052

return 1;

5053

5054

5055

* Append the expanded actual, marking any

5056

* fully-expanded tokens as such and removing

5057

* end-of-expansion markers.

5058

5059

* We can't leave end-of-expansion markers in the

5060

* expanded actual text, because end-of-expansion

5061

* markers apply only to the current recursion level,

5062

* and we've now exited the actual's recursion level.

5063

* However, we must not expand further anything in the

5064

* actual's expansion that has already been fully

5065

* expanded. To achieve both of these goals, we switch

5066

* here from marking the run of text (with the end

5067

* marker) to marking individual tokens.

5068

5069

mark_full_exp_tokens(subexp, actual_exp_buf, TRUE);

5070

}

5071

5072

/* we've already read the next token, so proceed */

5073

continue;

5074

}

5075

5076

/* remember the current token as the previous token */

5077

prvprvtok = prvtok;

5078

prvtok = tok;

5079

5080

/* get the next token of the expansion */

5081

typ = next_on_line(&expsrc, &tok, &macro_in_embedding_, TRUE);

5082

}

5083

5084

/* copy the remaining replacement text */

5085

subexp->append(start, tok.get_text() - start);

5086

5087

/* success */

5088

return 0;

5089

}

5090

5091

5092

* Skip the source of a delimited macro expansion area (#foreach,

5093

* #ifempty, #ifnempty).

5094

5095

void CTcTokenizer::skip_delimited_group(utf8_ptr *p, int parts_to_skip)

5096

{

5097

wchar_t delim;

5098

5099

/* get the delimiter character */

5100

delim = p->getch();

5101

5102

5103

* if the delimiter put us at the end of the line, there's nothing to

5104

* skip

5105

5106

if (delim == 0 || delim == TOK_END_PP_LINE)

5107

return;

5108

5109

/* skip the delimiter */

5110

p->inc();

5111

5112

/* keep going until we've skipped the desired number of parts */

5113

while (parts_to_skip != 0)

5114

{

5115

wchar_t ch;

5116

5117

/* read the next character */

5118

ch = p->getch();

5119

5120

/* if it's the end of the line, give up */

5121

if (ch == 0 || ch == TOK_END_PP_LINE)

5122

{

5123

5124

* we ran out of input before reaching the delimiter, so this

5125

* is implicitly the end of it

5126

5127

return;

5128

}

5129

5130

/* check what we have */

5131

if (ch == delim)

5132

{

5133

/* that's one less part to skip */

5134

--parts_to_skip;

5135

5136

/* skip it */

5137

p->inc();

5138

}

5139

else if (ch == TOK_MACRO_FOREACH_FLAG)

5140

{

5141

/* it's a nested #foreach - skip all of its parts */

5142

skip_delimited_group(p, 2);

5143

}

5144

else if (ch == TOK_MACRO_IFEMPTY_FLAG

5145

|| ch == TOK_MACRO_IFNEMPTY_FLAG)

5146

{

5147

/* nested #ifempty or #ifnempty - skip its expansion */

5148

skip_delimited_group(p, 1);

5149

}

5150

else

5151

{

5152

/* it's nothing special to us - skip it */

5153

p->inc();

5154

}

5155

}

5156

}

5157

5158

5159

* Stringize a macro actual parameter value into a macro expansion

5160

* buffer

5161

5162

void CTcTokenizer::stringize_macro_actual(CTcTokString *expbuf,

5163

const char *actual_val,

5164

size_t actual_len, char quote_char,

5165

int add_open_quote,

5166

int add_close_quote)

5167

{

5168

utf8_ptr src;

5169

const char *start;

5170

int in_inner_quote;

5171

wchar_t inner_quote_char;

5172

wchar_t prvch;

5173

5174

/* add the open quote if desired */

5175

if (add_open_quote)

5176

expbuf->append(&quote_char, 1);

5177

5178

/* remember the start of the current segment */

5179

start = actual_val;

5180

5181

5182

* add the characters of the actual parameter value, quoting any

5183

* quotes or backslashes

5184

5185

for (src.set((char *)actual_val),

5186

in_inner_quote = FALSE, inner_quote_char = '\0', prvch = '\0' ;

5187

src.getptr() < actual_val + actual_len ; )

5188

{

5189

wchar_t cur;

5190

5191

/* get this character */

5192

cur = src.getch();

5193

5194

/* compress runs of whitespace to single spaces */

5195

if (is_space(cur) && prvch != '\\')

5196

{

5197

/* append up to this character */

5198

if (src.getptr() > start)

5199

expbuf->append(start, src.getptr() - start);

5200

5201

/* find the next non-space character */

5202

for ( ; src.getptr() < actual_val + actual_len ; src.inc())

5203

{

5204

if (!is_space(src.getch()))

5205

break;

5206

}

5207

5208

5209

* if we're not at the start or end of the string, add a

5210

* single space to replace the entire run of whitespace --

5211

* don't do this at the start or end of the string, since

5212

* we must remove leading and trailing whitespace

5213

5214

if (prvch != '\0' && src.getptr() < actual_val + actual_len)

5215

expbuf->append(" ", 1);

5216

5217

/* note that the previous character is a space */

5218

prvch = cur;

5219

5220

/* this is the new starting point */

5221

start = src.getptr();

5222

5223

/* proceed - we're already at the next character */

5224

continue;

5225

}

5226

5227

5228

* Check to see if we need to quote this character. Quote any

5229

* quote mark matching the enclosing quotes; also quote any

5230

* backslash that occurs within nested quotes within the source

5231

* material, but not backslashes that occur originally outside

5232

* quotes.

5233

5234

if (cur == quote_char

5235

|| (cur == '\\' && in_inner_quote))

5236

{

5237

/* append the segment up to (but not including) this character */

5238

if (src.getptr() > start)

5239

expbuf->append(start, src.getptr() - start);

5240

5241

/* add an extra backslash */

5242

expbuf->append("\\", 1);

5243

5244

/* remember the start of the next segment */

5245

start = src.getptr();

5246

}

5247

5248

5249

* if this is a quote character, and it's not itself escaped,

5250

* reverse our in-quote flag

5251

5252

if (prvch != '\\')

5253

{

5254

5255

* If we're in an inner quote, and it's a match for the open

5256

* inner quote, we're no longer in a quote. Otherwise, if

5257

* we're not in quotes and this is some kind of quote, enter

5258

* the new quotes.

5259

5260

if (in_inner_quote && cur == inner_quote_char)

5261

{

5262

/* we're leaving the inner quoted string */

5263

in_inner_quote = FALSE;

5264

}

5265

else if (!in_inner_quote && (cur == '"' || cur == '\''))

5266

{

5267

/* we're entering a new inner quoted string */

5268

in_inner_quote = TRUE;

5269

inner_quote_char = cur;

5270

}

5271

}

5272

5273

/* remember this as the previous character */

5274

prvch = cur;

5275

5276

/* move on to the next character */

5277

src.inc();

5278

}

5279

5280

/* if there's anything in the final segment, append it */

5281

if (src.getptr() > start)

5282

expbuf->append(start, src.getptr() - start);

5283

5284

/* add the close quote if desired */

5285

if (add_close_quote)

5286

expbuf->append(&quote_char, 1);

5287

}

5288

5289

5290

* Expand a "defined" preprocessor operator

5291

5292

int CTcTokenizer::expand_defined(CTcTokString *subexp,

5293

const CTcTokString *srcbuf, utf8_ptr *src)

5294

{

5295

CTcToken tok;

5296

tc_toktyp_t typ;

5297

int paren;

5298

int found;

5299

5300

/* get the next token */

5301

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, FALSE);

5302

5303

/* note whether we have an open paren; if we do, skip it */

5304

paren = (typ == TOKT_LPAR);

5305

if (paren)

5306

typ = next_on_line(srcbuf, src, &tok, &macro_in_embedding_, FALSE);

5307

5308

/* get the symbol */

5309

if (typ != TOKT_SYM)

5310

{

5311

log_error(TCERR_PP_DEFINED_NO_SYM,

5312

(int)tok.get_text_len(), tok.get_text());

5313

return 1;

5314

}

5315

5316

/* look to see if the symbol is defined */

5317

found = (find_define(tok.get_text(), tok.get_text_len()) != 0);

5318

5319

/* expand the macro to "1" if found, "0" if not */

5320

subexp->copy(found ? "1" : "0", 1);

5321

5322

/* check for and skip the matching close paren */

5323

if (paren)

5324

{

5325

/* require the closing paren */

5326

if (next_on_line(srcbuf, src, &tok, &macro_in_embedding_, FALSE)

5327

!= TOKT_RPAR)

5328

{

5329

/* generate an error if we don't find it */

5330

log_error(TCERR_PP_DEFINED_RPAR);

5331

return 1;

5332

}

5333

}

5334

5335

/* success */

5336

return 0;

5337

}

5338

5339

5340

/* ------------------------------------------------------------------------ */

5341

5342

* Process comments. Replaces each character of a comment with a space.

5343

5344

void CTcTokenizer::process_comments(size_t start_ofs)

5345

{

5346

utf8_ptr src;

5347

utf8_ptr dst;

5348

int trailing_sp_after_bs;

5349

5350

/* we haven't found a backslash followed by trailing space yet */

5351

trailing_sp_after_bs = FALSE;

5352

5353

5354

* Scan the line. When inside a comment, replace each character of

5355

* the comment with a space. When outside comments, simply copy

5356

* characters intact.

5357

5358

* Note that we need a separate src and dst pointer, because the

5359

* character length of the original and replaced characters may

5360

* change. Fortunately, the length will never do anything but

5361

* shrink or stay the same, since the only change we make is to

5362

* insert spaces, which are always one byte apiece in UTF-8; we can

5363

* therefore update the buffer in place.

5364

5365

for (src.set(linebuf_.get_buf() + start_ofs),

5366

dst.set(linebuf_.get_buf() + start_ofs) ;

5367

src.getch() != '\0' ; src.inc())

5368

{

5369

wchar_t cur;

5370

5371

/* get the current character */

5372

cur = src.getch();

5373

5374

/* check to see if we're in a comment */

5375

if (str_->is_in_comment())

5376

{

5377

5378

* check to see if the comment is ending, or if we have an

5379

* apparent nested comment (which isn't allowed)

5380

5381

if (cur == '*' && src.getch_at(1) == '/')

5382

{

5383

5384

* skip an extra character of the source - we'll skip

5385

* one in the main loop, so we only need to skip one

5386

* more now

5387

5388

src.inc();

5389

5390

/* we're no longer in a comment */

5391

str_->set_in_comment(FALSE);

5392

}

5393

else if (cur == '/' && src.getch_at(1) == '*')

5394

{

5395

/* looks like a nested comment - warn about it */

5396

if (!G_prs->get_syntax_only())

5397

log_warning(TCERR_NESTED_COMMENT);

5398

}

5399

5400

/* continue without copying anything from inside the comment */

5401

continue;

5402

}

5403

else if (in_quote_ != '\0')

5404

{

5405

/* see what we have */

5406

if (cur == '\\')

5407

{

5408

5409

* It's a backslash sequence -- copy the backslash to

5410

* the output, and skip it. Note that we don't have to

5411

* worry about the line ending with a backslash, since

5412

* the line reader will already have considered that to

5413

* be a line splice.

5414

5415

src.inc();

5416

dst.setch(cur);

5417

5418

/* get the next character, so we copy it directly */

5419

cur = src.getch();

5420

}

5421

else if (cur == in_quote_)

5422

{

5423

5424

* this is the closing quote character - simply note

5425

* that we're no longer in a quoted string

5426

5427

in_quote_ = '\0';

5428

}

5429

else if (in_quote_ == '"' && !comment_in_embedding_

5430

&& cur == '<' && src.getch_at(1) == '<')

5431

{

5432

5433

* it's an embedded expression starting point - skip the

5434

* first of the '<' characters (the enclosing loop will

5435

* skip the second one)

5436

5437

src.inc();

5438

5439

/* the string is done */

5440

in_quote_ = '\0';

5441

5442

/* we're in an embedding now */

5443

comment_in_embedding_ = TRUE;

5444

5445

/* copy the extra '<' to the output */

5446

dst.setch('<');

5447

}

5448

}

5449

else

5450

{

5451

5452

* Monitor the stream for a backslash followed by trailing

5453

* spaces. If this is a backslash, note that we might have a

5454

* backslash with trailing spaces; if it's a space, we might

5455

* still have this, so leave the flag alone; if it's anything

5456

* else, clear the flag, since we've found something other

5457

* than backslashes and spaces.

5458

5459

if (cur == '\\')

5460

trailing_sp_after_bs = TRUE;

5461

else if (!is_space(cur))

5462

trailing_sp_after_bs = FALSE;

5463

5464

/* check to see if we're starting a comment */

5465

if (cur == '/')

5466

{

5467

switch(src.getch_at(1))

5468

{

5469

case '*':

5470

/* note that we're starting a comment */

5471

str_->set_in_comment(TRUE);

5472

5473

5474

* replace the starting slash with a space - this

5475

* will effectively replace the entire comment with

5476

* a single space, since we won't copy anything else

5477

* from inside the comment

5478

5479

cur = ' ';

5480

break;

5481

5482

case '/':

5483

5484

* comment to end of line - we can terminate the

5485

* line at the opening slash and return immediately,

5486

* because the entire rest of the line is to be

5487

* ignored

5488

5489

dst.setch('\0');

5490

return;

5491

5492

default:

5493

/* not a comment - copy it as-is */

5494

break;

5495

}

5496

}

5497

else if (cur == '"' || cur == '\'')

5498

{

5499

/* it's the start of a new string */

5500

in_quote_ = cur;

5501

}

5502

else if (cur < 0x09)

5503

{

5504

5505

* it's a special flag character - we need to guarantee

5506

* that this character never occurs in input (it

5507

* shouldn't anyway, since it's a control character), so

5508

* translate it to a space

5509

5510

cur = ' ';

5511

}

5512

else if (comment_in_embedding_

5513

&& cur == '>' && src.getch_at(1) == '>')

5514

{

5515

5516

* it's the end of an embedded expression - we're back

5517

* in a double-quoted string (only double-quoted strings

5518

* can have embedded expressions)

5519

5520

in_quote_ = '"';

5521

comment_in_embedding_ = FALSE;

5522

5523

/* skip the extra '>' and copy it to the output */

5524

src.inc();

5525

dst.setch('>');

5526

}

5527

}

5528

5529

/* set the current character in the output */

5530

dst.setch(cur);

5531

}

5532

5533

/* set the updated line buffer length */

5534

linebuf_.set_text_len(dst.getptr() - linebuf_.get_buf());

5535

5536

5537

* if we found a backslash with nothing following but whitespace, flag

5538

* a warning, since they might have meant the backslash as a line

5539

* continuation signal, but we're not interpreting it that way because

5540

* of the trailing whitespace

5541

5542

if (trailing_sp_after_bs)

5543

log_warning(TCERR_TRAILING_SP_AFTER_BS);

5544

}

5545

5546

5547

* Splice strings. Splice additional lines onto the current line until

5548

* we find the end of the string.

5549

5550

void CTcTokenizer::splice_string()

5551

{

5552

utf8_ptr p;

5553

int in_quote;

5554

int in_embedding;

5555

char unterm;

5556

5557

/* presume we'll find proper termination */

5558

unterm = '\0';

5559

5560

5561

* remember the current in-quote and in-embedding status, as of the

5562

* end of the current line - when we splice, the line reader will

5563

* update these to the status at the end of the newly-read material,

5564

* but we want to scan from the beginning of the newly-read material

5565

5566

in_quote = in_quote_;

5567

in_embedding = comment_in_embedding_;

5568

5569

/* keep going until we find the end of the string */

5570

for (;;)

5571

{

5572

int new_line_ofs;

5573

char *new_line_p;

5574

wchar_t cur;

5575

5576

5577

* append a space at the end of the line, to replace the newline

5578

* that we've eliminated

5579

5580

if (string_newline_spacing_)

5581

linebuf_.append(" ", 1);

5582

5583

/* splice another line */

5584

new_line_ofs = read_line(TRUE);

5585

5586

/* if we reached end of file, there's no more splicing we can do */

5587

if (new_line_ofs == -1)

5588

break;

5589

5590

/* get a pointer to the new text */

5591

new_line_p = (char *)linebuf_.get_text() + new_line_ofs;

5592

5593

/* skip leading spaces in the new line */

5594

for (p.set(new_line_p) ; is_space(p.getch()) ; p.inc()) ;

5595

5596

/* if we skipped any spaces, remove them from the text */

5597

if (p.getptr() > new_line_p)

5598

{

5599

size_t rem;

5600

size_t new_len;

5601

5602

/* calculate the length of the rest of the line */

5603

rem = linebuf_.get_text_len() - (p.getptr() - linebuf_.get_buf());

5604

5605

/* calculate the new length of the line */

5606

new_len = (new_line_p - linebuf_.get_buf()) + rem;

5607

5608

/* move the rest of the line down over the spaces */

5609

memmove(new_line_p, p.getptr(), rem);

5610

5611

/* set the new length */

5612

linebuf_.set_text_len(new_len);

5613

}

5614

5615

5616

* If the new line contains only "}" or ";", presume that the

5617

* string is unterminated and terminate it here. (This

5618

* heuristic could flag well-formed strings as erroneous, but

5619

* users can always work around this by moving these characters

5620

* onto lines that contain at least one other non-whitespace

5621

* character.)

5622

5623

p.set(new_line_p);

5624

if (p.getch() == '}' || p.getch() == ';')

5625

{

5626

/* skip trailing whitespace */

5627

for (p.inc() ; is_space(p.getch()) ; p.inc()) ;

5628

5629

5630

* if there's nothing else on the line, presume it's an

5631

* unterminated string

5632

5633

if (p.getch() == '\0')

5634

{

5635

/* log the error */

5636

log_error(TCERR_POSSIBLE_UNTERM_STR,

5637

appended_linenum_);

5638

5639

/* remember that it's unterminated */

5640

unterm = (char)in_quote;

5641

5642

5643

* since we're adding a presumed close quote that never

5644

* appears in the text, we need to figure the new

5645

* in-string status for the line; clear the in-quote

5646

* flag, and re-scan comments from the current point on

5647

* the line

5648

5649

in_quote_ = '\0';

5650

process_comments(new_line_p - linebuf_.get_buf());

5651

5652

/* we're done - unsplice from the start of the new line */

5653

p.set(new_line_p);

5654

goto done;

5655

}

5656

}

5657

5658

/* scan for the end of the string */

5659

for (p.set(new_line_p) ;; p.inc())

5660

{

5661

/* get this character */

5662

cur = p.getch();

5663

5664

/* see what we have */

5665

if (cur == '\\')

5666

{

5667

/* it's a backslash sequence - skip the extra character */

5668

p.inc();

5669

}

5670

else if (cur == in_quote)

5671

{

5672

/* it's our quote character - skip it, and we're done */

5673

p.inc();

5674

goto done;

5675

}

5676

else if (in_quote == '"' && !in_embedding

5677

&& cur == '<' && p.getch_at(1) == '<')

5678

{

5679

5680

* it's an embedded expression starter - skip the '<<'

5681

* sequence and stop scanning

5682

5683

p.inc();

5684

p.inc();

5685

goto done;

5686

}

5687

else if (cur == '\0')

5688

{

5689

/* end of line - go back and splice another line */

5690

break;

5691

}

5692

}

5693

}

5694

5695

done:

5696

/* unsplice the line at the current point */

5697

unsplice_line(p.getptr());

5698

5699

/* if we found an unterminated string, supply implicit termination */

5700

if (unterm != '\0')

5701

linebuf_.append(&unterm, 1);

5702

}

5703

5704

5705

/* ------------------------------------------------------------------------ */

5706

5707

* Process a #pragma directive

5708

5709

void CTcTokenizer::pp_pragma()

5710

{

5711

struct pp_kw_def

5712

{

5713

const char *kw;

5714

void (CTcTokenizer::*func)();

5715

};

5716

static pp_kw_def kwlist[] =

5717

{

5718

// { "c", &CTcTokenizer::pragma_c }, -- obsolete

5719

{ "once", &CTcTokenizer::pragma_once },

5720

{ "all_once", &CTcTokenizer::pragma_all_once },

5721

{ "message", &CTcTokenizer::pragma_message },

5722

{ "newline_spacing", &CTcTokenizer::pragma_newline_spacing },

5723

{ "sourceTextGroup", &CTcTokenizer::pragma_source_text_group },

5724

{ 0, 0 }

5725

};

5726

pp_kw_def *kwp;

5727

size_t kwlen;

5728

5729

/* get the pragma keyword */

5730

if (next_on_line() != TOKT_SYM)

5731

{

5732

log_warning(TCERR_UNKNOWN_PRAGMA,

5733

(int)curtok_.get_text_len(), curtok_.get_text());

5734

return;

5735

}

5736

5737

/* get the keyword length */

5738

kwlen = curtok_.get_text_len();

5739

5740

/* scan the pragma list */

5741

for (kwp = kwlist ; kwp->kw != 0 ; ++kwp)

5742

{

5743

/* is this our keyword? */

5744

if (strlen(kwp->kw) == kwlen

5745

&& memicmp(curtok_.get_text(), kwp->kw, kwlen) == 0)

5746

{

5747

/* this is our keyword - invoke the handler */

5748

(this->*(kwp->func))();

5749

5750

/* we're done */

5751

return;

5752

}

5753

}

5754

5755

/* we didn't find it - generate a warning */

5756

log_warning(TCERR_UNKNOWN_PRAGMA, kwlen, curtok_.get_text());

5757

}

5758

5759

#if 0 // #pragma C is not currently used

5760

5761

* Process a #pragma C directive

5762

5763

void CTcTokenizer::pragma_c()

5764

{

5765

tc_toktyp_t tok;

5766

int new_pragma_c;

5767

5768

/* get the next token */

5769

tok = next_on_line();

5770

5771

5772

* "+" or empty (end of line or whitespace) indicates C mode; "-"

5773

* indicates standard mode

5774

5775

if (tok == TOKT_PLUS || tok == TOKT_EOF)

5776

new_pragma_c = TRUE;

5777

else if (tok == TOKT_MINUS)

5778

new_pragma_c = FALSE;

5779

else

5780

{

5781

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5782

new_pragma_c = str_->is_pragma_c();

5783

}

5784

5785

5786

* retain the pragma in the result if we're in preprocess-only mode,

5787

* otherwise remove it

5788

5789

if (!pp_only_mode_)

5790

clear_linebuf();

5791

5792

/* set the mode in the stream */

5793

str_->set_pragma_c(new_pragma_c);

5794

5795

/* if there's a parser, notify it of the change */

5796

if (G_prs != 0)

5797

G_prs->set_pragma_c(new_pragma_c);

5798

}

5799

#endif

5800

5801

5802

* Process a #pragma once directive

5803

5804

void CTcTokenizer::pragma_once()

5805

{

5806

/* add this file to the ONCE list */

5807

add_include_once(str_->get_desc()->get_fname());

5808

5809

/* don't retain this pragma in the result */

5810

clear_linebuf();

5811

}

5812

5813

5814

* Process a #pragma all_once directive

5815

5816

void CTcTokenizer::pragma_all_once()

5817

{

5818

tc_toktyp_t tok;

5819

5820

/* get the next token */

5821

tok = next_on_line();

5822

5823

5824

* "+" or empty (end of line or whitespace) indicates ALL_ONCE mode;

5825

* '-' indicates standard mode

5826

5827

if (tok == TOKT_PLUS || tok == TOKT_EOF)

5828

all_once_ = TRUE;

5829

else if (tok == TOKT_MINUS)

5830

all_once_ = FALSE;

5831

else

5832

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5833

5834

/* don't retain this pragma in the result */

5835

clear_linebuf();

5836

}

5837

5838

5839

* Process a #pragma message directive

5840

5841

void CTcTokenizer::pragma_message()

5842

{

5843

size_t startofs;

5844

5845

5846

* copy the source line through the "message" token to the macro

5847

* expansion buffer - we don't want to expand that part, but we want

5848

* it to appear in the expansion, so just copy the original

5849

5850

startofs = (curtok_.get_text() + curtok_.get_text_len()

5851

- linebuf_.get_text());

5852

expbuf_.copy(linebuf_.get_text(), startofs);

5853

5854

/* expand macros; don't allow reading additional lines */

5855

if (expand_macros_curline(FALSE, FALSE, TRUE))

5856

{

5857

clear_linebuf();

5858

return;

5859

}

5860

5861

5862

* If we're in normal compilation mode, display the message. If we're

5863

* in preprocess-only mode, simply retain the message in the

5864

* preprocessed result, so that it shows up when the result is

5865

* compiled.

5866

5867

* Ignore messages in list-includes mode.

5868

5869

if (!pp_only_mode_ && !list_includes_mode_)

5870

{

5871

/* set up at the first post-processed token */

5872

start_new_line(&expbuf_, startofs);

5873

5874

/* if there's an open paren, skip it */

5875

if (next_on_line_xlat(0) == TOKT_LPAR)

5876

next_on_line_xlat(0);

5877

else

5878

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5879

5880

/* keep going until we reach the closing paren */

5881

while (curtok_.gettyp() != TOKT_RPAR

5882

&& curtok_.gettyp() != TOKT_EOF)

5883

{

5884

/* display this token */

5885

switch(curtok_.gettyp())

5886

{

5887

case TOKT_SSTR:

5888

case TOKT_DSTR:

5889

case TOKT_SYM:

5890

/* display the text of the token */

5891

msg_str(curtok_.get_text(), curtok_.get_text_len());

5892

break;

5893

5894

case TOKT_INT:

5895

/* display the integer */

5896

msg_long(curtok_.get_int_val());

5897

break;

5898

5899

default:

5900

/* ignore anything else */

5901

break;

5902

}

5903

5904

/* get the next token */

5905

next_on_line_xlat(0);

5906

}

5907

5908

/* end the line */

5909

msg_str("\n", 1);

5910

5911

/* remove the message from the result text */

5912

clear_linebuf();

5913

}

5914

else

5915

{

5916

/* preprocessing - copy expanded text to line buffer */

5917

linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len());

5918

}

5919

}

5920

5921

5922

* Process a #pragma newline_spacing(on/off) directive

5923

5924

void CTcTokenizer::pragma_newline_spacing()

5925

{

5926

int f;

5927

5928

/* if we're in preprocess-only mode, just pass the pragma through */

5929

if (pp_only_mode_)

5930

return;

5931

5932

/* get the '(' token and the on/off token */

5933

if (next_on_line() != TOKT_LPAR || next_on_line() != TOKT_SYM)

5934

{

5935

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5936

goto done;

5937

}

5938

5939

/* note the new mode flag */

5940

if (curtok_.get_text_len() == 2

5941

&& memcmp(curtok_.get_text(), "on", 2) == 0)

5942

{

5943

/* it's 'on' */

5944

f = TRUE;

5945

}

5946

else if (curtok_.get_text_len() == 3

5947

&& memcmp(curtok_.get_text(), "off", 3) == 0)

5948

{

5949

/* it's 'off' */

5950

f = FALSE;

5951

}

5952

else

5953

{

5954

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5955

goto done;

5956

}

5957

5958

/* make sure we have the ')' token */

5959

if (next_on_line() != TOKT_RPAR)

5960

{

5961

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

5962

goto done;

5963

}

5964

5965

/* set the new mode */

5966

string_newline_spacing_ = f;

5967

5968

done:

5969

/* done - discard this line buffer */

5970

clear_linebuf();

5971

}

5972

5973

5974

5975

* Process a #pragma sourceTextGroup(on/off) directive

5976

5977

void CTcTokenizer::pragma_source_text_group()

5978

{

5979

tc_toktyp_t tok;

5980

int f;

5981

5982

/* if we're in preprocess-only mode, just pass the pragma through */

5983

if (pp_only_mode_)

5984

return;

5985

5986

/* get the '(' token and the on/off token, if present */

5987

if ((tok = next_on_line()) == TOKT_EOF)

5988

{

5989

/* no on/off - by default it's on */

5990

f = TRUE;

5991

}

5992

else if (tok == TOKT_LPAR && next_on_line() == TOKT_SYM)

5993

{

5994

/* get the on/off mode */

5995

if (curtok_.get_text_len() == 2

5996

&& memcmp(curtok_.get_text(), "on", 2) == 0)

5997

{

5998

/* it's 'on' */

5999

f = TRUE;

6000

}

6001

else if (curtok_.get_text_len() == 3

6002

&& memcmp(curtok_.get_text(), "off", 3) == 0)

6003

{

6004

/* it's 'off' */

6005

f = FALSE;

6006

}

6007

else

6008

{

6009

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

6010

goto done;

6011

}

6012

6013

/* make sure we have the ')' token */

6014

if (next_on_line() != TOKT_RPAR)

6015

{

6016

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

6017

goto done;

6018

}

6019

}

6020

else

6021

{

6022

/* anything else is invalid syntax */

6023

log_warning(TCERR_BAD_PRAGMA_SYNTAX);

6024

goto done;

6025

}

6026

6027

/* set the new mode in the parser */

6028

G_prs->set_source_text_group_mode(f);

6029

6030

done:

6031

/* done - discard this line buffer */

6032

clear_linebuf();

6033

}

6034

6035

6036

/* ------------------------------------------------------------------------ */

6037

6038

* Process a #charset directive

6039

6040

void CTcTokenizer::pp_charset()

6041

{

6042

6043

* Encountering a #charset directive within the tokenizer is always

6044

* an error. If the file opener managed to use a #charset, we'll

6045

* never see it, because the file opener will have skipped it before

6046

* giving us the file.

6047

6048

* If we flagged a #charset error when opening the file, indicate

6049

* that the problem is that the character set given was unloadable;

6050

* otherwise, the problem is that #charset is in the wrong place.

6051

6052

log_error(str_ != 0 && str_->get_charset_error()

6053

? TCERR_CANT_LOAD_CHARSET : TCERR_UNEXPECTED_CHARSET);

6054

6055

/* don't retain this pragma in the result */

6056

clear_linebuf();

6057

}

6058

6059

/* ------------------------------------------------------------------------ */

6060

6061

* Process a #include directive

6062

6063

void CTcTokenizer::pp_include()

6064

{

6065

wchar_t match;

6066

int is_local;

6067

int is_absolute;

6068

utf8_ptr fname;

6069

CTcSrcFile *new_src;

6070

int charset_error;

6071

int default_charset_error;

6072

char full_name[OSFNMAX];

6073

char lcl_name[OSFNMAX];

6074

int found;

6075

CTcTokFileDesc *desc;

6076

int expand;

6077

utf8_ptr start;

6078

6079

/* presume we'll expand macros */

6080

expand = TRUE;

6081

6082

6083

* Check to see if expansion is needed. Macro expansion is needed

6084

* only if the source line is not of one of the following forms:

6085

6086

*. #include "filename"

6087

*. #include <filename>

6088

6089

for (start = p_ ; is_space(p_.getch()) ; p_.inc()) ;

6090

switch(p_.getch())

6091

{

6092

case '<':

6093

/* look for a matching '>' */

6094

match = '>';

6095

goto find_match;

6096

6097

case '"':

6098

/* look for a matching '"' */

6099

match = '"';

6100

goto find_match;

6101

6102

find_match:

6103

/* find the matching character */

6104

for (p_.inc() ; p_.getch() != '\0' && p_.getch() != match ;

6105

p_.inc()) ;

6106

6107

/* if we found it, check for other characters on the line */

6108

if (p_.getch() == match)

6109

{

6110

/* skip the matching character */

6111

p_.inc();

6112

6113

/* skip whitespace */

6114

while (is_space(p_.getch()))

6115

p_.inc();

6116

6117

6118

* make sure there's nothing else on the line - if not, it's

6119

* one of the approved formats, so there's no need to do

6120

* macro expansion

6121

6122

if (p_.getch() == 0)

6123

expand = FALSE;

6124

}

6125

break;

6126

}

6127

6128

/* go back to read from the original starting point */

6129

p_ = start;

6130

6131

/* expand macros if necessary */

6132

if (expand)

6133

{

6134

/* do the expansion */

6135

if (expand_macros_curline(FALSE, FALSE, FALSE))

6136

{

6137

/* clear the buffer and abort */

6138

clear_linebuf();

6139

return;

6140

}

6141

6142

6143

* remove any expansion flags, so that we don't have to worry about

6144

* parsing or skipping them

6145

6146

remove_expansion_flags(&expbuf_);

6147

6148

/* read from the expansion buffer */

6149

start_new_line(&expbuf_, 0);

6150

}

6151

6152

/* skip leading whitespace */

6153

for ( ; is_space(p_.getch()) ; p_.inc()) ;

6154

6155

/* we have to be looking at at '"' or '<' character */

6156

if (p_.getch() == '"')

6157

{

6158

/* look for a matching quote, and look for a local file */

6159

match = '"';

6160

is_local = TRUE;

6161

}

6162

else if (p_.getch() == '<')

6163

{

6164

/* look for a matching angle bracket, and look for a system file */

6165

match = '>';

6166

is_local = FALSE;

6167

}

6168

else

6169

{

6170

/* invalid syntax - log an error and ignore the line */

6171

log_error(TCERR_BAD_INC_SYNTAX);

6172

clear_linebuf();

6173

return;

6174

}

6175

6176

/* skip the open quote, and remember where the filename starts */

6177

p_.inc();

6178

fname = p_;

6179

6180

/* find the matching quote */

6181

for ( ; p_.getch() != '\0' && p_.getch() != match ; p_.inc()) ;

6182

6183

/* if we didn't find the match, log an error and ignore the line */

6184

if (p_.getch() == '\0')

6185

{

6186

log_error(TCERR_BAD_INC_SYNTAX);

6187

clear_linebuf();

6188

return;

6189

}

6190

else

6191

{

6192

6193

* We found the close quote. Before we parse the filename, make

6194

* one last check: if there's anything further on the line apart

6195

* from whitespace, it's extraneous, so issue a warning.

6196

6197

6198

/* remember where the close quote is */

6199

utf8_ptr closep = p_;

6200

6201

/* skip it, and then skip any trailing whitespace */

6202

for (p_.inc() ; is_space(p_.getch()) ; p_.inc()) ;

6203

6204

/* if we're not at the end of the line, issue a warning */

6205

if (p_.getch() != '\0')

6206

log_warning(TCERR_EXTRA_INC_SYNTAX);

6207

6208

6209

* Null-terminate the filename. (We know there's nothing else

6210

* interesting in the buffer after the filename at this point, so

6211

* we don't care about overwriting the quote or anything that might

6212

* come after it.)

6213

6214

closep.setch('\0');

6215

}

6216

6217

/* check to see if the filename is absolute */

6218

is_absolute = os_is_file_absolute(fname.getptr());

6219

6220

/* we have yet to find the file */

6221

found = FALSE;

6222

6223

6224

* in case the name is in portable URL notation, convert from URL

6225

* notation to local notation; we'll consider this form of the name

6226

* first, and only if we can't find it in this form will we try

6227

* treating the name as using local filename conventions

6228

6229

os_cvt_url_dir(lcl_name, sizeof(lcl_name), fname.getptr(), FALSE);

6230

6231

6232

* Search for the included file.

6233

6234

* First, if it's a local file (in quotes rather than angle

6235

* brackets), start the search in the directory containing the

6236

* current file, then look in the directory containing the parent

6237

* file, and so on. If we fail to find it, proceed as for a

6238

* non-local file.

6239

6240

if (is_local && last_desc_ != 0)

6241

{

6242

CTcTokStream *cur_str;

6243

char pathbuf[OSFNMAX];

6244

6245

/* start with the current file, and search parents */

6246

for (cur_str = str_ ; cur_str != 0 ; cur_str = cur_str->get_parent())

6247

{

6248

/* get the path to the current file */

6249

os_get_path_name(pathbuf, sizeof(pathbuf),

6250

last_desc_->get_fname());

6251

6252

6253

* try the URL-converted name first - this takes precedence

6254

* over a local interpretation of the name

6255

6256

os_build_full_path(full_name, sizeof(full_name),

6257

pathbuf, lcl_name);

6258

if (!osfacc(full_name))

6259

{

6260

found = TRUE;

6261

break;

6262

}

6263

6264

/* if it's a relative local name, try again with local naming */

6265

if (!is_absolute)

6266

{

6267

6268

* build the full filename, treating the name as using

6269

* local system conventions

6270

6271

os_build_full_path(full_name, sizeof(full_name),

6272

pathbuf, fname.getptr());

6273

6274

/* if we found it, so note and stop searching */

6275

if (!osfacc(full_name))

6276

{

6277

found = TRUE;

6278

break;

6279

}

6280

}

6281

}

6282

}

6283

6284

6285

* If we still haven't found the file (or if it's a non-local file,

6286

* in angle brackets), search the include path.

6287

6288

if (!found)

6289

{

6290

tctok_incpath_t *inc_path;

6291

6292

/* scan the include path */

6293

for (inc_path = incpath_head_ ; inc_path != 0 ;

6294

inc_path = inc_path->nxt)

6295

{

6296

/* try the URL-converted local name first */

6297

os_build_full_path(full_name, sizeof(full_name),

6298

inc_path->path, lcl_name);

6299

if (!osfacc(full_name))

6300

{

6301

found = TRUE;

6302

break;

6303

}

6304

6305

/* try with the local name, if it's a relative local name */

6306

if (!is_absolute)

6307

{

6308

/* build the full name for the file in this directory */

6309

os_build_full_path(full_name, sizeof(full_name),

6310

inc_path->path, fname.getptr());

6311

6312

/* if we found it, stop searching */

6313

if (!osfacc(full_name))

6314

{

6315

found = TRUE;

6316

break;

6317

}

6318

}

6319

}

6320

}

6321

6322

6323

* If the filename specified an absolute path, and we didn't find a

6324

* file with any of the local interpretations, look at the absolute

6325

* path. Note that our portable URL-style notation doesn't allow

6326

* absolute notation, so we use only the exact name as specified in

6327

* the #include directive as the absolute form.

6328

6329

if (is_absolute && !found)

6330

{

6331

/* use the original filename as the full name */

6332

strcpy(full_name, fname.getptr());

6333

6334

/* try finding the file */

6335

found = !osfacc(full_name);

6336

}

6337

6338

6339

* we have our copy of the filename now; we don't want to retain

6340

* this directive in the preprocessed source, so clear out the line

6341

* buffer now

6342

6343

clear_linebuf();

6344

6345

6346

* if we didn't find the file anywhere, show an error and ignore the

6347

* #include directive

6348

6349

if (!found)

6350

{

6351

log_error(TCERR_INC_NOT_FOUND,

6352

(int)strlen(fname.getptr()), fname.getptr());

6353

return;

6354

}

6355

6356

6357

* Check the list of included files that are marked for inclusion

6358

* only once. If we've already included this file, ignore this

6359

* redundant inclusion. Check based on the full filename that we

6360

* resolved from the search path.

6361

6362

if (find_include_once(full_name))

6363

{

6364

/* log an error if appropriate */

6365

if (warn_on_ignore_incl_)

6366

log_warning(TCERR_REDUNDANT_INCLUDE,

6367

(int)strlen(full_name), full_name);

6368

6369

/* ignore this #include directive */

6370

return;

6371

}

6372

6373

/* open a file source to read the file */

6374

new_src = CTcSrcFile::open_source(full_name, res_loader_,

6375

default_charset_, &charset_error,

6376

&default_charset_error);

6377

6378

/* if we couldn't open the file, log an error and ignore the line */

6379

if (new_src == 0)

6380

{

6381

6382

* if the error was due to the default character set, log that

6383

* problem; otherwise, log the general file-open problem

6384

6385

if (default_charset_error)

6386

log_error(TCERR_CANT_LOAD_DEFAULT_CHARSET, default_charset_);

6387

else

6388

log_error(TCERR_INC_NOT_FOUND,

6389

(int)strlen(full_name), full_name);

6390

6391

/* we can go no further */

6392

return;

6393

}

6394

6395

/* get the descriptor for the source file */

6396

desc = get_file_desc(full_name, strlen(full_name), FALSE,

6397

fname.getptr(),

6398

fname.getptr() != 0 ? strlen(fname.getptr()) : 0);

6399

6400

6401

* remember the current #pragma newline_spacing mode, so we can restore

6402

* it when we reinstate the current stream

6403

6404

str_->set_newline_spacing(string_newline_spacing_);

6405

6406

6407

* Create and install the new file reader stream object. By

6408

* installing it as the current reader, we'll activate it so that

6409

* the next line read will come from the new stream. Note that the

6410

* current stream becomes the parent of the new stream, so that we

6411

* revert to the current stream when the new stream is exhausted;

6412

* this will allow us to pick up reading from the current stream at

6413

* the next line after the #include directive when we've finished

6414

* including the new file.

6415

6416

str_ = new CTcTokStream(desc, new_src, str_, charset_error, if_sp_);

6417

6418

6419

* If we're in ALL_ONCE mode, it means that every single file we

6420

* include should be included only once.

6421

6422

if (all_once_)

6423

add_include_once(full_name);

6424

6425

6426

* if we're in list-includes mode, write the name of the include file

6427

* to the standard output

6428

6429

if (list_includes_mode_)

6430

G_hostifc->print_msg("#include %s\n", full_name);

6431

}

6432

6433

/* ------------------------------------------------------------------------ */

6434

6435

* Add a file to the include-once list. Once a file is in this list, we

6436

* won't include it again.

6437

6438

void CTcTokenizer::add_include_once(const char *fname)

6439

{

6440

tctok_incfile_t *prvinc;

6441

6442

/* if the file is already in the list, don't add it again */

6443

if (find_include_once(fname))

6444

return;

6445

6446

/* create a new entry for the filename */

6447

prvinc = (tctok_incfile_t *)t3malloc(sizeof(tctok_incfile_t)

6448

+ strlen(fname));

6449

6450

/* save the filename */

6451

strcpy(prvinc->fname, fname);

6452

6453

/* link the new entry into our list */

6454

prvinc->nxt = prev_includes_;

6455

prev_includes_ = prvinc;

6456

}

6457

6458

6459

* Find a file in the list of files to be included only once. Returns

6460

* true if the file is in the list, false if not.

6461

6462

int CTcTokenizer::find_include_once(const char *fname)

6463

{

6464

tctok_incfile_t *prvinc;

6465

6466

/* search the list */

6467

for (prvinc = prev_includes_ ; prvinc != 0 ; prvinc = prvinc->nxt)

6468

{

6469

/* if this one matches, we found it, so return true */

6470

if (strcmp(fname, prvinc->fname) == 0)

6471

return TRUE;

6472

}

6473

6474

/* we didn't find the file */

6475

return FALSE;

6476

}

6477

6478

/* ------------------------------------------------------------------------ */

6479

6480

* Process a #define directive

6481

6482

void CTcTokenizer::pp_define()

6483

{

6484

const char *macro_name;

6485

size_t macro_len;

6486

const char *argv[TOK_MAX_MACRO_ARGS];

6487

size_t argvlen[TOK_MAX_MACRO_ARGS];

6488

int argc;

6489

int has_args;

6490

const char *expan;

6491

size_t expan_len;

6492

CTcHashEntryPp *entry;

6493

int has_varargs;

6494

6495

/* get the macro name */

6496

if (next_on_line() != TOKT_SYM)

6497

{

6498

log_error(TCERR_BAD_DEFINE_SYM,

6499

(int)curtok_.get_text_len(), curtok_.get_text());

6500

clear_linebuf();

6501

return;

6502

}

6503

6504

/* make a copy of the macro name */

6505

macro_name = curtok_.get_text();

6506

macro_len = curtok_.get_text_len();

6507

6508

/* no arguments yet */

6509

argc = 0;

6510

6511

/* presume we won't find a varargs marker */

6512

has_varargs = FALSE;

6513

6514

6515

* If there's a '(' immediately after the macro name, without any

6516

* intervening whitespace, it has arguments; otherwise, it has no

6517

* arguments. Note which case we have.

6518

6519

if (p_.getch() == '(')

6520

{

6521

int done;

6522

tc_toktyp_t tok;

6523

6524

/* note that we have an argument list */

6525

has_args = TRUE;

6526

6527

/* assume we're not done yet */

6528

done = FALSE;

6529

6530

/* skip the paren and get the next token */

6531

p_.inc();

6532

tok = next_on_line();

6533

6534

/* check for an empty argument list */

6535

if (tok == TOKT_RPAR)

6536

{

6537

/* note that we're done with the arguments */

6538

done = TRUE;

6539

}

6540

6541

/* scan the argument list */

6542

while (!done)

6543

{

6544

/* if we have too many arguments, it's an error */

6545

if (argc >= TOK_MAX_MACRO_ARGS)

6546

{

6547

log_error(TCERR_TOO_MANY_MAC_PARMS,

6548

macro_name, macro_len, TOK_MAX_MACRO_ARGS);

6549

clear_linebuf();

6550

return;

6551

}

6552

6553

/* if we're at the end of the macro, it's an error */

6554

if (tok == TOKT_EOF)

6555

{

6556

/* log the error and ignore the line */

6557

log_error(TCERR_MACRO_NO_RPAR);

6558

clear_linebuf();

6559

return;

6560

}

6561

6562

/* check for a valid initial symbol character */

6563

if (tok != TOKT_SYM)

6564

{

6565

log_error_curtok(TCERR_BAD_MACRO_ARG_NAME);

6566

clear_linebuf();

6567

return;

6568

}

6569

6570

/* remember the argument name */

6571

argvlen[argc] = curtok_.get_text_len();

6572

argv[argc++] = curtok_.get_text();

6573

6574

/* get the next token */

6575

tok = next_on_line();

6576

6577

/* make sure we have a comma or paren following */

6578

if (tok == TOKT_COMMA)

6579

{

6580

/* we have more arguments - skip the comma */

6581

tok = next_on_line();

6582

}

6583

else if (tok == TOKT_ELLIPSIS)

6584

{

6585

/* skip the ellipsis */

6586

tok = next_on_line();

6587

6588

/* note the varargs marker */

6589

has_varargs = TRUE;

6590

6591

/* this must be the last argument */

6592

if (tok != TOKT_RPAR)

6593

{

6594

/* log the error */

6595

log_error_curtok(TCERR_MACRO_ELLIPSIS_REQ_RPAR);

6596

6597

/* discard the line and give up */

6598

clear_linebuf();

6599

return;

6600

}

6601

6602

/* that's the last argument - we can stop now */

6603

done = TRUE;

6604

}

6605

else if (tok == TOKT_RPAR)

6606

{

6607

/* no more arguments - note that we can stop now */

6608

done = TRUE;

6609

}

6610

else

6611

{

6612

/* invalid argument - log an error and discard the line */

6613

log_error_curtok(TCERR_MACRO_EXP_COMMA);

6614

clear_linebuf();

6615

return;

6616

}

6617

}

6618

}

6619

else

6620

{

6621

6622

* there are no arguments - the macro's expansion starts

6623

* immediately after the end of the name and any subsequent

6624

* whitespace

6625

6626

has_args = FALSE;

6627

}

6628

6629

/* skip whitespace leading up to the expansion */

6630

while (is_space(p_.getch()))

6631

p_.inc();

6632

6633

/* the rest of the line is the expansion */

6634

expan = p_.getptr();

6635

6636

/* don't allow defining "defined" */

6637

if (macro_len == 7 && memcmp(macro_name, "defined", 7) == 0)

6638

{

6639

/* log an error */

6640

log_error(TCERR_REDEF_OP_DEFINED);

6641

6642

/* don't retain the directive in the preprocessed result */

6643

clear_linebuf();

6644

6645

/* ignore the definition */

6646

return;

6647

}

6648

6649

/* get the length of the expansion text */

6650

expan_len = strlen(expan);

6651

6652

6653

* remove any trailing whitespace from the expansion text; however,

6654

* leave a trailing space if it's preceded by a backslash

6655

6656

while (expan_len > 0

6657

&& is_space(expan[expan_len-1])

6658

&& !(expan_len > 1 && expan[expan_len-2] == '\\'))

6659

--expan_len;

6660

6661

6662

* If there are arguments, scan the expansion for formal parameter

6663

* names. For each one we find, replace it with the special

6664

* TOK_MACRO_FORMAL_FLAG character followed by a one-byte value

6665

* giving the argument index. This special sequence is less costly

6666

* to find when we're expanding the macros - by doing the search

6667

* here, we only need to do it once, rather than each time we expand

6668

* the macro.

6669

6670

if (argc != 0)

6671

{

6672

utf8_ptr src;

6673

size_t dstofs;

6674

tc_toktyp_t typ;

6675

CTcToken tok;

6676

const char *start;

6677

int in_embedding = FALSE;

6678

6679

6680

* Generate our modified expansion text in the macro expansion

6681

* buffer. Initially, make sure we have room for a copy of the

6682

* text; we'll resize the buffer later if we find we need even

6683

* more.

6684

6685

expbuf_.ensure_space(expan_len);

6686

6687

/* scan for argument names, and replace them */

6688

for (start = expan, dstofs = 0, src.set((char *)expan) ;; )

6689

{

6690

/* get the next token */

6691

typ = next_on_line(&src, &tok, &in_embedding, FALSE);

6692

6693

/* if we've reached the end of the expansion, we're done */

6694

if (typ == TOKT_EOF)

6695

break;

6696

6697

6698

* If this is a formal parameter name, we'll replace it with

6699

* a special two-byte sequence; otherwise, we'll keep it

6700

* unchanged.

6701

6702

if (typ == TOKT_SYM)

6703

{

6704

int i;

6705

6706

/* find it in the table */

6707

for (i = 0 ; i < argc ; ++i)

6708

{

6709

/* does it match this argument name? */

6710

if (argvlen[i] == tok.get_text_len()

6711

&& memcmp(argv[i], tok.get_text(),

6712

tok.get_text_len()) == 0)

6713

{

6714

size_t new_len;

6715

size_t arg_len;

6716

size_t repl_len;

6717

char flag_byte;

6718

6719

/* get the length of the formal name */

6720

arg_len = argvlen[i];

6721

6722

6723

* the normal replacement length for a formal

6724

* parameter is two bytes - one byte for the flag,

6725

* and one for the formal parameter index

6726

6727

repl_len = 2;

6728

6729

/* by default, the flag byte is the formal flag */

6730

flag_byte = TOK_MACRO_FORMAL_FLAG;

6731

6732

6733

* Check for special varargs control suffixes. If

6734

* we matched the last argument name, and this is

6735

* a varargs macro, we might have a suffix.

6736

6737

if (has_varargs

6738

&& i == argc - 1

6739

&& src.getch() == '#')

6740

{

6741

/* check for the various suffixes */

6742

if (memcmp(src.getptr() + 1, "foreach", 7) == 0

6743

&& !is_sym(src.getch_at(8)))

6744

{

6745

6746

* include the suffix length in the token

6747

* length

6748

6749

arg_len += 8;

6750

6751

6752

* the flag byte is the #foreach flag,

6753

* which is a one-byte sequence

6754

6755

flag_byte = TOK_MACRO_FOREACH_FLAG;

6756

repl_len = 1;

6757

}

6758

else if (memcmp(src.getptr() + 1,

6759

"argcount", 8) == 0

6760

&& !is_sym(src.getch_at(9)))

6761

{

6762

6763

* include the suffix length in the token

6764

* length

6765

6766

arg_len += 9;

6767

6768

6769

* the flag byte is the #argcount flag,

6770

* which is a one-byte sequence

6771

6772

flag_byte = TOK_MACRO_ARGCOUNT_FLAG;

6773

repl_len = 1;

6774

}

6775

else if (memcmp(src.getptr() + 1,

6776

"ifempty", 7) == 0

6777

&& !is_sym(src.getch_at(8)))

6778

{

6779

/* include the length */

6780

arg_len += 8;

6781

6782

/* set the one-byte flag */

6783

flag_byte = TOK_MACRO_IFEMPTY_FLAG;

6784

repl_len = 1;

6785

}

6786

else if (memcmp(src.getptr() + 1,

6787

"ifnempty", 8) == 0

6788

&& !is_sym(src.getch_at(9)))

6789

{

6790

/* include the length */

6791

arg_len += 9;

6792

6793

/* set the one-byte flag */

6794

flag_byte = TOK_MACRO_IFNEMPTY_FLAG;

6795

repl_len = 1;

6796

}

6797

}

6798

6799

6800

* calculate the new length - we're removing the

6801

* argument name and adding the replacement string

6802

* in its place

6803

6804

new_len = expan_len + repl_len - arg_len;

6805

6806

6807

* we need two bytes for the replacement - if

6808

* this is more than we're replacing, make sure

6809

* we have room for the extra

6810

6811

if (new_len > expan_len)

6812

expbuf_.ensure_space(new_len);

6813

6814

6815

* copy everything up to but not including the

6816

* formal name

6817

6818

if (tok.get_text() > start)

6819

{

6820

/* store the text */

6821

memcpy(expbuf_.get_buf() + dstofs,

6822

start, tok.get_text() - start);

6823

6824

/* move past the stored text in the output */

6825

dstofs += tok.get_text() - start;

6826

}

6827

6828

/* the next segment starts after this token */

6829

start = tok.get_text() + arg_len;

6830

6831

/* store the flag byte */

6832

expbuf_.get_buf()[dstofs++] = flag_byte;

6833

6834

6835

* If appropriate, store the argument index - this

6836

* always fits in one byte because our hard limit

6837

* on formal parameters is less than 128 per

6838

* macro. Note that we add one to the index so

6839

* that we never store a zero byte, to avoid any

6840

* potential confusion with a null terminator

6841

* byte.

6842

6843

if (repl_len > 1)

6844

expbuf_.get_buf()[dstofs++] = (char)(i + 1);

6845

6846

/* remember the new length */

6847

expan_len = new_len;

6848

6849

/* no need to search further for it */

6850

break;

6851

}

6852

}

6853

}

6854

}

6855

6856

/* copy the last segment */

6857

if (tok.get_text() > start)

6858

{

6859

/* store the text */

6860

memcpy(expbuf_.get_buf() + dstofs, start,

6861

tok.get_text() - start);

6862

}

6863

6864

/* set the new length */

6865

expbuf_.set_text_len(expan_len);

6866

6867

/* use the modified expansion text instead of the original */

6868

expan = expbuf_.get_text();

6869

}

6870

6871

6872

* check the symbol table to see if this symbol is already defined -

6873

* if so, show a warning, but honor the new definition

6874

6875

entry = find_define(macro_name, macro_len);

6876

if (entry != 0)

6877

{

6878

6879

* Check for a trivial redefinition - if the number of arguments

6880

* is the same, and the type (object-like or function-like) is

6881

* the same, and the expansion string is identical, there's no

6882

* need to warn, because the redefinition has no effect and can

6883

* thus be safely ignored. Note that we must ignore any

6884

* differences in the whitespace in the expansions for this

6885

* comparision.

6886

6887

if ((entry->has_args() != 0) == (has_args != 0)

6888

&& entry->get_argc() == argc

6889

&& lib_strequal_collapse_spaces(expan, expan_len,

6890

entry->get_expansion(),

6891

entry->get_expan_len()))

6892

{

6893

/* it's a non-trivial redefinition - ignore it */

6894

goto done;

6895

}

6896

6897

/* log a warning about the redefinition */

6898

log_warning(TCERR_MACRO_REDEF, (int)macro_len, macro_name);

6899

6900

/* remove and delete the old entry */

6901

defines_->remove(entry);

6902

6903

/* if the item isn't already in the #undef table, add it */

6904

if (find_undef(macro_name, macro_len) == 0)

6905

{

6906

6907

* move the entry to the #undef table so that we can keep track

6908

* of the fact that this macro's definition has changed in the

6909

* course of the compilation

6910

6911

undefs_->add(entry);

6912

}

6913

else

6914

{

6915

6916

* the name is already in the #undef table, so we don't need

6917

* another copy - just forget about the old entry entirely

6918

6919

delete entry;

6920

}

6921

}

6922

6923

/* create an entry for the new macro */

6924

entry = new CTcHashEntryPpDefine(macro_name, macro_len, TRUE,

6925

has_args, argc, has_varargs,

6926

argv, argvlen, expan, expan_len);

6927

6928

/* add it to the hash table */

6929

defines_->add(entry);

6930

6931

done:

6932

/* don't retain the directive in the preprocessed source */

6933

clear_linebuf();

6934

}

6935

6936

/* ------------------------------------------------------------------------ */

6937

6938

* Process a #ifdef directive

6939

6940

void CTcTokenizer::pp_ifdef()

6941

{

6942

/* process the ifdef/ifndef with a positive sense */

6943

pp_ifdef_or_ifndef(TRUE);

6944

}

6945

6946

6947

* Process a #ifndef directive

6948

6949

void CTcTokenizer::pp_ifndef()

6950

{

6951

/* process the ifdef/ifndef with a negative sense */

6952

pp_ifdef_or_ifndef(FALSE);

6953

}

6954

6955

6956

* Process a #ifdef or #ifndef. If 'sense' is true, we'll take the

6957

* branch if the symbol is defined (hence #ifdef), otherwise we'll take

6958

* it if the symbol isn't defined (hence #ifndef).

6959

6960

void CTcTokenizer::pp_ifdef_or_ifndef(int sense)

6961

{

6962

char macro_name[TOK_SYM_MAX_BUFFER];

6963

int found;

6964

tok_if_t state;

6965

6966

/* make sure we have a valid symbol */

6967

if (pp_get_lone_ident(macro_name, sizeof(macro_name)))

6968

{

6969

/* clear the line buffer */

6970

clear_linebuf();

6971

6972

6973

* push a true if to avoid cascading errors for matching #endif

6974

* or #else

6975

6976

push_if(TOKIF_IF_YES);

6977

6978

/* we're done */

6979

return;

6980

}

6981

6982

/* check to see if it's defined */

6983

found = (find_define(macro_name, strlen(macro_name)) != 0);

6984

6985

6986

* if we found it and they wanted it found, or we didn't find it and

6987

* they didn't want it found, take a true branch; otherwise, take a

6988

* false branch

6989

6990

if ((sense != 0) == (found != 0))

6991

state = TOKIF_IF_YES;

6992

else

6993

state = TOKIF_IF_NO;

6994

6995

/* push the new #if state */

6996

push_if(state);

6997

6998

/* don't retain the directive in the preprocessed source */

6999

clear_linebuf();

7000

}

7001

7002

/* ------------------------------------------------------------------------ */

7003

7004

* Process a #if directive

7005

7006

void CTcTokenizer::pp_if()

7007

{

7008

CTcConstVal val;

7009

7010

/* expand macros; don't allow reading additional lines */

7011

if (expand_macros_curline(FALSE, TRUE, FALSE))

7012

goto do_error;

7013

7014

7015

* we don't need the original source line any more, and we don't

7016

* want to copy it to the preprocessed output, so clear it

7017

7018

clear_linebuf();

7019

7020

/* parse out of the expansion buffer */

7021

start_new_line(&expbuf_, 0);

7022

7023

/* parse the preprocessor expression */

7024

if (pp_parse_expr(&val, TRUE, TRUE, TRUE))

7025

{

7026

7027

* we can't get a value; treat the expression as true and

7028

* continue parsing, so that we don't throw off the #if nesting

7029

* level

7030

7031

val.set_bool(TRUE);

7032

}

7033

7034

/* push the new state according to the value of the expression */

7035

push_if(val.get_val_bool() ? TOKIF_IF_YES : TOKIF_IF_NO);

7036

7037

/* done */

7038

return;

7039

7040

do_error:

7041

/* clear the line buffer */

7042

clear_linebuf();

7043

7044

7045

* push a true if - even though we can't evaluate the condition, we

7046

* can at least avoid a cascade of errors for the matching #endif

7047

* and #else

7048

7049

push_if(TOKIF_IF_YES);

7050

}

7051

7052

/* ------------------------------------------------------------------------ */

7053

7054

* Process a #elif directive

7055

7056

void CTcTokenizer::pp_elif()

7057

{

7058

CTcConstVal val;

7059

7060

/* expand macros; don't allow reading additional lines */

7061

if (expand_macros_curline(FALSE, TRUE, FALSE))

7062

{

7063

clear_linebuf();

7064

return;

7065

}

7066

7067

/* parse out of the expansion buffer */

7068

start_new_line(&expbuf_, 0);

7069

7070

/* parse the preprocessor expression */

7071

if (pp_parse_expr(&val, TRUE, TRUE, TRUE))

7072

{

7073

clear_linebuf();

7074

return;

7075

}

7076

7077

7078

* make sure that the #elif occurs in the same file as the

7079

* corresponding #if

7080

7081

if (if_sp_ <= str_->get_init_if_level())

7082

{

7083

/* log the error */

7084

log_error(TCERR_PP_ELIF_NOT_IN_SAME_FILE);

7085

7086

/* clear the text and abort */

7087

clear_linebuf();

7088

return;

7089

}

7090

7091

/* check the current #if state */

7092

switch(get_if_state())

7093

{

7094

case TOKIF_IF_YES:

7095

7096

* we just took the #if branch, so don't take this or any

7097

* subsequent #elif or #else branch, regardless of the value of

7098

* the condition - set the state to DONE to indicate that we're

7099

* skipping everything through the endif

7100

7101

change_if_state(TOKIF_IF_DONE);

7102

break;

7103

7104

case TOKIF_IF_NO:

7105

7106

* We haven't yet taken a #if or #elif branch, so we can take

7107

* this branch if its condition is true. If this branch's

7108

* condition is false, stay with NO so that we will consider

7109

* future #elif and #else branches.

7110

7111

if (val.get_val_bool())

7112

change_if_state(TOKIF_IF_YES);

7113

break;

7114

7115

case TOKIF_IF_DONE:

7116

7117

* we've already taken a #if or #elif branch, so we must ignore

7118

* this and subsequent #elif and #else branches until we get to

7119

* our #endif - just stay in state DONE

7120

7121

break;

7122

7123

case TOKIF_NONE:

7124

case TOKIF_ELSE_YES:

7125

case TOKIF_ELSE_NO:

7126

7127

* we're not in a #if branch at all, or we're inside a #else; a

7128

* #elif is not legal here

7129

7130

log_error(TCERR_PP_ELIF_WITHOUT_IF);

7131

break;

7132

}

7133

7134

/* don't retain the directive in the preprocessed source */

7135

clear_linebuf();

7136

}

7137

7138

/* ------------------------------------------------------------------------ */

7139

7140

* Process a #else directive

7141

7142

void CTcTokenizer::pp_else()

7143

{

7144

/* make sure there's nothing but whitespace on the line */

7145

if (next_on_line() != TOKT_EOF)

7146

log_error(TCERR_PP_EXTRA);

7147

7148

7149

* make sure that the #else occurs in the same file as the

7150

* corresponding #if

7151

7152

if (if_sp_ <= str_->get_init_if_level())

7153

{

7154

/* log the error */

7155

log_error(TCERR_PP_ELSE_NOT_IN_SAME_FILE);

7156

7157

/* clear the text and abort */

7158

clear_linebuf();

7159

return;

7160

}

7161

7162

/* check our current #if state */

7163

switch(get_if_state())

7164

{

7165

case TOKIF_IF_YES:

7166

case TOKIF_IF_DONE:

7167

7168

* we've already taken a true #if branch, so we don't want to

7169

* process the #else part - switch to a false #else branch

7170

7171

change_if_state(TOKIF_ELSE_NO);

7172

break;

7173

7174

case TOKIF_IF_NO:

7175

7176

* we haven't yet found a true #if branch, so take the #else

7177

* branch -- switch to a true #else branch

7178

7179

change_if_state(TOKIF_ELSE_YES);

7180

break;

7181

7182

case TOKIF_NONE:

7183

case TOKIF_ELSE_YES:

7184

case TOKIF_ELSE_NO:

7185

7186

* we're not in a #if at all, or we're in a #else - log an error

7187

* and ignore it

7188

7189

log_error(TCERR_PP_ELSE_WITHOUT_IF);

7190

break;

7191

}

7192

7193

/* don't retain the directive in the preprocessed source */

7194

clear_linebuf();

7195

}

7196

7197

/* ------------------------------------------------------------------------ */

7198

7199

* Process a #endif directive

7200

7201

void CTcTokenizer::pp_endif()

7202

{

7203

/* make sure the rest of the line is blank */

7204

if (next_on_line() != TOKT_EOF)

7205

log_error(TCERR_PP_EXTRA);

7206

7207

/* ignore the rest of the line */

7208

clear_linebuf();

7209

7210

/* if we're not in a #if in the same file it's an error */

7211

if (if_sp_ == 0)

7212

{

7213

log_error(TCERR_PP_ENDIF_WITHOUT_IF);

7214

return;

7215

}

7216

else if (if_sp_ <= str_->get_init_if_level())

7217

{

7218

log_error(TCERR_PP_ENDIF_NOT_IN_SAME_FILE);

7219

return;

7220

}

7221

7222

/* pop a #if level */

7223

pop_if();

7224

7225

/* don't retain the directive in the preprocessed source */

7226

clear_linebuf();

7227

}

7228

7229

/* ------------------------------------------------------------------------ */

7230

7231

* Process a #error directive

7232

7233

void CTcTokenizer::pp_error()

7234

{

7235

size_t startofs;

7236

7237

7238

* copy the source line through the "error" token to the macro

7239

* expansion buffer - we don't want to expand that part, but we want

7240

* it to appear in the expansion, so just copy the original

7241

7242

startofs = (curtok_.get_text() + curtok_.get_text_len()

7243

- linebuf_.get_text());

7244

expbuf_.copy(linebuf_.get_text(), startofs);

7245

7246

/* expand macros; don't allow reading additional lines */

7247

if (expand_macros_curline(FALSE, FALSE, TRUE))

7248

{

7249

clear_linebuf();

7250

return;

7251

}

7252

7253

/* clean up any expansion flags embedded in the buffer */

7254

remove_expansion_flags(&expbuf_);

7255

7256

7257

* If we're in preprocess-only mode, simply retain the text in the

7258

* processed result, so that the error is processed on a subsequent

7259

* compilation of the result; otherwise, display the error.

7260

7261

* Ignore #error directives in list-includes mode as well.

7262

7263

if (!pp_only_mode_ && !list_includes_mode_)

7264

{

7265

/* display the error */

7266

log_error(TCERR_ERROR_DIRECTIVE,

7267

(int)expbuf_.get_text_len() - startofs,

7268

expbuf_.get_text() + startofs);

7269

7270

/* clear the directive from the result */

7271

clear_linebuf();

7272

}

7273

else

7274

{

7275

/* preprocessing - copy expanded text to line buffer */

7276

linebuf_.copy(expbuf_.get_text(), expbuf_.get_text_len());

7277

}

7278

}

7279

7280

/* ------------------------------------------------------------------------ */

7281

7282

* Process a #undef directive

7283

7284

void CTcTokenizer::pp_undef()

7285

{

7286

char macro_name[TOK_SYM_MAX_BUFFER];

7287

7288

/* get the macro name */

7289

if (pp_get_lone_ident(macro_name, sizeof(macro_name)))

7290

{

7291

clear_linebuf();

7292

return;

7293

}

7294

7295

/* remove it */

7296

undefine(macro_name);

7297

7298

/* don't retain the directive in the preprocessed source */

7299

clear_linebuf();

7300

}

7301

7302

7303

* Programmatically delete a preprocesor symbol

7304

7305

void CTcTokenizer::undefine(const char *sym, size_t len)

7306

{

7307

CTcHashEntryPp *entry;

7308

7309

7310

* find the macro - if it wasn't defined, silently ignore it, since

7311

* it's legal to #undef a symbol that wasn't previously defined

7312

7313

entry = find_define(sym, len);

7314

if (entry != 0 && entry->is_undefable())

7315

{

7316

/* remove it */

7317

defines_->remove(entry);

7318

7319

/* if it's not already in the #undef table, move it there */

7320

if (find_undef(sym, len) == 0)

7321

{

7322

/* move it to the #undef table */

7323

undefs_->add(entry);

7324

}

7325

else

7326

{

7327

7328

* the name is already in the #undef table, so we don't need to

7329

* add it again - we can forget about this entry entirely

7330

7331

delete entry;

7332

}

7333

}

7334

}

7335

7336

/* ------------------------------------------------------------------------ */

7337

7338

* Process a #line directive

7339

7340

void CTcTokenizer::pp_line()

7341

{

7342

CTcConstVal val_line;

7343

CTcConstVal val_fname;

7344

CTcTokFileDesc *desc;

7345

7346

/* expand macros; don't allow reading additional lines */

7347

if (expand_macros_curline(FALSE, TRUE, FALSE))

7348

{

7349

clear_linebuf();

7350

return;

7351

}

7352

7353

7354

* we don't need the original source line any more, and we don't

7355

* want to copy it to the preprocessed output, so clear it

7356

7357

clear_linebuf();

7358

7359

/* set up to parse from the expansion */

7360

start_new_line(&expbuf_, 0);

7361

7362

/* evaluate the line number expression */

7363

if (pp_parse_expr(&val_line, TRUE, FALSE, TRUE))

7364

return;

7365

7366

/* if it's not an integer constant, it's an error */

7367

if (val_line.get_type() != TC_CVT_INT)

7368

{

7369

log_error(TCERR_LINE_REQ_INT);

7370

return;

7371

}

7372

7373

/* evaluate the filename expression */

7374

if (pp_parse_expr(&val_fname, FALSE, TRUE, TRUE))

7375

return;

7376

7377

/* the filename must be a string expression */

7378

if (val_fname.get_type() != TC_CVT_SSTR)

7379

{

7380

log_error(TCERR_LINE_FILE_REQ_STR);

7381

return;

7382

}

7383

7384

/* find or create a descriptor for the filename */

7385

desc = get_file_desc(val_fname.get_val_str(),

7386

val_fname.get_val_str_len(), FALSE, 0, 0);

7387

7388

/* set the new line number and descriptor in the current stream */

7389

if (str_ != 0)

7390

{

7391

str_->set_next_linenum(val_line.get_val_int());

7392

str_->set_desc(desc);

7393

}

7394

7395

7396

* retain the pragma in the result if we're in preprocess-only mode,

7397

* otherwise remove it

7398

7399

if (!pp_only_mode_)

7400

clear_linebuf();

7401

}

7402

7403

/* ------------------------------------------------------------------------ */

7404

7405

* Look up a symbol in the #define symbol table

7406

7407

CTcHashEntryPp *CTcTokenizer::find_define(const char *sym, size_t len) const

7408

{

7409

/* look it up in the #define symbol table and return the result */

7410

return (CTcHashEntryPp *)defines_->find(sym, len);

7411

}

7412

7413

7414

* Look up a symbol in the #undef table

7415

7416

CTcHashEntryPp *CTcTokenizer::find_undef(const char *sym, size_t len) const

7417

{

7418

/* look it up in the #define symbol table and return the result */

7419

return (CTcHashEntryPp *)undefs_->find(sym, len);

7420

}

7421

7422

7423

* Add a preprocessor macro definition

7424

7425

void CTcTokenizer::add_define(const char *sym, size_t len,

7426

const char *expansion, size_t expan_len)

7427

{

7428

CTcHashEntryPp *entry;

7429

7430

/* create an entry for the macro, with no argument list */

7431

entry = new CTcHashEntryPpDefine(sym, len, TRUE, FALSE, 0, FALSE, 0, 0,

7432

expansion, expan_len);

7433

7434

/* add the new entry to the table */

7435

defines_->add(entry);

7436

}

7437

7438

7439

* Add a preprocessor macro definition

7440

7441

void CTcTokenizer::add_define(CTcHashEntryPp *entry)

7442

{

7443

/* add the entry to our symbol table */

7444

defines_->add(entry);

7445

}

7446

7447

7448

* parse an expression

7449

7450

int CTcTokenizer::pp_parse_expr(CTcConstVal *val, int read_first,

7451

int last_on_line, int add_line_ending)

7452

{

7453

CTcPrsNode *expr_tree;

7454

char ch;

7455

7456

/* add the line ending marker if required */

7457

if (add_line_ending)

7458

{

7459

7460

* append the special end-of-preprocess-line to the macro

7461

* expansion buffer

7462

7463

ch = TOK_END_PP_LINE;

7464

expbuf_.append(&ch, 1);

7465

}

7466

7467

7468

* note that we're pasing a preprocessor expression; this affects

7469

* error logging in certain cases

7470

7471

in_pp_expr_ = TRUE;

7472

7473

7474

* parse the expression in preprocessor mode, so that double-quoted

7475

* strings can be concatenated and compared

7476

7477

G_prs->set_pp_expr_mode(TRUE);

7478

7479

/* get the first token on the line if desired */

7480

if (read_first)

7481

next();

7482

7483

/* parse the expression */

7484

expr_tree = G_prs->parse_expr();

7485

7486

/* make sure we're at the end of the line if desired */

7487

if (last_on_line && next() != TOKT_EOF)

7488

log_error(TCERR_PP_EXPR_EXTRA);

7489

7490

/* if we added the special pp-line-ending marker, remove it */

7491

if (add_line_ending)

7492

{

7493

7494

* the marker is always the last character - remove it simply by

7495

* shortening the buffer by a character

7496

7497

expbuf_.set_text_len(expbuf_.get_text_len() - 1);

7498

}

7499

7500

/* return to normal expression mode */

7501

G_prs->set_pp_expr_mode(FALSE);

7502

7503

/* return to normal tokenizing mode */

7504

in_pp_expr_ = FALSE;

7505

7506

/* if we didn't get a valid expression, return failure */

7507

if (expr_tree == 0)

7508

return 1;

7509

7510

/* make sure we got a constant */

7511

if (!expr_tree->is_const())

7512

{

7513

log_error(TCERR_PP_EXPR_NOT_CONST);

7514

return 1;

7515

}

7516

7517

/* fill in the caller's value */

7518

*val = *expr_tree->get_const_val();

7519

7520

/* success */

7521

return 0;

7522

}

7523

7524

/* ------------------------------------------------------------------------ */

7525

7526

* #define enumeration callback context

7527

7528

struct def_enum_cb_t

7529

{

7530

/* original callback function */

7531

void (*cb)(void *, CTcHashEntryPp *);

7532

7533

/* original callback context */

7534

void *ctx;

7535

};

7536

7537

7538

* #define enumeration callback. This is a simple impedence matcher on the

7539

* way to the real callbac; we cast the generic hash entry type to the

7540

* CTcHashEntryPp subclass for the benefit of the real callback.

7541

7542

static void enum_defines_cb(void *ctx0, CVmHashEntry *entry)

7543

{

7544

def_enum_cb_t *ctx;

7545

7546

/* get our real context */

7547

ctx = (def_enum_cb_t *)ctx0;

7548

7549

/* invoke the real callback, casting the entry reference appropriately */

7550

(*ctx->cb)(ctx->ctx, (CTcHashEntryPp *)entry);

7551

}

7552

7553

7554

* Enumerate the entries in the #define table through a callback

7555

7556

void CTcTokenizer::enum_defines(void (*cb)(void *, CTcHashEntryPp *),

7557

void *ctx)

7558

{

7559

def_enum_cb_t myctx;

7560

7561

/* set up our impedence-matcher context with the real callback info */

7562

myctx.cb = cb;

7563

myctx.ctx = ctx;

7564

7565

/* enumerate through our impedence-matcher callback */

7566

defines_->enum_entries(&enum_defines_cb, &myctx);

7567

}

7568

7569

/* ------------------------------------------------------------------------ */

7570

7571

* Get a lone identifier for a preprocessor directive. The identifier

7572

* must be the only thing left on the line; we'll generate an error if

7573

* extra characters follow on the line.

7574

7575

* If there's no identifier on the line, or there's more information

7576

* after the identifier, logs an error and returns non-zero; returns

7577

* zero on success.

7578

7579

int CTcTokenizer::pp_get_lone_ident(char *buf, size_t bufl)

7580

{

7581

/* get the next token, and make sure it's a symbol */

7582

if (next_on_line() != TOKT_SYM)

7583

{

7584

log_error_curtok(TCERR_BAD_DEFINE_SYM);

7585

return 1;

7586

}

7587

7588

/* return an error if it doesn't fit */

7589

if (curtok_.get_text_len() > bufl)

7590

return 1;

7591

7592

/* copy the text */

7593

memcpy(buf, curtok_.get_text(), curtok_.get_text_len());

7594

buf[curtok_.get_text_len()] = '\0';

7595

7596

/* make sure there's nothing else on the line but whitespace */

7597

if (next_on_line() != TOKT_EOF)

7598

{

7599

log_error(TCERR_PP_EXTRA);

7600

return 1;

7601

}

7602

7603

/* success */

7604

return 0;

7605

}

7606

7607

/* ------------------------------------------------------------------------ */

7608

7609

* Push a new #if level

7610

7611

void CTcTokenizer::push_if(tok_if_t state)

7612

{

7613

/* if we're out of space in the stack, throw a fatal error */

7614

if (if_sp_ == TOK_MAX_IF_NESTING)

7615

throw_fatal_error(TCERR_IF_NESTING_OVERFLOW);

7616

7617

7618

* if we're in a nested #if in a false #if, increase the nested

7619

* false #if level

7620

7621

if (in_false_if())

7622

++if_false_level_;

7623

7624

/* push the state, remembering where the #if was defined */

7625

if_stack_[if_sp_].desc = last_desc_;

7626

if_stack_[if_sp_].linenum = last_linenum_;

7627

if_stack_[if_sp_++].state = state;

7628

}

7629

7630

7631

* Pop a #if level

7632

7633

void CTcTokenizer::pop_if()

7634

{

7635

/* if we're in a nested #if in a false #if, pop the nesting level */

7636

if (if_false_level_ != 0)

7637

--if_false_level_;

7638

7639

/* pop the main if level */

7640

if (if_sp_ != 0)

7641

--if_sp_;

7642

}

7643

7644

7645

/* ------------------------------------------------------------------------ */

7646

7647

* Log an error

7648

7649

void CTcTokenizer::log_error(int errnum, ...)

7650

{

7651

va_list marker;

7652

7653

/* display the message */

7654

va_start(marker, errnum);

7655

G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(),

7656

TC_SEV_ERROR, errnum, marker);

7657

va_end(marker);

7658

}

7659

7660

7661

* Log an error with the current token's text as the parameter data,

7662

* suitable for use with a "%.*s" display format entry

7663

7664

void CTcTokenizer::log_error_curtok(int errnum)

7665

{

7666

7667

* display the message, passing "%.*s" parameter data for the

7668

* current token text: an integer giving the length of the token

7669

* text, and a pointer to the token text

7670

7671

log_error_or_warning_curtok(TC_SEV_ERROR, errnum);

7672

}

7673

7674

7675

* Log an error or warning for the current token

7676

7677

void CTcTokenizer::log_error_or_warning_curtok(tc_severity_t sev, int errnum)

7678

{

7679

/* log the error with our current token */

7680

log_error_or_warning_with_tok(sev, errnum, getcur());

7681

}

7682

7683

7684

* Log an error or warning with the given token

7685

7686

void CTcTokenizer::log_error_or_warning_with_tok(

7687

tc_severity_t sev, int errnum, const CTcToken *tok)

7688

{

7689

const char *tok_txt;

7690

size_t tok_len;

7691

char buf[128];

7692

const char *prefix;

7693

const char *suffix;

7694

utf8_ptr src;

7695

utf8_ptr dst;

7696

size_t rem;

7697

size_t outchars;

7698

7699

/* see what we have */

7700

switch(tok->gettyp())

7701

{

7702

case TOKT_SSTR:

7703

/* show the string in quotes, but limit the length */

7704

prefix = "'";

7705

suffix = "'";

7706

goto format_string;

7707

7708

case TOKT_DSTR:

7709

prefix = "\"";

7710

suffix = "\"";

7711

goto format_string;

7712

7713

case TOKT_DSTR_START:

7714

prefix = "\"";

7715

suffix = "<<";

7716

goto format_string;

7717

7718

case TOKT_DSTR_MID:

7719

prefix = ">>";

7720

suffix = "<<";

7721

goto format_string;

7722

7723

case TOKT_DSTR_END:

7724

prefix = ">>";

7725

suffix = "\"";

7726

goto format_string;

7727

7728

format_string:

7729

/* set the prefix */

7730

strcpy(buf, prefix);

7731

7732

7733

* show the string, but limit the length, and convert control

7734

* characters to escaped representation

7735

7736

src.set((char *)tok->get_text());

7737

rem = tok->get_text_len();

7738

for (dst.set(buf + strlen(buf)), outchars = 0 ;

7739

rem != 0 && outchars < 20 ; src.inc(&rem), ++outchars)

7740

{

7741

/* if this is a control character, escape it */

7742

if (src.getch() < 32)

7743

{

7744

dst.setch('\\');

7745

7746

switch(src.getch())

7747

{

7748

case 10:

7749

dst.setch('n');

7750

break;

7751

7752

case 0x000F:

7753

dst.setch('^');

7754

break;

7755

7756

case 0x000E:

7757

dst.setch('v');

7758

break;

7759

7760

case 0x000B:

7761

dst.setch('b');

7762

break;

7763

7764

case 0x0015:

7765

dst.setch(' ');

7766

break;

7767

7768

case 9:

7769

dst.setch('t');

7770

break;

7771

7772

default:

7773

dst.setch('x');

7774

dst.setch('0' + (src.getch() >> 12) & 0xf);

7775

dst.setch('0' + (src.getch() >> 8) & 0xf);

7776

dst.setch('0' + (src.getch() >> 4) & 0xf);

7777

dst.setch('0' + (src.getch()) & 0xf);

7778

break;

7779

}

7780

}

7781

else

7782

{

7783

/* put this character as-is */

7784

dst.setch(src.getch());

7785

}

7786

}

7787

7788

/* if there's more string left, add "..." */

7789

if (rem != 0)

7790

{

7791

dst.setch('.');

7792

dst.setch('.');

7793

dst.setch('.');

7794

}

7795

7796

/* add the suffix */

7797

strcpy(dst.getptr(), suffix);

7798

7799

/* use this buffer as the token string to display */

7800

tok_txt = buf;

7801

tok_len = strlen(tok_txt);

7802

break;

7803

7804

case TOKT_EOF:

7805

/* show a special "<End Of File>" marker */

7806

tok_txt = "<End Of File>";

7807

tok_len = strlen(tok_txt);

7808

break;

7809

7810

default:

7811

/* just show the current token text */

7812

tok_txt = tok->get_text();

7813

tok_len = tok->get_text_len();

7814

break;

7815

}

7816

7817

/* log the error */

7818

G_tcmain->log_error(get_last_desc(), get_last_linenum(),

7819

sev, errnum, tok_len, tok_txt);

7820

}

7821

7822

7823

* Log a warning

7824

7825

void CTcTokenizer::log_warning(int errnum, ...)

7826

{

7827

va_list marker;

7828

7829

/* display the message */

7830

va_start(marker, errnum);

7831

G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(),

7832

TC_SEV_WARNING, errnum, marker);

7833

va_end(marker);

7834

}

7835

7836

7837

* Log a warning with the current token's text as the parameter data,

7838

* suitable for use with a "%.*s" display format entry

7839

7840

void CTcTokenizer::log_warning_curtok(int errnum)

7841

{

7842

7843

* display the warning message, passing "%.*s" parameter data for

7844

* the current token text: an integer giving the length of the token

7845

* text, and a pointer to the token text

7846

7847

log_error_or_warning_curtok(TC_SEV_WARNING, errnum);

7848

}

7849

7850

7851

* Log and throw an internal error

7852

7853

void CTcTokenizer::throw_internal_error(int errnum, ...)

7854

{

7855

va_list marker;

7856

7857

/* display the message */

7858

va_start(marker, errnum);

7859

G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(),

7860

TC_SEV_INTERNAL, errnum, marker);

7861

va_end(marker);

7862

7863

/* throw the generic internal error, since we've logged this */

7864

err_throw(TCERR_INTERNAL_ERROR);

7865

}

7866

7867

7868

* Log and throw a fatal error

7869

7870

void CTcTokenizer::throw_fatal_error(int errnum, ...)

7871

{

7872

va_list marker;

7873

7874

/* display the message */

7875

va_start(marker, errnum);

7876

G_tcmain->v_log_error(G_tok->get_last_desc(), G_tok->get_last_linenum(),

7877

TC_SEV_FATAL, errnum, marker);

7878

va_end(marker);

7879

7880

/* throw the generic fatal error, since we've logged this */

7881

err_throw(TCERR_FATAL_ERROR);

7882

}

7883

7884

7885

* display a string value

7886

7887

void CTcTokenizer::msg_str(const char *str, size_t len) const

7888

{

7889

/* display the string through the host interface */

7890

G_hostifc->print_msg("%.*s", (int)len, str);

7891

}

7892

7893

7894

* display a numeric value

7895

7896

void CTcTokenizer::msg_long(long val) const

7897

{

7898

/* display the number through the host interface */

7899

G_hostifc->print_msg("%ld", val);

7900

}

7901

7902

/* ------------------------------------------------------------------------ */

7903

7904

* Tokenizer Input Stream implementation

7905

7906

7907

7908

* create a token input stream

7909

7910

CTcTokStream::CTcTokStream(CTcTokFileDesc *desc, CTcSrcObject *src,

7911

CTcTokStream *parent, int charset_error,

7912

int init_if_level)

7913

{

7914

/* remember the underlying source file */

7915

src_ = src;

7916

7917

/* remember the file descriptor */

7918

desc_ = desc;

7919

7920

/* remember the containing stream */

7921

parent_ = parent;

7922

7923

/* the next line to read is line number 1 */

7924

next_linenum_ = 1;

7925

7926

/* remember if there was a #charset error */

7927

charset_error_ = charset_error;

7928

7929

/* we're not in a comment yet */

7930

in_comment_ = FALSE;

7931

7932

/* remember the starting #if level */

7933

init_if_level_ = init_if_level;

7934

7935

#if 0 // #pragma C is not currently used

7936

7937

* start out in parent's pragma C mode, or in non-C mode if we have

7938

* no parent

7939

7940

if (parent != 0)

7941

pragma_c_ = parent->is_pragma_c();

7942

else

7943

pragma_c_ = TRUE;

7944

#endif

7945

}

7946

7947

7948

* delete a token input stream

7949

7950

CTcTokStream::~CTcTokStream()

7951

{

7952

/* we own the underlying file, so delete it */

7953

if (src_ != 0)

7954

delete src_;

7955

}

7956

7957

/* ------------------------------------------------------------------------ */

7958

7959

* File Descriptor

7960

7961

7962

7963

* Get the length of a string with each instance of the given quote

7964

* character escaped with a backslash. We'll also count the escapes we

7965

* need for each backslash.

7966

7967

static size_t get_quoted_len(const char *str, wchar_t qu)

7968

{

7969

utf8_ptr p;

7970

size_t len;

7971

7972

7973

* scan the string for instances of the quote mark; each one adds an

7974

* extra byte to the length needed, since each one requires a

7975

* backslash character to escape the quote mark

7976

7977

for (p.set((char *)str), len = strlen(str) ; p.getch() != '\0' ; p.inc())

7978

{

7979

wchar_t ch;

7980

7981

7982

* check to see if this character is quotable - it is quotable if

7983

* it's a backslash or it's the quote character we're escaping

7984

7985

ch = p.getch();

7986

if (ch == qu || ch == '\\')

7987

{

7988

7989

* we need to escape this character, so add a byte for the

7990

* backslash we'll need to insert

7991

7992

++len;

7993

}

7994

}

7995

7996

/* return the length we calculated */

7997

return len;

7998

}

7999

8000

8001

* Build a quoted string. Fills in dst with the source string with each

8002

* of the given quote marks and each backslash escaped with a backslash.

8003

* Use get_quoted_len() to determine how much space to allocate for the

8004

* destination buffer.

8005

8006

static void build_quoted_str(char *dstbuf, const char *src, wchar_t qu)

8007

{

8008

utf8_ptr p;

8009

utf8_ptr dst;

8010

8011

/* scan the source string for escapable characters */

8012

for (p.set((char *)src), dst.set(dstbuf), dst.setch(qu) ;

8013

p.getch() != '\0' ; p.inc())

8014

{

8015

wchar_t ch;

8016

8017

/* get this source character */

8018

ch = p.getch();

8019

8020

/* add a quote if we have a backslash or the quote character */

8021

if (ch == '\\' || ch == qu)

8022

{

8023

/* add a backslash to escape the character */

8024

dst.setch('\\');

8025

}

8026

8027

/* add the character */

8028

dst.setch(ch);

8029

}

8030

8031

/* add the close quote and trailing null */

8032

dst.setch(qu);

8033

dst.setch('\0');

8034

}

8035

8036

8037

* create a file descriptor

8038

8039

CTcTokFileDesc::CTcTokFileDesc(const char *fname, size_t fname_len,

8040

int index, CTcTokFileDesc *orig_desc,

8041

const char *orig_fname, size_t orig_fname_len)

8042

{

8043

const char *rootname;

8044

8045

/* no source pages are allocated yet */

8046

src_pages_ = 0;

8047

src_pages_alo_ = 0;

8048

8049

/* remember the first instance of this filename in the list */

8050

orig_ = orig_desc;

8051

8052

/* there's nothing else in our chain yet */

8053

next_ = 0;

8054

8055

/* remember my index in the master list */

8056

index_ = index;

8057

8058

/* if there's a filename, save a copy of the name */

8059

fname_ = lib_copy_str(fname, fname_len);

8060

8061

/* if there's an original filename save it as well */

8062

orig_fname_ = lib_copy_str(orig_fname, orig_fname_len);

8063

8064

8065

* get the root filename, since we need to build a quoted version of

8066

* that as well as of the basic filename

8067

8068

rootname = os_get_root_name(fname_);

8069

8070

8071

* Allocate space for the quoted versions of the filename - make room

8072

* for the filename plus the quotes (one on each end) and a null

8073

* terminator byte.

8074

8075

dquoted_fname_ = (char *)t3malloc(get_quoted_len(fname_, '"') + 3);

8076

squoted_fname_ = (char *)t3malloc(get_quoted_len(fname_, '\'') + 3);

8077

dquoted_rootname_ = (char *)t3malloc(get_quoted_len(rootname, '"') + 3);

8078

squoted_rootname_ = (char *)t3malloc(get_quoted_len(rootname, '\'') + 3);

8079

8080

/* build the quoted version of the name */

8081

build_quoted_str(dquoted_fname_, fname_, '"');

8082

build_quoted_str(squoted_fname_, fname_, '\'');

8083

build_quoted_str(dquoted_rootname_, rootname, '"');

8084

build_quoted_str(squoted_rootname_, rootname, '\'');

8085

}

8086

8087

8088

* delete the descriptor

8089

8090

CTcTokFileDesc::~CTcTokFileDesc()

8091

{

8092

/* delete the filename and original filename strings */

8093

lib_free_str(fname_);

8094

lib_free_str(orig_fname_);

8095

8096

/* delete the quotable filename strings */

8097

t3free(dquoted_fname_);

8098

t3free(squoted_fname_);

8099

t3free(dquoted_rootname_);

8100

t3free(squoted_rootname_);

8101

8102

/* delete each source page we've allocated */

8103

if (src_pages_ != 0)

8104

{

8105

size_t i;

8106

8107

/* go through the index array and delete each allocated page */

8108

for (i = 0 ; i < src_pages_alo_ ; ++i)

8109

{

8110

/* if this page was allocated, delete it */

8111

if (src_pages_[i] != 0)

8112

t3free(src_pages_[i]);

8113

}

8114

8115

/* delete the source page index array */

8116

t3free(src_pages_);

8117

}

8118

}

8119

8120

8121

* Source page structure. Each page tracks a block of source lines.

8122

8123

const size_t TCTOK_SRC_PAGE_CNT = 1024;

8124

struct CTcTokSrcPage

8125

{

8126

8127

* Array of line entries on this page. Each entry is zero if it

8128

* hasn't been assigned yet, and contains the absolute image file

8129

* address of the generated code for the source line if it has been

8130

* assigned.

8131

8132

ulong ofs[TCTOK_SRC_PAGE_CNT];

8133

};

8134

8135

8136

8137

* Add a source line

8138

8139

void CTcTokFileDesc::add_source_line(ulong linenum, ulong line_addr)

8140

{

8141

size_t page_idx;

8142

size_t idx;

8143

8144

/* get the index of the page containing this source line */

8145

page_idx = linenum / TCTOK_SRC_PAGE_CNT;

8146

8147

/* get the index of the entry within the page */

8148

idx = linenum % TCTOK_SRC_PAGE_CNT;

8149

8150

8151

* determine if our page index table is large enough, and expand it

8152

* if not

8153

8154

if (page_idx >= src_pages_alo_)

8155

{

8156

size_t siz;

8157

size_t new_alo;

8158

8159

/* allocate or expand the source pages array */

8160

new_alo = page_idx + 16;

8161

siz = new_alo * sizeof(src_pages_[0]);

8162

if (src_pages_ == 0)

8163

src_pages_ = (CTcTokSrcPage **)t3malloc(siz);

8164

else

8165

src_pages_ = (CTcTokSrcPage **)t3realloc(src_pages_, siz);

8166

8167

/* clear the new part */

8168

memset(src_pages_ + src_pages_alo_, 0,

8169

(new_alo - src_pages_alo_) * sizeof(src_pages_[0]));

8170

8171

/* remember the new allocation size */

8172

src_pages_alo_ = new_alo;

8173

}

8174

8175

/* if this page isn't allocated, do so now */

8176

if (src_pages_[page_idx] == 0)

8177

{

8178

/* allocate the new page */

8179

src_pages_[page_idx] = (CTcTokSrcPage *)

8180

t3malloc(sizeof(CTcTokSrcPage));

8181

8182

/* clear it */

8183

memset(src_pages_[page_idx], 0, sizeof(CTcTokSrcPage));

8184

}

8185

8186

8187

* if this source line entry has been previously set, don't change

8188

* it; otherwise, store the new setting

8189

8190

if (src_pages_[page_idx]->ofs[idx] == 0)

8191

src_pages_[page_idx]->ofs[idx] = line_addr;

8192

}

8193

8194

8195

* Enumerate source lines

8196

8197

void CTcTokFileDesc::enum_source_lines(void (*cbfunc)(void *, ulong, ulong),

8198

void *cbctx)

8199

{

8200

size_t page_idx;

8201

CTcTokSrcPage **pg;

8202

8203

/* loop over all of the pages */

8204

for (page_idx = 0, pg = src_pages_ ; page_idx < src_pages_alo_ ;

8205

++page_idx, ++pg)

8206

{

8207

size_t i;

8208

ulong linenum;

8209

ulong *p;

8210

8211

/* if this page is not populated, skip it */

8212

if (*pg == 0)

8213

continue;

8214

8215

/* calculate the starting line number for this page */

8216

linenum = page_idx * TCTOK_SRC_PAGE_CNT;

8217

8218

/* loop over the entries on this page */

8219

for (i = 0, p = (*pg)->ofs ; i < TCTOK_SRC_PAGE_CNT ;

8220

++i, ++p, ++linenum)

8221

{

8222

/* if this entry has been set, call the callback */

8223

if (*p != 0)

8224

(*cbfunc)(cbctx, linenum, *p);

8225

}

8226

}

8227

}

8228

8229

/* ------------------------------------------------------------------------ */

8230

8231

* #define symbol table hash entry

8232

8233

8234

8235

* create an entry

8236

8237

CTcHashEntryPpDefine::CTcHashEntryPpDefine(const textchar_t *str, size_t len,

8238

int copy, int has_args, int argc,

8239

int has_varargs,

8240

const char **argv,

8241

const size_t *argvlen,

8242

const char *expansion,

8243

size_t expan_len)

8244

: CTcHashEntryPp(str, len, copy)

8245

{

8246

/* copy the argument list if necessary */

8247

has_args_ = has_args;

8248

has_varargs_ = has_varargs;

8249

argc_ = argc;

8250

if (argc != 0)

8251

{

8252

int i;

8253

8254

/* allocate the argument list */

8255

argv_ = (char **)t3malloc(argc * sizeof(*argv_));

8256

8257

/* allocate the parameters hash table */

8258

params_table_ = new CVmHashTable(16, new CVmHashFuncCS(), TRUE);

8259

8260

/* allocate the entry list */

8261

arg_entry_ = (CTcHashEntryPpArg **)

8262

t3malloc(argc * sizeof(arg_entry_[0]));

8263

8264

/* copy the arguments */

8265

for (i = 0 ; i < argc ; ++i)

8266

{

8267

CTcHashEntryPpArg *entry;

8268

8269

/* copy the argument name */

8270

argv_[i] = lib_copy_str(argv[i], argvlen[i]);

8271

8272

8273

* Create the hash entries for this parameters. We'll use

8274

* this entry to look up tokens in the expansion text for

8275

* matches to the formal names when expanding the macro.

8276

8277

* Note that we'll refer directly to our local copy of the

8278

* argument name, so we don't need to make another copy in

8279

* the hash entry.

8280

8281

entry = new CTcHashEntryPpArg(argv_[i], argvlen[i], FALSE, i);

8282

params_table_->add(entry);

8283

8284

/* add it to our by-index list */

8285

arg_entry_[i] = entry;

8286

}

8287

}

8288

else

8289

{

8290

/* no arguments */

8291

argv_ = 0;

8292

params_table_ = 0;

8293

arg_entry_ = 0;

8294

}

8295

8296

/* save the expansion */

8297

expan_ = lib_copy_str(expansion, expan_len);

8298

expan_len_ = expan_len;

8299

}

8300

8301

8302

* delete

8303

8304

CTcHashEntryPpDefine::~CTcHashEntryPpDefine()

8305

{

8306

int i;

8307

8308

/* delete the argument list */

8309

if (argv_ != 0)

8310

{

8311

/* delete each argument string */

8312

for (i = 0 ; i < argc_ ; ++i)

8313

lib_free_str(argv_[i]);

8314

8315

/* delete the argument vector */

8316

t3free(argv_);

8317

8318

/* delete the argument entry list */

8319

t3free(arg_entry_);

8320

8321

/* delete the hash table */

8322

delete params_table_;

8323

}

8324

8325

/* delete the expansion */

8326

lib_free_str(expan_);

8327

}

8328

8329

8330

* __LINE__ static buffer

8331

8332

char CTcHashEntryPpLINE::buf_[20];

8333

8334

8335

/* ------------------------------------------------------------------------ */

8336

8337

* Load macro definitions from a file.

8338

8339

int CTcTokenizer::load_macros_from_file(CVmStream *fp,

8340

CTcTokLoadMacErr *err_handler)

8341

{

8342

long cnt;

8343

long i;

8344

size_t curarg;

8345

char *argv[TOK_MAX_MACRO_ARGS];

8346

size_t argvlen[TOK_MAX_MACRO_ARGS];

8347

size_t maxarg;

8348

int result;

8349

char *expan;

8350

size_t expmaxlen;

8351

8352

/* we haven't allocated any argument buffers yet */

8353

maxarg = 0;

8354

8355

/* allocate an initial expansion buffer */

8356

expmaxlen = 1024;

8357

expan = (char *)t3malloc(expmaxlen);

8358

8359

/* presume success */

8360

result = 0;

8361

8362

/* read the number of macros */

8363

cnt = fp->read_uint4();

8364

8365

/* read each macro */

8366

for (i = 0 ; i < cnt ; ++i)

8367

{

8368

char namebuf[TOK_SYM_MAX_LEN];

8369

size_t namelen;

8370

int flags;

8371

size_t argc;

8372

size_t explen;

8373

CTcHashEntryPp *entry;

8374

int has_args;

8375

int has_varargs;

8376

8377

/* read the name's length */

8378

namelen = fp->read_uint2();

8379

if (namelen > sizeof(namebuf))

8380

{

8381

/* log an error through the handler */

8382

err_handler->log_error(1);

8383

8384

/* give up - we can't read any more of the file */

8385

result = 1;

8386

goto done;

8387

}

8388

8389

/* read the name */

8390

fp->read_bytes(namebuf, namelen);

8391

8392

/* read and decode the flags */

8393

flags = fp->read_uint2();

8394

has_args = ((flags & 1) != 0);

8395

has_varargs = ((flags & 2) != 0);

8396

8397

/* read the number of arguments, and read each argument */

8398

argc = fp->read_uint2();

8399

for (curarg = 0 ; curarg < argc ; ++curarg)

8400

{

8401

/* read the length, and make sure it's valid */

8402

argvlen[curarg] = fp->read_uint2();

8403

if (argvlen[curarg] > TOK_SYM_MAX_LEN)

8404

{

8405

/* log an error */

8406

err_handler->log_error(2);

8407

8408

/* give up - we can't read any more of the file */

8409

result = 2;

8410

goto done;

8411

}

8412

8413

8414

* if we haven't allocated a buffer for this argument slot yet,

8415

* allocate it now; allocate the buffer at the maximum symbol

8416

* size, so we can reuse the same buffer for an argument of

8417

* other macros we read later

8418

8419

while (curarg >= maxarg)

8420

argv[maxarg++] = (char *)t3malloc(TOK_SYM_MAX_LEN);

8421

8422

/* read the argument text */

8423

fp->read_bytes(argv[curarg], argvlen[curarg]);

8424

}

8425

8426

/* read the expansion size */

8427

explen = (size_t)fp->read_uint4();

8428

8429

/* expand the expansion buffer if necessary */

8430

if (explen > expmaxlen)

8431

{

8432

8433

* overshoot a bit, so that we won't have to reallocate again

8434

* if we find a slightly larger expansion for a future macro

8435

8436

expmaxlen = explen + 512;

8437

8438

/* allocate the new buffer */

8439

expan = (char *)t3realloc(expan, expmaxlen);

8440

}

8441

8442

/* read the expansion */

8443

fp->read_bytes(expan, explen);

8444

8445

8446

* Before we create the entry, check to see if there's an existing

8447

* entry with the same name.

8448

8449

entry = find_define(namebuf, namelen);

8450

if (entry != 0)

8451

{

8452

8453

* We have another entry. If the entry is exactly the same,

8454

* then we can simply skip the current entry, because we simply

8455

* want to keep one copy of each macro that's defined

8456

* identically in mutiple compilation macros. If the entry is

8457

* different from the new one, delete both - a macro which

8458

* appears in two or more compilation units with different

8459

* meanings is NOT a global macro, and thus we can't include it

8460

* in the debugging records.

8461

8462

if (entry->is_pseudo()

8463

|| entry->has_args() != has_args

8464

|| entry->has_varargs() != has_varargs

8465

|| entry->get_argc() != (int)argc

8466

|| entry->get_expan_len() != explen

8467

|| memcmp(entry->get_expansion(), expan, explen) != 0)

8468

{

8469

8470

* The existing entry is different from the new entry, so

8471

* the macro has different meanings in different

8472

* compilation units, hence we cannot keep *either*

8473

* definition in the debug records. Delete the existing

8474

* macro, and do not create the new macro. If the existing

8475

* macro is a pseudo-macro, keep the old one (since it's

8476

* provided by the compiler itself), but still discard the

8477

* new one.

8478

8479

if (!entry->is_pseudo())

8480

undefine(namebuf, namelen);

8481

}

8482

else

8483

{

8484

8485

* The new entry is identical to the old one, so keep it.

8486

* We only need one copy of the entry, though, so simply

8487

* keep the old one - there's no need to create a new entry

8488

* for the object file data.

8489

8490

}

8491

}

8492

else

8493

{

8494

8495

* There's no existing macro with the same name, so create a

8496

* new entry based on the object file data.

8497

8498

entry = new CTcHashEntryPpDefine(namebuf, namelen, TRUE,

8499

has_args, argc, has_varargs,

8500

(const char **)argv, argvlen,

8501

expan, explen);

8502

8503

/* add it to the preprocessor's macro symbol table */

8504

add_define(entry);

8505

}

8506

}

8507

8508

done:

8509

/* free the argument buffers we allocated */

8510

for (curarg = 0 ; curarg < maxarg ; ++curarg)

8511

t3free(argv[curarg]);

8512

8513

/* free the expansion buffer */

8514

t3free(expan);

8515

8516

/* success */

8517

return result;

8518

}

8519

8520

/* ------------------------------------------------------------------------ */

8521

8522

* Callback context for writing enumerated #define symbols to a file

8523

8524

struct write_macro_ctx_t

8525

{

8526

/* object file we're writing to */

8527

CVmFile *fp;

8528

8529

/* number of symbols written so far */

8530

unsigned long cnt;

8531

};

8532

8533

8534

* Enumeration callback for writing the #define symbols to a file

8535

8536

static void write_macros_cb(void *ctx0, CTcHashEntryPp *entry)

8537

{

8538

write_macro_ctx_t *ctx = (write_macro_ctx_t *)ctx0;

8539

int flags;

8540

int i;

8541

CVmFile *fp = ctx->fp;

8542

8543

8544

* if this is a pseudo-macro (such as __LINE__ or __FILE__), ignore it

8545

* - these macros do not have permanent global definitions, so they're

8546

* not usable in the debugger

8547

8548

if (entry->is_pseudo())

8549

return;

8550

8551

8552

* If the macro was ever redefined or undefined, ignore it - the

8553

* debugger can only use truly global macros, which are macros that

8554

* have stable meanings throughout the compilation units where they

8555

* appear (and which do not have different meanings in different

8556

* compilation units, but that's not our concern at the moment). The

8557

* preprocessor keeps an "undef" table of everything undefined

8558

* (explicitly, or implicitly via redefinition), so look up this macro

8559

* in the undef table, and ignore the macro if it we find it.

8560

8561

if (G_tok->find_undef(entry->getstr(), entry->getlen()) != 0)

8562

return;

8563

8564

/* count this macro */

8565

ctx->cnt++;

8566

8567

/* write the macro's name */

8568

fp->write_int2(entry->getlen());

8569

fp->write_bytes(entry->getstr(), entry->getlen());

8570

8571

/* write the flag bits */

8572

flags = 0;

8573

if (entry->has_args()) flags |= 1;

8574

if (entry->has_varargs()) flags |= 2;

8575

fp->write_int2(flags);

8576

8577

/* write the number of arguments, and write each argument */

8578

fp->write_int2(entry->get_argc());

8579

for (i = 0 ; i < entry->get_argc() ; ++i)

8580

{

8581

CTcHashEntryPpArg *arg;

8582

8583

/* get the argument */

8584

arg = entry->get_arg_entry(i);

8585

8586

/* write the parameter name */

8587

fp->write_int2(arg->getlen());

8588

fp->write_bytes(arg->getstr(), arg->getlen());

8589

}

8590

8591

/* write the expansion */

8592

fp->write_int4(entry->get_expan_len());

8593

fp->write_bytes(entry->get_expansion(), entry->get_expan_len());

8594

}

8595

8596

8597

* Write all #define symbols to a file, for debugging purposes. Writes

8598

* only symbols that have never been undefined or redefined, since the

8599

* debugger can only make use of global symbols (i.e., symbols with

8600

* consistent meanings through all compilation units in which they

8601

* appear).

8602

8603

void CTcTokenizer::write_macros_to_file_for_debug(CVmFile *fp)

8604

{

8605

long pos;

8606

long endpos;

8607

write_macro_ctx_t ctx;

8608

8609

/* write a placeholder for the symbol count */

8610

pos = fp->get_pos();

8611

fp->write_int4(0);

8612

8613

/* write the symbols */

8614

ctx.fp = fp;

8615

ctx.cnt = 0;

8616

enum_defines(&write_macros_cb, &ctx);

8617

8618

/* go back and fix up the symbol count */

8619

endpos = fp->get_pos();

8620

fp->set_pos(pos);

8621

fp->write_int4(ctx.cnt);

8622

8623

/* seek back to where we left off */

8624

fp->set_pos(endpos);

8625

}

Older »