3
static char rcsid[] = "$Header: /p/src/local/bin/detex/RCS/detex.l,v 2.19 1997/09/10 18:12:37 trinkle Exp $";
7
* detex [-e environment-list] [-c] [-l] [-n] [-s] [-t] [-w] [file[.tex] ]
9
* This program is used to remove TeX or LaTeX constructs from a text
14
* Department of Computer Science
31
#define rindex strrchr
36
#include <sys/param.h>
38
#define PATH_MAX MAXPATHLEN
42
#ifndef NO_MALLOC_DECL
49
#include "kpathsea/c-auto.h"
50
#include "kpathsea/config.h"
51
#include "kpathsea/c-memstr.h"
52
#include "kpathsea/c-pathmx.h"
53
#include "kpathsea/c-std.h"
54
#include "kpathsea/tex-file.h"
56
#ifdef HAVE_SYS_PARAM_H
57
#include <sys/param.h>
62
extern void SetEnvIgnore(char *sbEnvList);
63
extern int BeginEnv(char *sbEnv);
64
extern int EndEnv(char *sbEnv);
65
extern void InputFile(char *sbFile);
66
extern void IncludeFile(char *sbFile);
67
extern void AddInclude(char *sbFile);
68
extern int InList(char *sbFile);
69
extern void SetInputPaths();
70
extern int SeparateList(char *sbList,char **rgsbList ,char chSep,int csbMax);
71
extern FILE * TexOpen(char *sbFile);
72
extern char * SafeMalloc(int cch,char *sbMessage);
73
extern void Warning(char *sb1,char *sb2);
74
extern int ErrorExit(char *sb1);
76
#define LaBEGIN if (fLatex) BEGIN
77
#define CITEBEGIN if (fLatex && !fCite) BEGIN
78
#define IGNORE if (fSpace && !fWord) putchar(' ')
79
#define SPACE if (!fWord) putchar(' ')
80
#define NEWLINE if (!fWord) putchar('\n')
87
char *rgsbEnvIgnore[MAXENVS]; /* list of environments ignored */
88
char *rgsbIncList[MAXINCLIST]; /* list of includeonly files */
89
char *rgsbInputPaths[MAXINPUTPATHS]; /* list of input paths in order */
90
char sbCurrentEnv[CCHMAXENV]; /* current environment being ignored */
91
char *sbProgName; /* name we were invoked with */
92
FILE *rgfp[NOFILE+1]; /* stack of input/include files */
93
int cfp = 0; /* count of files in stack */
94
int cOpenBrace = 0; /* count of `{' in <LaMacro2> */
95
int csbEnvIgnore; /* count of environments ignored */
96
int csbIncList = 0; /* count of includeonly files */
97
int csbInputPaths; /* count of input paths */
98
int fLatex = 0; /* flag to indicated delatex */
99
int fWord = 0; /* flag for -w option */
100
int fFollow = 1; /* flag to follow input/include */
101
int fCite = 0; /* flag to echo \cite and \ref args */
102
int fSpace = 0; /* flag to replace \cs with space */
103
int fForcetex = 0; /* flag to inhibit latex mode */
109
%Start Define Display IncludeOnly Input Math Normal Control
110
%Start LaBegin LaDisplay LaEnd LaEnv LaFormula LaInclude
111
%Start LaMacro LaMacro2 LaVerbatim
114
<Normal>"%".* /* ignore comments */ ;
116
<Normal>"\\begin"{S}"{"{S}"document"{S}"}" {fLatex = !fForcetex; IGNORE;}
118
<Normal>"\\begin" /* environment start */ {LaBEGIN LaBegin; IGNORE;}
120
<LaBegin>{S}"{"{S}"verbatim"{S}"}" { if (BeginEnv("verbatim"))
127
<LaVerbatim>"\\end"{S}"{"{S}"verbatim"{S}"}" /* verbatim mode */ {BEGIN Normal; IGNORE;}
130
<LaBegin>{W} { if (BeginEnv(yytext))
136
<LaBegin>"\n" NEWLINE;
139
<LaEnv>"\\end" /* absorb some environments */ {LaBEGIN LaEnd; IGNORE;}
143
<LaEnd>{W} /* end environment */ { if (EndEnv(yytext))
147
<LaEnd>"}" {BEGIN LaEnv; IGNORE;}
151
<Normal>"\\bibitem" /* ignore args */ {LaBEGIN LaMacro2; IGNORE;}
152
<Normal>"\\bibliography" /* of these \cs */ {LaBEGIN LaMacro; IGNORE;}
153
<Normal>"\\bibstyle" {LaBEGIN LaMacro; IGNORE;}
154
<Normal>"\\cite" {CITEBEGIN LaMacro2; IGNORE;}
155
<Normal>"\\documentstyle" {LaBEGIN LaMacro; IGNORE;}
156
<Normal>"\\end" {LaBEGIN LaMacro; IGNORE;}
157
<Normal>"\\footnote" {SPACE;}
158
<Normal>"\\index" {LaBEGIN LaMacro2; SPACE;}
159
<Normal>"\\label" {LaBEGIN LaMacro; IGNORE;}
160
<Normal>"\\pageref" {CITEBEGIN LaMacro; IGNORE;}
161
<Normal>"\\pagestyle" {LaBEGIN LaMacro; IGNORE;}
162
<Normal>"\\ref" {CITEBEGIN LaMacro; IGNORE;}
163
<Normal>"\\setcounter" {LaBEGIN LaMacro; IGNORE;}
164
<Normal>"\\verb" /* ignore \verb<ch>...<ch> */ { if (fLatex) {
167
while ((c = input()) != verbchar)
173
<LaMacro>"}" BEGIN Normal;
174
<LaMacro>"\n" NEWLINE;
176
<LaMacro2>"{" { cOpenBrace++; }
177
<LaMacro2>"}" { cOpenBrace--;
181
<LaMacro2>"\n" NEWLINE;
184
<Normal>"\\def" /* ignore def begin */ {BEGIN Define; IGNORE;}
185
<Define>"{" BEGIN Normal;
186
<Define>"\n" NEWLINE;
189
<Normal>"\\(" /* formula mode */ {LaBEGIN LaFormula; IGNORE;}
190
<LaFormula>"\\)" BEGIN Normal;
191
<LaFormula>"\n" NEWLINE;
194
<Normal>"\\[" /* display mode */ {LaBEGIN LaDisplay; IGNORE;}
195
<LaDisplay>"\\]" BEGIN Normal;
196
<LaDisplay>"\n" NEWLINE;
199
<Normal>"$$" /* display mode */ {BEGIN Display; IGNORE;}
200
<Display>"$$" BEGIN Normal;
201
<Display>"\n" NEWLINE;
204
<Normal>"$" /* math mode */ {BEGIN Math; IGNORE;}
205
<Math>"$" BEGIN Normal;
210
<Normal>"\\include" /* process files */ {LaBEGIN LaInclude; IGNORE;}
211
<LaInclude>[^{ \t\n}]+ { IncludeFile(yytext);
214
<LaInclude>"\n" NEWLINE;
217
<Normal>"\\includeonly" {BEGIN IncludeOnly; IGNORE;}
218
<IncludeOnly>[^{ \t,\n}]+ AddInclude(yytext);
219
<IncludeOnly>"}" { if (csbIncList == 0)
220
rgsbIncList[csbIncList++] = '\0';
223
<IncludeOnly>"\n" NEWLINE;
226
<Normal>"\\input" {BEGIN Input; IGNORE;}
227
<Input>[^{ \t\n}]+ { InputFile(yytext);
233
<Normal>\\(aa|AA|ae|AE|oe|OE|ss)[ \t]*[ \t\n}] /* handle ligatures */ {(void)printf("%.2s", yytext+1);}
234
<Normal>\\[OoijLl][ \t]*[ \t\n}] {(void)printf("%.1s", yytext+1);}
236
<Normal>\\[a-zA-Z@]+ /* ignore other \cs */ {BEGIN Control; IGNORE;}
239
<Control>\\[a-zA-Z@]+ IGNORE;
240
<Control>[a-zA-Z@0-9]*[-'=`][^ \t\n{]* IGNORE;
241
<Control>"\n" {BEGIN Normal; NEWLINE;}
242
<Control>[ \t]*[{]* {BEGIN Normal; IGNORE;}
243
<Control>. {yyless(0);BEGIN Normal;}
245
<Normal>[{}\\|] /* special characters */ IGNORE;
246
<Normal>[!?]"`" IGNORE;
249
<Normal>{W}[']*{W} { if (fWord)
250
(void)printf("%s\n", yytext);
254
<Normal>[0-9]+ if (!fWord) ECHO;
255
<Normal>(.|\n) if (!fWord) ECHO;
259
** Set sbProgName to the base of arg 0.
260
** Set the input paths.
262
** -c echo LaTeX \cite, \ref, and \pageref values
263
** -e <env-list> list of LaTeX environments to ignore
264
** -l force latex mode
265
** -n do not follow \input and \include
266
** -s replace control sequences with space
268
** -w word only output
269
** Set the list of LaTeX environments to ignore.
270
** Process each input file.
271
** If no input files are specified on the command line, process stdin.
278
char *pch, *sbEnvList = DEFAULTENV, sbBadOpt[2];
280
int fSawFile = 0, iArgs = 1;
282
/* get base name and decide what we are doing, detex or delatex */
284
char drive[_MAX_DRIVE], dir[_MAX_DIR];
285
char fname[_MAX_FNAME], ext[_MAX_EXT];
287
_wildcard(&cArgs, &rgsbArgs);
288
_response(&cArgs, &rgsbArgs);
290
_splitpath (rgsbArgs[0], drive, dir, fname, ext);
291
sbProgName = strlwr(fname);
294
kpse_set_program_name (rgsbArgs[0], NULL);
296
if ((sbProgName = rindex(rgsbArgs[0], '/')) != NULL)
299
sbProgName = rgsbArgs[0];
301
if (strcmp("delatex",sbProgName) == 0)
305
/* set rgsbInputPaths for use with TexOpen() */
309
/* process command line options */
310
while (iArgs < cArgs && *(pch = rgsbArgs[iArgs]) == CHOPT) {
317
sbEnvList = rgsbArgs[++iArgs];
340
Warning("unknown option ignored -", sbBadOpt);
345
SetEnvIgnore(sbEnvList);
347
/* process input files */
348
for (; iArgs < cArgs; iArgs++) {
350
if ((yyin = TexOpen(rgsbArgs[iArgs])) == NULL) {
351
Warning("can't open file", rgsbArgs[iArgs]);
358
/* if there were no input files, assume stdin */
362
if (isatty(fileno(stdin)))
369
if (YYSTATE != Normal)
370
ErrorExit("input contains an unterminated mode or environment");
380
** yywrap -- handles EOF for lex. Check to see if the stack of open files
381
** has anything on it. If it does, set yyin to the to value. If not
382
** return the termination signal for lex.
398
** yyless -- return characters to the input stream. Some systems don't have
405
int i = strlen(yytext);
407
while (i > n) unput(yytext[--i]);
408
yytext[yyleng = n] = '\0';
413
** SetEnvIgnore -- sets rgsbEnvIgnore to the values indicated by the
418
SetEnvIgnore(sbEnvList)
423
sb = SafeMalloc(strlen(sbEnvList) + 1, "malloc for SetEnvIgnore failed");
424
(void) strcpy(sb, sbEnvList);
425
csbEnvIgnore = SeparateList(sb, rgsbEnvIgnore, CHENVSEP, MAXENVS);
426
if (csbEnvIgnore == ERROR)
427
ErrorExit("The environtment list contains too many environments");
431
** BeginEnv -- checks to see if sbEnv is in the list rgsbEnvIgnore. If it
432
** is, sbCurrentEnv is set to sbEnv.
440
if (!fLatex) return(0);
441
for (i = 0; i < csbEnvIgnore; i++)
442
if (strcmp(sbEnv, rgsbEnvIgnore[i]) == 0) {
443
(void)strcpy(sbCurrentEnv, sbEnv);
450
** EndEnv -- checks to see if sbEnv is the current environment being ignored.
456
if (!fLatex) return(0);
457
if (strcmp(sbEnv, sbCurrentEnv) == 0)
463
** InputFile -- push the current yyin and open sbFile. If the open fails,
464
** the sbFile is ignored.
476
if ((yyin = TexOpen(sbFile)) == NULL) {
477
Warning("can't open \\input file", sbFile);
483
** IncludeFile -- if sbFile is not in the rgsbIncList, push current yyin
484
** and open sbFile. If the open fails, the sbFile is ignored.
498
if ((yyin = TexOpen(sbFile)) == NULL) {
499
Warning("can't open \\include file", sbFile);
505
** AddInclude -- adds sbFile to the rgsbIncList and increments csbIncList.
506
** If the include list is too long, sbFile is ignored.
515
if (csbIncList >= MAXINCLIST)
516
Warning("\\includeonly list is too long, ignoring", sbFile);
517
rgsbIncList[csbIncList] = SafeMalloc(strlen(sbFile) + 1, "malloc for AddInclude failed");
518
(void)strcpy(rgsbIncList[csbIncList++], sbFile);
522
** InList -- checks to see if sbFile is in the rgsbIncList. If there is
523
** no list, all files are assumed to be "in the list".
529
char *pch, sbBase[PATH_MAX];
532
if (csbIncList == 0) /* no list */
534
(void)strcpy(sbBase, sbFile);
535
if ((pch = rindex(sbBase, '.')) != NULL)
538
while ((i < csbIncList) && rgsbIncList[i])
539
if (strcmp(rgsbIncList[i++], sbBase) == 0)
545
** SetInputPaths -- sets rgsbInputPaths to the values indicated by the
546
** TEXINPUTS environment variable if set or else DEFAULTINPUTS. If
547
** the user's TEXINPUTS has a leading ':' prepend the DEFAULTINPUTS
548
** to the path, if there is a trailing ':' append the DEFAULTINPUTS.
549
** This is consistent with the most recent TeX. However, this
550
** routine does not honor the '//' construct (expand subdirs).
560
int cchDefaults, cchPaths;
562
cchDefaults = strlen(DEFAULTINPUTS);
564
if ((sb = getenv("TEXINPUT")) == NULL)
566
if ((sb = getenv("TEXINPUTS")) == NULL)
568
cchPaths = strlen(sb);
569
if (sb[0] == CHPATHSEP)
570
cchPaths += cchDefaults;
571
if (sb[strlen(sb) - 1] == CHPATHSEP)
572
cchPaths += cchDefaults;
573
sbPaths = SafeMalloc(cchPaths + 1, "malloc for SetInputPaths failed");
575
if (sb[0] == CHPATHSEP)
576
(void)strcat(sbPaths, DEFAULTINPUTS);
577
(void)strcat(sbPaths, sb);
578
if (sb[strlen(sb) - 1] == CHPATHSEP)
579
(void)strcat(sbPaths, DEFAULTINPUTS);
581
csbInputPaths = SeparateList(sbPaths, rgsbInputPaths, CHPATHSEP, MAXINPUTPATHS);
582
if (csbInputPaths == ERROR)
584
ErrorExit("TEXINPUT(S) environment variable has too many paths");
586
ErrorExit("TEXINPUTS environment variable has too many paths");
591
** SeparateList -- takes a chSep separated list sbList, replaces the
592
** chSep's with NULLs and sets rgsbList[i] to the beginning of
593
** the ith word in sbList. The number of words is returned. A
594
** ERROR is returned if there are more than csbMax words.
597
SeparateList(sbList, rgsbList, chSep, csbMax)
598
char *sbList, *rgsbList[], chSep;
603
while (sbList && *sbList && csbList < csbMax) {
604
rgsbList[csbList++] = sbList;
605
if (sbList = index(sbList, chSep))
608
return(sbList && *sbList ? ERROR : csbList);
612
** TexOpen -- tries to open sbFile in each of the rgsbInputPaths in turn.
613
** For each input path the following order is used:
614
** file.tex - must be as named, if not there go to the next path
615
** file.ext - random extension, try it
616
** file - base name, add .tex and try it
617
** file - try it as is
618
** Notice that if file exists in the first path and file.tex exists in
619
** one of the other paths, file in the first path is what is opened.
620
** If the sbFile begins with a '/', no paths are searched.
630
static char sbFullPath[PATH_MAX];
633
for (iPath = 0; iPath < csbInputPaths; iPath++) {
635
if (*sbFile == '/' || *sbFile == '\\' || strchr(sbFile, ':')) /* absolute path */
637
if (*sbFile == '/') /* absolute path */
640
(void)sprintf(sbFullPath, "%s", sbFile);
641
iPath = csbInputPaths; /* only check once */
643
(void)sprintf(sbFullPath, "%s/%s", rgsbInputPaths[iPath], sbFile);
646
while (pch = strchr(pch, '\\'))
650
/* If sbFile ends in .tex then it must be there */
651
if ((pch = rindex(sbFullPath, '.')) != NULL
652
&& (strcmp(pch, ".tex") == 0))
653
if ((fp = fopen(sbFullPath, "r")) != NULL)
658
/* if .<ext> then try to open it. the '.' represents */
659
/* the beginning of an extension if it is not the first */
660
/* character and it does not follow a '.' or a '/' */
661
if (pch != NULL && pch > &(sbFullPath[0])
662
&& *(pch - 1) != '.' && *(pch - 1) != '/'
663
&& (fp = fopen(sbFullPath, "r")) != NULL)
666
/* just base name, add .tex to the name */
667
sbNew = SafeMalloc(strlen(sbFullPath) + 5, "malloc for TexOpen failed");
668
(void)strcpy(sbNew, sbFullPath);
669
(void)strcat(sbNew, ".tex");
670
if ((fp = fopen(sbNew, "r")) != NULL)
673
/* try sbFile regardless */
674
if ((fp = fopen(sbFullPath, "r")) != NULL)
677
return((FILE *)NULL);
679
sbNew = kpse_find_file (sbFile, kpse_tex_format, false);
684
return fopen (sbNew, "r");
689
** SafeMalloc -- wrapper around malloc() to check for failure.
693
SafeMalloc(cch, sbMessage)
699
if ((sb = (char *)malloc((unsigned)cch)) == NULL)
700
ErrorExit(sbMessage);
705
** Warning -- print a warning message preceded by the program name.
712
(void)fprintf(stderr, "%s: warning: %s %s\n", sbProgName, sb1, sb2);
716
** ErrorExit -- print an error message preceded by the program name.
717
** Stdout is flushed and detex exits.
723
(void)fflush(stdout);
724
(void)fprintf(stderr, "%s: error: %s\n", sbProgName, sb1);
730
** OS2UsageExit -- print OS/2 usage message and exit.
735
(void)printf("\n%s [ -clnstw ] [ -e environment-list ] [ filename[.tex] ... ]\n",
737
puts(" -c echo LaTeX \\cite, \\ref, and \\pageref values\n \
738
-e <env-list> list of LaTeX environments to ignore\n \
739
-l force latex mode\n \
740
-n do not follow \\input and \\include\n \
741
-s replace control sequences with space\n \
742
-t force tex mode\n \
743
-w word only output");