2
* $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
4
* Copyright (c) 2008, David Fishburn
6
* This source code is released for free distribution under the terms of the
7
* GNU General Public License.
9
* This module contains functions for generating tags for TeX language files.
11
* Tex language reference:
12
* http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
18
#include "general.h" /* must always come first */
19
#include <ctype.h> /* to define isalpha () */
36
#define isType(token,t) (boolean) ((token)->type == (t))
37
#define isKeyword(token,k) (boolean) ((token)->keyword == (k))
43
typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
46
* Used to specify type of keyword.
48
typedef enum eKeywordId {
53
KEYWORD_subsubsection,
59
/* Used to determine whether keyword is valid for the token language and
62
typedef struct sKeywordDesc {
67
typedef enum eTokenType {
96
typedef struct sTokenInfo {
101
unsigned long lineNumber;
109
static langType Lang_js;
111
static jmp_buf Exception;
117
TEXTAG_SUBSUBSECTION,
124
static kindOption TexKinds [] = {
125
{ TRUE, 'c', "chapter", "chapters" },
126
{ TRUE, 's', "section", "sections" },
127
{ TRUE, 'u', "subsection", "subsections" },
128
{ TRUE, 'b', "subsubsection", "subsubsections" },
129
{ TRUE, 'p', "part", "parts" },
130
{ TRUE, 'P', "paragraph", "paragraphs" },
131
{ TRUE, 'G', "subparagraph", "subparagraphs" }
134
static const keywordDesc TexKeywordTable [] = {
135
/* keyword keyword ID */
136
{ "chapter", KEYWORD_chapter },
137
{ "section", KEYWORD_section },
138
{ "subsection", KEYWORD_subsection },
139
{ "subsubsection", KEYWORD_subsubsection },
140
{ "part", KEYWORD_part },
141
{ "paragraph", KEYWORD_paragraph },
142
{ "subparagraph", KEYWORD_subparagraph }
146
* FUNCTION DEFINITIONS
149
static boolean isIdentChar (const int c)
152
(isalpha (c) || isdigit (c) || c == '$' ||
153
c == '_' || c == '#');
156
static void buildTexKeywordHash (void)
158
const size_t count = sizeof (TexKeywordTable) /
159
sizeof (TexKeywordTable [0]);
161
for (i = 0 ; i < count ; ++i)
163
const keywordDesc* const p = &TexKeywordTable [i];
164
addKeyword (p->name, Lang_js, (int) p->id);
168
static tokenInfo *newToken (void)
170
tokenInfo *const token = xMalloc (1, tokenInfo);
172
token->type = TOKEN_UNDEFINED;
173
token->keyword = KEYWORD_NONE;
174
token->string = vStringNew ();
175
token->scope = vStringNew ();
176
token->lineNumber = getSourceLineNumber ();
177
token->filePosition = getInputFilePosition ();
182
static void deleteToken (tokenInfo *const token)
184
vStringDelete (token->string);
185
vStringDelete (token->scope);
190
* Tag generation functions
193
static void makeConstTag (tokenInfo *const token, const texKind kind)
195
if (TexKinds [kind].enabled )
197
const char *const name = vStringValue (token->string);
199
initTagEntry (&e, name);
201
e.lineNumber = token->lineNumber;
202
e.filePosition = token->filePosition;
203
e.kindName = TexKinds [kind].name;
204
e.kind = TexKinds [kind].letter;
210
static void makeTexTag (tokenInfo *const token, texKind kind)
214
if (TexKinds [kind].enabled)
217
* If a scope has been added to the token, change the token
218
* string to include the scope when making the tag.
220
if ( vStringLength (token->scope) > 0 )
222
fulltag = vStringNew ();
223
vStringCopy (fulltag, token->scope);
224
vStringCatS (fulltag, ".");
225
vStringCatS (fulltag, vStringValue (token->string));
226
vStringTerminate (fulltag);
227
vStringCopy (token->string, fulltag);
228
vStringDelete (fulltag);
230
makeConstTag (token, kind);
238
static void parseString (vString *const string, const int delimiter)
248
c = fileGetc(); /* This maybe a ' or ". */
249
vStringPut (string, c);
251
else if (c == delimiter)
254
vStringPut (string, c);
256
vStringTerminate (string);
260
* Read a C identifier beginning with "firstChar" and places it into
263
static void parseIdentifier (vString *const string, const int firstChar)
266
Assert (isIdentChar (c));
269
vStringPut (string, c);
271
} while (isIdentChar (c));
273
vStringTerminate (string);
275
fileUngetc (c); /* unget non-identifier character */
278
static void readToken (tokenInfo *const token)
282
token->type = TOKEN_UNDEFINED;
283
token->keyword = KEYWORD_NONE;
284
vStringClear (token->string);
290
token->lineNumber = getSourceLineNumber ();
291
token->filePosition = getInputFilePosition ();
293
while (c == '\t' || c == ' ' || c == '\n');
297
case EOF: longjmp (Exception, (int)ExceptionEOF); break;
298
case '(': token->type = TOKEN_OPEN_PAREN; break;
299
case ')': token->type = TOKEN_CLOSE_PAREN; break;
300
case ';': token->type = TOKEN_SEMICOLON; break;
301
case ',': token->type = TOKEN_COMMA; break;
302
case '.': token->type = TOKEN_PERIOD; break;
303
case ':': token->type = TOKEN_COLON; break;
304
case '{': token->type = TOKEN_OPEN_CURLY; break;
305
case '}': token->type = TOKEN_CLOSE_CURLY; break;
306
case '=': token->type = TOKEN_EQUAL_SIGN; break;
307
case '[': token->type = TOKEN_OPEN_SQUARE; break;
308
case ']': token->type = TOKEN_CLOSE_SQUARE; break;
309
case '?': token->type = TOKEN_QUESTION_MARK; break;
310
case '*': token->type = TOKEN_STAR; break;
314
token->type = TOKEN_STRING;
315
parseString (token->string, c);
316
token->lineNumber = getSourceLineNumber ();
317
token->filePosition = getInputFilePosition ();
322
* All Tex tags start with a backslash.
323
* Check if the next character is an alpha character
324
* else it is not a potential tex tag.
331
parseIdentifier (token->string, c);
332
token->lineNumber = getSourceLineNumber ();
333
token->filePosition = getInputFilePosition ();
334
token->keyword = analyzeToken (token->string, Lang_js);
335
if (isKeyword (token, KEYWORD_NONE))
336
token->type = TOKEN_IDENTIFIER;
338
token->type = TOKEN_KEYWORD;
343
fileSkipToCharacter ('\n'); /* % are single line comments */
348
if (! isIdentChar (c))
349
token->type = TOKEN_UNDEFINED;
352
parseIdentifier (token->string, c);
353
token->lineNumber = getSourceLineNumber ();
354
token->filePosition = getInputFilePosition ();
355
token->type = TOKEN_IDENTIFIER;
361
static void copyToken (tokenInfo *const dest, tokenInfo *const src)
363
dest->lineNumber = src->lineNumber;
364
dest->filePosition = src->filePosition;
365
dest->type = src->type;
366
dest->keyword = src->keyword;
367
vStringCopy (dest->string, src->string);
368
vStringCopy (dest->scope, src->scope);
375
static boolean parseTag (tokenInfo *const token, texKind kind)
377
tokenInfo *const name = newToken ();
379
boolean useLongName = TRUE;
381
fullname = vStringNew ();
382
vStringClear (fullname);
385
* Tex tags are of these formats:
386
* \keyword{any number of words}
387
* \keyword[short desc]{any number of words}
388
* \keyword*[short desc]{any number of words}
390
* When a keyword is found, loop through all words within
391
* the curly braces for the tag name.
394
if (isType (token, TOKEN_KEYWORD))
396
copyToken (name, token);
400
if (isType (token, TOKEN_OPEN_SQUARE))
405
while (! isType (token, TOKEN_CLOSE_SQUARE) )
407
if (isType (token, TOKEN_IDENTIFIER))
409
if (fullname->length > 0)
410
vStringCatS (fullname, " ");
411
vStringCatS (fullname, vStringValue (token->string));
415
vStringTerminate (fullname);
416
vStringCopy (name->string, fullname);
417
makeTexTag (name, kind);
420
if (isType (token, TOKEN_STAR))
425
if (isType (token, TOKEN_OPEN_CURLY))
428
while (! isType (token, TOKEN_CLOSE_CURLY) )
430
if (isType (token, TOKEN_IDENTIFIER) && useLongName)
432
if (fullname->length > 0)
433
vStringCatS (fullname, " ");
434
vStringCatS (fullname, vStringValue (token->string));
440
vStringTerminate (fullname);
441
vStringCopy (name->string, fullname);
442
makeTexTag (name, kind);
447
vStringDelete (fullname);
451
static void parseTexFile (tokenInfo *const token)
457
if (isType (token, TOKEN_KEYWORD))
459
switch (token->keyword)
461
case KEYWORD_chapter:
462
parseTag (token, TEXTAG_CHAPTER);
464
case KEYWORD_section:
465
parseTag (token, TEXTAG_SECTION);
467
case KEYWORD_subsection:
468
parseTag (token, TEXTAG_SUBSUBSECTION);
470
case KEYWORD_subsubsection:
471
parseTag (token, TEXTAG_SUBSUBSECTION);
474
parseTag (token, TEXTAG_PART);
476
case KEYWORD_paragraph:
477
parseTag (token, TEXTAG_PARAGRAPH);
479
case KEYWORD_subparagraph:
480
parseTag (token, TEXTAG_SUBPARAGRAPH);
489
static void initialize (const langType language)
491
Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
493
buildTexKeywordHash ();
496
static void findTexTags (void)
498
tokenInfo *const token = newToken ();
499
exception_t exception;
501
exception = (exception_t) (setjmp (Exception));
502
while (exception == ExceptionNone)
503
parseTexFile (token);
508
/* Create parser definition stucture */
509
extern parserDefinition* TexParser (void)
511
static const char *const extensions [] = { "tex", NULL };
512
parserDefinition *const def = parserNew ("Tex");
513
def->extensions = extensions;
515
* New definitions for parsing instead of regex
517
def->kinds = TexKinds;
518
def->kindCount = KIND_COUNT (TexKinds);
519
def->parser = findTexTags;
520
def->initialize = initialize;
524
/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */