~ubuntu-branches/ubuntu/karmic/exuberant-ctags/karmic-201010031914

Viewing changes to tex.c

Committer: Bazaar Package Importer
Author(s): Colin Watson
Date: 2009-07-14 15:05:23 UTC
mfrom: (1.1.3 upstream)
Revision ID: james.westby@ubuntu.com-20090714150523-rytn1psh622nb709

Tags: 1:5.8-1

http://bugs.debian.org/529215

* New upstream release. Debian bugs fixed:
- Add support for ASP classes (closes: #529215).
* Compress files using gzip -n (cf. debhelper 6.0.6).
* Add a watch file.

files added:
ant.c

configure.ac

debian/watch

dosbatch.c

flex.c

gnu_regex

gnu_regex/README.txt

gnu_regex/regcomp.c

gnu_regex/regex.c

gnu_regex/regex.h

gnu_regex/regex_internal.c

gnu_regex/regex_internal.h

gnu_regex/regexec.c

matlab.c

mk_mingw.mak

ocaml.c

tex.c

vhdl.c

files removed:
acconfig.h

configure.in

mk_ming.mak

files modified:
INSTALL.oth

MAINTAINERS

Makefile.in

NEWS

README

asp.c

config.h.in

configure

ctags.1

ctags.h

ctags.html

debian/changelog

debian/rules

eiffel.c

fortran.c

jscript.c

keyword.c

keyword.h

lisp.c

maintainer.mak

make.c

mk_bc5.mak

mk_mvc.mak

parsers.h

php.c

python.c

read.c

read.h

source.mak

sql.c

vstring.h

Show diffs side-by-side

added added

removed removed

tex.c

* $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $

* This source code is released for free distribution under the terms of the

* GNU General Public License.

* This module contains functions for generating tags for TeX language files.

* Tex language reference:

* http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX

* INCLUDE FILES

#include "general.h" /* must always come first */

#include <ctype.h> /* to define isalpha () */

#include <setjmp.h>

#ifdef DEBUG

#include <stdio.h>

#endif

#include "debug.h"

#include "entry.h"

#include "keyword.h"

#include "parse.h"

#include "read.h"

#include "routines.h"

#include "vstring.h"

* MACROS

#define isType(token,t) (boolean) ((token)->type == (t))

#define isKeyword(token,k) (boolean) ((token)->keyword == (k))

* DATA DECLARATIONS

typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;

* Used to specify type of keyword.

typedef enum eKeywordId {

KEYWORD_NONE = -1,

KEYWORD_chapter,

KEYWORD_section,

KEYWORD_subsection,

KEYWORD_subsubsection,

KEYWORD_part,

KEYWORD_paragraph,

KEYWORD_subparagraph

} keywordId;

/* Used to determine whether keyword is valid for the token language and

* what its ID is.

typedef struct sKeywordDesc {

const char *name;

keywordId id;

} keywordDesc;

typedef enum eTokenType {

TOKEN_UNDEFINED,

TOKEN_CHARACTER,

TOKEN_CLOSE_PAREN,

TOKEN_SEMICOLON,

TOKEN_COLON,

TOKEN_COMMA,

TOKEN_KEYWORD,

TOKEN_OPEN_PAREN,

TOKEN_OPERATOR,

TOKEN_IDENTIFIER,

TOKEN_STRING,

TOKEN_PERIOD,

TOKEN_OPEN_CURLY,

TOKEN_CLOSE_CURLY,

TOKEN_EQUAL_SIGN,

TOKEN_EXCLAMATION,

TOKEN_FORWARD_SLASH,

TOKEN_OPEN_SQUARE,

TOKEN_CLOSE_SQUARE,

TOKEN_OPEN_MXML,

TOKEN_CLOSE_MXML,

TOKEN_CLOSE_SGML,

TOKEN_LESS_THAN,

TOKEN_GREATER_THAN,

TOKEN_QUESTION_MARK,

TOKEN_STAR

} tokenType;

typedef struct sTokenInfo {

tokenType type;

keywordId keyword;

vString * string;

100

vString * scope;

101

unsigned long lineNumber;

102

fpos_t filePosition;

103

} tokenInfo;

104

105

106

* DATA DEFINITIONS

107

108

109

static langType Lang_js;

110

111

static jmp_buf Exception;

112

113

typedef enum {

114

TEXTAG_CHAPTER,

115

TEXTAG_SECTION,

116

TEXTAG_SUBSECTION,

117

TEXTAG_SUBSUBSECTION,

118

TEXTAG_PART,

119

TEXTAG_PARAGRAPH,

120

TEXTAG_SUBPARAGRAPH,

121

TEXTAG_COUNT

122

} texKind;

123

124

static kindOption TexKinds [] = {

125

{ TRUE, 'c', "chapter", "chapters" },

126

{ TRUE, 's', "section", "sections" },

127

{ TRUE, 'u', "subsection", "subsections" },

128

{ TRUE, 'b', "subsubsection", "subsubsections" },

129

{ TRUE, 'p', "part", "parts" },

130

{ TRUE, 'P', "paragraph", "paragraphs" },

131

{ TRUE, 'G', "subparagraph", "subparagraphs" }

132

};

133

134

static const keywordDesc TexKeywordTable [] = {

135

/* keyword keyword ID */

136

{ "chapter", KEYWORD_chapter },

137

{ "section", KEYWORD_section },

138

{ "subsection", KEYWORD_subsection },

139

{ "subsubsection", KEYWORD_subsubsection },

140

{ "part", KEYWORD_part },

141

{ "paragraph", KEYWORD_paragraph },

142

{ "subparagraph", KEYWORD_subparagraph }

143

};

144

145

146

* FUNCTION DEFINITIONS

147

148

149

static boolean isIdentChar (const int c)

150

{

151

return (boolean)

152

(isalpha (c) || isdigit (c) || c == '$' ||

153

c == '_' || c == '#');

154

}

155

156

static void buildTexKeywordHash (void)

157

{

158

const size_t count = sizeof (TexKeywordTable) /

159

sizeof (TexKeywordTable [0]);

160

size_t i;

161

for (i = 0 ; i < count ; ++i)

162

{

163

const keywordDesc* const p = &TexKeywordTable [i];

164

addKeyword (p->name, Lang_js, (int) p->id);

165

}

166

}

167

168

static tokenInfo *newToken (void)

169

{

170

tokenInfo *const token = xMalloc (1, tokenInfo);

171

172

token->type = TOKEN_UNDEFINED;

173

token->keyword = KEYWORD_NONE;

174

token->string = vStringNew ();

175

token->scope = vStringNew ();

176

token->lineNumber = getSourceLineNumber ();

177

token->filePosition = getInputFilePosition ();

178

179

return token;

180

}

181

182

static void deleteToken (tokenInfo *const token)

183

{

184

vStringDelete (token->string);

185

vStringDelete (token->scope);

186

eFree (token);

187

}

188

189

190

* Tag generation functions

191

192

193

static void makeConstTag (tokenInfo *const token, const texKind kind)

194

{

195

if (TexKinds [kind].enabled )

196

{

197

const char *const name = vStringValue (token->string);

198

tagEntryInfo e;

199

initTagEntry (&e, name);

200

201

e.lineNumber = token->lineNumber;

202

e.filePosition = token->filePosition;

203

e.kindName = TexKinds [kind].name;

204

e.kind = TexKinds [kind].letter;

205

206

makeTagEntry (&e);

207

}

208

}

209

210

static void makeTexTag (tokenInfo *const token, texKind kind)

211

{

212

vString * fulltag;

213

214

if (TexKinds [kind].enabled)

215

{

216

217

* If a scope has been added to the token, change the token

218

* string to include the scope when making the tag.

219

220

if ( vStringLength (token->scope) > 0 )

221

{

222

fulltag = vStringNew ();

223

vStringCopy (fulltag, token->scope);

224

vStringCatS (fulltag, ".");

225

vStringCatS (fulltag, vStringValue (token->string));

226

vStringTerminate (fulltag);

227

vStringCopy (token->string, fulltag);

228

vStringDelete (fulltag);

229

}

230

makeConstTag (token, kind);

231

}

232

}

233

234

235

* Parsing functions

236

237

238

static void parseString (vString *const string, const int delimiter)

239

{

240

boolean end = FALSE;

241

while (! end)

242

{

243

int c = fileGetc ();

244

if (c == EOF)

245

end = TRUE;

246

else if (c == '\\')

247

{

248

c = fileGetc(); /* This maybe a ' or ". */

249

vStringPut (string, c);

250

}

251

else if (c == delimiter)

252

end = TRUE;

253

else

254

vStringPut (string, c);

255

}

256

vStringTerminate (string);

257

}

258

259

260

* Read a C identifier beginning with "firstChar" and places it into

261

* "name".

262

263

static void parseIdentifier (vString *const string, const int firstChar)

264

{

265

int c = firstChar;

266

Assert (isIdentChar (c));

267

268

{

269

vStringPut (string, c);

270

c = fileGetc ();

271

} while (isIdentChar (c));

272

273

vStringTerminate (string);

274

if (!isspace (c))

275

fileUngetc (c); /* unget non-identifier character */

276

}

277

278

static void readToken (tokenInfo *const token)

279

{

280

int c;

281

282

token->type = TOKEN_UNDEFINED;

283

token->keyword = KEYWORD_NONE;

284

vStringClear (token->string);

285

286

getNextChar:

287

288

{

289

c = fileGetc ();

290

token->lineNumber = getSourceLineNumber ();

291

token->filePosition = getInputFilePosition ();

292

}

293

while (c == '\t' || c == ' ' || c == '\n');

294

295

switch (c)

296

{

297

case EOF: longjmp (Exception, (int)ExceptionEOF); break;

298

case '(': token->type = TOKEN_OPEN_PAREN; break;

299

case ')': token->type = TOKEN_CLOSE_PAREN; break;

300

case ';': token->type = TOKEN_SEMICOLON; break;

301

case ',': token->type = TOKEN_COMMA; break;

302

case '.': token->type = TOKEN_PERIOD; break;

303

case ':': token->type = TOKEN_COLON; break;

304

case '{': token->type = TOKEN_OPEN_CURLY; break;

305

case '}': token->type = TOKEN_CLOSE_CURLY; break;

306

case '=': token->type = TOKEN_EQUAL_SIGN; break;

307

case '[': token->type = TOKEN_OPEN_SQUARE; break;

308

case ']': token->type = TOKEN_CLOSE_SQUARE; break;

309

case '?': token->type = TOKEN_QUESTION_MARK; break;

310

case '*': token->type = TOKEN_STAR; break;

311

312

case '\'':

313

case '"':

314

token->type = TOKEN_STRING;

315

parseString (token->string, c);

316

token->lineNumber = getSourceLineNumber ();

317

token->filePosition = getInputFilePosition ();

318

break;

319

320

case '\\':

321

322

* All Tex tags start with a backslash.

323

* Check if the next character is an alpha character

324

* else it is not a potential tex tag.

325

326

c = fileGetc ();

327

if (! isalpha (c))

328

fileUngetc (c);

329

else

330

{

331

parseIdentifier (token->string, c);

332

token->lineNumber = getSourceLineNumber ();

333

token->filePosition = getInputFilePosition ();

334

token->keyword = analyzeToken (token->string, Lang_js);

335

if (isKeyword (token, KEYWORD_NONE))

336

token->type = TOKEN_IDENTIFIER;

337

else

338

token->type = TOKEN_KEYWORD;

339

}

340

break;

341

342

case '%':

343

fileSkipToCharacter ('\n'); /* % are single line comments */

344

goto getNextChar;

345

break;

346

347

default:

348

if (! isIdentChar (c))

349

token->type = TOKEN_UNDEFINED;

350

else

351

{

352

parseIdentifier (token->string, c);

353

token->lineNumber = getSourceLineNumber ();

354

token->filePosition = getInputFilePosition ();

355

token->type = TOKEN_IDENTIFIER;

356

}

357

break;

358

}

359

}

360

361

static void copyToken (tokenInfo *const dest, tokenInfo *const src)

362

{

363

dest->lineNumber = src->lineNumber;

364

dest->filePosition = src->filePosition;

365

dest->type = src->type;

366

dest->keyword = src->keyword;

367

vStringCopy (dest->string, src->string);

368

vStringCopy (dest->scope, src->scope);

369

}

370

371

372

* Scanning functions

373

374

375

static boolean parseTag (tokenInfo *const token, texKind kind)

376

{

377

tokenInfo *const name = newToken ();

378

vString * fullname;

379

boolean useLongName = TRUE;

380

381

fullname = vStringNew ();

382

vStringClear (fullname);

383

384

385

* Tex tags are of these formats:

386

* \keyword{any number of words}

387

* \keyword[short desc]{any number of words}

388

* \keyword*[short desc]{any number of words}

389

390

* When a keyword is found, loop through all words within

391

* the curly braces for the tag name.

392

393

394

if (isType (token, TOKEN_KEYWORD))

395

{

396

copyToken (name, token);

397

readToken (token);

398

}

399

400

if (isType (token, TOKEN_OPEN_SQUARE))

401

{

402

useLongName = FALSE;

403

404

readToken (token);

405

while (! isType (token, TOKEN_CLOSE_SQUARE) )

406

{

407

if (isType (token, TOKEN_IDENTIFIER))

408

{

409

if (fullname->length > 0)

410

vStringCatS (fullname, " ");

411

vStringCatS (fullname, vStringValue (token->string));

412

}

413

readToken (token);

414

}

415

vStringTerminate (fullname);

416

vStringCopy (name->string, fullname);

417

makeTexTag (name, kind);

418

}

419

420

if (isType (token, TOKEN_STAR))

421

{

422

readToken (token);

423

}

424

425

if (isType (token, TOKEN_OPEN_CURLY))

426

{

427

readToken (token);

428

while (! isType (token, TOKEN_CLOSE_CURLY) )

429

{

430

if (isType (token, TOKEN_IDENTIFIER) && useLongName)

431

{

432

if (fullname->length > 0)

433

vStringCatS (fullname, " ");

434

vStringCatS (fullname, vStringValue (token->string));

435

}

436

readToken (token);

437

}

438

if (useLongName)

439

{

440

vStringTerminate (fullname);

441

vStringCopy (name->string, fullname);

442

makeTexTag (name, kind);

443

}

444

}

445

446

deleteToken (name);

447

vStringDelete (fullname);

448

return TRUE;

449

}

450

451

static void parseTexFile (tokenInfo *const token)

452

{

453

454

{

455

readToken (token);

456

457

if (isType (token, TOKEN_KEYWORD))

458

{

459

switch (token->keyword)

460

{

461

case KEYWORD_chapter:

462

parseTag (token, TEXTAG_CHAPTER);

463

break;

464

case KEYWORD_section:

465

parseTag (token, TEXTAG_SECTION);

466

break;

467

case KEYWORD_subsection:

468

parseTag (token, TEXTAG_SUBSUBSECTION);

469

break;

470

case KEYWORD_subsubsection:

471

parseTag (token, TEXTAG_SUBSUBSECTION);

472

break;

473

case KEYWORD_part:

474

parseTag (token, TEXTAG_PART);

475

break;

476

case KEYWORD_paragraph:

477

parseTag (token, TEXTAG_PARAGRAPH);

478

break;

479

case KEYWORD_subparagraph:

480

parseTag (token, TEXTAG_SUBPARAGRAPH);

481

break;

482

default:

483

break;

484

}

485

}

486

} while (TRUE);

487

}

488

489

static void initialize (const langType language)

490

{

491

Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);

492

Lang_js = language;

493

buildTexKeywordHash ();

494

}

495

496

static void findTexTags (void)

497

{

498

tokenInfo *const token = newToken ();

499

exception_t exception;

500

501

exception = (exception_t) (setjmp (Exception));

502

while (exception == ExceptionNone)

503

parseTexFile (token);

504

505

deleteToken (token);

506

}

507

508

/* Create parser definition stucture */

509

extern parserDefinition* TexParser (void)

510

{

511

static const char *const extensions [] = { "tex", NULL };

512

parserDefinition *const def = parserNew ("Tex");

513

def->extensions = extensions;

514

515

* New definitions for parsing instead of regex

516

517

def->kinds = TexKinds;

518

def->kindCount = KIND_COUNT (TexKinds);

519

def->parser = findTexTags;

520

def->initialize = initialize;

521

522

return def;

523

}

524

/* vi:set tabstop=4 shiftwidth=4 noexpandtab: */

Older »