2
* $Id: lregex.c,v 1.7 2006/05/30 04:37:12 darren Exp $
4
* Copyright (c) 2000-2003, Darren Hiebert
6
* This source code is released for free distribution under the terms of the
7
* GNU General Public License.
9
* This module contains functions for applying regular expression matching.
11
* The code for utlizing the Gnu regex package with regards to processing the
12
* regex option and checking for regex matches was adapted from routines in
19
#include "general.h" /* must always come first */
26
# ifdef HAVE_SYS_TYPES_H
27
# include <sys/types.h> /* declare off_t (not known to regex.h on FreeBSD) */
44
/* Back-references \0 through \9 */
45
#define BACK_REFERENCE_COUNT 10
47
#if defined (HAVE_REGCOMP) && !defined (REGCOMP_BROKEN)
51
#define REGEX_NAME "Regex"
56
#if defined (POSIX_REGEX)
65
enum pType { PTRN_TAG, PTRN_CALLBACK };
76
regexCallback function;
84
regexPattern *patterns;
92
static boolean regexBroken = FALSE;
94
/* Array of pattern sets, indexed by language */
95
static patternSet* Sets = NULL;
96
static int SetUpper = -1; /* upper language index in list */
99
* FUNCTION DEFINITIONS
102
static void clearPatternSet (const langType language)
104
if (language < SetUpper)
106
patternSet* const set = Sets + language;
108
for (i = 0 ; i < set->count ; ++i)
110
#if defined (POSIX_REGEX)
111
regfree (set->patterns [i].pattern);
113
eFree (set->patterns [i].pattern);
114
set->patterns [i].pattern = NULL;
116
if (set->patterns [i].type == PTRN_TAG)
118
eFree (set->patterns [i].u.tag.name_pattern);
119
set->patterns [i].u.tag.name_pattern = NULL;
122
if (set->patterns != NULL)
123
eFree (set->patterns);
124
set->patterns = NULL;
130
* Regex psuedo-parser
133
static void makeRegexTag (
134
const vString* const name, const struct sKind* const kind)
139
Assert (name != NULL && vStringLength (name) > 0);
140
Assert (kind != NULL);
141
initTagEntry (&e, vStringValue (name));
142
e.kind = kind->letter;
143
e.kindName = kind->name;
149
* Regex pattern definition
152
/* Take a string like "/blah/" and turn it into "blah", making sure
153
* that the first and last characters are the same, and handling
154
* quoted separator characters. Actually, stops on the occurrence of
155
* an unquoted separator. Also turns "\t" into a Tab character.
156
* Returns pointer to terminating separator. Works in place. Null
157
* terminates name string.
159
static char* scanSeparators (char* name)
163
boolean quoted = FALSE;
165
for (++name ; *name != '\0' ; ++name)
171
else if (*name == 't')
175
/* Something else is quoted, so preserve the quote. */
181
else if (*name == '\\')
183
else if (*name == sep)
194
/* Parse `regexp', in form "/regex/name/[k,Kind/]flags" (where the separator
195
* character is whatever the first character of `regexp' is), by breaking it
196
* up into null terminated strings, removing the separators, and expanding
197
* '\t' into tabs. When complete, `regexp' points to the line matching
198
* pattern, a pointer to the name matching pattern is written to `name', a
199
* pointer to the kinds is written to `kinds' (possibly NULL), and a pointer
200
* to the trailing flags is written to `flags'. If the pattern is not in the
201
* correct format, a false value is returned.
203
static boolean parseTagRegex (
204
char* const regexp, char** const name,
205
char** const kinds, char** const flags)
207
boolean result = FALSE;
208
const int separator = (unsigned char) regexp [0];
210
*name = scanSeparators (regexp);
212
error (WARNING, "empty regexp");
213
else if (**name != separator)
214
error (WARNING, "%s: incomplete regexp", regexp);
217
char* const third = scanSeparators (*name);
219
error (WARNING, "%s: regexp missing name pattern", regexp);
220
if ((*name) [strlen (*name) - 1] == '\\')
221
error (WARNING, "error in name pattern: \"%s\"", *name);
222
if (*third != separator)
223
error (WARNING, "%s: regexp missing final separator", regexp);
226
char* const fourth = scanSeparators (third);
227
if (*fourth == separator)
230
scanSeparators (fourth);
244
static void addCompiledTagPattern (
245
const langType language, regex_t* const pattern,
246
char* const name, const char kind, char* const kindName,
247
char *const description)
251
if (language > SetUpper)
254
Sets = xRealloc (Sets, (language + 1), patternSet);
255
for (i = SetUpper + 1 ; i <= language ; ++i)
257
Sets [i].patterns = NULL;
262
set = Sets + language;
263
set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
264
ptrn = &set->patterns [set->count];
267
ptrn->pattern = pattern;
268
ptrn->type = PTRN_TAG;
269
ptrn->u.tag.name_pattern = name;
270
ptrn->u.tag.kind.enabled = TRUE;
271
ptrn->u.tag.kind.letter = kind;
272
ptrn->u.tag.kind.name = kindName;
273
ptrn->u.tag.kind.description = description;
276
static void addCompiledCallbackPattern (
277
const langType language, regex_t* const pattern,
278
const regexCallback callback)
282
if (language > SetUpper)
285
Sets = xRealloc (Sets, (language + 1), patternSet);
286
for (i = SetUpper + 1 ; i <= language ; ++i)
288
Sets [i].patterns = NULL;
293
set = Sets + language;
294
set->patterns = xRealloc (set->patterns, (set->count + 1), regexPattern);
295
ptrn = &set->patterns [set->count];
298
ptrn->pattern = pattern;
299
ptrn->type = PTRN_CALLBACK;
300
ptrn->u.callback.function = callback;
303
#if defined (POSIX_REGEX)
305
static regex_t* compileRegex (const char* const regexp, const char* const flags)
307
int cflags = REG_EXTENDED | REG_NEWLINE;
308
regex_t *result = NULL;
311
for (i = 0 ; flags != NULL && flags [i] != '\0' ; ++i)
313
switch ((int) flags [i])
315
case 'b': cflags &= ~REG_EXTENDED; break;
316
case 'e': cflags |= REG_EXTENDED; break;
317
case 'i': cflags |= REG_ICASE; break;
318
default: error (WARNING, "unknown regex flag: '%c'", *flags); break;
321
result = xMalloc (1, regex_t);
322
errcode = regcomp (result, regexp, cflags);
326
regerror (errcode, result, errmsg, 256);
327
error (WARNING, "%s", errmsg);
337
static void parseKinds (
338
const char* const kinds, char* const kind, char** const kindName,
344
if (kinds == NULL || kinds [0] == '\0')
347
*kindName = eStrdup ("regex");
349
else if (kinds [0] != '\0')
351
const char* k = kinds;
352
if (k [0] != ',' && (k [1] == ',' || k [1] == '\0'))
359
*kindName = eStrdup ("regex");
362
const char *const comma = strchr (k, ',');
364
*kindName = eStrdup (k);
367
*kindName = (char*) eMalloc (comma - k + 1);
368
strncpy (*kindName, k, comma - k);
369
(*kindName) [comma - k] = '\0';
372
*description = eStrdup (k);
378
static void printRegexKind (const regexPattern *pat, unsigned int i, boolean indent)
380
const struct sKind *const kind = &pat [i].u.tag.kind;
381
const char *const indentation = indent ? " " : "";
382
Assert (pat [i].type == PTRN_TAG);
383
printf ("%s%c %s %s\n", indentation,
384
kind->letter != '\0' ? kind->letter : '?',
385
kind->description != NULL ? kind->description : kind->name,
386
kind->enabled ? "" : " [off]");
389
static void processLanguageRegex (const langType language,
390
const char* const parameter)
392
if (parameter == NULL || parameter [0] == '\0')
393
clearPatternSet (language);
394
else if (parameter [0] != '@')
395
addLanguageRegex (language, parameter);
396
else if (! doesFileExist (parameter + 1))
397
error (WARNING, "cannot open regex file");
400
const char* regexfile = parameter + 1;
401
FILE* const fp = fopen (regexfile, "r");
403
error (WARNING | PERROR, regexfile);
406
vString* const regex = vStringNew ();
407
while (readLine (regex, fp))
408
addLanguageRegex (language, vStringValue (regex));
410
vStringDelete (regex);
416
* Regex pattern matching
419
#if defined (POSIX_REGEX)
421
static vString* substitute (
422
const char* const in, const char* out,
423
const int nmatch, const regmatch_t* const pmatch)
425
vString* result = vStringNew ();
427
for (p = out ; *p != '\0' ; p++)
429
if (*p == '\\' && isdigit ((int) *++p))
431
const int dig = *p - '0';
432
if (0 < dig && dig < nmatch && pmatch [dig].rm_so != -1)
434
const int diglen = pmatch [dig].rm_eo - pmatch [dig].rm_so;
435
vStringNCatS (result, in + pmatch [dig].rm_so, diglen);
438
else if (*p != '\n' && *p != '\r')
439
vStringPut (result, *p);
441
vStringTerminate (result);
445
static void matchTagPattern (const vString* const line,
446
const regexPattern* const patbuf,
447
const regmatch_t* const pmatch)
449
vString *const name = substitute (vStringValue (line),
450
patbuf->u.tag.name_pattern, BACK_REFERENCE_COUNT, pmatch);
451
vStringStripLeading (name);
452
vStringStripTrailing (name);
453
if (vStringLength (name) > 0)
454
makeRegexTag (name, &patbuf->u.tag.kind);
456
error (WARNING, "%s:%ld: null expansion of name pattern \"%s\"",
457
getInputFileName (), getInputLineNumber (),
458
patbuf->u.tag.name_pattern);
459
vStringDelete (name);
462
static void matchCallbackPattern (
463
const vString* const line, const regexPattern* const patbuf,
464
const regmatch_t* const pmatch)
466
regexMatch matches [BACK_REFERENCE_COUNT];
467
unsigned int count = 0;
469
for (i = 0 ; i < BACK_REFERENCE_COUNT && pmatch [i].rm_so != -1 ; ++i)
471
matches [i].start = pmatch [i].rm_so;
472
matches [i].length = pmatch [i].rm_eo - pmatch [i].rm_so;
475
patbuf->u.callback.function (vStringValue (line), matches, count);
478
static boolean matchRegexPattern (const vString* const line,
479
const regexPattern* const patbuf)
481
boolean result = FALSE;
482
regmatch_t pmatch [BACK_REFERENCE_COUNT];
483
const int match = regexec (patbuf->pattern, vStringValue (line),
484
BACK_REFERENCE_COUNT, pmatch, 0);
488
if (patbuf->type == PTRN_TAG)
489
matchTagPattern (line, patbuf, pmatch);
490
else if (patbuf->type == PTRN_CALLBACK)
491
matchCallbackPattern (line, patbuf, pmatch);
494
Assert ("invalid pattern type" == NULL);
503
/* PUBLIC INTERFACE */
505
/* Match against all patterns for specified language. Returns true if at least
506
* on pattern matched.
508
extern boolean matchRegex (const vString* const line, const langType language)
510
boolean result = FALSE;
511
if (language != LANG_IGNORE && language <= SetUpper &&
512
Sets [language].count > 0)
514
const patternSet* const set = Sets + language;
516
for (i = 0 ; i < set->count ; ++i)
517
if (matchRegexPattern (line, set->patterns + i))
523
extern void findRegexTags (void)
525
/* merely read all lines of the file */
526
while (fileReadLine () != NULL)
530
#endif /* HAVE_REGEX */
532
extern void addTagRegex (
533
const langType language __unused__,
534
const char* const regex __unused__,
535
const char* const name __unused__,
536
const char* const kinds __unused__,
537
const char* const flags __unused__)
540
Assert (regex != NULL);
541
Assert (name != NULL);
544
regex_t* const cp = compileRegex (regex, flags);
550
parseKinds (kinds, &kind, &kindName, &description);
551
addCompiledTagPattern (language, cp, eStrdup (name),
552
kind, kindName, description);
558
extern void addCallbackRegex (
559
const langType language __unused__,
560
const char* const regex __unused__,
561
const char* const flags __unused__,
562
const regexCallback callback __unused__)
565
Assert (regex != NULL);
568
regex_t* const cp = compileRegex (regex, flags);
570
addCompiledCallbackPattern (language, cp, callback);
575
extern void addLanguageRegex (
576
const langType language __unused__, const char* const regex __unused__)
581
char *const regex_pat = eStrdup (regex);
582
char *name, *kinds, *flags;
583
if (parseTagRegex (regex_pat, &name, &kinds, &flags))
585
addTagRegex (language, regex_pat, name, kinds, flags);
593
* Regex option parsing
596
extern boolean processRegexOption (const char *const option,
597
const char *const parameter __unused__)
599
boolean handled = FALSE;
600
const char* const dash = strchr (option, '-');
601
if (dash != NULL && strncmp (option, "regex", dash - option) == 0)
605
language = getNamedLanguage (dash + 1);
606
if (language == LANG_IGNORE)
607
error (WARNING, "unknown language in --%s option", option);
609
processLanguageRegex (language, parameter);
611
error (WARNING, "regex support not available; required for --%s option",
619
extern void disableRegexKinds (const langType language __unused__)
622
if (language <= SetUpper && Sets [language].count > 0)
624
patternSet* const set = Sets + language;
626
for (i = 0 ; i < set->count ; ++i)
627
if (set->patterns [i].type == PTRN_TAG)
628
set->patterns [i].u.tag.kind.enabled = FALSE;
633
extern boolean enableRegexKind (
634
const langType language __unused__,
635
const int kind __unused__, const boolean mode __unused__)
637
boolean result = FALSE;
639
if (language <= SetUpper && Sets [language].count > 0)
641
patternSet* const set = Sets + language;
643
for (i = 0 ; i < set->count ; ++i)
644
if (set->patterns [i].type == PTRN_TAG &&
645
set->patterns [i].u.tag.kind.letter == kind)
647
set->patterns [i].u.tag.kind.enabled = mode;
655
extern void printRegexKinds (const langType language __unused__, boolean indent)
658
if (language <= SetUpper && Sets [language].count > 0)
660
patternSet* const set = Sets + language;
662
for (i = 0 ; i < set->count ; ++i)
663
if (set->patterns [i].type == PTRN_TAG)
664
printRegexKind (set->patterns, i, indent);
669
extern void freeRegexResources (void)
673
for (i = 0 ; i <= SetUpper ; ++i)
682
/* Check for broken regcomp() on Cygwin */
683
extern void checkRegex (void)
685
#if defined (HAVE_REGEX) && defined (CHECK_REGCOMP)
688
if (regcomp (&patbuf, "/hello/", 0) != 0)
690
error (WARNING, "Disabling broken regex");
696
/* vi:set tabstop=4 shiftwidth=4: */