1
/* Copyright (C) 1995 Bjoern Beutel. */
3
/* Description. =============================================================*/
5
/* This module contains function to compile and execute pattern matching
6
* strings (regular expressions). */
8
/* Includes. ================================================================*/
18
/* Constants. ===============================================================*/
20
#define SPECIAL_CHARS ".[]-^()*?+|\\" /* Characters with a special meaning. */
22
/* These are the instructions for matching a pattern.
24
* A pattern is a 0-terminated sequence of CHARs, defined as follows:
25
* - P[] is the coded pattern string.
26
* - PI is the current index into P[].
27
* - PIS[] is the stack of indexes into P[], used for backtracking.
28
* - T[] is the text to be examined.
29
* - TI is the current index into T[].
30
* - TIS[] is the stack of indexes into T[], used for backtracking.
31
* - SI is the stack index, used for PIS[] and TIS[].
32
* - VB[I] and VE[I] store beginning and end, resp., for variable no. I. */
36
PAT_JUMP = 1, /* PI += (byte_t) P[PI]; */
37
PAT_JUMP_NOW, /* SI++; PIS[SI] = PI+1; TIS[SI] = TI; PI += (byte_t) P[PI]; */
38
PAT_JUMP_LATER, /* SP++; PIS[SI] = PI + (byte_t) P[PI]; TIS[SI] = I; PI++; */
39
PAT_MATCH_ANY, /* if T[TI] != EOS then TI++; else fail; */
40
PAT_MATCH_CLASS, /* if (T[TI] in {P[ PI + 1 ],..,P[ PI + P[PI] ]})
41
* { TI++; PI += P[PI]+1; } else fail; */
42
PAT_MATCH_NOT_CLASS, /* if (T[TI] in {P[ PI + 1 ],..,P[ PI + P[PI] ]})
43
* fail; else { TI++; PI += P[PI]+1; } */
44
PAT_START_VAR_0, /* VB[0] = I; */
45
PAT_START_VAR_1, /* VB[1] = I; */
46
PAT_START_VAR_2, /* VB[2] = I; */
47
PAT_START_VAR_3, /* VB[3] = I; */
48
PAT_START_VAR_4, /* VB[4] = I; */
49
PAT_END_VAR_0, /* VE[0] = I; */
50
PAT_END_VAR_1, /* VE[1] = I; */
51
PAT_END_VAR_2, /* VE[2] = I; */
52
PAT_END_VAR_3, /* VE[3] = I; */
53
PAT_END_VAR_4 /* VE[4] = I; */
56
/* Types. ===================================================================*/
58
typedef struct {string_t string, pattern;} pattern_state_t;
60
/* Global variables. ========================================================*/
62
string_t pattern_var[ PATTERN_VAR_MAX ]; /* Pattern variables. */
64
/* Variables. ===============================================================*/
66
static pattern_state_t *stack; /* Stack used for backtracking. */
67
static int_t stack_size;
69
/* Forwards. ================================================================*/
71
static void compile_pattern_local( text_t *text,
73
bool_t *may_be_empty );
75
/* Functions. ===============================================================*/
78
is_pattern_char( string_t s )
80
return ((*s == '\\' && s[1] != EOS && strchr( SPECIAL_CHARS, s[1] ) != NULL)
81
|| (IS_PRINT( *s ) && strchr( SPECIAL_CHARS, *s ) == NULL));
84
/*---------------------------------------------------------------------------*/
87
pattern_char( string_t *string_p )
88
/* See if *STRING_P points to a valid char or to an escape sequence.
89
* Return the character code. Show an error if not valid. */
95
if (s[0] == '\\' && s[1] != EOS && strchr( SPECIAL_CHARS, s[1] ) != NULL)
100
else if (IS_PRINT( s[0] ) && strchr( SPECIAL_CHARS, s[0] ) == NULL)
102
c = TO_LOWER( s[0] );
105
else if (s[0] == EOS)
106
complain( "Unexpected end of pattern." );
108
complain( "Invalid char \"%c\" in pattern.", s[0] );
113
/*---------------------------------------------------------------------------*/
117
/* Return OFFS as a char. */
119
/* See if OFFS can be stored in a char. */
120
if ((byte_t) offs != offs || offs == 0)
121
complain( "Pattern too complex." );
123
return (byte_t) offs;
126
/*---------------------------------------------------------------------------*/
129
compile_char_class( text_t *text, string_t *string_p )
130
/* Compile a character class at *STRING_P.
131
* Save the resulting pattern in TEXT. */
134
int_t ca, ce, c; /* First char, last char and current char for ranges. */
142
add_char_to_text( text, PAT_MATCH_NOT_CLASS );
145
add_char_to_text( text, PAT_MATCH_CLASS );
146
patch_index = text->string_size;
148
/* Read chars and ranges. */
152
complain( "Missing \"]\" in pattern." );
153
ca = ORD( pattern_char( &s ) );
157
ce = ORD( pattern_char( &s ) );
159
complain( "Invalid range \"%c-%c\" in pattern.", ca, ce );
163
for (c = ca; c <= ce; c++)
164
add_char_to_text( text, (char_t) c );
167
insert_char_in_text( text, offset( text->string_size - patch_index ),
171
/*---------------------------------------------------------------------------*/
174
check_greedy( string_t *string_p )
176
if (**string_p == '?')
185
/*---------------------------------------------------------------------------*/
188
compile_atom( text_t *text, string_t *string_p, bool_t *may_be_empty )
189
/* Compile an atom at *STRING_P.
190
* Save the resulting pattern in TEXT.
191
* MAY_BE_EMPTY becomes TRUE iff the atom may match an empty string. */
195
bool_t may_be_empty_local, greedy;
198
*may_be_empty = TRUE;
201
may_be_empty_local = FALSE;
202
start = text->string_size;
204
compile_char_class( text, &s );
208
add_char_to_text( text, PAT_MATCH_ANY );
213
compile_pattern_local( text, &s, &may_be_empty_local );
215
complain( "Missing \")\" in pattern." );
217
else if (is_pattern_char( s ))
218
add_char_to_text( text, pattern_char( &s ) );
221
length = text->string_size - start;
223
/* There may be a postfix operator. */
227
greedy = check_greedy( &s );
229
insert_char_in_text( text, PAT_JUMP_LATER, start );
231
insert_char_in_text( text, PAT_JUMP_NOW, start );
232
insert_char_in_text( text, offset( 2 + length ), start + 1 );
237
if (may_be_empty_local)
238
complain( "Pattern argument for \"*\" may match empty string." );
239
greedy = check_greedy( &s );
240
insert_char_in_text( text, PAT_JUMP, start );
241
insert_char_in_text( text, offset( 2 + length ), start + 1 );
243
add_char_to_text( text, PAT_JUMP_NOW );
245
add_char_to_text( text, PAT_JUMP_LATER );
246
add_char_to_text( text, offset( -length ) );
251
if (may_be_empty_local)
252
complain( "Pattern argument for \"+\" may match empty string.");
253
greedy = check_greedy( &s );
255
add_char_to_text( text, PAT_JUMP_NOW );
257
add_char_to_text( text, PAT_JUMP_LATER );
258
add_char_to_text( text, offset( -length ) );
259
*may_be_empty = FALSE;
262
*may_be_empty &= may_be_empty_local;
267
/*---------------------------------------------------------------------------*/
270
compile_pattern_local (text_t *text, string_t *string_p, bool_t *may_be_empty)
271
/* Convert STRING_P to a pattern to be used as input to "match_pattern".
272
* If PATTERN_VAR_NO != -1, mark the pattern so the string matching this
273
* pattern will be stored in PATTERN_VAR[ PATTERN_VAR_NO ].
274
* The result pattern must be freed after usage. */
278
bool_t may_be_empty_local;
281
start = text->string_size;
282
compile_atom( text, &s, may_be_empty );
283
length = text->string_size - start;
288
/* Add jump from start of last alternative to start of this alternative. */
289
insert_char_in_text( text, PAT_JUMP_LATER, start++ );
290
insert_char_in_text( text, offset( length + 4 ), start++ );
292
/* Compile this alternative. */
293
start = text->string_size;
294
compile_atom( text, &s, &may_be_empty_local );
295
length = text->string_size - start;
296
*may_be_empty |= may_be_empty_local;
298
/* Add jump from end of last alternative to end of this alternative. */
299
insert_char_in_text( text, PAT_JUMP, start++ );
301
insert_char_in_text( text, offset( length + 4 ), start++ );
303
insert_char_in_text( text, offset( length + 2 ), start++ );
308
/*---------------------------------------------------------------------------*/
311
compile_pattern( string_t string, int_t pattern_var_no )
312
/* Convert STRING to a pattern to be used as input to "match_pattern".
313
* If PATTERN_VAR_NO != -1, mark the pattern so the string matching this
314
* pattern will be stored in PATTERN_VAR[ PATTERN_VAR_NO ].
315
* The result pattern must be freed after usage. */
321
if (pattern_var_no != -1)
322
add_char_to_text( text, PAT_START_VAR_0 + pattern_var_no );
323
compile_pattern_local( text, &string, &may_be_empty );
325
complain( "Illegal char \"%c\" in pattern.", *string );
326
if (pattern_var_no != -1)
327
add_char_to_text( text, PAT_END_VAR_0 + pattern_var_no );
328
return text_to_string( &text );
331
/*---------------------------------------------------------------------------*/
334
match_pattern( string_t string, string_t pattern )
335
/* Test whether STRING matches PATTERN (a string of chars compiled with
336
* "compile_pattern") and set substrings in PATTERN_VAR.
337
* The substrings can be freed after usage. */
339
struct {string_t start; string_t end;} var[ PATTERN_VAR_MAX ];
341
bool_t found_mismatch;
346
found_mismatch = FALSE;
348
/* Clear all variables. */
349
for (i = 0; i < PATTERN_VAR_MAX; i++)
350
var[i].start = var[i].end = NULL;
352
while (! found_mismatch)
360
for (i = 0; i < PATTERN_VAR_MAX; i++)
362
if (var[i].start != NULL && var[i].end != NULL)
364
free_mem( &pattern_var[i] );
365
pattern_var[i] = new_string( var[i].start, var[i].end );
371
found_mismatch = TRUE;
374
pattern += (byte_t) *pattern - 1;
377
if (sp == stack_size)
379
stack_size = renew_vector( &stack, sizeof( pattern_state_t ),
382
stack[ sp ].string = string;
383
stack[ sp ].pattern = pattern + 1;
385
pattern += (byte_t) *pattern - 1;
388
if (sp == stack_size)
390
stack_size = renew_vector( &stack, sizeof( pattern_state_t ),
393
stack[ sp ].string = string;
394
stack[ sp ].pattern = pattern + (byte_t) *pattern - 1;
399
if (*string++ == EOS)
400
found_mismatch = TRUE;
402
case PAT_MATCH_CLASS:
404
found_mismatch = TRUE;
408
pattern += (byte_t) *pattern + 1;
409
while (index < pattern && TO_LOWER( *string ) != *index)
412
if (index >= pattern)
413
found_mismatch = TRUE;
416
case PAT_MATCH_NOT_CLASS:
418
found_mismatch = TRUE;
422
pattern += (byte_t) *pattern + 1;
423
while (index < pattern && TO_LOWER( *string ) != *index)
427
found_mismatch = TRUE;
430
case PAT_START_VAR_0:
431
case PAT_START_VAR_1:
432
case PAT_START_VAR_2:
433
case PAT_START_VAR_3:
434
case PAT_START_VAR_4:
435
var[ code - PAT_START_VAR_0 ].start = string;
442
var[ code - PAT_END_VAR_0 ].end = string;
445
if (code != TO_LOWER( *string ))
446
found_mismatch = TRUE;
451
/* If this path was not successful and there is another path, try it. */
452
if (found_mismatch && sp > 0)
455
string = stack[ sp ].string;
456
pattern = stack[ sp ].pattern;
457
found_mismatch = FALSE;
463
/*---------------------------------------------------------------------------*/
466
terminate_patterns( void )
467
/* Terminate this module. */
471
for (i = 0; i < PATTERN_VAR_MAX; i++)
472
free_mem( &pattern_var[i] );
477
/* End of file. =============================================================*/