1
1
/* Extended regular expression matching and search library.
2
Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
2
Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
3
This file is part of the GNU C Library.
4
4
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
16
16
You should have received a copy of the GNU Lesser General Public
17
17
License along with the GNU C Library; if not, write to the Free
18
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18
Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21
21
#ifndef _REGEX_INTERNAL_H
22
22
#define _REGEX_INTERNAL_H 1
24
24
#include <assert.h>
27
/* Don't include this here. On some systems it sets RE_DUP_MAX to a
28
* lower value than GNU regex allows. Instead, include it in
29
* regex.c, before include of <regex.h>, which correctly
30
* #undefs RE_DUP_MAX and sets it to the right value.
35
27
#include <stdlib.h>
36
28
#include <string.h>
30
#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
31
# include <langinfo.h>
38
33
#if defined HAVE_LOCALE_H || defined _LIBC
39
34
# include <locale.h>
45
40
# include <wctype.h>
46
41
#endif /* HAVE_WCTYPE_H || _LIBC */
48
44
/* In case that the system doesn't have isblank(). */
49
45
#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
50
46
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
50
* This is a freaking mess. On glibc systems you have to define
51
* a magic constant to get isblank() out of <ctype.h>, since it's
52
* a C99 function. To heck wiht all that and borrow a page from
59
return (c == ' ' || c == '\t');
54
64
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
117
133
# define __attribute(arg)
120
#if _LIBC || __GNUC__ >= 3
121
# define BE(expr, val) __builtin_expect (expr, val)
123
# define BE(expr, val) (expr)
136
/* Error messages (regcomp.c), with hackery to support very old compilers. */
137
#ifdef NO_TOKEN_PASTING
138
#define RE_ERRMSG(idx) __re_error_msgid[(int) (idx)]
139
#define ERRMSG_TYPE char * const /* pointers */
140
#define ERRMSG_OFFSET(base,offset) ((base)+1) /* array index */
141
#define ERRMSG_SEPARATOR ,
143
#define RE_ERRMSG(idx) (__re_error_msgid + __re_error_msgid_idx[(int) (idx)])
144
#define ERRMSG_TYPE char /* one string */
145
#define ERRMSG_OFFSET(base,offset) ((base)+(offset)) /* substring index */
146
#define ERRMSG_SEPARATOR "\0"
127
extern const char __re_error_msgid[] attribute_hidden;
149
extern const ERRMSG_TYPE __re_error_msgid[] attribute_hidden;
128
150
extern const size_t __re_error_msgid_idx[] attribute_hidden;
130
152
/* Number of bits in an unsinged int. */
156
178
#define NEXT_NEWLINE_CONSTRAINT 0x0020
157
179
#define PREV_BEGBUF_CONSTRAINT 0x0040
158
180
#define NEXT_ENDBUF_CONSTRAINT 0x0080
159
#define DUMMY_CONSTRAINT 0x0100
181
#define WORD_DELIM_CONSTRAINT 0x0100
182
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
163
186
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
164
187
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
165
188
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
189
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
166
190
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
167
191
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
168
192
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
169
193
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
170
WORD_DELIM = DUMMY_CONSTRAINT
194
WORD_DELIM = WORD_DELIM_CONSTRAINT,
195
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
171
196
} re_context_type;
199
224
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
200
225
OP_ALT = EPSILON_BIT | 2,
201
226
OP_DUP_ASTERISK = EPSILON_BIT | 3,
202
OP_DUP_PLUS = EPSILON_BIT | 4,
203
OP_DUP_QUESTION = EPSILON_BIT | 5,
204
ANCHOR = EPSILON_BIT | 6,
227
ANCHOR = EPSILON_BIT | 4,
206
229
/* Tree type, these are used only by tree. */
209
233
/* Token type, these are used only by token. */
210
OP_OPEN_BRACKET = 17,
211
237
OP_CLOSE_BRACKET,
212
238
OP_CHARSET_RANGE,
296
322
unsigned int duplicated : 1;
297
323
unsigned int opt_subexp : 1;
298
324
#ifdef RE_ENABLE_I18N
325
unsigned int accept_mb : 1;
299
326
/* These 2 bits can be moved into the union if needed (e.g. if running out
300
327
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
301
328
unsigned int mb_partial : 1;
400
425
static void re_string_destruct (re_string_t *pstr) internal_function;
401
426
# ifdef RE_ENABLE_I18N
402
427
static int re_string_elem_size_at (const re_string_t *pstr, int idx)
428
internal_function __attribute ((pure));
404
429
static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
430
internal_function __attribute ((pure));
406
431
static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
432
internal_function __attribute ((pure));
408
433
# endif /* RE_ENABLE_I18N */
409
434
static unsigned int re_string_context_at (const re_string_t *input, int idx,
410
int eflags) internal_function;
436
internal_function __attribute ((pure));
411
437
static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
412
int idx) internal_function;
439
internal_function __attribute ((pure));
413
440
static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
441
internal_function __attribute ((pure));
416
443
#define re_string_peek_byte(pstr, offset) \
417
444
((pstr)->mbs[(pstr)->cur_idx + offset])
433
460
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
434
461
/* SunOS 4.1.x realloc doesn't accept null pointers: pre-Standard C. Sigh. */
435
462
#define re_realloc(p,t,n) ((p != NULL) ? (t *) realloc (p,(n)*sizeof(t)) : (t *) calloc(n,sizeof(t)))
437
463
#define re_free(p) free (p)
439
465
struct bin_tree_t
441
467
struct bin_tree_t *parent;
442
468
struct bin_tree_t *left;
443
469
struct bin_tree_t *right;
470
struct bin_tree_t *first;
471
struct bin_tree_t *next;
445
475
/* `node_idx' is the index in dfa->nodes, if `type' == 0.
446
476
Otherwise `type' indicate the type of this node. */
447
re_token_type_t type;
452
re_node_set eclosure;
454
479
typedef struct bin_tree_t bin_tree_t;
496
521
unsigned int hash;
497
522
re_node_set nodes;
523
re_node_set non_eps_nodes;
524
re_node_set inveclosure;
498
525
re_node_set *entrance_nodes;
499
struct re_dfastate_t **trtable;
526
struct re_dfastate_t **trtable, **word_trtable;
500
527
unsigned int context : 4;
501
528
unsigned int halt : 1;
502
529
/* If this state can accept `multi byte'.
506
533
/* If this state has backreference node(s). */
507
534
unsigned int has_backref : 1;
508
535
unsigned int has_constraint : 1;
509
unsigned int word_trtable : 1;
511
537
typedef struct re_dfastate_t re_dfastate_t;
515
/* start <= node < end */
520
539
struct re_state_table_entry
645
659
int str_tree_storage_idx;
647
661
/* number of subexpressions `re_nsub' is in regex_t. */
649
662
unsigned int state_hash_mask;
650
663
int states_alloc;
652
665
int nbackref; /* The number of backreference in this dfa. */
653
667
/* Bitmap expressing which backreference is used. */
654
668
unsigned int used_bkref_map;
669
unsigned int completed_bkref_map;
655
671
unsigned int has_plural_match : 1;
656
672
/* If this dfa has "multibyte node", which is a backreference or
657
673
a node which can accept multibyte character or multi character
685
702
static reg_errcode_t re_node_set_merge (re_node_set *dest,
686
703
const re_node_set *src) internal_function;
687
704
static int re_node_set_insert (re_node_set *set, int elem) internal_function;
705
static int re_node_set_insert_last (re_node_set *set,
706
int elem) internal_function;
688
707
static int re_node_set_compare (const re_node_set *set1,
689
const re_node_set *set2) internal_function;
690
static int re_node_set_contains (const re_node_set *set, int elem) internal_function;
708
const re_node_set *set2)
709
internal_function __attribute ((pure));
710
static int re_node_set_contains (const re_node_set *set, int elem)
711
internal_function __attribute ((pure));
691
712
static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
692
713
#define re_node_set_remove(set,id) \
693
714
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
694
715
#define re_node_set_empty(p) ((p)->nelem = 0)
695
716
#define re_node_set_free(set) re_free ((set)->elems)
696
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
717
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
697
718
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
698
719
const re_node_set *nodes) internal_function;
699
720
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,