~vcs-imports/gawk/master

731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
1
/* Extended regular expression matching and search library.
1133 by Arnold D. Robbins
Sync support files from GNULIB.
2
   Copyright (C) 2002-2019 Free Software Foundation, Inc.
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
3
   This file is part of the GNU C Library.
4
   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6
   The GNU C Library is free software; you can redistribute it and/or
7
   modify it under the terms of the GNU Lesser General Public
8
   License as published by the Free Software Foundation; either
9
   version 2.1 of the License, or (at your option) any later version.
10
11
   The GNU C Library is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
   Lesser General Public License for more details.
15
16
   You should have received a copy of the GNU Lesser General Public
17
   License along with the GNU C Library; if not, see
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
18
   <https://www.gnu.org/licenses/>.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
19
20
#ifndef _REGEX_INTERNAL_H
21
#define _REGEX_INTERNAL_H 1
22
23
#include <assert.h>
24
#include <ctype.h>
25
#include <stdio.h>
26
#include <stdlib.h>
27
#include <string.h>
28
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
29
#include <langinfo.h>
30
#include <locale.h>
31
#include <wchar.h>
32
#include <wctype.h>
33
#include <stdbool.h>
34
#include <stdint.h>
35
731.18.33 by Arnold D. Robbins
Sync and update regex from GNULIB.
36
#include <intprops.h>
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
37
38
#ifdef _LIBC
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
39
# include <libc-lock.h>
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
40
# define lock_define(name) __libc_lock_define (, name)
41
# define lock_init(lock) (__libc_lock_init (lock), 0)
42
# define lock_fini(lock) ((void) 0)
43
# define lock_lock(lock) __libc_lock_lock (lock)
44
# define lock_unlock(lock) __libc_lock_unlock (lock)
45
#elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO
46
# include "glthread/lock.h"
47
  /* Use gl_lock_define if empty macro arguments are known to work.
48
     Otherwise, fall back on less-portable substitutes.  */
49
# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \
50
      || (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__))
51
#  define lock_define(name) gl_lock_define (, name)
52
# elif USE_POSIX_THREADS
53
#  define lock_define(name) pthread_mutex_t name;
54
# elif USE_PTH_THREADS
55
#  define lock_define(name) pth_mutex_t name;
56
# elif USE_SOLARIS_THREADS
57
#  define lock_define(name) mutex_t name;
58
# elif USE_WINDOWS_THREADS
59
#  define lock_define(name) gl_lock_t name;
60
# else
61
#  define lock_define(name)
62
# endif
63
# define lock_init(lock) glthread_lock_init (&(lock))
64
# define lock_fini(lock) glthread_lock_destroy (&(lock))
65
# define lock_lock(lock) glthread_lock_lock (&(lock))
66
# define lock_unlock(lock) glthread_lock_unlock (&(lock))
67
#elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO
68
# include <pthread.h>
69
# define lock_define(name) pthread_mutex_t name;
70
# define lock_init(lock) pthread_mutex_init (&(lock), 0)
71
# define lock_fini(lock) pthread_mutex_destroy (&(lock))
72
# define lock_lock(lock) pthread_mutex_lock (&(lock))
73
# define lock_unlock(lock) pthread_mutex_unlock (&(lock))
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
74
#else
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
75
# define lock_define(name)
76
# define lock_init(lock) 0
77
# define lock_fini(lock) ((void) 0)
78
  /* The 'dfa' avoids an "unused variable 'dfa'" warning from GCC.  */
79
# define lock_lock(lock) ((void) dfa)
80
# define lock_unlock(lock) ((void) 0)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
81
#endif
82
83
/* In case that the system doesn't have isblank().  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
84
#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
85
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
86
#endif
87
88
#ifdef _LIBC
89
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
90
#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
91
#   include <locale/localeinfo.h>
92
#   include <locale/coll-lookup.h>
93
# endif
94
#endif
95
96
/* This is for other GNU distributions with internationalized messages.  */
97
#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
98
# include <libintl.h>
99
# ifdef _LIBC
100
#  undef gettext
101
#  define gettext(msgid) \
102
  __dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES)
103
# endif
104
#else
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
105
# undef gettext
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
106
# define gettext(msgid) (msgid)
107
#endif
108
109
#ifndef gettext_noop
110
/* This define is so xgettext can find the internationalizable
111
   strings.  */
112
# define gettext_noop(String) String
113
#endif
114
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
115
#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
116
# define RE_ENABLE_I18N
117
#endif
118
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
119
/* Number of ASCII characters.  */
120
#define ASCII_CHARS 0x80
121
122
/* Number of single byte characters.  */
123
#define SBC_MAX (UCHAR_MAX + 1)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
124
125
#define COLL_ELEM_LEN_MAX 8
126
127
/* The character which represents newline.  */
128
#define NEWLINE_CHAR '\n'
129
#define WIDE_NEWLINE_CHAR L'\n'
130
131
/* Rename to standard API for using out of glibc.  */
132
#ifndef _LIBC
133
# undef __wctype
731.17.11 by Arnold D. Robbins
z/OS compile fix for GNULIB regex.
134
# undef __iswalnum
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
135
# undef __iswctype
731.17.11 by Arnold D. Robbins
z/OS compile fix for GNULIB regex.
136
# undef __towlower
137
# undef __towupper
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
138
# define __wctype wctype
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
139
# define __iswalnum iswalnum
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
140
# define __iswctype iswctype
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
141
# define __towlower towlower
142
# define __towupper towupper
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
143
# define __btowc btowc
144
# define __mbrtowc mbrtowc
145
# define __wcrtomb wcrtomb
146
# define __regfree regfree
147
#endif /* not _LIBC */
148
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
149
#ifndef SSIZE_MAX
150
# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
151
#endif
152
153
/* The type of indexes into strings.  This is signed, not size_t,
154
   since the API requires indexes to fit in regoff_t anyway, and using
155
   signed integers makes the code a bit smaller and presumably faster.
156
   The traditional GNU regex implementation uses int for indexes.
157
   The POSIX-compatible implementation uses a possibly-wider type.
158
   The name 'Idx' is three letters to minimize the hassle of
159
   reindenting a lot of regex code that formerly used 'int'.  */
160
typedef regoff_t Idx;
161
#ifdef _REGEX_LARGE_OFFSETS
162
# define IDX_MAX SSIZE_MAX
163
#else
164
# define IDX_MAX INT_MAX
165
#endif
166
167
/* A hash value, suitable for computing hash tables.  */
168
typedef __re_size_t re_hashval_t;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
169
170
/* An integer used to represent a set of bits.  It must be unsigned,
171
   and must be at least as wide as unsigned int.  */
172
typedef unsigned long int bitset_word_t;
173
/* All bits set in a bitset_word_t.  */
174
#define BITSET_WORD_MAX ULONG_MAX
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
175
176
/* Number of bits in a bitset_word_t.  For portability to hosts with
177
   padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
178
   instead, deduce it directly from BITSET_WORD_MAX.  Avoid
179
   greater-than-32-bit integers and unconditional shifts by more than
180
   31 bits, as they're not portable.  */
181
#if BITSET_WORD_MAX == 0xffffffffUL
182
# define BITSET_WORD_BITS 32
183
#elif BITSET_WORD_MAX >> 31 >> 4 == 1
184
# define BITSET_WORD_BITS 36
185
#elif BITSET_WORD_MAX >> 31 >> 16 == 1
186
# define BITSET_WORD_BITS 48
187
#elif BITSET_WORD_MAX >> 31 >> 28 == 1
188
# define BITSET_WORD_BITS 60
189
#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
190
# define BITSET_WORD_BITS 64
191
#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
192
# define BITSET_WORD_BITS 72
193
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
194
# define BITSET_WORD_BITS 128
195
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
196
# define BITSET_WORD_BITS 256
197
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
198
# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
199
# if BITSET_WORD_BITS <= SBC_MAX
200
#  error "Invalid SBC_MAX"
201
# endif
202
#else
203
# error "Add case for new bitset_word_t size"
204
#endif
205
206
/* Number of bitset_word_t values in a bitset_t.  */
207
#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
208
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
209
typedef bitset_word_t bitset_t[BITSET_WORDS];
210
typedef bitset_word_t *re_bitset_ptr_t;
211
typedef const bitset_word_t *re_const_bitset_ptr_t;
212
213
#define PREV_WORD_CONSTRAINT 0x0001
214
#define PREV_NOTWORD_CONSTRAINT 0x0002
215
#define NEXT_WORD_CONSTRAINT 0x0004
216
#define NEXT_NOTWORD_CONSTRAINT 0x0008
217
#define PREV_NEWLINE_CONSTRAINT 0x0010
218
#define NEXT_NEWLINE_CONSTRAINT 0x0020
219
#define PREV_BEGBUF_CONSTRAINT 0x0040
220
#define NEXT_ENDBUF_CONSTRAINT 0x0080
221
#define WORD_DELIM_CONSTRAINT 0x0100
222
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
223
224
typedef enum
225
{
226
  INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
227
  WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
228
  WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
229
  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
230
  LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
231
  LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
232
  BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
233
  BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
234
  WORD_DELIM = WORD_DELIM_CONSTRAINT,
235
  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
236
} re_context_type;
237
238
typedef struct
239
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
240
  Idx alloc;
241
  Idx nelem;
242
  Idx *elems;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
243
} re_node_set;
244
245
typedef enum
246
{
247
  NON_TYPE = 0,
248
249
  /* Node type, These are used by token, node, tree.  */
250
  CHARACTER = 1,
251
  END_OF_RE = 2,
252
  SIMPLE_BRACKET = 3,
253
  OP_BACK_REF = 4,
254
  OP_PERIOD = 5,
255
#ifdef RE_ENABLE_I18N
256
  COMPLEX_BRACKET = 6,
257
  OP_UTF8_PERIOD = 7,
258
#endif /* RE_ENABLE_I18N */
259
260
  /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
261
     when the debugger shows values of this enum type.  */
262
#define EPSILON_BIT 8
263
  OP_OPEN_SUBEXP = EPSILON_BIT | 0,
264
  OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
265
  OP_ALT = EPSILON_BIT | 2,
266
  OP_DUP_ASTERISK = EPSILON_BIT | 3,
267
  ANCHOR = EPSILON_BIT | 4,
268
269
  /* Tree type, these are used only by tree. */
270
  CONCAT = 16,
271
  SUBEXP = 17,
272
273
  /* Token type, these are used only by token.  */
274
  OP_DUP_PLUS = 18,
275
  OP_DUP_QUESTION,
276
  OP_OPEN_BRACKET,
277
  OP_CLOSE_BRACKET,
278
  OP_CHARSET_RANGE,
279
  OP_OPEN_DUP_NUM,
280
  OP_CLOSE_DUP_NUM,
281
  OP_NON_MATCH_LIST,
282
  OP_OPEN_COLL_ELEM,
283
  OP_CLOSE_COLL_ELEM,
284
  OP_OPEN_EQUIV_CLASS,
285
  OP_CLOSE_EQUIV_CLASS,
286
  OP_OPEN_CHAR_CLASS,
287
  OP_CLOSE_CHAR_CLASS,
288
  OP_WORD,
289
  OP_NOTWORD,
290
  OP_SPACE,
291
  OP_NOTSPACE,
292
  BACK_SLASH
293
294
} re_token_type_t;
295
296
#ifdef RE_ENABLE_I18N
297
typedef struct
298
{
299
  /* Multibyte characters.  */
300
  wchar_t *mbchars;
301
302
  /* Collating symbols.  */
303
# ifdef _LIBC
304
  int32_t *coll_syms;
305
# endif
306
307
  /* Equivalence classes. */
308
# ifdef _LIBC
309
  int32_t *equiv_classes;
310
# endif
311
312
  /* Range expressions. */
313
# ifdef _LIBC
314
  uint32_t *range_starts;
315
  uint32_t *range_ends;
316
# else /* not _LIBC */
317
  wchar_t *range_starts;
318
  wchar_t *range_ends;
319
# endif /* not _LIBC */
320
321
  /* Character classes. */
322
  wctype_t *char_classes;
323
324
  /* If this character set is the non-matching list.  */
325
  unsigned int non_match : 1;
326
327
  /* # of multibyte characters.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
328
  Idx nmbchars;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
329
330
  /* # of collating symbols.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
331
  Idx ncoll_syms;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
332
333
  /* # of equivalence classes. */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
334
  Idx nequiv_classes;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
335
336
  /* # of range expressions. */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
337
  Idx nranges;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
338
339
  /* # of character classes. */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
340
  Idx nchar_classes;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
341
} re_charset_t;
342
#endif /* RE_ENABLE_I18N */
343
344
typedef struct
345
{
346
  union
347
  {
348
    unsigned char c;		/* for CHARACTER */
349
    re_bitset_ptr_t sbcset;	/* for SIMPLE_BRACKET */
350
#ifdef RE_ENABLE_I18N
351
    re_charset_t *mbcset;	/* for COMPLEX_BRACKET */
352
#endif /* RE_ENABLE_I18N */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
353
    Idx idx;			/* for BACK_REF */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
354
    re_context_type ctx_type;	/* for ANCHOR */
355
  } opr;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
356
#if __GNUC__ >= 2 && !defined __STRICT_ANSI__
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
357
  re_token_type_t type : 8;
358
#else
359
  re_token_type_t type;
360
#endif
361
  unsigned int constraint : 10;	/* context constraint */
362
  unsigned int duplicated : 1;
363
  unsigned int opt_subexp : 1;
364
#ifdef RE_ENABLE_I18N
365
  unsigned int accept_mb : 1;
366
  /* These 2 bits can be moved into the union if needed (e.g. if running out
367
     of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
368
  unsigned int mb_partial : 1;
369
#endif
370
  unsigned int word_char : 1;
371
} re_token_t;
372
373
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
374
375
struct re_string_t
376
{
377
  /* Indicate the raw buffer which is the original string passed as an
378
     argument of regexec(), re_search(), etc..  */
379
  const unsigned char *raw_mbs;
380
  /* Store the multibyte string.  In case of "case insensitive mode" like
381
     REG_ICASE, upper cases of the string are stored, otherwise MBS points
382
     the same address that RAW_MBS points.  */
383
  unsigned char *mbs;
384
#ifdef RE_ENABLE_I18N
385
  /* Store the wide character string which is corresponding to MBS.  */
386
  wint_t *wcs;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
387
  Idx *offsets;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
388
  mbstate_t cur_state;
389
#endif
390
  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
391
     raw_mbs[raw_mbs_idx + i].  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
392
  Idx raw_mbs_idx;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
393
  /* The length of the valid characters in the buffers.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
394
  Idx valid_len;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
395
  /* The corresponding number of bytes in raw_mbs array.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
396
  Idx valid_raw_len;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
397
  /* The length of the buffers MBS and WCS.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
398
  Idx bufs_len;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
399
  /* The index in MBS, which is updated by re_string_fetch_byte.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
400
  Idx cur_idx;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
401
  /* length of RAW_MBS array.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
402
  Idx raw_len;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
403
  /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
404
  Idx len;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
405
  /* End of the buffer may be shorter than its length in the cases such
406
     as re_match_2, re_search_2.  Then, we use STOP for end of the buffer
407
     instead of LEN.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
408
  Idx raw_stop;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
409
  /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
410
  Idx stop;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
411
412
  /* The context of mbs[0].  We store the context independently, since
413
     the context of mbs[0] may be different from raw_mbs[0], which is
414
     the beginning of the input string.  */
415
  unsigned int tip_context;
416
  /* The translation passed as a part of an argument of re_compile_pattern.  */
417
  RE_TRANSLATE_TYPE trans;
418
  /* Copy of re_dfa_t's word_char.  */
419
  re_const_bitset_ptr_t word_char;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
420
  /* true if REG_ICASE.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
421
  unsigned char icase;
422
  unsigned char is_utf8;
423
  unsigned char map_notascii;
424
  unsigned char mbs_allocated;
425
  unsigned char offsets_needed;
426
  unsigned char newline_anchor;
427
  unsigned char word_ops_used;
428
  int mb_cur_max;
429
};
430
typedef struct re_string_t re_string_t;
431
432
433
struct re_dfa_t;
434
typedef struct re_dfa_t re_dfa_t;
435
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
436
#ifndef _LIBC
437
# define IS_IN(libc) false
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
438
#endif
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
439
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
440
#define re_string_peek_byte(pstr, offset) \
441
  ((pstr)->mbs[(pstr)->cur_idx + offset])
442
#define re_string_fetch_byte(pstr) \
443
  ((pstr)->mbs[(pstr)->cur_idx++])
444
#define re_string_first_byte(pstr, idx) \
445
  ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
446
#define re_string_is_single_byte_char(pstr, idx) \
447
  ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
448
				|| (pstr)->wcs[(idx) + 1] != WEOF))
449
#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
450
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
451
#define re_string_get_buffer(pstr) ((pstr)->mbs)
452
#define re_string_length(pstr) ((pstr)->len)
453
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
454
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
455
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
456
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
457
#if defined _LIBC || HAVE_ALLOCA
458
# include <alloca.h>
459
#endif
460
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
461
#ifndef _LIBC
462
# if HAVE_ALLOCA
463
/* The OS usually guarantees only one guard page at the bottom of the stack,
464
   and a page size can be as small as 4096 bytes.  So we cannot safely
465
   allocate anything larger than 4096 bytes.  Also care for the possibility
466
   of a few compiler-allocated temporary stack slots.  */
467
#  define __libc_use_alloca(n) ((n) < 4032)
468
# else
469
/* alloca is implemented with malloc, so just use malloc.  */
470
#  define __libc_use_alloca(n) 0
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
471
#  undef alloca
472
#  define alloca(n) malloc (n)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
473
# endif
474
#endif
475
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
476
#ifdef _LIBC
477
# define MALLOC_0_IS_NONNULL 1
478
#elif !defined MALLOC_0_IS_NONNULL
479
# define MALLOC_0_IS_NONNULL 0
480
#endif
481
482
#ifndef MAX
483
# define MAX(a,b) ((a) < (b) ? (b) : (a))
484
#endif
485
#ifndef MIN
486
# define MIN(a,b) ((a) < (b) ? (a) : (b))
487
#endif
488
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
489
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
490
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
491
#define re_free(p) free (p)
492
493
struct bin_tree_t
494
{
495
  struct bin_tree_t *parent;
496
  struct bin_tree_t *left;
497
  struct bin_tree_t *right;
498
  struct bin_tree_t *first;
499
  struct bin_tree_t *next;
500
501
  re_token_t token;
502
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
503
  /* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
504
     Otherwise 'type' indicate the type of this node.  */
505
  Idx node_idx;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
506
};
507
typedef struct bin_tree_t bin_tree_t;
508
509
#define BIN_TREE_STORAGE_SIZE \
510
  ((1024 - sizeof (void *)) / sizeof (bin_tree_t))
511
512
struct bin_tree_storage_t
513
{
514
  struct bin_tree_storage_t *next;
515
  bin_tree_t data[BIN_TREE_STORAGE_SIZE];
516
};
517
typedef struct bin_tree_storage_t bin_tree_storage_t;
518
519
#define CONTEXT_WORD 1
520
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
521
#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
522
#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
523
524
#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
525
#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
526
#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
527
#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
528
#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
529
530
#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
531
#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
532
#define IS_WIDE_WORD_CHAR(ch) (__iswalnum (ch) || (ch) == L'_')
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
533
#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
534
535
#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
536
 ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
537
  || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
538
  || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
539
  || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
540
541
#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
542
 ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
543
  || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
544
  || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
545
  || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
546
547
struct re_dfastate_t
548
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
549
  re_hashval_t hash;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
550
  re_node_set nodes;
551
  re_node_set non_eps_nodes;
552
  re_node_set inveclosure;
553
  re_node_set *entrance_nodes;
554
  struct re_dfastate_t **trtable, **word_trtable;
555
  unsigned int context : 4;
556
  unsigned int halt : 1;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
557
  /* If this state can accept "multi byte".
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
558
     Note that we refer to multibyte characters, and multi character
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
559
     collating elements as "multi byte".  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
560
  unsigned int accept_mb : 1;
561
  /* If this state has backreference node(s).  */
562
  unsigned int has_backref : 1;
563
  unsigned int has_constraint : 1;
564
};
565
typedef struct re_dfastate_t re_dfastate_t;
566
567
struct re_state_table_entry
568
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
569
  Idx num;
570
  Idx alloc;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
571
  re_dfastate_t **array;
572
};
573
574
/* Array type used in re_sub_match_last_t and re_sub_match_top_t.  */
575
576
typedef struct
577
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
578
  Idx next_idx;
579
  Idx alloc;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
580
  re_dfastate_t **array;
581
} state_array_t;
582
583
/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP.  */
584
585
typedef struct
586
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
587
  Idx node;
588
  Idx str_idx; /* The position NODE match at.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
589
  state_array_t path;
590
} re_sub_match_last_t;
591
592
/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
593
   And information about the node, whose type is OP_CLOSE_SUBEXP,
594
   corresponding to NODE is stored in LASTS.  */
595
596
typedef struct
597
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
598
  Idx str_idx;
599
  Idx node;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
600
  state_array_t *path;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
601
  Idx alasts; /* Allocation size of LASTS.  */
602
  Idx nlasts; /* The number of LASTS.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
603
  re_sub_match_last_t **lasts;
604
} re_sub_match_top_t;
605
606
struct re_backref_cache_entry
607
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
608
  Idx node;
609
  Idx str_idx;
610
  Idx subexp_from;
611
  Idx subexp_to;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
612
  char more;
613
  char unused;
614
  unsigned short int eps_reachable_subexps_map;
615
};
616
617
typedef struct
618
{
619
  /* The string object corresponding to the input string.  */
620
  re_string_t input;
621
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
622
  const re_dfa_t *const dfa;
623
#else
624
  const re_dfa_t *dfa;
625
#endif
626
  /* EFLAGS of the argument of regexec.  */
627
  int eflags;
628
  /* Where the matching ends.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
629
  Idx match_last;
630
  Idx last_node;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
631
  /* The state log used by the matcher.  */
632
  re_dfastate_t **state_log;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
633
  Idx state_log_top;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
634
  /* Back reference cache.  */
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
635
  Idx nbkref_ents;
636
  Idx abkref_ents;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
637
  struct re_backref_cache_entry *bkref_ents;
638
  int max_mb_elem_len;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
639
  Idx nsub_tops;
640
  Idx asub_tops;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
641
  re_sub_match_top_t **sub_tops;
642
} re_match_context_t;
643
644
typedef struct
645
{
646
  re_dfastate_t **sifted_states;
647
  re_dfastate_t **limited_states;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
648
  Idx last_node;
649
  Idx last_str_idx;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
650
  re_node_set limits;
651
} re_sift_context_t;
652
653
struct re_fail_stack_ent_t
654
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
655
  Idx idx;
656
  Idx node;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
657
  regmatch_t *regs;
658
  re_node_set eps_via_nodes;
659
};
660
661
struct re_fail_stack_t
662
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
663
  Idx num;
664
  Idx alloc;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
665
  struct re_fail_stack_ent_t *stack;
666
};
667
668
struct re_dfa_t
669
{
670
  re_token_t *nodes;
671
  size_t nodes_alloc;
672
  size_t nodes_len;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
673
  Idx *nexts;
674
  Idx *org_indices;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
675
  re_node_set *edests;
676
  re_node_set *eclosures;
677
  re_node_set *inveclosures;
678
  struct re_state_table_entry *state_table;
679
  re_dfastate_t *init_state;
680
  re_dfastate_t *init_state_word;
681
  re_dfastate_t *init_state_nl;
682
  re_dfastate_t *init_state_begbuf;
683
  bin_tree_t *str_tree;
684
  bin_tree_storage_t *str_tree_storage;
685
  re_bitset_ptr_t sb_char;
686
  int str_tree_storage_idx;
687
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
688
  /* number of subexpressions 're_nsub' is in regex_t.  */
689
  re_hashval_t state_hash_mask;
690
  Idx init_node;
691
  Idx nbackref; /* The number of backreference in this dfa.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
692
693
  /* Bitmap expressing which backreference is used.  */
694
  bitset_word_t used_bkref_map;
695
  bitset_word_t completed_bkref_map;
696
697
  unsigned int has_plural_match : 1;
698
  /* If this dfa has "multibyte node", which is a backreference or
699
     a node which can accept multibyte character or multi character
700
     collating element.  */
701
  unsigned int has_mb_node : 1;
702
  unsigned int is_utf8 : 1;
703
  unsigned int map_notascii : 1;
704
  unsigned int word_ops_used : 1;
705
  int mb_cur_max;
706
  bitset_t word_char;
707
  reg_syntax_t syntax;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
708
  Idx *subexp_map;
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
709
#ifdef DEBUG
710
  char* re_str;
711
#endif
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
712
  lock_define (lock)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
713
};
714
715
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
716
#define re_node_set_remove(set,id) \
717
  (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
718
#define re_node_set_empty(p) ((p)->nelem = 0)
719
#define re_node_set_free(set) re_free ((set)->elems)
720

721
722
typedef enum
723
{
724
  SB_CHAR,
725
  MB_CHAR,
726
  EQUIV_CLASS,
727
  COLL_SYM,
728
  CHAR_CLASS
729
} bracket_elem_type;
730
731
typedef struct
732
{
733
  bracket_elem_type type;
734
  union
735
  {
736
    unsigned char ch;
737
    unsigned char *name;
738
    wchar_t wch;
739
  } opr;
740
} bracket_elem_t;
741
742
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
743
/* Functions for bitset_t operation.  */
744
745
static inline void
746
bitset_set (bitset_t set, Idx i)
747
{
748
  set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
749
}
750
751
static inline void
752
bitset_clear (bitset_t set, Idx i)
753
{
754
  set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
755
}
756
757
static inline bool
758
bitset_contain (const bitset_t set, Idx i)
759
{
760
  return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
761
}
762
763
static inline void
764
bitset_empty (bitset_t set)
765
{
766
  memset (set, '\0', sizeof (bitset_t));
767
}
768
769
static inline void
770
bitset_set_all (bitset_t set)
771
{
772
  memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
773
  if (SBC_MAX % BITSET_WORD_BITS != 0)
774
    set[BITSET_WORDS - 1] =
775
      ((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
776
}
777
778
static inline void
779
bitset_copy (bitset_t dest, const bitset_t src)
780
{
781
  memcpy (dest, src, sizeof (bitset_t));
782
}
783
784
static inline void
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
785
bitset_not (bitset_t set)
786
{
787
  int bitset_i;
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
788
  for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
789
    set[bitset_i] = ~set[bitset_i];
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
790
  if (SBC_MAX % BITSET_WORD_BITS != 0)
791
    set[BITSET_WORDS - 1] =
792
      ((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
793
       & ~set[BITSET_WORDS - 1]);
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
794
}
795
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
796
static inline void
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
797
bitset_merge (bitset_t dest, const bitset_t src)
798
{
799
  int bitset_i;
800
  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
801
    dest[bitset_i] |= src[bitset_i];
802
}
803
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
804
static inline void
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
805
bitset_mask (bitset_t dest, const bitset_t src)
806
{
807
  int bitset_i;
808
  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
809
    dest[bitset_i] &= src[bitset_i];
810
}
811
812
#ifdef RE_ENABLE_I18N
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
813
/* Functions for re_string.  */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
814
static int
731.11.228 by Arnold D. Robbins
Sync support with GLIBC and GNULIB.
815
__attribute__ ((pure, unused))
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
816
re_string_char_size_at (const re_string_t *pstr, Idx idx)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
817
{
818
  int byte_idx;
819
  if (pstr->mb_cur_max == 1)
820
    return 1;
821
  for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
822
    if (pstr->wcs[idx + byte_idx] != WEOF)
823
      break;
824
  return byte_idx;
825
}
826
827
static wint_t
731.11.228 by Arnold D. Robbins
Sync support with GLIBC and GNULIB.
828
__attribute__ ((pure, unused))
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
829
re_string_wchar_at (const re_string_t *pstr, Idx idx)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
830
{
831
  if (pstr->mb_cur_max == 1)
832
    return (wint_t) pstr->mbs[idx];
833
  return (wint_t) pstr->wcs[idx];
834
}
835
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
836
# ifdef _LIBC
837
#  include <locale/weight.h>
838
# endif
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
839
840
static int
731.11.228 by Arnold D. Robbins
Sync support with GLIBC and GNULIB.
841
__attribute__ ((pure, unused))
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
842
re_string_elem_size_at (const re_string_t *pstr, Idx idx)
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
843
{
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
844
# ifdef _LIBC
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
845
  const unsigned char *p, *extra;
846
  const int32_t *table, *indirect;
847
  uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
848
849
  if (nrules != 0)
850
    {
851
      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
852
      extra = (const unsigned char *)
853
	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
854
      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
855
						_NL_COLLATE_INDIRECTMB);
856
      p = pstr->mbs + idx;
857
      findidx (table, indirect, extra, &p, pstr->len - idx);
858
      return p - pstr->mbs - idx;
859
    }
860
  else
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
861
# endif /* _LIBC */
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
862
    return 1;
863
}
864
#endif /* RE_ENABLE_I18N */
865
731.17.1 by Arnold D. Robbins
Switch to GNULIB regex.
866
#ifndef FALLTHROUGH
867
# if __GNUC__ < 7
868
#  define FALLTHROUGH ((void) 0)
869
# else
870
#  define FALLTHROUGH __attribute__ ((__fallthrough__))
871
# endif
872
#endif
873
731.1.10 by Arnold D. Robbins
Add small regex fix. Add support directory.
874
#endif /*  _REGEX_INTERNAL_H */