2
* Copyright (c) 2002 by The XFree86 Project, Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE XFREE86 PROJECT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* Except as contained in this notice, the name of the XFree86 Project shall
23
* not be used in advertising or otherwise to promote the sale, use or other
24
* dealings in this Software without prior written authorization from the
27
* Author: Paulo C�sar Pereira de Andrade
30
/* $XFree86: xc/programs/xedit/lisp/re/re.c,v 1.9 2002/12/11 04:44:28 paulo Exp $ */
39
/* Information used when generating the final form of the compiled re.
47
/* Start offset of special repetition instruction */
50
/* Jump offset of special repetition instruction */
53
/* Just a flag, to know if this nesting is for a special repetition */
56
int bas; /* Alternatives/repetitions depth */
57
int par; /* Open parenthesis counter */
58
int ref; /* Backreference counter */
60
rec_pat *apat; /* Alternatives duplicate patterns
61
* if a special repetition is found,
62
* this is done to somewhat simplify
63
* the bytecode engine and still allow
64
* some complex (and time consuming)
71
/* This structure is not associated with re_cod as it's data only matters
72
* to the current match search.
75
unsigned char *bas; /* Base string pointer */
76
unsigned char *str; /* String to search for pattern */
77
unsigned char *end; /* Where to stop searching */
78
unsigned char *cod; /* Pointer in the re_cod structure */
79
long off; /* Number of used entries in so/eo etc */
81
/* Match offset/nesting information */
82
long so[MAX_DEPTH]; /* (s)tart of (m)atch */
83
long eo[MAX_DEPTH]; /* (e)nd of (m)atch */
84
long sv[MAX_DEPTH]; /* (s)a(v)e match end offset */
85
long re[MAX_DEPTH]; /* (re)petition count */
86
long ss[MAX_DEPTH]; /* (s)ave (s)tart of match */
87
unsigned char *rcod[MAX_DEPTH]; /* restart position in regex code */
88
unsigned char *rstr[MAX_DEPTH]; /* restart position in string */
90
/* Group/backreference information */
99
static void reinit(void);
100
static int rec_check(re_inf*, int);
101
static int rec_code(re_inf*, ReCode);
102
static int rec_byte(re_inf*, int);
103
static int rec_byte_byte(re_inf*, int, int);
104
static int rec_code_byte(re_inf*, ReCode, int);
105
static int rec_length(re_inf*, int);
106
static int rec_code_byte_byte(re_inf*, ReCode, int, int);
107
static int rec_build_alt(re_inf*, rec_alt*);
108
static int rec_build_pat(re_inf*, rec_pat*);
109
static int rec_build_rng(re_inf*, rec_rng*);
110
static int rec_build_grp(re_inf*, rec_grp*);
111
static int rec_build_stl(re_inf*, rec_stl*);
112
static int rec_build_rep(re_inf*, rec_rep*);
113
static int rec_inc_spc(re_inf*);
114
static int rec_dec_spc(re_inf*);
115
static int rec_add_spc(re_inf*, int);
116
static int rec_off_spc(re_inf*);
117
static int rec_alt_spc(re_inf*, int);
118
static int rec_rep_spc(re_inf*, int);
120
static void redump(re_cod*);
126
unsigned char re__alnum[256];
127
unsigned char re__odigit[256];
128
unsigned char re__ddigit[256];
129
unsigned char re__xdigit[256];
130
unsigned char re__control[256];
136
recomp(re_cod *preg, const char *pattern, int flags)
144
inf.alt = irec_comp(pattern,
145
flags & RE_PEND ? preg->re_endp :
146
pattern + strlen(pattern),
152
inf.len = inf.spc = 0;
159
for (i = 0; i < MAX_DEPTH; i++)
162
/* First byte is runtime modifier flags */
163
if (rec_byte(&inf, flags & (RE_NEWLINE | RE_NOSUB)) == 0 &&
164
rec_byte(&inf, 0xff) == 0 &&
165
rec_build_alt(&inf, inf.alt) == 0 &&
166
rec_rep_spc(&inf, 0) == 0 &&
167
rec_code(&inf, Re_Done) == 0) {
168
/* Number of possible references, loops will not leave this
169
* value correct, but it is cheap to read it from the second
170
* byte, instead of adding several extra checks in the bytecode. */
172
inf.cod[1] = inf.ref - 1;
174
/* Public structure member */
175
preg->re_nsub = inf.ref;
178
irec_free_alt(inf.alt);
182
else if (flags & RE_DUMP)
190
reexec(const re_cod *preg, const char *string,
191
int nmatch, re_mat pmat[], int flags)
193
unsigned char *ptr, *str, newline, nosub;
194
int len, si, ci, bas, i, j, k, l, m;
197
if (preg == NULL || preg->cod == NULL || nmatch < 0 ||
198
((flags & RE_STARTEND) &&
199
(pmat == NULL || pmat[0].rm_eo < pmat[0].rm_so)))
202
eng.str = (unsigned char*)string;
203
if (flags & RE_STARTEND) {
204
eng.end = eng.str + pmat[0].rm_eo;
205
eng.str += pmat[0].rm_so;
208
eng.end = eng.str + strlen(string);
210
nosub = preg->cod[0] & RE_NOSUB;
211
newline = preg->cod[0] & RE_NEWLINE;
212
eng.cod = preg->cod + 2;
214
if (!nosub && preg->cod[1] != 0xff) {
215
for (i = 0; i <= preg->cod[1]; i++) {
221
/* Setup to search for start of match from the first character */
223
eng.eo[0] = eng.sv[0] = -1;
224
eng.rcod[0] = eng.cod;
225
eng.rstr[0] = eng.str + 1;
228
for (ci = si = 1;;) {
231
/****************************************************
233
****************************************************/
235
if (eng.str == eng.end || (newline && eng.str[0] == '\n'))
238
case Re_AnyEatAnyTimes:
240
for (ptr = eng.str; ptr < eng.end; ptr++) {
247
si = eng.end - eng.str;
250
si = eng.end > eng.str;
251
if (newline && si && eng.str[0] == '\n')
254
case Re_AnyEatAtLeast:
256
for (ptr = eng.str; ptr < eng.end; ptr++) {
263
si = eng.end - eng.str;
270
if (eng.str >= eng.end)
272
if (re__odigit[eng.str[0]])
276
if (eng.str >= eng.end || re__odigit[eng.str[0]])
280
if (eng.str >= eng.end)
282
if (re__ddigit[eng.str[0]])
286
if (eng.str >= eng.end || re__ddigit[eng.str[0]])
290
if (eng.str >= eng.end)
292
if (re__xdigit[eng.str[0]])
296
if (eng.str >= eng.end || re__xdigit[eng.str[0]])
300
if (eng.str >= eng.end)
302
if (eng.str[0] == ' ' || eng.str[0] == '\t')
306
if (eng.str >= eng.end)
308
if (eng.str[0] != ' ' && eng.str[0] != '\t')
312
if (eng.str >= eng.end)
314
if (eng.str[0] == '\t')
318
if (eng.str >= eng.end)
320
if (eng.str[0] == '\n')
324
if (eng.str >= eng.end)
326
if (eng.str[0] >= 'a' && eng.str[0] <= 'z')
330
if (eng.str >= eng.end)
332
if (eng.str[0] >= 'A' && eng.str[0] <= 'Z')
336
if (eng.str >= eng.end)
338
if (re__alnum[eng.str[0]])
342
if (eng.str >= eng.end)
344
if (re__alnum[eng.str[0]])
348
if (eng.str >= eng.end)
350
if (re__control[eng.str[0]])
354
if (eng.str >= eng.end || re__control[eng.str[0]])
358
/****************************************************
359
* One byte codes, match special emtpy strings *
360
****************************************************/
362
if (eng.str == eng.bas) {
363
if ((flags & RE_NOTBOL)) {
364
/* String does not start at the beginning of a line */
372
if (newline && eng.str[-1] == '\n') {
378
if (eng.str == eng.end) {
379
if (flags & RE_NOTEOL)
380
/* String does not finish at the end of a line */
385
if (newline && eng.str[0] == '\n') {
391
if (eng.str >= eng.end ||
392
(eng.str > eng.bas &&
393
(re__alnum[eng.str[-1]])))
395
if (re__alnum[eng.str[0]]) {
401
if (eng.str == eng.bas ||
402
(eng.str < eng.end &&
403
re__alnum[eng.str[0]]))
405
if (re__alnum[eng.str[-1]]) {
411
/****************************************************
412
* One byte code, one byte argument *
413
****************************************************/
415
if (eng.str >= eng.end)
417
if (eng.str[0] == eng.cod[1]) {
423
if (eng.str >= eng.end)
425
if (eng.str[0] != eng.cod[1]) {
430
case Re_SearchLiteral:
431
for (str = eng.str; str < eng.end; str++) {
432
if (*str == eng.cod[1]) {
438
/* This bytecode only happens in the toplevel */
439
eng.so[0] = str - eng.bas;
443
/****************************************************
444
* One byte code, two bytes argument *
445
****************************************************/
447
if (eng.str >= eng.end)
449
if (eng.str[0] == eng.cod[1] || eng.str[0] == eng.cod[2]) {
454
case Re_CaseLiteralNot:
455
if (eng.str >= eng.end)
457
if (eng.str[0] != eng.cod[1] && eng.str[0] != eng.cod[2]) {
462
case Re_SearchCaseLiteral:
463
for (str = eng.str; str < eng.end; str++) {
464
if (*str == eng.cod[1] || *str == eng.cod[2]) {
470
eng.so[0] = str - eng.bas;
474
/****************************************************
475
* One byte codes, two arguments, n bytes *
476
****************************************************/
481
len = (len & 0x7f) + (eng.cod[2] << 7);
485
if (eng.end - eng.str < len)
489
for (k = len; k > 0; k--) {
490
if (*ptr++ != *str++)
496
case Re_SearchString:
500
len = (len & 0x7f) + (eng.cod[2] << 7);
504
for (str = eng.str; eng.end - str >= len; str = eng.str++) {
505
for (ptr = eng.cod + i, str = eng.str, k = len; k > 0; k--)
506
if (*ptr++ != *str++)
509
/* Substring found */
515
eng.so[0] = eng.end - eng.bas;
523
len = (len & 0x7f) + (eng.cod[2] << 7);
529
/* Check if there are at least len/2 bytes left, string
530
* is represented as two bytes, lower and upper case */
531
if (eng.end - eng.str < len)
535
for (k = len; k > 0; str++, ptr += 2, k--) {
536
if (*str != ptr[0] && *str != ptr[1])
542
case Re_SearchCaseString:
546
len = (len & 0x7f) + (eng.cod[2] << 7);
551
for (str = eng.str; eng.end - str >= len; str = eng.str++) {
552
for (ptr = eng.cod + i, str = eng.str, k = len;
553
k > 0; k--, ptr += 2, str++)
554
if (ptr[0] != str[0] && ptr[1] != str[0])
557
/* Substring found */
563
eng.so[0] = eng.end - eng.bas;
568
/* Number of strings */
571
/* Where to jump after match */
572
bas = eng.cod[2] | (eng.cod[3] << 8);
575
ptr = eng.cod + k + 4;
576
l = eng.end - eng.str;
577
for (j = 0; j < k; j++) {
578
len = eng.cod[j + 4];
580
for (i = 0; i < len; i++)
581
if (ptr[i] != str[i])
594
case Re_CaseStringList:
595
/* Number of strings */
598
/* Where to jump after match */
599
bas = eng.cod[2] | (eng.cod[3] << 8);
602
ptr = eng.cod + k + 4;
603
l = eng.end - eng.str;
604
for (j = 0; j < k; j++) {
605
len = eng.cod[j + 4];
606
if ((len >> 1) <= l) {
607
for (i = m = 0; i < len; m++, i += 2)
608
if (ptr[i] != str[m] && ptr[i + 1] != str[m])
622
case Re_LargeStringList:
623
/* Where to jump after match */
624
bas = eng.cod[1] | (eng.cod[2] << 8);
628
/* First entry in index map */
630
i = (int)str[0] << 1;
631
j = ptr[i] | (ptr[i + 1] << 8);
633
/* No entry with this byte */
636
/* Bytes left in input */
637
l = eng.end - eng.str;
639
/* First entry matching initial byte */
644
ptr += len + 1, len = ptr[0]) {
646
for (i = 1; i < len; i++) {
647
if (ptr[i + 1] != str[i])
658
case Re_LargeCaseStringList:
659
/* Where to jump after match */
660
bas = eng.cod[1] | (eng.cod[2] << 8);
664
/* First entry in index map */
666
i = (int)str[0] << 1;
667
j = ptr[i] | (ptr[i + 1] << 8);
669
/* No entry with this byte */
672
/* Bytes left in input */
673
l = eng.end - eng.str;
675
/* First entry matching initial byte */
679
str[0] == ptr[1] || str[0] == ptr[2];
680
ptr += len + 1, len = ptr[0]) {
681
if ((k = (len >> 1)) <= l) {
682
for (i = 2, j = 1; i < len; i += 2, j++) {
683
if (ptr[i + 1] != str[j] && ptr[i + 2] != str[j])
695
/****************************************************
696
* Character range matching *
697
****************************************************/
699
if (eng.str < eng.end && eng.cod[eng.str[0] + 1]) {
705
if (eng.str >= eng.end || eng.cod[eng.str[0] + 1])
710
/****************************************************
712
****************************************************/
716
eng.gso[eng.goff] = eng.str - eng.bas;
720
eng.geo[eng.goff] = eng.str - eng.bas;
725
eng.geo[eng.goff] = eng.str - eng.bas;
726
eng.cod += 2; /* + Update + bas */
729
/****************************************************
731
****************************************************/
737
if (k < j || eng.end - eng.str < len)
741
for (l = len; l > 0; l--) {
742
if (*ptr++ != *str++)
749
/****************************************************
750
* Alternatives handling *
751
****************************************************/
754
if (++eng.off >= MAX_DEPTH)
757
/* Get offset of next alternative */
758
i = eng.cod[1] | (eng.cod[2] << 8);
760
/* Setup for next alternative if the current fails */
761
eng.rcod[eng.off] = eng.cod + i + 1; /* + Alt */
763
/* If fail, test the next alternative in the same string */
764
eng.rstr[eng.off] = eng.str;
766
/* Setup match offsets */
767
if (eng.so[bas] <= eng.eo[bas])
768
eng.so[eng.off] = eng.eo[bas];
770
eng.so[eng.off] = eng.so[bas];
771
eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;
773
/* Save start of possible previous matches */
774
eng.ss[eng.off] = eng.so[bas];
779
/* Go try the first alternative */
784
/* Check if matched and if it is a better match */
785
if (eng.sv[eng.off] - eng.so[eng.off] <
786
eng.eo[eng.off] - eng.so[eng.off])
787
eng.sv[eng.off] = eng.eo[eng.off];
789
/* Get offset of next alternative */
790
i = eng.cod[1] | (eng.cod[2] << 8);
792
/* Setup for next alternative if the current fails */
793
eng.rcod[eng.off] = eng.cod + i + 1; /* + AltNext */
795
/* Setup match offset */
796
eng.eo[eng.off] = eng.so[eng.off] - 1;
798
/* Reset string for next alternative */
799
eng.str = eng.rstr[eng.off];
804
/* Go try the next alternative */
809
/* Check if matched and if it is a better match */
810
if (eng.sv[eng.off] - eng.so[eng.off] <
811
eng.eo[eng.off] - eng.so[eng.off])
812
eng.sv[eng.off] = eng.eo[eng.off];
814
/* If there is a match */
815
if (eng.sv[eng.off] >= eng.so[eng.off]) {
816
eng.so[bas] = eng.ss[eng.off];
817
eng.eo[bas] = eng.sv[eng.off];
818
eng.str = eng.bas + eng.eo[bas];
820
/* Pop stack and skip code */
824
/* Go try next regular expression pattern */
828
/* Failed, reset string and pop stack */
829
eng.str = eng.rstr[eng.off];
834
/****************************************************
836
****************************************************/
838
/* Note that the repetition counter is not
839
* updated for <re>*, <re>+ and <re>?
840
* it is easy to updated, but since it is not
841
* really useful, code to do it was removed
842
* to save a few cpu cicles. */
844
#define REPETITION_SETUP() \
845
if (++eng.off >= MAX_DEPTH) \
846
return (RE_ASSERT); \
848
/* Return here for recovery if match fail */ \
849
eng.rcod[eng.off] = eng.cod; \
851
/* Setup match offsets */ \
852
if (eng.so[bas] <= eng.eo[bas]) \
853
eng.so[eng.off] = eng.eo[bas]; \
855
eng.so[eng.off] = eng.so[bas]; \
856
eng.ss[eng.off] = eng.so[bas]; \
857
eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
859
/* Skip repetition instruction */ \
865
if (eng.off == bas) {
866
/* First iteration */
870
if (eng.eo[eng.off] >= eng.so[eng.off] &&
871
eng.eo[eng.off] > eng.sv[eng.off]) {
872
/* Update offset of match */
873
eng.sv[eng.off] = eng.eo[eng.off];
875
/* Skip repetition instruction */
879
/* Match failed but it is ok */
880
len = eng.cod[2] | (eng.cod[3] << 8);
881
eng.so[bas] = eng.ss[eng.off];
882
if (eng.sv[eng.off] >= eng.so[eng.off])
883
/* Something matched earlier, update */
884
eng.eo[bas] = eng.sv[eng.off];
885
else if (eng.eo[bas] < eng.so[bas])
887
eng.eo[bas] = eng.so[bas];
889
/* Try next pattern at correct offset */
890
eng.str = eng.bas + eng.eo[bas];
892
/* Pop stack and skip code */
901
if (eng.off == bas) {
902
/* First iteration */
906
/* Matched or first iteration is done */
907
len = eng.cod[2] | (eng.cod[3] << 8);
908
eng.so[bas] = eng.ss[eng.off];
909
if (eng.eo[eng.off] > eng.so[eng.off]) {
910
/* Something matched earlier, update */
911
eng.eo[bas] = eng.eo[eng.off];
912
eng.str = eng.bas + eng.eo[bas];
913
/* Don't need to update counter */
917
if (eng.eo[bas] < eng.so[bas])
918
eng.eo[bas] = eng.so[bas];
920
/* Try next pattern at correct offset */
921
eng.str = eng.bas + eng.eo[bas];
934
if (eng.off == bas) {
935
/* First iteration */
939
if (eng.eo[eng.off] >= eng.so[eng.off] &&
940
eng.eo[eng.off] > eng.sv[eng.off]) {
941
/* Update offset of match */
942
eng.sv[eng.off] = eng.eo[eng.off];
944
/* Skip repetition instruction */
948
/* Last try failed */
949
len = eng.cod[2] | (eng.cod[3] << 8);
950
if (eng.sv[eng.off] >= eng.so[eng.off]) {
951
/* Something matched earlier, update */
952
eng.so[bas] = eng.ss[eng.off];
953
eng.eo[bas] = eng.sv[eng.off];
954
eng.str = eng.bas + eng.eo[bas];
957
/* Do it here, so that the fail label does
958
* not need to do too expensive work for
959
* simple patterns. */
960
eng.so[bas] = eng.str - eng.bas;
962
/* Zero matches, pop stack and restart */
967
/* Pop stack and skip code */
975
/****************************************************
976
* Repetition with arguments *
977
****************************************************/
979
#define COMPLEX_REPETITION_SETUP_0() \
983
#define COMPLEX_REPETITION_SETUP() \
984
/* First iteration */ \
985
if (++eng.off >= MAX_DEPTH) \
986
return (RE_ASSERT); \
988
/* Remeber number or repetitions */ \
989
eng.re[eng.off] = 0; \
991
/* Return here for recovery if match fail */ \
992
eng.rcod[eng.off] = eng.cod; \
994
/* Setup match offsets */ \
995
if (eng.so[bas] <= eng.eo[bas]) \
996
eng.so[eng.off] = eng.eo[bas]; \
998
eng.so[eng.off] = eng.so[bas]; \
999
eng.sv[eng.off] = eng.eo[eng.off] = eng.so[eng.off] - 1;\
1000
eng.ss[eng.off] = eng.so[bas]; \
1002
/* Skip repetition instruction */ \
1005
COMPLEX_REPETITION_SETUP_0();
1006
if (eng.off == bas) {
1007
/* First iteration */
1008
COMPLEX_REPETITION_SETUP();
1011
if (eng.eo[eng.off] >= eng.so[eng.off] &&
1012
eng.eo[eng.off] > eng.sv[eng.off]) {
1013
/* Update offset of match */
1014
eng.sv[eng.off] = eng.eo[eng.off];
1016
/* Update repetition counter */
1017
if (++eng.re[eng.off] == i) {
1018
/* Matched the required times */
1019
eng.so[bas] = eng.ss[eng.off];
1020
eng.eo[bas] = eng.sv[eng.off];
1021
eng.str = eng.bas + eng.eo[bas];
1024
k = eng.cod[3] | (eng.cod[4] << 8);
1027
/* Pop stack and go for next pattern */
1032
/* Skip repetition instruction */
1036
/* Do it here, so that the fail label does
1037
* not need to do too expensive work for
1038
* simple patterns. */
1039
eng.so[bas] = eng.str - eng.bas;
1041
/* Pop stack and restart */
1049
COMPLEX_REPETITION_SETUP_0();
1050
if (eng.off == bas) {
1051
/* First iteration */
1052
COMPLEX_REPETITION_SETUP();
1055
if (eng.eo[eng.off] >= eng.so[eng.off] &&
1056
eng.eo[eng.off] > eng.sv[eng.off]) {
1057
/* Update offset of match */
1058
eng.sv[eng.off] = eng.eo[eng.off];
1060
/* Update repetition counter */
1063
/* Skip repetition instruction and try again */
1068
if (eng.re[eng.off] < i) {
1069
/* Do it here, so that the fail label does
1070
* not need to do too expensive work for
1071
* simple patterns. */
1072
eng.so[bas] = eng.str - eng.bas;
1074
/* Didn't match required number of times */
1079
/* Matched minimum number of times */
1080
eng.eo[bas] = eng.sv[eng.off];
1081
eng.str = eng.bas + eng.eo[bas];
1082
k = eng.cod[3] | (eng.cod[4] << 8);
1084
/* Update code and pop stack */
1093
COMPLEX_REPETITION_SETUP_0();
1094
if (eng.off == bas) {
1095
/* First iteration */
1096
COMPLEX_REPETITION_SETUP();
1099
if (eng.eo[eng.off] >= eng.so[eng.off] &&
1100
eng.eo[eng.off] > eng.sv[eng.off]) {
1101
/* Update offset of match */
1102
eng.sv[eng.off] = eng.eo[eng.off];
1104
/* Update repetition counter */
1105
if (++eng.re[eng.off] == i) {
1106
/* Matched the maximum times */
1107
eng.so[bas] = eng.ss[eng.off];
1108
eng.eo[bas] = eng.sv[eng.off];
1109
eng.str = eng.bas + eng.eo[bas];
1111
k = eng.cod[3] | (eng.cod[4] << 8);
1113
/* Update code and pop stack */
1119
/* Skip repetition instruction and try again */
1123
/* No matches, but zero matches are ok */
1124
k = eng.cod[3] | (eng.cod[4] << 8);
1125
if (eng.sv[eng.off] >= eng.so[eng.off]) {
1126
/* Something matched earlier, update */
1127
eng.so[bas] = eng.ss[eng.off];
1128
eng.eo[bas] = eng.sv[eng.off];
1129
eng.str = eng.bas + eng.eo[bas];
1133
if (eng.eo[bas] < eng.so[bas])
1134
eng.eo[bas] = eng.so[bas];
1136
/* Try next pattern at correct offset */
1137
eng.str = eng.bas + eng.eo[bas];
1140
/* Pop stack and update code */
1149
if (eng.off == bas) {
1150
/* First iteration */
1151
COMPLEX_REPETITION_SETUP();
1154
if (eng.eo[eng.off] >= eng.so[eng.off] &&
1155
eng.eo[eng.off] > eng.sv[eng.off]) {
1156
/* Update offset of match */
1157
eng.sv[eng.off] = eng.eo[eng.off];
1159
/* Update repetition counter */
1160
if (++eng.re[eng.off] == eng.cod[2]) {
1161
/* Matched the maximum times */
1162
eng.so[bas] = eng.ss[eng.off];
1163
eng.eo[bas] = eng.sv[eng.off];
1164
eng.str = eng.bas + eng.eo[bas];
1165
k = eng.cod[4] | (eng.cod[5] << 8);
1167
/* Update code and pop stack */
1173
/* Skip repetition instruction and try again */
1178
if (eng.re[eng.off] < eng.cod[1]) {
1179
/* Do it here, so that the fail label does
1180
* not need to do too expensive work for
1181
* simple patterns. */
1182
eng.so[bas] = eng.str - eng.bas;
1184
/* Didn't match required number of times */
1189
/* Matched minimum number of times */
1190
eng.so[bas] = eng.ss[eng.off];
1191
eng.eo[bas] = eng.sv[eng.off];
1192
eng.str = eng.bas + eng.eo[bas];
1193
k = eng.cod[4] | (eng.cod[5] << 8);
1195
/* Update code and pop stack */
1204
/****************************************************
1205
* Special repetition handling *
1206
****************************************************/
1207
case Re_AnyAnyTimes:
1208
/* code(1) + bas(1) + gbas(1) + jump(2) */
1210
if (eng.off == bas) {
1211
/* First iteration */
1212
if (++eng.off >= MAX_DEPTH)
1215
/* Return here for recovery if match fail */
1216
eng.rcod[eng.off] = eng.cod;
1218
/* If fail, test the next pattern at the same point */
1219
eng.rstr[eng.off] = eng.str;
1221
/* Setup match offsets */
1222
eng.so[eng.off] = eng.str - eng.bas;
1223
eng.eo[eng.off] = eng.so[eng.off] - 1;
1226
/* Use the repetition counter to store start of
1227
* skipped string, to later check if skipping a
1229
eng.re[eng.off] = eng.so[eng.off];
1231
/* Save start of possible previous matches */
1232
eng.ss[eng.off] = eng.so[bas];
1234
/* Skip repetition instruction */
1238
/* -1 as an unsigned char */
1239
if (eng.cod[2] != 0xff)
1240
eng.goff = eng.cod[2];
1245
ptr = eng.bas + eng.re[eng.off];
1246
str = eng.bas + eng.so[eng.off];
1247
for (; ptr < str; ptr++)
1249
eng.cod = eng.rcod[0];
1250
eng.so[0] = ptr - eng.bas + 1;
1251
eng.eo[0] = eng.so[0] - 1;
1252
eng.rstr[0] = eng.str = ptr + 1;
1256
/* If looping, don't do too many noops */
1257
eng.re[eng.off] = ptr - eng.bas;
1260
if (eng.eo[eng.off] >= eng.so[eng.off]) {
1261
/* Note that this is only true if all possibly
1262
* nested special repetitions also matched. */
1264
if (eng.goff >= 0) {
1265
if (eng.cod[5] == Re_Update)
1266
eng.gso[eng.goff] = eng.eo[bas] +
1267
(eng.so[bas] > eng.eo[bas]);
1268
else if (eng.geo[eng.goff] < eng.so[eng.off])
1269
eng.geo[eng.goff] = eng.so[eng.off];
1272
/* Jump relative offset */
1273
len = eng.cod[3] | (eng.cod[4] << 8);
1275
/* Restore offset from where started trying */
1276
eng.so[bas] = eng.ss[eng.off];
1277
eng.eo[bas] = eng.eo[eng.off];
1279
/* Pop stack and skip code */
1284
/* Only give up if the entire string was scanned */
1285
if (eng.str < eng.end) {
1286
/* Update restart point for next pattern */
1287
eng.str = ++eng.rstr[eng.off];
1289
/* Reset start of nested match */
1290
eng.so[eng.off] = eng.str - eng.bas;
1292
/* Skip repetition instruction */
1296
/* Entire string scanned and failed */
1298
/* Jump relative offset */
1299
len = eng.cod[3] | (eng.cod[4] << 8);
1301
/* Restore offset from where started trying */
1302
eng.so[bas] = eng.ss[eng.off];
1303
eng.eo[bas] = eng.ss[eng.off] - 1;
1305
/* Pop stack and skip code */
1313
/* This is significantly different than matching <re>.*<re>
1314
* because it may need to restart several times since it is
1315
* possible to find too many false positives, for example:
1316
* a.*b => once one "a" is found, scan all
1317
* the remaining string searching for a "b"
1318
* a.?b => the string may have too many "a"s, but the
1319
* first occurrences of "a" may not be followed
1320
* by any-character and a "b" or a single "b".
1324
if (eng.off == bas) {
1325
/* First iteration */
1326
if (++eng.off >= MAX_DEPTH)
1329
/* Return here for recovery if match fail */
1330
eng.rcod[eng.off] = eng.cod;
1332
/* First try without eating a byte */
1333
eng.rstr[eng.off] = eng.str;
1335
/* Remember this is the first try if match fail */
1336
eng.re[eng.off] = 0;
1338
/* Setup match offsets */
1339
eng.so[eng.off] = eng.str - eng.bas;
1340
eng.eo[eng.off] = eng.so[eng.off] - 1;
1342
/* Save start of possible previous matches */
1343
eng.ss[eng.off] = eng.so[bas];
1345
/* Skip repetition instruction */
1349
/* -1 as an unsigned char */
1350
if (eng.cod[2] != 0xff)
1351
eng.goff = eng.cod[2];
1355
if (eng.eo[eng.off] >= eng.so[eng.off]) {
1356
/* Something matched */
1358
if (eng.goff >= 0) {
1359
if (eng.cod[5] == Re_Update)
1360
eng.gso[eng.goff] = eng.eo[bas] +
1361
(eng.so[bas] > eng.eo[bas]);
1362
else if (eng.geo[eng.goff] < eng.so[eng.off])
1363
eng.geo[eng.goff] = eng.so[eng.off];
1366
/* Jump relative offset */
1367
len = eng.cod[3] | (eng.cod[4] << 8);
1369
/* Update offset of match */
1370
eng.eo[bas] = eng.eo[eng.off];
1372
/* Pop stack and skip code */
1376
else if (eng.re[eng.off] == 0 &&
1377
(!newline || eng.rstr[eng.off][1] != '\n')) {
1378
/* Try this time skiping a byte */
1381
/* Reset string, skip code and go try one time more */
1382
eng.str = ++eng.rstr[eng.off];
1386
/* Failed to match */
1388
/* Update offsets */
1389
eng.eo[bas] = eng.ss[eng.off];
1390
eng.so[bas] = eng.eo[bas] + 1;
1392
eng.str = eng.rstr[eng.off] + (eng.re[eng.off] == 0);
1394
/* Pop stack and return to toplevel code */
1396
if (eng.str >= eng.end)
1398
eng.cod = eng.rcod[bas];
1403
/* .+ almost identical to .* but requires eating at least one byte */
1406
if (eng.off == bas) {
1407
/* First iteration */
1408
if (++eng.off >= MAX_DEPTH)
1411
/* Return here for recovery if match fail */
1412
eng.rcod[eng.off] = eng.cod;
1414
/* Skip one byte for the restart string */
1415
if (newline && eng.str[0] == '\n') {
1416
/* Cannot skip newline */
1417
eng.cod = eng.rcod[0];
1418
eng.rstr[0] = ++eng.str;
1419
eng.so[0] = eng.str - eng.bas;
1420
eng.eo[0] = eng.so[0] - 1;
1424
eng.rstr[eng.off] = ++eng.str;
1426
/* Setup match offsets */
1427
eng.so[eng.off] = eng.str - eng.bas;
1428
eng.eo[eng.off] = eng.so[eng.off] - 1;
1431
/* Use the repetition counter to store start of
1432
* skipped string, to later check if skipping a
1434
eng.re[eng.off] = eng.so[eng.off];
1436
/* Save start of possible previous matches */
1437
eng.ss[eng.off] = eng.so[bas];
1439
/* Skip repetition instruction */
1443
/* -1 as an unsigned char */
1444
if (eng.cod[2] != 0xff)
1445
eng.goff = eng.cod[2];
1450
ptr = eng.bas + eng.re[eng.off];
1451
str = eng.bas + eng.so[eng.off];
1452
for (; ptr < str; ptr++)
1454
eng.cod = eng.rcod[0];
1455
eng.so[0] = ptr - eng.bas + 1;
1456
eng.eo[0] = eng.so[0] - 1;
1457
eng.rstr[0] = eng.str = ptr + 1;
1461
/* If looping, don't do too many noops */
1462
eng.re[eng.off] = ptr - eng.bas;
1465
if (eng.eo[eng.off] >= eng.so[eng.off]) {
1466
/* Note that this is only true if all possibly
1467
* nested special repetitions also matched. */
1469
if (eng.goff >= 0) {
1470
if (eng.cod[5] == Re_Update)
1471
eng.gso[eng.goff] = eng.eo[bas] +
1472
(eng.so[bas] > eng.eo[bas]);
1473
else if (eng.geo[eng.goff] < eng.so[eng.off])
1474
eng.geo[eng.goff] = eng.so[eng.off];
1477
/* Jump relative offset */
1478
len = eng.cod[3] | (eng.cod[4] << 8);
1480
/* Restore offset from where started trying */
1481
eng.so[bas] = eng.ss[eng.off];
1482
eng.eo[bas] = eng.eo[eng.off];
1484
/* Pop stack and skip code */
1489
/* Only give up if the entire string was scanned */
1490
if (eng.str < eng.end) {
1491
/* Update restart point for next pattern */
1492
eng.str = ++eng.rstr[eng.off];
1494
/* Reset start of nested match */
1495
eng.so[eng.off] = eng.str - eng.bas;
1497
/* Skip repetition instruction */
1501
/* Entire string scanned and failed */
1503
/* Jump relative offset */
1504
len = eng.cod[3] | (eng.cod[4] << 8);
1506
/* Restore offset from where started trying */
1507
eng.so[bas] = eng.ss[eng.off];
1508
eng.eo[bas] = eng.ss[eng.off] - 1;
1510
/* Pop stack and skip code */
1519
/****************************************************
1520
* Repetition matched! *
1521
****************************************************/
1523
/* eng.cod[1] is toplevel offset of repetition */
1524
if (eng.off > eng.cod[1])
1525
/* If still needs to try matches */
1526
eng.cod -= eng.cod[2];
1528
eng.cod += 3; /* + RepJump + bas + len-size */
1531
case Re_RepLongJump:
1532
/* eng.cod[1] is toplevel offset of repetition */
1533
if (eng.off > eng.cod[1])
1534
/* If still needs to try matches */
1535
eng.cod -= eng.cod[2] | (eng.cod[3] << 8);
1537
eng.cod += 4; /* + RepLongJump + bas + len-size */
1540
/****************************************************
1542
****************************************************/
1544
if (eng.eo[eng.off] >= eng.so[eng.off]) {
1545
eng.so[0] = eng.ss[eng.off];
1546
eng.eo[0] = eng.eo[eng.off];
1552
if (eng.eo[eng.off] >= eng.so[eng.off]) {
1553
eng.so[0] = eng.ss[eng.off];
1554
eng.eo[0] = eng.eo[eng.off];
1563
/* Fatal internal error */
1569
/* Surely won't match */
1571
eng.eo[0] = eng.so[0] - 1;
1578
/* If the entire string scanned */
1579
if (++eng.str > eng.end) {
1580
eng.eo[0] = eng.so[0] - 1;
1584
/* Update start of possible match after restart */
1585
if (eng.eo[0] >= eng.so[0]) {
1587
eng.str = eng.rstr[0];
1589
eng.so[0] = eng.str - eng.bas;
1590
eng.eo[0] = eng.so[eng.off] - 1;
1593
/* Just trying at next byte */
1597
/* Remember this match failed */
1598
eng.eo[eng.off] = eng.so[eng.off] - 1;
1601
eng.cod = eng.rcod[eng.off];
1606
/* If first match */
1607
if (eng.eo[eng.off] < eng.so[eng.off]) {
1609
eng.rstr[0] = eng.str + 1;
1610
eng.so[eng.off] = eng.eo[eng.off] = eng.str - eng.bas;
1612
eng.eo[eng.off] += si;
1623
if (flags & RE_STARTEND)
1624
len = pmat[0].rm_so;
1628
if (preg->cod[1] != 0xff)
1629
eng.goff = preg->cod[1];
1630
pmat[0].rm_so = eng.so[0];
1631
pmat[0].rm_eo = eng.eo[0];
1632
for (i = 1; i < nmatch; i++) {
1633
if (i - 1 <= eng.goff) {
1634
pmat[i].rm_so = eng.gso[i - 1];
1635
pmat[i].rm_eo = eng.geo[i - 1];
1643
/* Update offsets, since the match was done in a substring */
1644
j = eng.goff + 2 > nmatch ? nmatch : eng.goff + 2;
1645
for (i = 0; i < j; i++) {
1646
pmat[i].rm_so += len;
1647
pmat[i].rm_eo += len;
1652
/* Already know these values, allow compiling the regex with
1653
* RE_NOSUB to use parenthesis only for grouping, but avoiding
1654
* the runtime overhead of keeping track of the subexpression
1656
pmat[0].rm_so = eng.so[0] + len;
1657
pmat[0].rm_eo = eng.eo[0] + len;
1661
return (eng.so[0] <= eng.eo[0] ? 0 : RE_NOMATCH);
1665
reerror(int ecode, const re_cod *preg, char *ebuffer, int ebuffer_size)
1667
static char *errors[] = {
1669
"Failed to match", /* NOMATCH */
1671
/* Errors not generated */
1672
"Invalid regular expression", /* BADPAT */
1673
"Invalid collating element", /* ECOLLATE */
1674
"Invalid character class", /* ECTYPE */
1676
"`\' applied to unescapable character", /* EESCAPE */
1677
"Invalid backreference number", /* ESUBREG */
1678
"Brackets `[ ]' not balanced", /* EBRACK */
1679
"Parentheses `( )' not balanced", /* EPAREN */
1680
"Braces `{ }' not balanced", /* EBRACE */
1681
"Invalid repetition count(s) in `{ }'", /* BADBR */
1682
"Invalid character range in `[ ]'", /* ERANGE */
1683
"Out of memory", /* ESPACE */
1684
"`?', `*', or `+' operand invalid", /* BADRPT */
1685
"Empty (sub)expression", /* EMPTY */
1686
"Assertion error - you found a bug", /* ASSERT */
1687
"Invalid argument" /* INVARG */
1691
if (ecode >= 0 && ecode < sizeof(errors) / sizeof(errors[0]))
1692
str = errors[ecode];
1694
str = "Unknown error";
1696
return (snprintf(ebuffer, ebuffer_size, "%s", str));
1710
static int first = 1;
1718
for (i = '0'; i <= '7'; i++)
1719
re__alnum[i] = re__odigit[i] = re__ddigit[i] = re__xdigit[i] = 1;
1721
for (; i <= '9'; i++)
1722
re__alnum[i] = re__ddigit[i] = re__xdigit[i] = 1;
1724
for (i = 'a'; i <= 'f'; i++)
1725
re__alnum[i] = re__xdigit[i] = 1;
1726
for (; i <= 'z'; i++)
1729
for (i = 'A'; i <= 'F'; i++)
1730
re__alnum[i] = re__xdigit[i] = 1;
1731
for (; i <= 'Z'; i++)
1734
for (i = 1; i < 32; i++)
1736
re__control[127] = 1;
1737
/* Don't show tabs as control characters */
1738
re__control['\t'] = 0;
1742
rec_check(re_inf *inf, int count)
1744
if (inf->len + count >= inf->spc) {
1748
if ((spc = (count % 64)) != 0)
1750
spc += count + inf->spc;
1751
if ((cod = realloc(inf->cod, spc)) == NULL)
1752
return (inf->ecode = RE_ESPACE);
1757
return (inf->ecode);
1761
rec_code(re_inf *inf, ReCode code)
1763
if (rec_check(inf, 1) == 0)
1764
inf->cod[inf->len++] = code;
1766
return (inf->ecode);
1770
rec_byte(re_inf *inf, int value)
1772
if (rec_check(inf, 1) == 0)
1773
inf->cod[inf->len++] = value;
1775
return (inf->ecode);
1779
rec_code_byte(re_inf *inf, ReCode code, int value)
1781
if (rec_check(inf, 2) == 0) {
1782
inf->cod[inf->len++] = code;
1783
inf->cod[inf->len++] = value;
1786
return (inf->ecode);
1790
rec_length(re_inf *inf, int length)
1794
if (length >= 16384)
1795
return (inf->ecode = RE_ESPACE);
1798
hi = length & 0xff00;
1799
two = ((length > 0x7f) != 0) + 1;
1802
hi |= (lo & 0x80) != 0;
1806
if (rec_check(inf, two) == 0) {
1807
inf->cod[inf->len++] = lo;
1809
inf->cod[inf->len++] = hi >> 8;
1812
return (inf->ecode);
1816
rec_byte_byte(re_inf *inf, int value0, int value1)
1818
if (rec_check(inf, 2) == 0) {
1819
inf->cod[inf->len++] = value0;
1820
inf->cod[inf->len++] = value1;
1823
return (inf->ecode);
1827
rec_code_byte_byte(re_inf *inf, ReCode code, int value0, int value1)
1829
if (rec_check(inf, 3) == 0) {
1830
inf->cod[inf->len++] = code;
1831
inf->cod[inf->len++] = value0;
1832
inf->cod[inf->len++] = value1;
1835
return (inf->ecode);
1839
rec_build_alt(re_inf *inf, rec_alt *alt)
1841
int offset, value, bas = inf->bas + 1;
1845
if (rec_inc_spc(inf))
1846
return (inf->ecode);
1848
/* A real a list of alternatives */
1849
rec_code(inf, Re_Alt);
1851
offset = inf->len; /* Remember current offset */
1852
rec_byte_byte(inf, 0, 0); /* Reserve two bytes for retry address */
1853
while (alt && inf->ecode == 0) {
1854
if (rec_build_pat(inf, alt->pat))
1857
if (alt && inf->ecode == 0) {
1858
/* Handle (hyper)complex repetitions */
1859
if (inf->bas != bas) {
1860
/* Duplicate patterns up to end of expression */
1861
rec_build_pat(inf, inf->apat);
1862
/* Restore engine state for next alternative(s) */
1863
rec_alt_spc(inf, bas - 1);
1866
/* If the jump would be so long */
1867
if ((value = inf->len - offset) >= 16384) {
1868
inf->ecode = RE_ESPACE;
1871
inf->cod[offset] = value & 0xff;
1872
inf->cod[offset + 1] = (value & 0xff00) >> 8;
1874
rec_code(inf, Re_AltNext);
1876
rec_byte_byte(inf, 0, 0);
1879
if (inf->ecode == 0) {
1880
/* Handle (hyper)complex repetitions */
1881
if (inf->bas != bas) {
1882
/* Duplicate patterns up to end of expression */
1883
rec_build_pat(inf, inf->apat);
1884
/* Restore engine state for next alternative(s) */
1885
rec_alt_spc(inf, bas - 1);
1888
/* If the jump would be so long */
1889
if ((value = inf->len - offset) >= 16384)
1890
return (inf->ecode = RE_ESPACE);
1891
inf->cod[offset] = value & 0xff;
1892
inf->cod[offset + 1] = (value & 0xff00) >> 8;
1893
/* Last jump is here */
1894
rec_code(inf, Re_AltDone);
1899
/* Single alternative */
1900
rec_build_pat(inf, alt->pat);
1903
return (inf->ecode);
1907
rec_build_pat(re_inf *inf, rec_pat *pat)
1910
int length, offset = 0, distance, jump = 0, bas = 0;
1912
while (pat && inf->ecode == 0) {
1915
if (pat->type == Rep_Group && !inf->par && rec_code(inf, Re_Open))
1916
return (inf->ecode);
1917
if (rec_inc_spc(inf))
1918
return (inf->ecode);
1920
if (rec_build_rep(inf, pat->rep))
1922
/* Reserve space to jump after repetition done */
1924
rec_byte_byte(inf, 0, 0);
1926
switch (pat->type) {
1927
case Rep_AnyAnyTimes:
1929
case Rep_AnyAtLeast:
1930
if (rec_add_spc(inf, pat->type == Rep_AnyMaybe))
1931
return (inf->ecode);
1932
if (rec_code(inf, (ReCode)pat->type) == 0 &&
1933
rec_byte(inf, inf->bas - 1) == 0 &&
1934
rec_byte(inf, inf->ref - 1) == 0)
1938
case Rep_LiteralNot:
1939
case Rep_SearchLiteral:
1940
rec_code_byte(inf, (ReCode)pat->type, pat->data.chr);
1942
case Rep_CaseLiteral:
1943
case Rep_CaseLiteralNot:
1944
case Rep_SearchCaseLiteral:
1945
rec_code_byte_byte(inf, (ReCode)pat->type,
1946
pat->data.cse.lower, pat->data.cse.upper);
1950
if (rec_code(inf, (ReCode)pat->type) == 0)
1951
rec_build_rng(inf, pat->data.rng);
1954
case Rep_SearchString:
1955
case Rep_CaseString:
1956
case Rep_SearchCaseString:
1957
rec_code(inf, (ReCode)pat->type);
1958
length = strlen((char*)pat->data.str);
1959
if (rec_length(inf, length) == 0 && rec_check(inf, length) == 0) {
1960
memcpy(inf->cod + inf->len, pat->data.str, length);
1965
case Rep_AnyEatAnyTimes:
1966
case Rep_AnyEatMaybe:
1967
case Rep_AnyEatAtLeast:
1983
case Rep_ControlNot:
1988
rec_code(inf, (ReCode)pat->type);
1991
rec_code_byte(inf, Re_Backref, pat->data.chr);
1994
if (pat->rep == NULL && !inf->par && rec_code(inf, Re_Open))
1997
inf->apat = pat->next;
1998
rec_build_grp(inf, pat->data.grp);
2001
case Rep_StringList:
2002
rec_build_stl(inf, pat->data.stl);
2007
if (rec_dec_spc(inf))
2008
return (inf->ecode);
2010
if (rec_rep_spc(inf, bas))
2011
return (inf->ecode);
2013
distance = inf->len - offset;
2014
if (distance > 255) {
2015
if (rec_code(inf, Re_RepLongJump) ||
2016
rec_byte(inf, inf->bas) ||
2017
rec_byte(inf, distance & 0xff) ||
2018
rec_byte(inf, (distance & 0xff00) >> 8))
2021
else if (rec_code(inf, Re_RepJump) ||
2022
rec_byte(inf, inf->bas) ||
2023
rec_byte(inf, distance))
2025
distance = inf->len - offset;
2026
inf->cod[jump] = distance & 0xff;
2027
inf->cod[jump + 1] = (distance & 0xff00) >> 8;
2032
return (inf->ecode);
2036
rec_build_rng(re_inf *inf, rec_rng *rng)
2038
if (rec_check(inf, sizeof(rng->range)) == 0) {
2039
memcpy(inf->cod + inf->len, rng->range, sizeof(rng->range));
2040
inf->len += sizeof(rng->range);
2043
return (inf->ecode);
2047
rec_build_grp(re_inf *inf, rec_grp *grp)
2051
if (!(inf->flags & RE_NOSUB)) {
2055
if (rec_build_alt(inf, grp->alt) == 0) {
2058
rec_code_byte(inf, Re_Update, inf->ref - 1);
2060
rec_code(inf, Re_Close);
2066
rec_build_alt(inf, grp->alt);
2068
return (inf->ecode);
2072
rec_build_stl(re_inf *inf, rec_stl *stl)
2077
/* Calculate jump distance information */
2078
rlen = stl->tlen + stl->nstrs + 4;
2079
/* + code + nstrs + place-offset + data-length */
2081
if (stl->nstrs >= LARGE_STL_COUNT) {
2082
rlen += 511; /* Don't write number of strings */
2083
code = stl->type == Rep_StringList ?
2084
Re_LargeStringList : Re_LargeCaseStringList;
2087
code = (ReCode)stl->type;
2090
return (inf->ecode = RE_ESPACE);
2091
if (rec_check(inf, rlen) ||
2092
rec_code(inf, code))
2093
return (inf->ecode);
2095
/* Space is allocated, just write the data */
2096
if (stl->nstrs < LARGE_STL_COUNT)
2097
inf->cod[inf->len++] = stl->nstrs;
2099
inf->cod[inf->len++] = rlen & 0xff;
2100
inf->cod[inf->len++] = (rlen & 0xff00) >> 8;
2102
if (stl->nstrs < LARGE_STL_COUNT) {
2103
for (i = 0; i < stl->nstrs; i++)
2104
inf->cod[inf->len++] = stl->lens[i];
2105
for (i = 0; i < stl->nstrs; i++) {
2108
memcpy(inf->cod + inf->len, stl->strs[i], len);
2113
inf->cod[inf->len++] = (long)stl->strs[i];
2115
inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2116
inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2122
/* The string length goes before the string itself */
2125
/* Fill everything with an invalid jump address */
2126
memset(inf->cod + inf->len, 0xff, 512);
2127
for (i = len = 0, j = -1; i < stl->nstrs; i++) {
2128
chl = stl->lens[i] > 2 ? stl->strs[i][0] : (long)stl->strs[i] & 0xff;
2130
inf->cod[inf->len + (chl << 1)] = len & 0xff;
2131
inf->cod[inf->len + (chl << 1) + 1] = (len & 0xff00) >> 8;
2132
if (code == Re_LargeCaseStringList) {
2133
chu = stl->lens[i] > 2 ?
2134
stl->strs[i][1] : ((long)(stl->strs[i]) & 0xff00) >> 8;
2135
inf->cod[inf->len + (chu << 1)] = len & 0xff;
2136
inf->cod[inf->len + (chu << 1) + 1] = (len & 0xff00) >> 8;
2140
len += stl->lens[i] + 1;
2144
for (i = 0; i < stl->nstrs; i++) {
2146
inf->cod[inf->len++] = len;
2148
memcpy(inf->cod + inf->len, stl->strs[i], len);
2153
inf->cod[inf->len++] = (long)stl->strs[i];
2155
inf->cod[inf->len++] = (long)stl->strs[i] & 0xff;
2156
inf->cod[inf->len++] = ((long)stl->strs[i] & 0xff00) >> 8;
2162
return (inf->ecode);
2166
rec_build_rep(re_inf *inf, rec_rep *rep)
2169
switch (rep->type) {
2173
rec_code(inf, (ReCode)rep->type);
2176
if (rec_code(inf, Re_Exact) == 0)
2177
rec_byte(inf, rep->mine);
2180
if (rec_code(inf, Re_Min) == 0)
2181
rec_byte(inf, rep->mine);
2184
if (rec_code(inf, Re_Max) == 0)
2185
rec_byte(inf, rep->maxc);
2188
if (rec_code(inf, Re_MinMax) == 0 &&
2189
rec_byte(inf, rep->mine) == 0)
2190
rec_byte(inf, rep->maxc);
2193
/* It is incremented in rec_build_pat */
2194
rec_byte(inf, inf->bas - 1);
2197
return (inf->ecode);
2201
rec_inc_spc(re_inf *inf)
2203
if (++inf->bas >= MAX_DEPTH)
2204
return (inf->ecode = RE_ESPACE);
2206
return (inf->ecode);
2210
rec_dec_spc(re_inf *inf)
2213
return (inf->ecode = RE_ASSERT);
2215
return (inf->ecode);
2219
rec_add_spc(re_inf *inf, int maybe)
2221
if (++inf->bas >= MAX_DEPTH)
2222
return (inf->ecode = RE_ESPACE);
2223
inf->sp[inf->bas] = maybe + 1;
2225
return (inf->ecode);
2228
/* Could be joined with rec_rep_spc, code almost identical */
2230
rec_alt_spc(re_inf *inf, int top)
2232
int distance, i, bas = inf->bas;
2234
while ((inf->bas > top) && inf->sp[inf->bas]) {
2235
/* Jump to this repetition for cleanup */
2236
distance = inf->len - inf->sr[inf->bas];
2238
/* This will generate a jump to a jump decision opcode */
2239
inf->sj[inf->bas] = inf->len;
2241
if (distance > 255) {
2242
if (rec_code(inf, Re_RepLongJump) ||
2243
rec_byte(inf, inf->bas - 1) ||
2244
rec_byte(inf, distance & 0xff) ||
2245
rec_byte(inf, (distance & 0xff00) >> 8))
2248
else if (rec_code(inf, Re_RepJump) ||
2249
rec_byte(inf, inf->bas - 1) ||
2250
rec_byte(inf, distance))
2253
/* Top of stack value before repetition, or end condition value */
2259
if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2260
/* Only the repetition at the bottom jump to code after testing
2261
* all possibilities */
2262
distance = inf->len - inf->sr[i];
2263
inf->cod[inf->sr[i] + 3] = distance & 0xff;
2264
inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2266
/* The bottom jump is here */
2267
if (rec_code(inf, inf->sp[i] == 1 ? Re_DoneIf : Re_MaybeDone))
2268
return (inf->ecode);
2270
/* Generate jumps to the previous special repetition */
2271
for (++i; i <= bas; i++) {
2273
distance = inf->sj[i] - inf->sr[i];
2274
inf->cod[inf->sr[i] + 3] = distance & 0xff;
2275
inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2280
return (inf->ecode);
2284
rec_rep_spc(re_inf *inf, int top)
2286
int distance, i, bas = inf->bas;
2288
while (inf->bas > top) {
2289
if (inf->sp[inf->bas]) {
2290
/* Jump to this repetition for cleanup */
2291
distance = inf->len - inf->sr[inf->bas];
2293
/* This will generate a jump to a jump decision opcode */
2294
inf->sj[inf->bas] = inf->len;
2296
if (distance > 255) {
2297
if (rec_code(inf, Re_RepLongJump) ||
2298
rec_byte(inf, inf->bas - 1) ||
2299
rec_byte(inf, distance & 0xff) ||
2300
rec_byte(inf, (distance & 0xff00) >> 8))
2303
else if (rec_code(inf, Re_RepJump) ||
2304
rec_byte(inf, inf->bas - 1) ||
2305
rec_byte(inf, distance))
2309
/* Top of stack value before repetition, or end condition value */
2313
/* Find first special repetition offset. XXX This should be a noop */
2314
for (i = 0; i < bas; i++)
2318
if (inf->ecode == 0 && i <= bas && inf->sp[i]) {
2319
/* Only the repetition at the bottom jump to code after testing
2320
* all possibilities */
2321
distance = inf->len - inf->sr[i];
2322
inf->cod[inf->sr[i] + 3] = distance & 0xff;
2323
inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2325
/* Generate jumps to the previous special repetition */
2326
for (++i; i <= bas; i++) {
2328
distance = inf->sj[i] - inf->sr[i];
2329
inf->cod[inf->sr[i] + 3] = distance & 0xff;
2330
inf->cod[inf->sr[i] + 4] = (distance & 0xff00) >> 8;
2335
return (inf->ecode);
2339
rec_off_spc(re_inf *inf)
2341
/* The jump address before the three bytes instruction */
2342
inf->sr[inf->bas] = inf->len - 3;
2343
/* Don't know yet where to go after done with the special
2344
* repetition, just reserve two bytes for the jump address. */
2345
return (rec_byte_byte(inf, 0, 0));
2350
redump(re_cod *code)
2353
unsigned char *cod = code->cod, *stl;
2355
if (cod[0] & RE_NOSUB)
2357
if (cod[0] & RE_NEWLINE)
2358
printf("Newline\n");
2361
printf("%d backrefs\n", cod[0] + 1);
2372
printf("Update (%d)", (int)*cod++);
2376
i = cod[0] | cod[1];
2382
i = cod[0] | cod[1];
2390
printf("-> Anytimes %d", (int)*cod++);
2391
i = cod[0] | (cod[1] << 8);
2395
case Re_AnyEatAnyTimes:
2396
printf("Any-eat-anytimes");
2398
case Re_AnyAnyTimes:
2399
printf("-> Any-anytimes %d", (int)*cod++);
2400
printf(" (%d)", (int)*cod++);
2401
i = cod[0] | (cod[1] << 8);
2405
case Re_AnyEatMaybe:
2406
printf("Any-eat-maybe");
2409
printf("-> Any-maybe %d", (int)*cod++);
2410
printf(" (%d)", (int)*cod++);
2411
i = cod[0] | (cod[1] << 8);
2416
printf("-> Any-atleast %d", (int)*cod++);
2417
printf(" (%d)", (int)*cod++);
2418
i = cod[0] | (cod[1] << 8);
2422
case Re_AnyEatAtLeast:
2423
printf("Any-eat-atleast");
2426
printf("-> Maybe %d", (int)*cod++);
2427
i = cod[0] | (cod[1] << 8);
2432
printf("-> Atleast %d", (int)*cod++);
2433
i = cod[0] | (cod[1] << 8);
2438
printf("-> Exact ");
2441
printf(" %d", (int)*cod++);
2442
i = cod[0] | (cod[1] << 8);
2450
printf(" %d", (int)*cod++);
2451
i = cod[0] | (cod[1] << 8);
2459
printf(" %d", (int)*cod++);
2460
i = cod[0] | (cod[1] << 8);
2465
printf("-> Min-max ");
2470
printf(" %d", (int)*cod++);
2471
i = cod[0] | (cod[1] << 8);
2476
printf("<- Rep-jump %d ", (int)*cod++);
2480
case Re_RepLongJump:
2481
printf("<- Rep-long-jump %d ", (int)*cod++);
2482
i = cod[0] | (cod[1] << 8);
2492
printf("Odigit-not");
2498
printf("Digit-not");
2504
printf("Xdigit-not");
2510
printf("Space-not");
2528
printf("Alnum-not");
2534
printf("Control-not");
2552
printf("Range-not ");
2554
for (i = 0; i < 256; i += 32) {
2555
for (j = k = 0; j < 32; j++)
2556
k |= (*cod++ & 1) << (31 - j);
2561
printf("Literal %c", *cod++);
2564
printf("Literal-not %c", *cod++);
2566
case Re_SearchLiteral:
2567
printf("Search-literal %c", *cod++);
2569
case Re_CaseLiteral:
2570
printf("Case-literal %c", *cod++);
2573
case Re_CaseLiteralNot:
2574
printf("Case-literal-not %c", *cod++);
2577
case Re_SearchCaseLiteral:
2578
printf("Search-case-literal %c", *cod++);
2584
case Re_SearchString:
2585
printf("Search-string ");
2588
printf("Case-string ");
2590
case Re_SearchCaseString:
2591
printf("Search-case-string ");
2595
i = (i & 0x7f) | (*cod++ << 7);
2596
for (j = 0; j < i; j++)
2600
printf("String-list");
2602
case Re_CaseStringList:
2603
printf("Case-string-list");
2608
for (i = 0; i < j; i++) {
2610
putchar(i ? ',' : ' ');
2611
fwrite(stl, k, 1, stdout);
2616
case Re_LargeStringList:
2617
printf("Large-string-list");
2619
i = cod[0] | (cod[1] << 8);
2621
for (i = 0, cod += 514; cod < stl; i++) {
2623
putchar(i ? ',' : ' ');
2624
fwrite(cod, k, 1, stdout);
2629
case Re_LargeCaseStringList:
2630
printf("Large-case-string-list");
2631
goto large_string_list;
2633
printf("Backref %d", (int)*cod++);
2639
printf("Maybe-done");