492
by jan at mysql
switch to lpeg to parse the PROXY ... queries |
1 |
/*
|
2 |
** $Id: lpeg.c,v 1.86 2008/03/07 17:20:19 roberto Exp $
|
|
3 |
** LPeg - PEG pattern matching for Lua
|
|
4 |
** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
|
|
5 |
** written by Roberto Ierusalimschy
|
|
6 |
*/
|
|
7 |
||
8 |
||
9 |
#include <assert.h> |
|
10 |
#include <limits.h> |
|
11 |
#include <stdio.h> |
|
12 |
#include <string.h> |
|
13 |
||
14 |
#include "lua.h" |
|
15 |
#include "lauxlib.h" |
|
16 |
||
17 |
||
18 |
#define VERSION "0.8"
|
|
19 |
#define PATTERN_T "pattern"
|
|
20 |
||
21 |
/* maximum call/backtrack levels */
|
|
22 |
#define MAXBACK 400
|
|
23 |
||
24 |
/* initial size for capture's list */
|
|
25 |
#define IMAXCAPTURES 600
|
|
26 |
||
27 |
||
28 |
/* index, on Lua stack, for subject */
|
|
29 |
#define SUBJIDX 2
|
|
30 |
||
31 |
/* number of fixed arguments to 'match' (before capture arguments) */
|
|
32 |
#define FIXEDARGS 3
|
|
33 |
||
34 |
/* index, on Lua stack, for substitution value cache */
|
|
35 |
#define subscache(ptop) ((ptop) + 1)
|
|
36 |
||
37 |
/* index, on Lua stack, for capture list */
|
|
38 |
#define caplistidx(ptop) ((ptop) + 2)
|
|
39 |
||
40 |
/* index, on Lua stack, for pattern's fenv */
|
|
41 |
#define penvidx(ptop) ((ptop) + 3)
|
|
42 |
||
43 |
||
44 |
||
45 |
typedef unsigned char byte; |
|
46 |
||
47 |
||
48 |
#define CHARSETSIZE ((UCHAR_MAX/CHAR_BIT) + 1)
|
|
49 |
||
50 |
||
51 |
typedef byte Charset[CHARSETSIZE]; |
|
52 |
||
53 |
||
54 |
typedef const char *(*PattFunc) (const void *ud, |
|
55 |
const char *o, /* string start */ |
|
56 |
const char *s, /* current position */ |
|
57 |
const char *e); /* string end */ |
|
58 |
||
59 |
||
60 |
/* Virtual Machine's instructions */
|
|
61 |
typedef enum Opcode { |
|
62 |
IAny, IChar, ISet, IZSet, |
|
63 |
ITestAny, ITestChar, ITestSet, ITestZSet, |
|
64 |
ISpan, ISpanZ, |
|
65 |
IRet, IEnd, |
|
66 |
IChoice, IJmp, ICall, IOpenCall, |
|
67 |
ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup, |
|
68 |
IFunc, |
|
69 |
IFullCapture, IEmptyCapture, IEmptyCaptureIdx, |
|
70 |
IOpenCapture, ICloseCapture, ICloseRunTime |
|
71 |
} Opcode; |
|
72 |
||
73 |
||
74 |
#define ISJMP 1
|
|
75 |
#define ISCHECK 2
|
|
76 |
#define ISTEST 4
|
|
77 |
#define ISNOFAIL 8
|
|
78 |
#define ISCAPTURE 16
|
|
79 |
#define ISMOVABLE 32
|
|
80 |
#define ISFENVOFF 64
|
|
81 |
#define HASCHARSET 128
|
|
82 |
||
83 |
static const byte opproperties[] = { |
|
84 |
/* IAny */ ISCHECK, |
|
85 |
/* IChar */ ISCHECK, |
|
86 |
/* ISet */ ISCHECK | HASCHARSET, |
|
87 |
/* IZSet */ ISCHECK | HASCHARSET, |
|
88 |
/* ITestAny */ ISJMP | ISTEST | ISNOFAIL, |
|
89 |
/* ITestChar */ ISJMP | ISTEST | ISNOFAIL, |
|
90 |
/* ITestSet */ ISJMP | ISTEST | ISNOFAIL | HASCHARSET, |
|
91 |
/* ITestZSet */ ISJMP | ISTEST | ISNOFAIL | HASCHARSET, |
|
92 |
/* ISpan */ ISNOFAIL | HASCHARSET, |
|
93 |
/* ISpanZ */ ISNOFAIL | HASCHARSET, |
|
94 |
/* IRet */ 0, |
|
95 |
/* IEnd */ 0, |
|
96 |
/* IChoice */ ISJMP, |
|
97 |
/* IJmp */ ISJMP | ISNOFAIL, |
|
98 |
/* ICall */ ISJMP, |
|
99 |
/* IOpenCall */ ISFENVOFF, |
|
100 |
/* ICommit */ ISJMP, |
|
101 |
/* IPartialCommit */ ISJMP, |
|
102 |
/* IBackCommit */ ISJMP, |
|
103 |
/* IFailTwice */ 0, |
|
104 |
/* IFail */ 0, |
|
105 |
/* IGiveup */ 0, |
|
106 |
/* IFunc */ 0, |
|
107 |
/* IFullCapture */ ISCAPTURE | ISNOFAIL | ISFENVOFF, |
|
108 |
/* IEmptyCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE, |
|
109 |
/* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, |
|
110 |
/* IOpenCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, |
|
111 |
/* ICloseCapture */ ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF, |
|
112 |
/* ICloseRunTime */ ISCAPTURE | ISFENVOFF |
|
113 |
};
|
|
114 |
||
115 |
||
116 |
typedef union Instruction { |
|
117 |
struct Inst { |
|
118 |
byte code; |
|
119 |
byte aux; |
|
120 |
short offset; |
|
121 |
} i; |
|
122 |
PattFunc f; |
|
123 |
byte buff[1]; |
|
124 |
} Instruction; |
|
125 |
||
126 |
static const Instruction giveup = {{IGiveup, 0, 0}}; |
|
127 |
||
128 |
#define getkind(op) ((op)->i.aux & 0xF)
|
|
129 |
#define getoff(op) (((op)->i.aux >> 4) & 0xF)
|
|
130 |
||
131 |
#define dest(p,x) ((x) + ((p)+(x))->i.offset)
|
|
132 |
||
133 |
#define MAXOFF 0xF
|
|
134 |
||
135 |
#define isprop(op,p) (opproperties[(op)->i.code] & (p))
|
|
136 |
#define isjmp(op) isprop(op, ISJMP)
|
|
137 |
#define iscapture(op) isprop(op, ISCAPTURE)
|
|
138 |
#define ischeck(op) isprop(op, ISCHECK)
|
|
139 |
#define istest(op) isprop(op, ISTEST)
|
|
140 |
#define isnofail(op) isprop(op, ISNOFAIL)
|
|
141 |
#define ismovable(op) isprop(op, ISMOVABLE)
|
|
142 |
#define isfenvoff(op) isprop(op, ISFENVOFF)
|
|
143 |
#define hascharset(op) isprop(op, HASCHARSET)
|
|
144 |
||
145 |
||
146 |
/* kinds of captures */
|
|
147 |
typedef enum CapKind { |
|
148 |
Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction, |
|
149 |
Cquery, Cstring, Csubst, Caccum, Cruntime |
|
150 |
} CapKind; |
|
151 |
||
152 |
#define iscapnosize(k) ((k) == Cposition || (k) == Cconst)
|
|
153 |
||
154 |
||
155 |
typedef struct Capture { |
|
156 |
const char *s; /* position */ |
|
157 |
short idx; |
|
158 |
byte kind; |
|
159 |
byte siz; |
|
160 |
} Capture; |
|
161 |
||
162 |
||
163 |
/* maximum size (in elements) for a pattern */
|
|
164 |
#define MAXPATTSIZE (SHRT_MAX - 10)
|
|
165 |
||
166 |
||
167 |
/* size (in elements) for an instruction plus extra l bytes */
|
|
168 |
#define instsize(l) (((l) - 1)/sizeof(Instruction) + 2)
|
|
169 |
||
170 |
||
171 |
/* size (in elements) for a ISet instruction */
|
|
172 |
#define CHARSETINSTSIZE instsize(CHARSETSIZE)
|
|
173 |
||
174 |
||
175 |
||
176 |
#define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) b; }
|
|
177 |
||
178 |
||
179 |
#define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
|
|
180 |
#define setchar(st,c) ((st)[(c) >> 3] |= (1 << ((c) & 7)))
|
|
181 |
||
182 |
||
183 |
||
184 |
static int sizei (const Instruction *i) { |
|
185 |
if (hascharset(i)) return CHARSETINSTSIZE; |
|
186 |
else if (i->i.code == IFunc) return i->i.offset; |
|
187 |
else return 1; |
|
188 |
}
|
|
189 |
||
190 |
||
191 |
static const char *val2str (lua_State *L, int idx) { |
|
192 |
const char *k = lua_tostring(L, idx); |
|
193 |
if (k != NULL) |
|
194 |
return lua_pushfstring(L, "rule '%s'", k); |
|
195 |
else
|
|
196 |
return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, -1)); |
|
197 |
}
|
|
198 |
||
199 |
||
200 |
static int getposition (lua_State *L, int t, int i) { |
|
201 |
int res; |
|
202 |
lua_getfenv(L, -1); |
|
203 |
lua_rawgeti(L, -1, i); /* get key from pattern's environment */ |
|
204 |
lua_gettable(L, t); /* get position from positions table */ |
|
205 |
res = lua_tointeger(L, -1); |
|
206 |
if (res == 0) { /* key has no registered position? */ |
|
207 |
lua_rawgeti(L, -2, i); /* get key again */ |
|
208 |
return luaL_error(L, "%s is not defined in given grammar", val2str(L, -1)); |
|
209 |
}
|
|
210 |
lua_pop(L, 2); /* remove environment and position */ |
|
211 |
return res; |
|
212 |
}
|
|
213 |
||
214 |
||
215 |
||
216 |
/*
|
|
217 |
** {======================================================
|
|
218 |
** Printing patterns
|
|
219 |
** =======================================================
|
|
220 |
*/
|
|
221 |
||
222 |
||
223 |
static void printcharset (const Charset st) { |
|
224 |
int i; |
|
225 |
printf("["); |
|
226 |
for (i = 0; i <= UCHAR_MAX; i++) { |
|
227 |
int first = i; |
|
228 |
while (testchar(st, i) && i <= UCHAR_MAX) i++; |
|
229 |
if (i - 1 == first) /* unary range? */ |
|
230 |
printf("(%02x)", first); |
|
231 |
else if (i - 1 > first) /* non-empty range? */ |
|
232 |
printf("(%02x-%02x)", first, i - 1); |
|
233 |
}
|
|
234 |
printf("]"); |
|
235 |
}
|
|
236 |
||
237 |
||
238 |
static void printcapkind (int kind) { |
|
239 |
const char *const modes[] = { |
|
240 |
"close", "position", "constant", "backref", |
|
241 |
"argument", "simple", "table", "function", |
|
242 |
"query", "string", "substitution", "accumulator", |
|
243 |
"runtime"}; |
|
244 |
printf("%s", modes[kind]); |
|
245 |
}
|
|
246 |
||
247 |
||
248 |
static void printjmp (const Instruction *op, const Instruction *p) { |
|
249 |
printf("-> %ld", (long)(dest(0, p) - op)); |
|
250 |
}
|
|
251 |
||
252 |
||
253 |
static void printinst (const Instruction *op, const Instruction *p) { |
|
254 |
const char *const names[] = { |
|
255 |
"any", "char", "set", "zset", |
|
256 |
"testany", "testchar", "testset", "testzset", |
|
257 |
"span", "spanz", |
|
258 |
"ret", "end", |
|
259 |
"choice", "jmp", "call", "open_call", |
|
260 |
"commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup", |
|
261 |
"func", |
|
262 |
"fullcapture", "emptycapture", "emptycaptureidx", "opencapture", |
|
263 |
"closecapture", "closeruntime" |
|
264 |
};
|
|
265 |
printf("%02ld: %s ", (long)(p - op), names[p->i.code]); |
|
266 |
switch ((Opcode)p->i.code) { |
|
267 |
case IChar: { |
|
268 |
printf("'%c'", p->i.aux); |
|
269 |
break; |
|
270 |
}
|
|
271 |
case ITestChar: { |
|
272 |
printf("'%c'", p->i.aux); |
|
273 |
printjmp(op, p); |
|
274 |
break; |
|
275 |
}
|
|
276 |
case IAny: { |
|
277 |
printf("* %d", p->i.aux); |
|
278 |
break; |
|
279 |
}
|
|
280 |
case ITestAny: { |
|
281 |
printf("* %d", p->i.aux); |
|
282 |
printjmp(op, p); |
|
283 |
break; |
|
284 |
}
|
|
285 |
case IFullCapture: case IOpenCapture: |
|
286 |
case IEmptyCapture: case IEmptyCaptureIdx: |
|
287 |
case ICloseCapture: case ICloseRunTime: { |
|
288 |
printcapkind(getkind(p)); |
|
289 |
printf("(n = %d) (off = %d)", getoff(p), p->i.offset); |
|
290 |
break; |
|
291 |
}
|
|
292 |
case ISet: case IZSet: case ISpan: case ISpanZ: { |
|
293 |
printcharset((p+1)->buff); |
|
294 |
break; |
|
295 |
}
|
|
296 |
case ITestSet: case ITestZSet: { |
|
297 |
printcharset((p+1)->buff); |
|
298 |
printjmp(op, p); |
|
299 |
break; |
|
300 |
}
|
|
301 |
case IOpenCall: { |
|
302 |
printf("-> %d", p->i.offset); |
|
303 |
break; |
|
304 |
}
|
|
305 |
case IChoice: { |
|
306 |
printjmp(op, p); |
|
307 |
printf(" (%d)", p->i.aux); |
|
308 |
break; |
|
309 |
}
|
|
310 |
case IJmp: case ICall: case ICommit: |
|
311 |
case IPartialCommit: case IBackCommit: { |
|
312 |
printjmp(op, p); |
|
313 |
break; |
|
314 |
}
|
|
315 |
default: break; |
|
316 |
}
|
|
317 |
printf("\n"); |
|
318 |
}
|
|
319 |
||
320 |
||
321 |
static void printpatt (Instruction *p) { |
|
322 |
Instruction *op = p; |
|
323 |
for (;;) { |
|
324 |
printinst(op, p); |
|
325 |
if (p->i.code == IEnd) break; |
|
326 |
p += sizei(p); |
|
327 |
}
|
|
328 |
}
|
|
329 |
||
330 |
||
331 |
static void printcap (Capture *cap) { |
|
332 |
printcapkind(cap->kind); |
|
333 |
printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s); |
|
334 |
}
|
|
335 |
||
336 |
||
337 |
static void printcaplist (Capture *cap) { |
|
338 |
for (; cap->s; cap++) printcap(cap); |
|
339 |
}
|
|
340 |
||
341 |
/* }====================================================== */
|
|
342 |
||
343 |
||
344 |
||
345 |
||
346 |
/*
|
|
347 |
** {======================================================
|
|
348 |
** Virtual Machine
|
|
349 |
** =======================================================
|
|
350 |
*/
|
|
351 |
||
352 |
||
353 |
typedef struct Stack { |
|
354 |
const char *s; |
|
355 |
const Instruction *p; |
|
356 |
int caplevel; |
|
357 |
} Stack; |
|
358 |
||
359 |
||
360 |
static int runtimecap (lua_State *L, Capture *close, Capture *ocap, |
|
361 |
const char *o, const char *s, int ptop); |
|
362 |
||
363 |
||
364 |
static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { |
|
365 |
Capture *newc; |
|
366 |
if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) |
|
367 |
luaL_error(L, "too many captures"); |
|
368 |
newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); |
|
369 |
memcpy(newc, cap, captop * sizeof(Capture)); |
|
370 |
lua_replace(L, caplistidx(ptop)); |
|
371 |
return newc; |
|
372 |
}
|
|
373 |
||
374 |
||
375 |
static void adddyncaptures (const char *s, Capture *base, int n, int fd) { |
|
376 |
int i; |
|
377 |
assert(base[0].kind == Cruntime && base[0].siz == 0); |
|
378 |
base[0].idx = fd; /* first returned capture */ |
|
379 |
for (i = 1; i < n; i++) { /* add extra captures */ |
|
380 |
base[i].siz = 1; /* mark it as closed */ |
|
381 |
base[i].s = s; |
|
382 |
base[i].kind = Cruntime; |
|
383 |
base[i].idx = fd + i; /* stack index */ |
|
384 |
}
|
|
385 |
base[n].kind = Cclose; /* add closing entry */ |
|
386 |
base[n].siz = 1; |
|
387 |
base[n].s = s; |
|
388 |
}
|
|
389 |
||
390 |
||
391 |
static const char *match (lua_State *L, |
|
392 |
const char *o, const char *s, const char *e, |
|
393 |
Instruction *op, Capture *capture, int ptop) { |
|
394 |
Stack stackbase[MAXBACK]; |
|
395 |
Stack *stacklimit = stackbase + MAXBACK; |
|
396 |
Stack *stack = stackbase; /* point to first empty slot in stack */ |
|
397 |
int capsize = IMAXCAPTURES; |
|
398 |
int captop = 0; /* point to first empty slot in captures */ |
|
399 |
const Instruction *p = op; |
|
400 |
stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; |
|
401 |
for (;;) { |
|
402 |
#if defined(DEBUG)
|
|
403 |
printf("s: |%s| stck: %d c: %d ", s, stack - stackbase, captop); |
|
404 |
printinst(op, p); |
|
405 |
#endif
|
|
406 |
switch ((Opcode)p->i.code) { |
|
407 |
case IEnd: { |
|
408 |
assert(stack == stackbase + 1); |
|
409 |
capture[captop].kind = Cclose; |
|
410 |
capture[captop].s = NULL; |
|
411 |
return s; |
|
412 |
}
|
|
413 |
case IGiveup: { |
|
414 |
assert(stack == stackbase); |
|
415 |
return NULL; |
|
416 |
}
|
|
417 |
case IRet: { |
|
418 |
assert(stack > stackbase && (stack - 1)->s == NULL); |
|
419 |
p = (--stack)->p; |
|
420 |
continue; |
|
421 |
}
|
|
422 |
case IAny: { |
|
423 |
int n = p->i.aux; |
|
424 |
if (n > e - s) goto fail; |
|
425 |
else { p++; s += n; } |
|
426 |
continue; |
|
427 |
}
|
|
428 |
case ITestAny: { |
|
429 |
int n = p->i.aux; |
|
430 |
if (n > e - s) p += p->i.offset; |
|
431 |
else { p++; s += n; } |
|
432 |
continue; |
|
433 |
}
|
|
434 |
case IChar: { |
|
435 |
if ((byte)*s != p->i.aux || s >= e) goto fail; |
|
436 |
else { p++; s++; } |
|
437 |
continue; |
|
438 |
}
|
|
439 |
case ITestChar: { |
|
440 |
if ((byte)*s != p->i.aux || s >= e) p += p->i.offset; |
|
441 |
else { p++; s++; } |
|
442 |
continue; |
|
443 |
}
|
|
444 |
case ISet: { |
|
445 |
int c = (unsigned char)*s; |
|
446 |
if (!testchar((p+1)->buff, c)) goto fail; |
|
447 |
else { p += CHARSETINSTSIZE; s++; } |
|
448 |
continue; |
|
449 |
}
|
|
450 |
case ITestSet: { |
|
451 |
int c = (unsigned char)*s; |
|
452 |
if (!testchar((p+1)->buff, c)) p += p->i.offset; |
|
453 |
else { p += CHARSETINSTSIZE; s++; } |
|
454 |
continue; |
|
455 |
}
|
|
456 |
case IZSet: { |
|
457 |
int c = (unsigned char)*s; |
|
458 |
if (!testchar((p+1)->buff, c) || s >= e) goto fail; |
|
459 |
else { p += CHARSETINSTSIZE; s++; } |
|
460 |
continue; |
|
461 |
}
|
|
462 |
case ITestZSet: { |
|
463 |
int c = (unsigned char)*s; |
|
464 |
if (!testchar((p+1)->buff, c) || s >= e) p += p->i.offset; |
|
465 |
else { p += CHARSETINSTSIZE; s++; } |
|
466 |
continue; |
|
467 |
}
|
|
468 |
case ISpan: { |
|
469 |
for (;; s++) { |
|
470 |
int c = (unsigned char)*s; |
|
471 |
if (!testchar((p+1)->buff, c)) break; |
|
472 |
}
|
|
473 |
p += CHARSETINSTSIZE; |
|
474 |
continue; |
|
475 |
}
|
|
476 |
case ISpanZ: { |
|
477 |
for (; s < e; s++) { |
|
478 |
int c = (unsigned char)*s; |
|
479 |
if (!testchar((p+1)->buff, c)) break; |
|
480 |
}
|
|
481 |
p += CHARSETINSTSIZE; |
|
482 |
continue; |
|
483 |
}
|
|
484 |
case IFunc: { |
|
485 |
const char *r = (p+1)->f((p+2)->buff, o, s, e); |
|
486 |
if (r == NULL) goto fail; |
|
487 |
s = r; |
|
488 |
p += p->i.offset; |
|
489 |
continue; |
|
490 |
}
|
|
491 |
case IJmp: { |
|
492 |
p += p->i.offset; |
|
493 |
continue; |
|
494 |
}
|
|
495 |
case IChoice: { |
|
496 |
if (stack >= stacklimit) |
|
497 |
return (luaL_error(L, "too many pending calls/choices"), (char *)0); |
|
498 |
stack->p = dest(0, p); |
|
499 |
stack->s = s - p->i.aux; |
|
500 |
stack->caplevel = captop; |
|
501 |
stack++; |
|
502 |
p++; |
|
503 |
continue; |
|
504 |
}
|
|
505 |
case ICall: { |
|
506 |
if (stack >= stacklimit) |
|
507 |
return (luaL_error(L, "too many pending calls/choices"), (char *)0); |
|
508 |
stack->s = NULL; |
|
509 |
stack->p = p + 1; /* save return address */ |
|
510 |
stack++; |
|
511 |
p += p->i.offset; |
|
512 |
continue; |
|
513 |
}
|
|
514 |
case ICommit: { |
|
515 |
assert(stack > stackbase && (stack - 1)->s != NULL); |
|
516 |
stack--; |
|
517 |
p += p->i.offset; |
|
518 |
continue; |
|
519 |
}
|
|
520 |
case IPartialCommit: { |
|
521 |
assert(stack > stackbase && (stack - 1)->s != NULL); |
|
522 |
(stack - 1)->s = s; |
|
523 |
(stack - 1)->caplevel = captop; |
|
524 |
p += p->i.offset; |
|
525 |
continue; |
|
526 |
}
|
|
527 |
case IBackCommit: { |
|
528 |
assert(stack > stackbase && (stack - 1)->s != NULL); |
|
529 |
s = (--stack)->s; |
|
530 |
p += p->i.offset; |
|
531 |
continue; |
|
532 |
}
|
|
533 |
case IFailTwice: |
|
534 |
assert(stack > stackbase); |
|
535 |
stack--; |
|
536 |
/* go through */
|
|
537 |
case IFail: |
|
538 |
fail: { /* pattern failed: try to backtrack */ |
|
539 |
do { /* remove pending calls */ |
|
540 |
assert(stack > stackbase); |
|
541 |
s = (--stack)->s; |
|
542 |
} while (s == NULL); |
|
543 |
captop = stack->caplevel; |
|
544 |
p = stack->p; |
|
545 |
continue; |
|
546 |
}
|
|
547 |
case ICloseRunTime: { |
|
548 |
int fr = lua_gettop(L) + 1; /* stack index of first result */ |
|
549 |
int ncap = runtimecap(L, capture + captop, capture, o, s, ptop); |
|
550 |
lua_Integer res = lua_tointeger(L, fr) - 1; /* offset */ |
|
551 |
int n = lua_gettop(L) - fr; /* number of new captures */ |
|
552 |
if (res == -1) { /* may not be a number */ |
|
553 |
if (!lua_toboolean(L, fr)) { /* false value? */ |
|
554 |
lua_settop(L, fr - 1); /* remove results */ |
|
555 |
goto fail; /* and fail */ |
|
556 |
}
|
|
557 |
else if (lua_isboolean(L, fr)) /* true? */ |
|
558 |
res = s - o; /* keep current position */ |
|
559 |
}
|
|
560 |
if (res < s - o || res > e - o) |
|
561 |
luaL_error(L, "invalid position returned by match-time capture"); |
|
562 |
s = o + res; /* update current position */ |
|
563 |
captop -= ncap; /* remove nested captures */ |
|
564 |
lua_remove(L, fr); /* remove first result (offset) */ |
|
565 |
if (n > 0) { /* captures? */ |
|
566 |
if ((captop += n + 1) >= capsize) { |
|
567 |
capture = doublecap(L, capture, captop, ptop); |
|
568 |
capsize = 2 * captop; |
|
569 |
}
|
|
570 |
adddyncaptures(s, capture + captop - n - 1, n, fr); |
|
571 |
}
|
|
572 |
p++; |
|
573 |
continue; |
|
574 |
}
|
|
575 |
case ICloseCapture: { |
|
576 |
const char *s1 = s - getoff(p); |
|
577 |
assert(captop > 0); |
|
578 |
if (capture[captop - 1].siz == 0 && |
|
579 |
s1 - capture[captop - 1].s < UCHAR_MAX) { |
|
580 |
capture[captop - 1].siz = s1 - capture[captop - 1].s + 1; |
|
581 |
p++; |
|
582 |
continue; |
|
583 |
}
|
|
584 |
else { |
|
585 |
capture[captop].siz = 1; /* mark entry as closed */ |
|
586 |
goto capture; |
|
587 |
}
|
|
588 |
}
|
|
589 |
case IEmptyCapture: case IEmptyCaptureIdx: |
|
590 |
capture[captop].siz = 1; /* mark entry as closed */ |
|
591 |
goto capture; |
|
592 |
case IOpenCapture: |
|
593 |
capture[captop].siz = 0; /* mark entry as open */ |
|
594 |
goto capture; |
|
595 |
case IFullCapture: |
|
596 |
capture[captop].siz = getoff(p) + 1; /* save capture size */ |
|
597 |
capture: { |
|
598 |
capture[captop].s = s - getoff(p); |
|
599 |
capture[captop].idx = p->i.offset; |
|
600 |
capture[captop].kind = getkind(p); |
|
601 |
if (++captop >= capsize) { |
|
602 |
capture = doublecap(L, capture, captop, ptop); |
|
603 |
capsize = 2 * captop; |
|
604 |
}
|
|
605 |
p++; |
|
606 |
continue; |
|
607 |
}
|
|
608 |
case IOpenCall: { |
|
609 |
lua_rawgeti(L, penvidx(ptop), p->i.offset); |
|
610 |
luaL_error(L, "reference to %s outside a grammar", val2str(L, -1)); |
|
611 |
}
|
|
612 |
default: assert(0); return NULL; |
|
613 |
}
|
|
614 |
}
|
|
615 |
}
|
|
616 |
||
617 |
/* }====================================================== */
|
|
618 |
||
619 |
||
620 |
/*
|
|
621 |
** {======================================================
|
|
622 |
** Verifier
|
|
623 |
** =======================================================
|
|
624 |
*/
|
|
625 |
||
626 |
||
627 |
static int verify (lua_State *L, Instruction *op, const Instruction *p, |
|
628 |
Instruction *e, int postable, int rule) { |
|
629 |
static const char dummy[] = ""; |
|
630 |
Stack back[MAXBACK]; |
|
631 |
int backtop = 0; /* point to first empty slot in back */ |
|
632 |
while (p != e) { |
|
633 |
switch ((Opcode)p->i.code) { |
|
634 |
case IRet: { |
|
635 |
p = back[--backtop].p; |
|
636 |
continue; |
|
637 |
}
|
|
638 |
case IChoice: { |
|
639 |
if (backtop >= MAXBACK) |
|
640 |
return luaL_error(L, "too many pending calls/choices"); |
|
641 |
back[backtop].p = dest(0, p); |
|
642 |
back[backtop++].s = dummy; |
|
643 |
p++; |
|
644 |
continue; |
|
645 |
}
|
|
646 |
case ICall: { |
|
647 |
assert((p + 1)->i.code != IRet); /* no tail call */ |
|
648 |
if (backtop >= MAXBACK) |
|
649 |
return luaL_error(L, "too many pending calls/choices"); |
|
650 |
back[backtop].s = NULL; |
|
651 |
back[backtop++].p = p + 1; |
|
652 |
goto dojmp; |
|
653 |
}
|
|
654 |
case IOpenCall: { |
|
655 |
int i; |
|
656 |
if (postable == 0) /* grammar still not fixed? */ |
|
657 |
goto fail; /* to be verified later */ |
|
658 |
for (i = 0; i < backtop; i++) { |
|
659 |
if (back[i].s == NULL && back[i].p == p + 1) |
|
660 |
return luaL_error(L, "%s is left recursive", val2str(L, rule)); |
|
661 |
}
|
|
662 |
if (backtop >= MAXBACK) |
|
663 |
return luaL_error(L, "too many pending calls/choices"); |
|
664 |
back[backtop].s = NULL; |
|
665 |
back[backtop++].p = p + 1; |
|
666 |
p = op + getposition(L, postable, p->i.offset); |
|
667 |
continue; |
|
668 |
}
|
|
669 |
case IBackCommit: |
|
670 |
case ICommit: { |
|
671 |
assert(backtop > 0 && p->i.offset > 0); |
|
672 |
backtop--; |
|
673 |
goto dojmp; |
|
674 |
}
|
|
675 |
case IPartialCommit: { |
|
676 |
assert(backtop > 0); |
|
677 |
if (p->i.offset > 0) goto dojmp; /* forward jump */ |
|
678 |
else { /* loop will be detected when checking corresponding rule */ |
|
679 |
assert(postable != 0); |
|
680 |
backtop--; |
|
681 |
p++; /* just go on now */ |
|
682 |
continue; |
|
683 |
}
|
|
684 |
}
|
|
685 |
case ITestAny: |
|
686 |
case ITestChar: /* all these cases jump for empty subject */ |
|
687 |
case ITestSet: |
|
688 |
case ITestZSet: |
|
689 |
case IJmp: |
|
690 |
dojmp: { |
|
691 |
p += p->i.offset; |
|
692 |
continue; |
|
693 |
}
|
|
694 |
case IAny: |
|
695 |
case IChar: |
|
696 |
case ISet: |
|
697 |
case IZSet: |
|
698 |
case IFailTwice: /* assume that first level failed; try to backtrack */ |
|
699 |
goto fail; |
|
700 |
case IFail: { |
|
701 |
if (p->i.aux) { /* is an 'and' predicate? */ |
|
702 |
assert((p - 1)->i.code == IBackCommit && (p - 1)->i.offset == 2); |
|
703 |
p++; /* pretend it succeeded and go ahead */ |
|
704 |
continue; |
|
705 |
}
|
|
706 |
/* else go through */
|
|
707 |
}
|
|
708 |
fail: { /* pattern failed: try to backtrack */ |
|
709 |
do { |
|
710 |
if (backtop-- == 0) |
|
711 |
return 1; /* no more backtracking */ |
|
712 |
} while (back[backtop].s == NULL); |
|
713 |
p = back[backtop].p; |
|
714 |
continue; |
|
715 |
}
|
|
716 |
case ISpan: case ISpanZ: |
|
717 |
case IOpenCapture: case ICloseCapture: |
|
718 |
case IEmptyCapture: case IEmptyCaptureIdx: |
|
719 |
case IFullCapture: { |
|
720 |
p += sizei(p); |
|
721 |
continue; |
|
722 |
}
|
|
723 |
case ICloseRunTime: { |
|
724 |
goto fail; /* be liberal in this case */ |
|
725 |
}
|
|
726 |
case IFunc: { |
|
727 |
const char *r = (p+1)->f((p+2)->buff, dummy, dummy, dummy); |
|
728 |
if (r == NULL) goto fail; |
|
729 |
p += p->i.offset; |
|
730 |
continue; |
|
731 |
}
|
|
732 |
case IEnd: /* cannot happen (should stop before it) */ |
|
733 |
default: assert(0); return 0; |
|
734 |
}
|
|
735 |
}
|
|
736 |
assert(backtop == 0); |
|
737 |
return 0; |
|
738 |
}
|
|
739 |
||
740 |
||
741 |
static void checkrule (lua_State *L, Instruction *op, int from, int to, |
|
742 |
int postable, int rule) { |
|
743 |
int i; |
|
744 |
int lastopen = 0; /* more recent OpenCall seen in the code */ |
|
745 |
for (i = from; i < to; i += sizei(op + i)) { |
|
746 |
if (op[i].i.code == IPartialCommit && op[i].i.offset < 0) { /* loop? */ |
|
747 |
int start = dest(op, i); |
|
748 |
assert(op[start - 1].i.code == IChoice && dest(op, start - 1) == i + 1); |
|
749 |
if (start <= lastopen) { /* loop does contain an open call? */ |
|
750 |
if (!verify(L, op, op + start, op + i, postable, rule)) /* check body */ |
|
751 |
luaL_error(L, "possible infinite loop in %s", val2str(L, rule)); |
|
752 |
}
|
|
753 |
}
|
|
754 |
else if (op[i].i.code == IOpenCall) |
|
755 |
lastopen = i; |
|
756 |
}
|
|
757 |
assert(op[i - 1].i.code == IRet); |
|
758 |
verify(L, op, op + from, op + to - 1, postable, rule); |
|
759 |
}
|
|
760 |
||
761 |
||
762 |
||
763 |
||
764 |
/* }====================================================== */
|
|
765 |
||
766 |
||
767 |
||
768 |
||
769 |
/*
|
|
770 |
** {======================================================
|
|
771 |
** Building Patterns
|
|
772 |
** =======================================================
|
|
773 |
*/
|
|
774 |
||
775 |
enum charsetanswer { NOINFO, ISCHARSET, VALIDSTARTS }; |
|
776 |
||
777 |
typedef struct CharsetTag { |
|
778 |
enum charsetanswer tag; |
|
779 |
Charset cs; |
|
780 |
} CharsetTag; |
|
781 |
||
782 |
||
783 |
static Instruction *getpatt (lua_State *L, int idx, int *size); |
|
784 |
||
785 |
||
786 |
static void check2test (Instruction *p, int n) { |
|
787 |
assert(ischeck(p)); |
|
788 |
p->i.code += ITestAny - IAny; |
|
789 |
p->i.offset = n; |
|
790 |
}
|
|
791 |
||
792 |
||
793 |
/*
|
|
794 |
** invert array slice p[0]-p[e] (both inclusive)
|
|
795 |
*/
|
|
796 |
static void invert (Instruction *p, int e) { |
|
797 |
int i; |
|
798 |
for (i = 0; i < e; i++, e--) { |
|
799 |
Instruction temp = p[i]; |
|
800 |
p[i] = p[e]; |
|
801 |
p[e] = temp; |
|
802 |
}
|
|
803 |
}
|
|
804 |
||
805 |
||
806 |
/*
|
|
807 |
** rotate array slice p[0]-p[e] (both inclusive) 'n' steps
|
|
808 |
** to the 'left'
|
|
809 |
*/
|
|
810 |
static void rotate (Instruction *p, int e, int n) { |
|
811 |
invert(p, n - 1); |
|
812 |
invert(p + n, e - n); |
|
813 |
invert(p, e); |
|
814 |
}
|
|
815 |
||
816 |
||
817 |
#define op_step(p) ((p)->i.code == IAny ? (p)->i.aux : 1)
|
|
818 |
||
819 |
||
820 |
static int skipchecks (Instruction *p, int up, int *pn) { |
|
821 |
int i, n = 0; |
|
822 |
for (i = 0; ischeck(p + i); i += sizei(p + i)) { |
|
823 |
int st = op_step(p + i); |
|
824 |
if (n + st > MAXOFF - up) break; |
|
825 |
n += st; |
|
826 |
}
|
|
827 |
*pn = n; |
|
828 |
return i; |
|
829 |
}
|
|
830 |
||
831 |
||
832 |
#define ismovablecap(op) (ismovable(op) && getoff(op) < MAXOFF)
|
|
833 |
||
834 |
static void optimizecaptures (Instruction *p) { |
|
835 |
int i; |
|
836 |
int limit = 0; |
|
837 |
for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) { |
|
838 |
if (isjmp(p + i) && dest(p, i) >= limit) |
|
839 |
limit = dest(p, i) + 1; /* do not optimize jump targets */ |
|
840 |
else if (i >= limit && ismovablecap(p + i) && ischeck(p + i + 1)) { |
|
841 |
int end, n, j; /* found a border capture|check */ |
|
842 |
int maxoff = getoff(p + i); |
|
843 |
int start = i; |
|
844 |
/* find first capture in the group */
|
|
845 |
while (start > limit && ismovablecap(p + start - 1)) { |
|
846 |
start--; |
|
847 |
if (getoff(p + start) > maxoff) maxoff = getoff(p + start); |
|
848 |
}
|
|
849 |
end = skipchecks(p + i + 1, maxoff, &n) + i; /* find last check */ |
|
850 |
if (n == 0) continue; /* first check is too big to move across */ |
|
851 |
assert(n <= MAXOFF && start <= i && i < end); |
|
852 |
for (j = start; j <= i; j++) |
|
853 |
p[j].i.aux += (n << 4); /* correct offset of captures to be moved */ |
|
854 |
rotate(p + start, end - start, i - start + 1); /* move them up */ |
|
855 |
i = end; |
|
856 |
assert(ischeck(p + start) && iscapture(p + i)); |
|
857 |
}
|
|
858 |
}
|
|
859 |
}
|
|
860 |
||
861 |
||
862 |
static int target (Instruction *p, int i) { |
|
863 |
while (p[i].i.code == IJmp) i += p[i].i.offset; |
|
864 |
return i; |
|
865 |
}
|
|
866 |
||
867 |
||
868 |
static void optimizejumps (Instruction *p) { |
|
869 |
int i; |
|
870 |
for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) { |
|
871 |
if (isjmp(p + i)) |
|
872 |
p[i].i.offset = target(p, dest(p, i)) - i; |
|
873 |
}
|
|
874 |
}
|
|
875 |
||
876 |
||
877 |
static void optimizechoice (Instruction *p) { |
|
878 |
assert(p->i.code == IChoice); |
|
879 |
if (ischeck(p + 1)) { |
|
880 |
int lc = sizei(p + 1); |
|
881 |
rotate(p, lc, 1); |
|
882 |
assert(ischeck(p) && (p + lc)->i.code == IChoice); |
|
883 |
(p + lc)->i.aux = op_step(p); |
|
884 |
check2test(p, (p + lc)->i.offset); |
|
885 |
(p + lc)->i.offset -= lc; |
|
886 |
}
|
|
887 |
}
|
|
888 |
||
889 |
||
890 |
/*
|
|
891 |
** A 'headfail' pattern is a pattern that can only fails in its first
|
|
892 |
** instruction, which must be a check.
|
|
893 |
*/
|
|
894 |
static int isheadfail (Instruction *p) { |
|
895 |
if (!ischeck(p)) return 0; |
|
896 |
/* check that other operations cannot fail */
|
|
897 |
for (p += sizei(p); p->i.code != IEnd; p += sizei(p)) |
|
898 |
if (!isnofail(p)) return 0; |
|
899 |
return 1; |
|
900 |
}
|
|
901 |
||
902 |
||
903 |
#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T))
|
|
904 |
||
905 |
||
906 |
static int jointable (lua_State *L, int p1) { |
|
907 |
int n, n1, i; |
|
908 |
lua_getfenv(L, p1); |
|
909 |
n1 = lua_objlen(L, -1); /* number of elements in p1's env */ |
|
910 |
lua_getfenv(L, -2); |
|
911 |
if (n1 == 0 || lua_equal(L, -2, -1)) { |
|
912 |
lua_pop(L, 2); |
|
913 |
return 0; /* no need to change anything */ |
|
914 |
}
|
|
915 |
n = lua_objlen(L, -1); /* number of elements in p's env */ |
|
916 |
if (n == 0) { |
|
917 |
lua_pop(L, 1); /* removes p env */ |
|
918 |
lua_setfenv(L, -2); /* p now shares p1's env */ |
|
919 |
return 0; /* no need to correct anything */ |
|
920 |
}
|
|
921 |
lua_createtable(L, n + n1, 0); |
|
922 |
/* stack: p; p1 env; p env; new p env */
|
|
923 |
for (i = 1; i <= n; i++) { |
|
924 |
lua_rawgeti(L, -2, i); |
|
925 |
lua_rawseti(L, -2, i); |
|
926 |
}
|
|
927 |
for (i = 1; i <= n1; i++) { |
|
928 |
lua_rawgeti(L, -3, i); |
|
929 |
lua_rawseti(L, -2, n + i); |
|
930 |
}
|
|
931 |
lua_setfenv(L, -4); /* new table becomes p env */ |
|
932 |
lua_pop(L, 2); /* remove p1 env and old p env */ |
|
933 |
return n; |
|
934 |
}
|
|
935 |
||
936 |
||
937 |
#define copypatt(p1,p2,sz) memcpy(p1, p2, (sz) * sizeof(Instruction));
|
|
938 |
||
939 |
#define pattsize(L,idx) (lua_objlen(L, idx)/sizeof(Instruction) - 1)
|
|
940 |
||
941 |
||
942 |
static int addpatt (lua_State *L, Instruction *p, int p1idx) { |
|
943 |
Instruction *p1 = (Instruction *)lua_touserdata(L, p1idx); |
|
944 |
int sz = pattsize(L, p1idx); |
|
945 |
int corr = jointable(L, p1idx); |
|
946 |
copypatt(p, p1, sz + 1); |
|
947 |
if (corr != 0) { |
|
948 |
Instruction *px; |
|
949 |
for (px = p; px < p + sz; px += sizei(px)) { |
|
950 |
if (isfenvoff(px) && px->i.offset != 0) |
|
951 |
px->i.offset += corr; |
|
952 |
}
|
|
953 |
}
|
|
954 |
return sz; |
|
955 |
}
|
|
956 |
||
957 |
||
958 |
static void setinstaux (Instruction *i, Opcode op, int offset, int aux) { |
|
959 |
i->i.code = op; |
|
960 |
i->i.offset = offset; |
|
961 |
i->i.aux = aux; |
|
962 |
}
|
|
963 |
||
964 |
#define setinst(i,op,off) setinstaux(i,op,off,0)
|
|
965 |
||
966 |
#define setinstcap(i,op,idx,k,n) setinstaux(i,op,idx,((k) | ((n) << 4)))
|
|
967 |
||
968 |
||
969 |
static int value2fenv (lua_State *L, int vidx) { |
|
970 |
lua_createtable(L, 1, 0); |
|
971 |
lua_pushvalue(L, vidx); |
|
972 |
lua_rawseti(L, -2, 1); |
|
973 |
lua_setfenv(L, -2); |
|
974 |
return 1; |
|
975 |
}
|
|
976 |
||
977 |
||
978 |
static Instruction *newpatt (lua_State *L, size_t n) { |
|
979 |
Instruction *p; |
|
980 |
if (n >= MAXPATTSIZE - 1) |
|
981 |
luaL_error(L, "pattern too big"); |
|
982 |
p = (Instruction *)lua_newuserdata(L, (n + 1) * sizeof(Instruction)); |
|
983 |
luaL_getmetatable(L, PATTERN_T); |
|
984 |
lua_setmetatable(L, -2); |
|
985 |
setinst(p + n, IEnd, 0); |
|
986 |
return p; |
|
987 |
}
|
|
988 |
||
989 |
||
990 |
static void fillcharset (Instruction *p, Charset cs) { |
|
991 |
switch (p[0].i.code) { |
|
992 |
case IZSet: case ITestZSet: |
|
993 |
assert(testchar(p[1].buff, '\0')); |
|
994 |
/* go through */
|
|
995 |
case ISet: case ITestSet: { |
|
996 |
loopset(i, cs[i] = p[1].buff[i]); |
|
997 |
break; |
|
998 |
}
|
|
999 |
case IChar: case ITestChar: { |
|
1000 |
loopset(i, cs[i] = 0); |
|
1001 |
setchar(cs, p[0].i.aux); |
|
1002 |
break; |
|
1003 |
}
|
|
1004 |
default: { /* any char may start unhandled instructions */ |
|
1005 |
loopset(i, cs[i] = 0xff); |
|
1006 |
break; |
|
1007 |
}
|
|
1008 |
}
|
|
1009 |
}
|
|
1010 |
||
1011 |
||
1012 |
/*
|
|
1013 |
** Function 'tocharset' gets information about which chars may be a
|
|
1014 |
** valid start for a pattern.
|
|
1015 |
*/
|
|
1016 |
||
1017 |
static enum charsetanswer tocharset (Instruction *p, CharsetTag *c) { |
|
1018 |
if (ischeck(p)) { |
|
1019 |
fillcharset(p, c->cs); |
|
1020 |
if ((p + sizei(p))->i.code == IEnd && op_step(p) == 1) |
|
1021 |
c->tag = ISCHARSET; |
|
1022 |
else
|
|
1023 |
c->tag = VALIDSTARTS; |
|
1024 |
}
|
|
1025 |
else
|
|
1026 |
c->tag = NOINFO; |
|
1027 |
return c->tag; |
|
1028 |
}
|
|
1029 |
||
1030 |
||
1031 |
static int exclusiveset (Charset c1, Charset c2) { |
|
1032 |
/* non-empty intersection? */
|
|
1033 |
loopset(i, {if ((c1[i] & c2[i]) != 0) return 0;}); |
|
1034 |
return 1; /* no intersection */ |
|
1035 |
}
|
|
1036 |
||
1037 |
||
1038 |
static int exclusive (CharsetTag *c1, CharsetTag *c2) { |
|
1039 |
if (c1->tag == NOINFO || c2->tag == NOINFO) |
|
1040 |
return 0; /* one of them is not filled */ |
|
1041 |
else return exclusiveset(c1->cs, c2->cs); |
|
1042 |
}
|
|
1043 |
||
1044 |
||
1045 |
#define correctset(p) { if (testchar(p[1].buff, '\0')) p->i.code++; }
|
|
1046 |
||
1047 |
||
1048 |
static Instruction *newcharset (lua_State *L) { |
|
1049 |
Instruction *p = newpatt(L, CHARSETINSTSIZE); |
|
1050 |
p[0].i.code = ISet; |
|
1051 |
loopset(i, p[1].buff[i] = 0); |
|
1052 |
return p; |
|
1053 |
}
|
|
1054 |
||
1055 |
||
1056 |
static int set_l (lua_State *L) { |
|
1057 |
size_t l; |
|
1058 |
const char *s = luaL_checklstring(L, 1, &l); |
|
1059 |
if (l == 1) |
|
1060 |
getpatt(L, 1, NULL); /* a unit set is equivalent to a literal */ |
|
1061 |
else { |
|
1062 |
Instruction *p = newcharset(L); |
|
1063 |
while (l--) { |
|
1064 |
setchar(p[1].buff, (unsigned char)(*s)); |
|
1065 |
s++; |
|
1066 |
}
|
|
1067 |
correctset(p); |
|
1068 |
}
|
|
1069 |
return 1; |
|
1070 |
}
|
|
1071 |
||
1072 |
||
1073 |
static int range_l (lua_State *L) { |
|
1074 |
int arg; |
|
1075 |
int top = lua_gettop(L); |
|
1076 |
Instruction *p = newcharset(L); |
|
1077 |
for (arg = 1; arg <= top; arg++) { |
|
1078 |
int c; |
|
1079 |
size_t l; |
|
1080 |
const char *r = luaL_checklstring(L, arg, &l); |
|
1081 |
luaL_argcheck(L, l == 2, arg, "range must have two characters"); |
|
1082 |
for (c = (byte)r[0]; c <= (byte)r[1]; c++) |
|
1083 |
setchar(p[1].buff, c); |
|
1084 |
}
|
|
1085 |
correctset(p); |
|
1086 |
return 1; |
|
1087 |
}
|
|
1088 |
||
1089 |
||
1090 |
static int nter_l (lua_State *L) { |
|
1091 |
Instruction *p; |
|
1092 |
luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); |
|
1093 |
p = newpatt(L, 1); |
|
1094 |
setinst(p, IOpenCall, value2fenv(L, 1)); |
|
1095 |
return 1; |
|
1096 |
}
|
|
1097 |
||
1098 |
||
1099 |
||
1100 |
static int testpattern (lua_State *L, int idx) { |
|
1101 |
if (lua_touserdata(L, idx)) { /* value is a userdata? */ |
|
1102 |
if (lua_getmetatable(L, idx)) { /* does it have a metatable? */ |
|
1103 |
luaL_getmetatable(L, PATTERN_T); |
|
1104 |
if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */ |
|
1105 |
lua_pop(L, 2); /* remove both metatables */ |
|
1106 |
return 1; |
|
1107 |
}
|
|
1108 |
}
|
|
1109 |
}
|
|
1110 |
return 0; |
|
1111 |
}
|
|
1112 |
||
1113 |
||
1114 |
static Instruction *fix_l (lua_State *L, int t) { |
|
1115 |
Instruction *p; |
|
1116 |
int i; |
|
1117 |
int totalsize = 2; /* include initial call and jump */ |
|
1118 |
int n = 0; /* number of rules */ |
|
1119 |
int base = lua_gettop(L); |
|
1120 |
lua_newtable(L); /* to store relative positions of each rule */ |
|
1121 |
lua_pushinteger(L, 1); /* default initial rule */ |
|
1122 |
/* collect patterns and compute sizes */
|
|
1123 |
lua_pushnil(L); |
|
1124 |
while (lua_next(L, t) != 0) { |
|
1125 |
int l; |
|
1126 |
if (lua_tonumber(L, -2) == 1 && lua_isstring(L, -1)) { |
|
1127 |
lua_replace(L, base + 2); /* use this value as initial rule */ |
|
1128 |
continue; |
|
1129 |
}
|
|
1130 |
if (!testpattern(L, -1)) |
|
1131 |
luaL_error(L, "invalid field in grammar"); |
|
1132 |
l = pattsize(L, -1) + 1; /* space for pattern + ret */ |
|
1133 |
if (totalsize >= MAXPATTSIZE - l) |
|
1134 |
luaL_error(L, "grammar too large"); |
|
1135 |
luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules"); |
|
1136 |
lua_insert(L, -2); /* put key on top */ |
|
1137 |
lua_pushvalue(L, -1); /* duplicate key (for lua_next) */ |
|
1138 |
lua_pushvalue(L, -1); /* duplicate key (to index positions table)) */ |
|
1139 |
lua_pushinteger(L, totalsize); /* position for this rule */ |
|
1140 |
lua_settable(L, base + 1); /* store key=>position in positions table */ |
|
1141 |
totalsize += l; |
|
1142 |
n++; |
|
1143 |
}
|
|
1144 |
luaL_argcheck(L, n > 0, t, "empty grammar"); |
|
1145 |
p = newpatt(L, totalsize); /* create new pattern */ |
|
1146 |
p++; /* save space for call */ |
|
1147 |
setinst(p++, IJmp, totalsize - 1); /* after call, jumps to the end */ |
|
1148 |
for (i = 1; i <= n; i++) { /* copy all rules into new pattern */ |
|
1149 |
p += addpatt(L, p, base + 1 + i*2); |
|
1150 |
setinst(p++, IRet, 0); |
|
1151 |
}
|
|
1152 |
p -= totalsize; /* back to first position */ |
|
1153 |
totalsize = 2; /* go through each rule's position */ |
|
1154 |
for (i = 1; i <= n; i++) { /* check all rules */ |
|
1155 |
int l = pattsize(L, base + 1 + i*2) + 1; |
|
1156 |
checkrule(L, p, totalsize, totalsize + l, base + 1, base + 2 + i*2); |
|
1157 |
totalsize += l; |
|
1158 |
}
|
|
1159 |
lua_pushvalue(L, base + 2); /* get initial rule */ |
|
1160 |
lua_gettable(L, base + 1); /* get its position in postions table */ |
|
1161 |
i = lua_tonumber(L, -1); /* convert to number */ |
|
1162 |
lua_pop(L, 1); |
|
1163 |
if (i == 0) /* is it defined? */ |
|
1164 |
luaL_error(L, "initial rule not defined in given grammar"); |
|
1165 |
setinst(p, ICall, i); /* first instruction calls initial rule */ |
|
1166 |
/* correct calls */
|
|
1167 |
for (i = 0; i < totalsize; i += sizei(p + i)) { |
|
1168 |
if (p[i].i.code == IOpenCall) { |
|
1169 |
int pos = getposition(L, base + 1, p[i].i.offset); |
|
1170 |
p[i].i.code = (p[target(p, i + 1)].i.code == IRet) ? IJmp : ICall; |
|
1171 |
p[i].i.offset = pos - i; |
|
1172 |
}
|
|
1173 |
}
|
|
1174 |
optimizejumps(p); |
|
1175 |
lua_replace(L, t); /* put new pattern in old's position */ |
|
1176 |
lua_settop(L, base); /* remove rules and positions table */ |
|
1177 |
return p; |
|
1178 |
}
|
|
1179 |
||
1180 |
||
1181 |
static Instruction *any (lua_State *L, int n, int extra, int *offsetp) { |
|
1182 |
int offset = offsetp ? *offsetp : 0; |
|
1183 |
Instruction *p = newpatt(L, (n - 1)/UCHAR_MAX + extra + 1); |
|
1184 |
Instruction *p1 = p + offset; |
|
1185 |
for (; n > UCHAR_MAX; n -= UCHAR_MAX) |
|
1186 |
setinstaux(p1++, IAny, 0, UCHAR_MAX); |
|
1187 |
setinstaux(p1++, IAny, 0, n); |
|
1188 |
if (offsetp) *offsetp = p1 - p; |
|
1189 |
return p; |
|
1190 |
}
|
|
1191 |
||
1192 |
||
1193 |
static Instruction *getpatt (lua_State *L, int idx, int *size) { |
|
1194 |
Instruction *p; |
|
1195 |
switch (lua_type(L, idx)) { |
|
1196 |
case LUA_TSTRING: { |
|
1197 |
size_t i, len; |
|
1198 |
const char *s = lua_tolstring(L, idx, &len); |
|
1199 |
p = newpatt(L, len); |
|
1200 |
for (i = 0; i < len; i++) |
|
1201 |
setinstaux(p + i, IChar, 0, (unsigned char)s[i]); |
|
1202 |
lua_replace(L, idx); |
|
1203 |
break; |
|
1204 |
}
|
|
1205 |
case LUA_TNUMBER: { |
|
1206 |
int n = lua_tointeger(L, idx); |
|
1207 |
if (n == 0) /* empty pattern? */ |
|
1208 |
p = newpatt(L, 0); |
|
1209 |
else if (n > 0) |
|
1210 |
p = any(L, n, 0, NULL); |
|
1211 |
else if (-n <= UCHAR_MAX) { |
|
1212 |
p = newpatt(L, 2); |
|
1213 |
setinstaux(p, ITestAny, 2, -n); |
|
1214 |
setinst(p + 1, IFail, 0); |
|
1215 |
}
|
|
1216 |
else { |
|
1217 |
int offset = 2; /* space for ITestAny & IChoice */ |
|
1218 |
p = any(L, -n - UCHAR_MAX, 3, &offset); |
|
1219 |
setinstaux(p, ITestAny, offset + 1, UCHAR_MAX); |
|
1220 |
setinstaux(p + 1, IChoice, offset, UCHAR_MAX); |
|
1221 |
setinst(p + offset, IFailTwice, 0); |
|
1222 |
}
|
|
1223 |
lua_replace(L, idx); |
|
1224 |
break; |
|
1225 |
}
|
|
1226 |
case LUA_TBOOLEAN: { |
|
1227 |
if (lua_toboolean(L, idx)) /* true? */ |
|
1228 |
p = newpatt(L, 0); /* empty pattern (always succeeds) */ |
|
1229 |
else { |
|
1230 |
p = newpatt(L, 1); |
|
1231 |
setinst(p, IFail, 0); |
|
1232 |
}
|
|
1233 |
lua_replace(L, idx); |
|
1234 |
break; |
|
1235 |
}
|
|
1236 |
case LUA_TTABLE: { |
|
1237 |
p = fix_l(L, idx); |
|
1238 |
break; |
|
1239 |
}
|
|
1240 |
case LUA_TFUNCTION: { |
|
1241 |
p = newpatt(L, 2); |
|
1242 |
setinstcap(p, IOpenCapture, value2fenv(L, idx), Cruntime, 0); |
|
1243 |
setinstcap(p + 1, ICloseRunTime, 0, Cclose, 0); |
|
1244 |
lua_replace(L, idx); |
|
1245 |
break; |
|
1246 |
}
|
|
1247 |
default: { |
|
1248 |
p = checkpattern(L, idx); |
|
1249 |
break; |
|
1250 |
}
|
|
1251 |
}
|
|
1252 |
if (size) *size = pattsize(L, idx); |
|
1253 |
return p; |
|
1254 |
}
|
|
1255 |
||
1256 |
||
1257 |
static int getpattl (lua_State *L, int idx) { |
|
1258 |
int size; |
|
1259 |
getpatt(L, idx, &size); |
|
1260 |
return size; |
|
1261 |
}
|
|
1262 |
||
1263 |
||
1264 |
static int pattern_l (lua_State *L) { |
|
1265 |
lua_settop(L, 1); |
|
1266 |
getpatt(L, 1, NULL); |
|
1267 |
return 1; |
|
1268 |
}
|
|
1269 |
||
1270 |
||
1271 |
#define isany(p) ((p)->i.code == IAny && ((p) + 1)->i.code == IEnd)
|
|
1272 |
||
1273 |
||
1274 |
static int concat_l (lua_State *L) { |
|
1275 |
/* p1; p2; */
|
|
1276 |
int l1, l2; |
|
1277 |
Instruction *p1 = getpatt(L, 1, &l1); |
|
1278 |
Instruction *p2 = getpatt(L, 2, &l2); |
|
1279 |
if (isany(p1) && isany(p2)) |
|
1280 |
any(L, p1->i.aux + p2->i.aux, 0, NULL); |
|
1281 |
else { |
|
1282 |
Instruction *op = newpatt(L, l1 + l2); |
|
1283 |
Instruction *p = op + addpatt(L, op, 1); |
|
1284 |
addpatt(L, p, 2); |
|
1285 |
optimizecaptures(op); |
|
1286 |
}
|
|
1287 |
return 1; |
|
1288 |
}
|
|
1289 |
||
1290 |
||
1291 |
static int diff_l (lua_State *L) { |
|
1292 |
int l1, l2; |
|
1293 |
Instruction *p1 = getpatt(L, 1, &l1); |
|
1294 |
Instruction *p2 = getpatt(L, 2, &l2); |
|
1295 |
CharsetTag st1, st2; |
|
1296 |
if (tocharset(p1, &st1) == ISCHARSET && tocharset(p2, &st2) == ISCHARSET) { |
|
1297 |
Instruction *p = newcharset(L); |
|
1298 |
loopset(i, p[1].buff[i] = st1.cs[i] & ~st2.cs[i]); |
|
1299 |
correctset(p); |
|
1300 |
}
|
|
1301 |
else if (isheadfail(p2)) { |
|
1302 |
Instruction *p = newpatt(L, l2 + 1 + l1); |
|
1303 |
p += addpatt(L, p, 2); |
|
1304 |
check2test(p - l2, l2 + 1); |
|
1305 |
setinst(p++, IFail, 0); |
|
1306 |
addpatt(L, p, 1); |
|
1307 |
}
|
|
1308 |
else { /* !e2 . e1 */ |
|
1309 |
/* !e -> choice L1; e; failtwice; L1: ... */
|
|
1310 |
Instruction *p = newpatt(L, 1 + l2 + 1 + l1); |
|
1311 |
Instruction *pi = p; |
|
1312 |
setinst(p++, IChoice, 1 + l2 + 1); |
|
1313 |
p += addpatt(L, p, 2); |
|
1314 |
setinst(p++, IFailTwice, 0); |
|
1315 |
addpatt(L, p, 1); |
|
1316 |
optimizechoice(pi); |
|
1317 |
}
|
|
1318 |
return 1; |
|
1319 |
}
|
|
1320 |
||
1321 |
||
1322 |
static int unm_l (lua_State *L) { |
|
1323 |
lua_pushliteral(L, ""); |
|
1324 |
lua_insert(L, 1); |
|
1325 |
return diff_l(L); |
|
1326 |
}
|
|
1327 |
||
1328 |
||
1329 |
static int pattand_l (lua_State *L) { |
|
1330 |
/* &e -> choice L1; e; backcommit L2; L1: fail; L2: ... */
|
|
1331 |
int l1 = getpattl(L, 1); |
|
1332 |
Instruction *p = newpatt(L, 1 + l1 + 2); |
|
1333 |
setinst(p++, IChoice, 1 + l1 + 1); |
|
1334 |
p += addpatt(L, p, 1); |
|
1335 |
setinst(p++, IBackCommit, 2); |
|
1336 |
setinstaux(p, IFail, 0, 1); |
|
1337 |
return 1; |
|
1338 |
}
|
|
1339 |
||
1340 |
||
1341 |
static int firstpart (Instruction *p, int l) { |
|
1342 |
if (istest(p)) { |
|
1343 |
int e = p[0].i.offset - 1; |
|
1344 |
if ((p[e].i.code == IJmp || p[e].i.code == ICommit) && |
|
1345 |
e + p[e].i.offset == l) |
|
1346 |
return e + 1; |
|
1347 |
}
|
|
1348 |
else if (p[0].i.code == IChoice) { |
|
1349 |
int e = p[0].i.offset - 1; |
|
1350 |
if (p[e].i.code == ICommit && e + p[e].i.offset == l) |
|
1351 |
return e + 1; |
|
1352 |
}
|
|
1353 |
return 0; |
|
1354 |
}
|
|
1355 |
||
1356 |
||
1357 |
static Instruction *auxnew (lua_State *L, Instruction **op, int *size, |
|
1358 |
int extra) { |
|
1359 |
*op = newpatt(L, *size + extra); |
|
1360 |
jointable(L, 1); |
|
1361 |
*size += extra; |
|
1362 |
return *op + *size - extra; |
|
1363 |
}
|
|
1364 |
||
1365 |
||
1366 |
static int nofail (Instruction *p, int l) { |
|
1367 |
int i; |
|
1368 |
for (i = 0; i < l; i += sizei(p + i)) { |
|
1369 |
if (!isnofail(p + i)) return 0; |
|
1370 |
}
|
|
1371 |
return 1; |
|
1372 |
}
|
|
1373 |
||
1374 |
||
1375 |
static int interfere (Instruction *p1, int l1, CharsetTag *st2) { |
|
1376 |
if (nofail(p1, l1)) /* p1 cannot fail? */ |
|
1377 |
return 0; /* nothing can intefere with it */ |
|
1378 |
if (st2->tag == NOINFO) return 1; |
|
1379 |
switch (p1->i.code) { |
|
1380 |
case ITestChar: return testchar(st2->cs, p1->i.aux); |
|
1381 |
case ITestSet: return !exclusiveset(st2->cs, (p1 + 1)->buff); |
|
1382 |
default: assert(p1->i.code == ITestAny); return 1; |
|
1383 |
}
|
|
1384 |
}
|
|
1385 |
||
1386 |
||
1387 |
static Instruction *basicUnion (lua_State *L, Instruction *p1, int l1, |
|
1388 |
int l2, int *size, CharsetTag *st2) { |
|
1389 |
Instruction *op; |
|
1390 |
CharsetTag st1; |
|
1391 |
tocharset(p1, &st1); |
|
1392 |
if (st1.tag == ISCHARSET && st2->tag == ISCHARSET) { |
|
1393 |
Instruction *p = auxnew(L, &op, size, CHARSETINSTSIZE); |
|
1394 |
setinst(p, ISet, 0); |
|
1395 |
loopset(i, p[1].buff[i] = st1.cs[i] | st2->cs[i]); |
|
1396 |
correctset(p); |
|
1397 |
}
|
|
1398 |
else if (exclusive(&st1, st2) || isheadfail(p1)) { |
|
1399 |
Instruction *p = auxnew(L, &op, size, l1 + 1 + l2); |
|
1400 |
copypatt(p, p1, l1); |
|
1401 |
check2test(p, l1 + 1); |
|
1402 |
p += l1; |
|
1403 |
setinst(p++, IJmp, l2 + 1); |
|
1404 |
addpatt(L, p, 2); |
|
1405 |
}
|
|
1406 |
else { |
|
1407 |
/* choice L1; e1; commit L2; L1: e2; L2: ... */
|
|
1408 |
Instruction *p = auxnew(L, &op, size, 1 + l1 + 1 + l2); |
|
1409 |
setinst(p++, IChoice, 1 + l1 + 1); |
|
1410 |
copypatt(p, p1, l1); p += l1; |
|
1411 |
setinst(p++, ICommit, 1 + l2); |
|
1412 |
addpatt(L, p, 2); |
|
1413 |
optimizechoice(p - (1 + l1 + 1)); |
|
1414 |
}
|
|
1415 |
return op; |
|
1416 |
}
|
|
1417 |
||
1418 |
||
1419 |
static Instruction *separateparts (lua_State *L, Instruction *p1, int l1, |
|
1420 |
int l2, int *size, CharsetTag *st2) { |
|
1421 |
int sp = firstpart(p1, l1); |
|
1422 |
if (sp == 0) /* first part is entire p1? */ |
|
1423 |
return basicUnion(L, p1, l1, l2, size, st2); |
|
1424 |
else if ((p1 + sp - 1)->i.code == ICommit || !interfere(p1, sp, st2)) { |
|
1425 |
Instruction *p; |
|
1426 |
int init = *size; |
|
1427 |
int end = init + sp; |
|
1428 |
*size = end; |
|
1429 |
p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2); |
|
1430 |
copypatt(p + init, p1, sp); |
|
1431 |
(p + end - 1)->i.offset = *size - (end - 1); |
|
1432 |
return p; |
|
1433 |
}
|
|
1434 |
else { /* must change back to non-optimized choice */ |
|
1435 |
Instruction *p; |
|
1436 |
int init = *size; |
|
1437 |
int end = init + sp + 1; /* needs one extra instruction (choice) */ |
|
1438 |
int sizefirst = sizei(p1); /* size of p1's first instruction (the test) */ |
|
1439 |
*size = end; |
|
1440 |
p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2); |
|
1441 |
copypatt(p + init, p1, sizefirst); /* copy the test */ |
|
1442 |
(p + init)->i.offset++; /* correct jump (because of new instruction) */ |
|
1443 |
init += sizefirst; |
|
1444 |
setinstaux(p + init, IChoice, sp - sizefirst + 1, 1); init++; |
|
1445 |
copypatt(p + init, p1 + sizefirst, sp - sizefirst - 1); |
|
1446 |
init += sp - sizefirst - 1; |
|
1447 |
setinst(p + init, ICommit, *size - (end - 1)); |
|
1448 |
return p; |
|
1449 |
}
|
|
1450 |
}
|
|
1451 |
||
1452 |
||
1453 |
static int union_l (lua_State *L) { |
|
1454 |
int l1, l2; |
|
1455 |
int size = 0; |
|
1456 |
Instruction *p1 = getpatt(L, 1, &l1); |
|
1457 |
Instruction *p2 = getpatt(L, 2, &l2); |
|
1458 |
CharsetTag st2; |
|
1459 |
if (p1->i.code == IFail) /* check for identity element */ |
|
1460 |
lua_pushvalue(L, 2); |
|
1461 |
else if (p2->i.code == IFail) |
|
1462 |
lua_pushvalue(L, 1); |
|
1463 |
else { |
|
1464 |
tocharset(p2, &st2); |
|
1465 |
separateparts(L, p1, l1, l2, &size, &st2); |
|
1466 |
}
|
|
1467 |
return 1; |
|
1468 |
}
|
|
1469 |
||
1470 |
||
1471 |
static int repeatcharset (lua_State *L, Charset cs, int l1, int n) { |
|
1472 |
/* e; ...; e; span; */
|
|
1473 |
int i; |
|
1474 |
Instruction *p = newpatt(L, n*l1 + CHARSETINSTSIZE); |
|
1475 |
for (i = 0; i < n; i++) { |
|
1476 |
p += addpatt(L, p, 1); |
|
1477 |
}
|
|
1478 |
setinst(p, ISpan, 0); |
|
1479 |
loopset(k, p[1].buff[k] = cs[k]); |
|
1480 |
correctset(p); |
|
1481 |
return 1; |
|
1482 |
}
|
|
1483 |
||
1484 |
||
1485 |
static Instruction *repeatheadfail (lua_State *L, int l1, int n) { |
|
1486 |
/* e; ...; e; L2: e'(L1); jump L2; L1: ... */
|
|
1487 |
int i; |
|
1488 |
Instruction *p = newpatt(L, (n + 1)*l1 + 1); |
|
1489 |
Instruction *op = p; |
|
1490 |
for (i = 0; i < n; i++) { |
|
1491 |
p += addpatt(L, p, 1); |
|
1492 |
}
|
|
1493 |
p += addpatt(L, p, 1); |
|
1494 |
check2test(p - l1, l1 + 1); |
|
1495 |
setinst(p, IJmp, -l1); |
|
1496 |
return op; |
|
1497 |
}
|
|
1498 |
||
1499 |
||
1500 |
static Instruction *repeats (lua_State *L, Instruction *p1, int l1, int n) { |
|
1501 |
/* e; ...; e; choice L1; L2: e; partialcommit L2; L1: ... */
|
|
1502 |
int i; |
|
1503 |
Instruction *op = newpatt(L, (n + 1)*l1 + 2); |
|
1504 |
Instruction *p = op; |
|
1505 |
if (!verify(L, p1, p1, p1 + l1, 0, 0)) |
|
1506 |
luaL_error(L, "loop body may accept empty string"); |
|
1507 |
for (i = 0; i < n; i++) { |
|
1508 |
p += addpatt(L, p, 1); |
|
1509 |
}
|
|
1510 |
setinst(p++, IChoice, 1 + l1 + 1); |
|
1511 |
p += addpatt(L, p, 1); |
|
1512 |
setinst(p, IPartialCommit, -l1); |
|
1513 |
return op; |
|
1514 |
}
|
|
1515 |
||
1516 |
||
1517 |
static void optionalheadfail (lua_State *L, int l1, int n) { |
|
1518 |
Instruction *op = newpatt(L, n * l1); |
|
1519 |
Instruction *p = op; |
|
1520 |
int i; |
|
1521 |
for (i = 0; i < n; i++) { |
|
1522 |
p += addpatt(L, p, 1); |
|
1523 |
check2test(p - l1, (n - i)*l1); |
|
1524 |
}
|
|
1525 |
}
|
|
1526 |
||
1527 |
||
1528 |
static void optionals (lua_State *L, int l1, int n) { |
|
1529 |
/* choice L1; e; partialcommit L2; L2: ... e; L1: commit L3; L3: ... */
|
|
1530 |
int i; |
|
1531 |
Instruction *op = newpatt(L, n*(l1 + 1) + 1); |
|
1532 |
Instruction *p = op; |
|
1533 |
setinst(p++, IChoice, 1 + n*(l1 + 1)); |
|
1534 |
for (i = 0; i < n; i++) { |
|
1535 |
p += addpatt(L, p, 1); |
|
1536 |
setinst(p++, IPartialCommit, 1); |
|
1537 |
}
|
|
1538 |
setinst(p - 1, ICommit, 1); /* correct last commit */ |
|
1539 |
optimizechoice(op); |
|
1540 |
}
|
|
1541 |
||
1542 |
||
1543 |
static int star_l (lua_State *L) { |
|
1544 |
int l1; |
|
1545 |
int n = luaL_checkint(L, 2); |
|
1546 |
Instruction *p1 = getpatt(L, 1, &l1); |
|
1547 |
if (n >= 0) { |
|
1548 |
CharsetTag st; |
|
1549 |
Instruction *op; |
|
1550 |
if (tocharset(p1, &st) == ISCHARSET) |
|
1551 |
return repeatcharset(L, st.cs, l1, n); |
|
1552 |
if (isheadfail(p1)) |
|
1553 |
op = repeatheadfail(L, l1, n); |
|
1554 |
else
|
|
1555 |
op = repeats(L, p1, l1, n); |
|
1556 |
optimizecaptures(op); |
|
1557 |
optimizejumps(op); |
|
1558 |
}
|
|
1559 |
else { |
|
1560 |
if (isheadfail(p1)) |
|
1561 |
optionalheadfail(L, l1, -n); |
|
1562 |
else
|
|
1563 |
optionals(L, l1, -n); |
|
1564 |
}
|
|
1565 |
return 1; |
|
1566 |
}
|
|
1567 |
||
1568 |
||
1569 |
static int getlabel (lua_State *L, int labelidx) { |
|
1570 |
if (labelidx == 0) return 0; |
|
1571 |
else return value2fenv(L, labelidx); |
|
1572 |
}
|
|
1573 |
||
1574 |
||
1575 |
static int capture_aux (lua_State *L, int kind, int labelidx) { |
|
1576 |
int l1, n; |
|
1577 |
Instruction *p1 = getpatt(L, 1, &l1); |
|
1578 |
int lc = skipchecks(p1, 0, &n); |
|
1579 |
if (lc == l1) { /* got whole pattern? */ |
|
1580 |
/* may use a IFullCapture instruction at its end */
|
|
1581 |
Instruction *p = newpatt(L, l1 + 1); |
|
1582 |
int label = getlabel(L, labelidx); |
|
1583 |
p += addpatt(L, p, 1); |
|
1584 |
setinstcap(p, IFullCapture, label, kind, n); |
|
1585 |
}
|
|
1586 |
else { /* must use open-close pair */ |
|
1587 |
Instruction *op = newpatt(L, 1 + l1 + 1); |
|
1588 |
Instruction *p = op; |
|
1589 |
setinstcap(p++, IOpenCapture, getlabel(L, labelidx), kind, 0); |
|
1590 |
p += addpatt(L, p, 1); |
|
1591 |
setinstcap(p, ICloseCapture, 0, Cclose, 0); |
|
1592 |
optimizecaptures(op); |
|
1593 |
}
|
|
1594 |
return 1; |
|
1595 |
}
|
|
1596 |
||
1597 |
||
1598 |
static int capture_l (lua_State *L) { return capture_aux(L, Csimple, 0); } |
|
1599 |
static int tcapture_l (lua_State *L) { return capture_aux(L, Ctable, 0); } |
|
1600 |
static int capsubst_l (lua_State *L) { return capture_aux(L, Csubst, 0); } |
|
1601 |
static int capaccum_l (lua_State *L) { return capture_aux(L, Caccum, 0); } |
|
1602 |
||
1603 |
static int rcapture_l (lua_State *L) { |
|
1604 |
switch (lua_type(L, 2)) { |
|
1605 |
case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2); |
|
1606 |
case LUA_TTABLE: return capture_aux(L, Cquery, 2); |
|
1607 |
case LUA_TSTRING: return capture_aux(L, Cstring, 2); |
|
1608 |
default: return luaL_argerror(L, 2, "invalid replacement value"); |
|
1609 |
}
|
|
1610 |
}
|
|
1611 |
||
1612 |
||
1613 |
static int position_l (lua_State *L) { |
|
1614 |
Instruction *p = newpatt(L, 1); |
|
1615 |
setinstcap(p, IEmptyCapture, 0, Cposition, 0); |
|
1616 |
return 1; |
|
1617 |
}
|
|
1618 |
||
1619 |
||
1620 |
static int emptycap_aux (lua_State *L, int kind, const char *msg) { |
|
1621 |
int n = luaL_checkint(L, 1); |
|
1622 |
Instruction *p = newpatt(L, 1); |
|
1623 |
luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, msg); |
|
1624 |
setinstcap(p, IEmptyCapture, n, kind, 0); |
|
1625 |
return 1; |
|
1626 |
}
|
|
1627 |
||
1628 |
||
1629 |
static int backref_l (lua_State *L) { |
|
1630 |
return emptycap_aux(L, Cbackref, "invalid back reference"); |
|
1631 |
}
|
|
1632 |
||
1633 |
||
1634 |
static int argcap_l (lua_State *L) { |
|
1635 |
return emptycap_aux(L, Carg, "invalid argument index"); |
|
1636 |
}
|
|
1637 |
||
1638 |
||
1639 |
static int matchtime_l (lua_State *L) { |
|
1640 |
int l1 = getpattl(L, 1); |
|
1641 |
Instruction *op = newpatt(L, 1 + l1 + 1); |
|
1642 |
Instruction *p = op; |
|
1643 |
luaL_checktype(L, 2, LUA_TFUNCTION); |
|
1644 |
setinstcap(p++, IOpenCapture, value2fenv(L, 2), Cruntime, 0); |
|
1645 |
p += addpatt(L, p, 1); |
|
1646 |
setinstcap(p, ICloseRunTime, 0, Cclose, 0); |
|
1647 |
optimizecaptures(op); |
|
1648 |
return 1; |
|
1649 |
}
|
|
1650 |
||
1651 |
||
1652 |
static int capconst_l (lua_State *L) { |
|
1653 |
int i, j; |
|
1654 |
int n = lua_gettop(L); |
|
1655 |
Instruction *p = newpatt(L, n); |
|
1656 |
lua_createtable(L, n, 0); /* new environment for the new pattern */ |
|
1657 |
for (i = j = 1; i <= n; i++) { |
|
1658 |
if (lua_isnil(L, i)) |
|
1659 |
setinstcap(p++, IEmptyCaptureIdx, 0, Cconst, 0); |
|
1660 |
else { |
|
1661 |
setinstcap(p++, IEmptyCaptureIdx, j, Cconst, 0); |
|
1662 |
lua_pushvalue(L, i); |
|
1663 |
lua_rawseti(L, -2, j++); |
|
1664 |
}
|
|
1665 |
}
|
|
1666 |
lua_setfenv(L, -2); /* set environment */ |
|
1667 |
return 1; |
|
1668 |
}
|
|
1669 |
||
1670 |
||
1671 |
/* }====================================================== */
|
|
1672 |
||
1673 |
||
1674 |
||
1675 |
/*
|
|
1676 |
** {======================================================
|
|
1677 |
** User-Defined Patterns
|
|
1678 |
** =======================================================
|
|
1679 |
*/
|
|
1680 |
||
1681 |
static void newpattfunc (lua_State *L, PattFunc f, const void *ud, size_t l) { |
|
1682 |
int n = instsize(l) + 1; |
|
1683 |
Instruction *p = newpatt(L, n); |
|
1684 |
p[0].i.code = IFunc; |
|
1685 |
p[0].i.offset = n; |
|
1686 |
p[1].f = f; |
|
1687 |
memcpy(p[2].buff, ud, l); |
|
1688 |
}
|
|
1689 |
||
1690 |
||
1691 |
#include <ctype.h> |
|
1692 |
||
1693 |
static const char *span (const void *ud, const char *o, |
|
1694 |
const char *s, |
|
1695 |
const char *e) { |
|
1696 |
const char *u = (const char *)ud; |
|
1697 |
(void)o; (void)e; |
|
1698 |
return s + strspn(s, u); |
|
1699 |
}
|
|
1700 |
||
1701 |
||
1702 |
static int span_l (lua_State *L) { |
|
1703 |
const char *s = luaL_checkstring(L, 1); |
|
1704 |
newpattfunc(L, span, s, strlen(s) + 1); |
|
1705 |
return 1; |
|
1706 |
}
|
|
1707 |
||
1708 |
/* }====================================================== */
|
|
1709 |
||
1710 |
||
1711 |
||
1712 |
/*
|
|
1713 |
** {======================================================
|
|
1714 |
** Captures
|
|
1715 |
** =======================================================
|
|
1716 |
*/
|
|
1717 |
||
1718 |
||
1719 |
typedef struct CapState { |
|
1720 |
Capture *cap; /* current capture */ |
|
1721 |
Capture *ocap; /* (original) capture list */ |
|
1722 |
lua_State *L; |
|
1723 |
int ptop; /* index of last argument to 'match' */ |
|
1724 |
const char *s; /* original string */ |
|
1725 |
int valuecached; /* value stored in cache slot */ |
|
1726 |
} CapState; |
|
1727 |
||
1728 |
||
1729 |
#define captype(cap) ((cap)->kind)
|
|
1730 |
||
1731 |
#define isclosecap(cap) (captype(cap) == Cclose)
|
|
1732 |
||
1733 |
#define closeaddr(c) ((c)->s + (c)->siz - 1)
|
|
1734 |
||
1735 |
#define isfullcap(cap) ((cap)->siz != 0)
|
|
1736 |
||
1737 |
#define pushluaval(cs) lua_rawgeti((cs)->L, penvidx((cs)->ptop), (cs)->cap->idx)
|
|
1738 |
||
1739 |
#define pushsubject(cs, c) lua_pushlstring((cs)->L, (c)->s, (c)->siz - 1)
|
|
1740 |
||
1741 |
||
1742 |
#define updatecache(cs,v) { if ((v) != (cs)->valuecached) updatecache_(cs,v); }
|
|
1743 |
||
1744 |
||
1745 |
static void updatecache_ (CapState *cs, int v) { |
|
1746 |
lua_rawgeti(cs->L, penvidx(cs->ptop), v); |
|
1747 |
lua_replace(cs->L, subscache(cs->ptop)); |
|
1748 |
cs->valuecached = v; |
|
1749 |
}
|
|
1750 |
||
1751 |
||
1752 |
static int pushcapture (CapState *cs); |
|
1753 |
||
1754 |
||
1755 |
static Capture *findopen (Capture *cap) { |
|
1756 |
int n = 0; |
|
1757 |
for (;;) { |
|
1758 |
cap--; |
|
1759 |
if (isclosecap(cap)) n++; |
|
1760 |
else if (!isfullcap(cap)) |
|
1761 |
if (n-- == 0) return cap; |
|
1762 |
}
|
|
1763 |
}
|
|
1764 |
||
1765 |
||
1766 |
static Capture *findback (CapState *cs, Capture *cap, int n) { |
|
1767 |
int i; |
|
1768 |
for (i = 0; i < n; i++) { |
|
1769 |
if (cap == cs->ocap) |
|
1770 |
luaL_error(cs->L, "invalid back reference (%d)", n); |
|
1771 |
cap--; |
|
1772 |
if (isclosecap(cap)) |
|
1773 |
cap = findopen(cap); |
|
1774 |
else if (!isfullcap(cap)) |
|
1775 |
i--; /* does not count enclosing captures */ |
|
1776 |
}
|
|
1777 |
assert(!isclosecap(cap)); |
|
1778 |
return cap; |
|
1779 |
}
|
|
1780 |
||
1781 |
||
1782 |
static int pushallcaptures (CapState *cs, int addextra) { |
|
1783 |
Capture *co = cs->cap; |
|
1784 |
int n = 0; |
|
1785 |
if (isfullcap(cs->cap++)) { |
|
1786 |
pushsubject(cs, co); /* push whole match */ |
|
1787 |
return 1; |
|
1788 |
}
|
|
1789 |
while (!isclosecap(cs->cap)) |
|
1790 |
n += pushcapture(cs); |
|
1791 |
if (addextra || n == 0) { /* need extra? */ |
|
1792 |
lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */ |
|
1793 |
n++; |
|
1794 |
}
|
|
1795 |
cs->cap++; /* skip close entry */ |
|
1796 |
return n; |
|
1797 |
}
|
|
1798 |
||
1799 |
||
1800 |
static int simplecap (CapState *cs) { |
|
1801 |
int n; |
|
1802 |
lua_pushnil(cs->L); /* open space */ |
|
1803 |
n = pushallcaptures(cs, 1); |
|
1804 |
lua_replace(cs->L, -(n + 1)); /* put extra in previously opened slot */ |
|
1805 |
return n; |
|
1806 |
}
|
|
1807 |
||
1808 |
||
1809 |
static int backrefcap (CapState *cs) { |
|
1810 |
int n = (cs->cap)->idx; |
|
1811 |
Capture *curr = cs->cap; |
|
1812 |
Capture *backref = findback(cs, curr, n); |
|
1813 |
cs->cap = backref; |
|
1814 |
n = pushcapture(cs); |
|
1815 |
cs->cap = curr + 1; |
|
1816 |
return n; |
|
1817 |
}
|
|
1818 |
||
1819 |
||
1820 |
static int tablecap (CapState *cs) { |
|
1821 |
int n = 0; |
|
1822 |
lua_newtable(cs->L); |
|
1823 |
if (isfullcap(cs->cap++)) |
|
1824 |
return 1; /* table is empty */ |
|
1825 |
while (!isclosecap(cs->cap)) { |
|
1826 |
int i; |
|
1827 |
int k = pushcapture(cs); |
|
1828 |
for (i = k; i > 0; i--) |
|
1829 |
lua_rawseti(cs->L, -(i + 1), n + i); |
|
1830 |
n += k; |
|
1831 |
}
|
|
1832 |
cs->cap++; /* skip close entry */ |
|
1833 |
return 1; |
|
1834 |
}
|
|
1835 |
||
1836 |
||
1837 |
static int querycap (CapState *cs) { |
|
1838 |
int idx = cs->cap->idx; |
|
1839 |
int n = pushallcaptures(cs, 0); |
|
1840 |
if (n > 1) /* extra captures? */ |
|
1841 |
lua_pop(cs->L, n - 1); /* throw them away */ |
|
1842 |
updatecache(cs, idx); |
|
1843 |
lua_gettable(cs->L, subscache(cs->ptop)); |
|
1844 |
if (!lua_isnil(cs->L, -1)) |
|
1845 |
return 1; |
|
1846 |
else { |
|
1847 |
lua_pop(cs->L, 1); /* remove value */ |
|
1848 |
return 0; |
|
1849 |
}
|
|
1850 |
}
|
|
1851 |
||
1852 |
||
1853 |
static int accumcap (CapState *cs) { |
|
1854 |
lua_State *L = cs->L; |
|
1855 |
if (isfullcap(cs->cap++) || isclosecap(cs->cap) || pushcapture(cs) != 1) |
|
1856 |
return luaL_error(L, "no initial value for accumulator capture"); |
|
1857 |
while (!isclosecap(cs->cap)) { |
|
1858 |
int n; |
|
1859 |
if (captype(cs->cap) != Cfunction) |
|
1860 |
return luaL_error(L, "invalid (non function) capture to accumulate"); |
|
1861 |
pushluaval(cs); |
|
1862 |
lua_insert(L, -2); /* put function before previous capture */ |
|
1863 |
n = pushallcaptures(cs, 0); |
|
1864 |
lua_call(L, n + 1, 1); |
|
1865 |
}
|
|
1866 |
cs->cap++; |
|
1867 |
return 1; |
|
1868 |
}
|
|
1869 |
||
1870 |
||
1871 |
static int functioncap (CapState *cs) { |
|
1872 |
int n; |
|
1873 |
int top = lua_gettop(cs->L); |
|
1874 |
pushluaval(cs); |
|
1875 |
n = pushallcaptures(cs, 0); |
|
1876 |
lua_call(cs->L, n, LUA_MULTRET); |
|
1877 |
return lua_gettop(cs->L) - top; |
|
1878 |
}
|
|
1879 |
||
1880 |
||
1881 |
static int runtimecap (lua_State *L, Capture *close, Capture *ocap, |
|
1882 |
const char *o, const char *s, int ptop) { |
|
1883 |
CapState cs; |
|
1884 |
int n; |
|
1885 |
Capture *open = findopen(close); |
|
1886 |
assert(captype(open) == Cruntime); |
|
1887 |
close->kind = Cclose; |
|
1888 |
close->s = s; |
|
1889 |
cs.ocap = ocap; cs.cap = open; cs.L = L; |
|
1890 |
cs.s = o; cs.valuecached = 0; cs.ptop = ptop; |
|
1891 |
luaL_checkstack(L, 4, "too many runtime captures"); |
|
1892 |
pushluaval(&cs); |
|
1893 |
lua_pushvalue(L, SUBJIDX); /* push original subject */ |
|
1894 |
lua_pushinteger(L, s - o + 1); /* current position */ |
|
1895 |
n = pushallcaptures(&cs, 0); |
|
1896 |
lua_call(L, n + 2, LUA_MULTRET); |
|
1897 |
return close - open; |
|
1898 |
}
|
|
1899 |
||
1900 |
||
1901 |
||
1902 |
typedef struct StrAux { |
|
1903 |
const char *s; |
|
1904 |
const char *e; |
|
1905 |
} StrAux; |
|
1906 |
||
1907 |
#define MAXSTRCAPS 10
|
|
1908 |
||
1909 |
static int getstrcaps (CapState *cs, StrAux *cps, int n) { |
|
1910 |
int k = n++; |
|
1911 |
if (k < MAXSTRCAPS) cps[k].s = cs->cap->s; |
|
1912 |
if (!isfullcap(cs->cap++)) { |
|
1913 |
while (!isclosecap(cs->cap)) { |
|
1914 |
if (captype(cs->cap) != Csimple) |
|
1915 |
return luaL_error(cs->L, |
|
1916 |
"invalid capture #%d in replacement pattern", n); |
|
1917 |
n = getstrcaps(cs, cps, n); |
|
1918 |
}
|
|
1919 |
cs->cap++; /* skip close */ |
|
1920 |
}
|
|
1921 |
if (k < MAXSTRCAPS) cps[k].e = closeaddr(cs->cap - 1); |
|
1922 |
return n; |
|
1923 |
}
|
|
1924 |
||
1925 |
||
1926 |
static void stringcap (luaL_Buffer *b, CapState *cs) { |
|
1927 |
StrAux cps[MAXSTRCAPS]; |
|
1928 |
int n; |
|
1929 |
size_t len, i; |
|
1930 |
const char *c; |
|
1931 |
updatecache(cs, cs->cap->idx); |
|
1932 |
c = lua_tolstring(cs->L, subscache(cs->ptop), &len); |
|
1933 |
n = getstrcaps(cs, cps, 0) - 1; |
|
1934 |
for (i = 0; i < len; i++) { |
|
1935 |
if (c[i] != '%' || c[++i] < '0' || c[i] > '9') |
|
1936 |
luaL_addchar(b, c[i]); |
|
1937 |
else { |
|
1938 |
int l = c[i] - '0'; |
|
1939 |
if (l > n) |
|
1940 |
luaL_error(cs->L, "invalid capture index (%c)", c[i]); |
|
1941 |
luaL_addlstring(b, cps[l].s, cps[l].e - cps[l].s); |
|
1942 |
}
|
|
1943 |
}
|
|
1944 |
}
|
|
1945 |
||
1946 |
||
1947 |
static void substcap (CapState *cs) { |
|
1948 |
luaL_Buffer b; |
|
1949 |
const char *curr = (cs->cap - 1)->s; |
|
1950 |
luaL_buffinit(cs->L, &b); |
|
1951 |
while (!isclosecap(cs->cap)) { |
|
1952 |
int n; |
|
1953 |
const char *next = cs->cap->s; |
|
1954 |
luaL_addlstring(&b, curr, next - curr); |
|
1955 |
if (captype(cs->cap) == Cstring) |
|
1956 |
stringcap(&b, cs); /* add capture directly to buffer */ |
|
1957 |
else if ((n = pushcapture(cs)) == 0) { /* no capture? */ |
|
1958 |
curr = next; /* result keeps the original text */ |
|
1959 |
continue; |
|
1960 |
}
|
|
1961 |
else { |
|
1962 |
if (n > 1) lua_pop(cs->L, n - 1); /* ignore extra values */ |
|
1963 |
if (!lua_isstring(cs->L, -1)) |
|
1964 |
luaL_error(cs->L, "invalid replacement value (a %s)", |
|
1965 |
luaL_typename(cs->L, -1)); |
|
1966 |
luaL_addvalue(&b); /* add result to accumulator */ |
|
1967 |
}
|
|
1968 |
/* continue after match */
|
|
1969 |
curr = closeaddr(cs->cap - 1); |
|
1970 |
}
|
|
1971 |
luaL_addlstring(&b, curr, cs->cap->s - curr); |
|
1972 |
luaL_pushresult(&b); |
|
1973 |
cs->cap++; |
|
1974 |
}
|
|
1975 |
||
1976 |
||
1977 |
static int pushcapture (CapState *cs) { |
|
1978 |
luaL_checkstack(cs->L, 4, "too many unstored captures"); |
|
1979 |
switch (captype(cs->cap)) { |
|
1980 |
case Cposition: { |
|
1981 |
lua_pushinteger(cs->L, cs->cap->s - cs->s + 1); |
|
1982 |
cs->cap++; |
|
1983 |
return 1; |
|
1984 |
}
|
|
1985 |
case Cconst: { |
|
1986 |
pushluaval(cs); |
|
1987 |
cs->cap++; |
|
1988 |
return 1; |
|
1989 |
}
|
|
1990 |
case Carg: { |
|
1991 |
int arg = (cs->cap++)->idx; |
|
1992 |
if (arg + FIXEDARGS > cs->ptop) |
|
1993 |
return luaL_error(cs->L, "reference to absent argument #%d", arg); |
|
1994 |
lua_pushvalue(cs->L, arg + FIXEDARGS); |
|
1995 |
return 1; |
|
1996 |
}
|
|
1997 |
case Csimple: { |
|
1998 |
if (isfullcap(cs->cap)) { |
|
1999 |
pushsubject(cs, cs->cap); |
|
2000 |
cs->cap++; |
|
2001 |
return 1; |
|
2002 |
}
|
|
2003 |
else return simplecap(cs); |
|
2004 |
}
|
|
2005 |
case Cruntime: { |
|
2006 |
int i = 0; |
|
2007 |
while (!isclosecap(cs->cap++)) { |
|
2008 |
luaL_checkstack(cs->L, 4, "too many unstored captures"); |
|
2009 |
lua_pushvalue(cs->L, (cs->cap - 1)->idx); |
|
2010 |
i++; |
|
2011 |
}
|
|
2012 |
return i; |
|
2013 |
}
|
|
2014 |
case Cstring: { |
|
2015 |
luaL_Buffer b; |
|
2016 |
luaL_buffinit(cs->L, &b); |
|
2017 |
stringcap(&b, cs); |
|
2018 |
luaL_pushresult(&b); |
|
2019 |
return 1; |
|
2020 |
}
|
|
2021 |
case Csubst: { |
|
2022 |
if (isfullcap(cs->cap++)) /* no changes? */ |
|
2023 |
pushsubject(cs, cs->cap - 1); /* push original subject */ |
|
2024 |
else
|
|
2025 |
substcap(cs); |
|
2026 |
return 1; |
|
2027 |
}
|
|
2028 |
case Cbackref: return backrefcap(cs); |
|
2029 |
case Ctable: return tablecap(cs); |
|
2030 |
case Cfunction: return functioncap(cs); |
|
2031 |
case Cquery: return querycap(cs); |
|
2032 |
case Caccum: return accumcap(cs); |
|
2033 |
default: assert(0); return 0; |
|
2034 |
}
|
|
2035 |
}
|
|
2036 |
||
2037 |
||
2038 |
static int getcaptures (lua_State *L, const char *s, const char *r, int ptop) { |
|
2039 |
Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop)); |
|
2040 |
int n = 0; |
|
2041 |
if (!isclosecap(capture)) { /* is there any capture? */ |
|
2042 |
CapState cs; |
|
2043 |
cs.ocap = cs.cap = capture; cs.L = L; |
|
2044 |
cs.s = s; cs.valuecached = 0; cs.ptop = ptop; |
|
2045 |
do { /* collect their values */ |
|
2046 |
n += pushcapture(&cs); |
|
2047 |
} while (!isclosecap(cs.cap)); |
|
2048 |
}
|
|
2049 |
if (n == 0) { /* no capture values? */ |
|
2050 |
lua_pushinteger(L, r - s + 1); /* return only end position */ |
|
2051 |
n = 1; |
|
2052 |
}
|
|
2053 |
return n; |
|
2054 |
}
|
|
2055 |
||
2056 |
/* }====================================================== */
|
|
2057 |
||
2058 |
||
2059 |
static int version_l (lua_State *L) { |
|
2060 |
lua_pushstring(L, VERSION); |
|
2061 |
return 1; |
|
2062 |
}
|
|
2063 |
||
2064 |
||
2065 |
static int type_l (lua_State *L) { |
|
2066 |
if (testpattern(L, 1)) |
|
2067 |
lua_pushliteral(L, "pattern"); |
|
2068 |
else
|
|
2069 |
lua_pushnil(L); |
|
2070 |
return 1; |
|
2071 |
}
|
|
2072 |
||
2073 |
||
2074 |
static int printpat_l (lua_State *L) { |
|
2075 |
Instruction *p = getpatt(L, 1, NULL); |
|
2076 |
int n, i; |
|
2077 |
lua_getfenv(L, 1); |
|
2078 |
n = lua_objlen(L, -1); |
|
2079 |
printf("["); |
|
2080 |
for (i = 1; i <= n; i++) { |
|
2081 |
printf("%d = ", i); |
|
2082 |
lua_rawgeti(L, -1, i); |
|
2083 |
if (lua_isstring(L, -1)) |
|
2084 |
printf("%s ", lua_tostring(L, -1)); |
|
2085 |
else
|
|
2086 |
printf("%s ", lua_typename(L, lua_type(L, -1))); |
|
2087 |
lua_pop(L, 1); |
|
2088 |
}
|
|
2089 |
printf("]\n"); |
|
2090 |
printpatt(p); |
|
2091 |
return 0; |
|
2092 |
}
|
|
2093 |
||
2094 |
||
2095 |
static int matchl (lua_State *L) { |
|
2096 |
Capture capture[IMAXCAPTURES]; |
|
2097 |
const char *r; |
|
2098 |
size_t l; |
|
2099 |
Instruction *p = getpatt(L, 1, NULL); |
|
2100 |
const char *s = luaL_checklstring(L, SUBJIDX, &l); |
|
2101 |
int ptop = lua_gettop(L); |
|
2102 |
lua_Integer ii = luaL_optinteger(L, 3, 1); |
|
2103 |
size_t i = (ii > 0) ? |
|
2104 |
(((size_t)ii <= l) ? (size_t)ii - 1 : l) : |
|
2105 |
(((size_t)-ii <= l) ? l - ((size_t)-ii) : 0); |
|
2106 |
lua_pushnil(L); /* subscache */ |
|
2107 |
lua_pushlightuserdata(L, capture); /* caplistidx */ |
|
2108 |
lua_getfenv(L, 1); /* penvidx */ |
|
2109 |
r = match(L, s, s + i, s + l, p, capture, ptop); |
|
2110 |
if (r == NULL) { |
|
2111 |
lua_pushnil(L); |
|
2112 |
return 1; |
|
2113 |
}
|
|
2114 |
return getcaptures(L, s, r, ptop); |
|
2115 |
}
|
|
2116 |
||
2117 |
||
2118 |
static struct luaL_reg pattreg[] = { |
|
2119 |
{"match", matchl}, |
|
2120 |
{"print", printpat_l}, |
|
2121 |
{"C", capture_l}, |
|
2122 |
{"Ca", capaccum_l}, |
|
2123 |
{"Cc", capconst_l}, |
|
2124 |
{"Cp", position_l}, |
|
2125 |
{"Cb", backref_l}, |
|
2126 |
{"Carg", argcap_l}, |
|
2127 |
{"Cmt", matchtime_l}, |
|
2128 |
{"Cs", capsubst_l}, |
|
2129 |
{"Ct", tcapture_l}, |
|
2130 |
{"P", pattern_l}, |
|
2131 |
{"R", range_l}, |
|
2132 |
{"S", set_l}, |
|
2133 |
{"V", nter_l}, |
|
2134 |
{"span", span_l}, |
|
2135 |
{"type", type_l}, |
|
2136 |
{"version", version_l}, |
|
2137 |
{NULL, NULL} |
|
2138 |
};
|
|
2139 |
||
2140 |
||
2141 |
static struct luaL_reg metapattreg[] = { |
|
2142 |
{"__add", union_l}, |
|
2143 |
{"__pow", star_l}, |
|
2144 |
{"__sub", diff_l}, |
|
2145 |
{"__mul", concat_l}, |
|
2146 |
{"__div", rcapture_l}, |
|
2147 |
{"__unm", unm_l}, |
|
2148 |
{"__len", pattand_l}, |
|
2149 |
{NULL, NULL} |
|
2150 |
};
|
|
2151 |
||
2152 |
||
2153 |
int luaopen_lpeg (lua_State *L); |
|
2154 |
int luaopen_lpeg (lua_State *L) { |
|
2155 |
lua_newtable(L); |
|
2156 |
lua_replace(L, LUA_ENVIRONINDEX); /* empty env for new patterns */ |
|
2157 |
luaL_newmetatable(L, PATTERN_T); |
|
2158 |
luaL_register(L, NULL, metapattreg); |
|
2159 |
luaL_register(L, "lpeg", pattreg); |
|
2160 |
lua_pushliteral(L, "__index"); |
|
2161 |
lua_pushvalue(L, -2); |
|
2162 |
lua_settable(L, -4); |
|
2163 |
return 1; |
|
2164 |
}
|
|
2165 |