~roger-booth/mysql-proxy/laminator

492 by jan at mysql
switch to lpeg to parse the PROXY ... queries
1
/*
2
** $Id: lpeg.c,v 1.86 2008/03/07 17:20:19 roberto Exp $
3
** LPeg - PEG pattern matching for Lua
4
** Copyright 2007, Lua.org & PUC-Rio  (see 'lpeg.html' for license)
5
** written by Roberto Ierusalimschy
6
*/
7
8
9
#include <assert.h>
10
#include <limits.h>
11
#include <stdio.h>
12
#include <string.h>
13
14
#include "lua.h"
15
#include "lauxlib.h"
16
17
18
#define VERSION		"0.8"
19
#define PATTERN_T	"pattern"
20
21
/* maximum call/backtrack levels */
22
#define MAXBACK		400
23
24
/* initial size for capture's list */
25
#define IMAXCAPTURES	600
26
27
28
/* index, on Lua stack, for subject */
29
#define SUBJIDX		2
30
31
/* number of fixed arguments to 'match' (before capture arguments) */
32
#define FIXEDARGS	3
33
34
/* index, on Lua stack, for substitution value cache */
35
#define subscache(ptop)	((ptop) + 1)
36
37
/* index, on Lua stack, for capture list */
38
#define caplistidx(ptop)	((ptop) + 2)
39
40
/* index, on Lua stack, for pattern's fenv */
41
#define penvidx(ptop)	((ptop) + 3)
42
43
44
45
typedef unsigned char byte;
46
47
48
#define CHARSETSIZE		((UCHAR_MAX/CHAR_BIT) + 1)
49
50
51
typedef byte Charset[CHARSETSIZE];
52
53
54
typedef const char *(*PattFunc) (const void *ud,
55
                                 const char *o,  /* string start */
56
                                 const char *s,  /* current position */
57
                                 const char *e); /* string end */
58
59
60
/* Virtual Machine's instructions */
61
typedef enum Opcode {
62
  IAny, IChar, ISet, IZSet,
63
  ITestAny, ITestChar, ITestSet, ITestZSet,
64
  ISpan, ISpanZ,
65
  IRet, IEnd,
66
  IChoice, IJmp, ICall, IOpenCall,
67
  ICommit, IPartialCommit, IBackCommit, IFailTwice, IFail, IGiveup,
68
  IFunc,
69
  IFullCapture, IEmptyCapture, IEmptyCaptureIdx,
70
  IOpenCapture, ICloseCapture, ICloseRunTime
71
} Opcode;
72
73
74
#define ISJMP		1
75
#define ISCHECK		2
76
#define ISTEST		4
77
#define ISNOFAIL	8
78
#define ISCAPTURE	16
79
#define ISMOVABLE	32
80
#define ISFENVOFF	64
81
#define HASCHARSET	128
82
83
static const byte opproperties[] = {
84
  /* IAny */		ISCHECK,
85
  /* IChar */		ISCHECK,
86
  /* ISet */		ISCHECK | HASCHARSET,
87
  /* IZSet */		ISCHECK | HASCHARSET,
88
  /* ITestAny */	ISJMP | ISTEST | ISNOFAIL,
89
  /* ITestChar */	ISJMP | ISTEST | ISNOFAIL,
90
  /* ITestSet */	ISJMP | ISTEST | ISNOFAIL | HASCHARSET,
91
  /* ITestZSet */	ISJMP | ISTEST | ISNOFAIL | HASCHARSET,
92
  /* ISpan */		ISNOFAIL | HASCHARSET,
93
  /* ISpanZ */		ISNOFAIL | HASCHARSET,
94
  /* IRet */		0,
95
  /* IEnd */		0,
96
  /* IChoice */		ISJMP,
97
  /* IJmp */		ISJMP | ISNOFAIL,
98
  /* ICall */		ISJMP,
99
  /* IOpenCall */	ISFENVOFF,
100
  /* ICommit */		ISJMP,
101
  /* IPartialCommit */	ISJMP,
102
  /* IBackCommit */	ISJMP,
103
  /* IFailTwice */	0,
104
  /* IFail */		0,
105
  /* IGiveup */		0,
106
  /* IFunc */		0,
107
  /* IFullCapture */	ISCAPTURE | ISNOFAIL | ISFENVOFF,
108
  /* IEmptyCapture */	ISCAPTURE | ISNOFAIL | ISMOVABLE,
109
  /* IEmptyCaptureIdx */ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
110
  /* IOpenCapture */	ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
111
  /* ICloseCapture */	ISCAPTURE | ISNOFAIL | ISMOVABLE | ISFENVOFF,
112
  /* ICloseRunTime */	ISCAPTURE | ISFENVOFF
113
};
114
115
116
typedef union Instruction {
117
  struct Inst {
118
    byte code;
119
    byte aux;
120
    short offset;
121
  } i;
122
  PattFunc f;
123
  byte buff[1];
124
} Instruction;
125
126
static const Instruction giveup = {{IGiveup, 0, 0}};
127
128
#define getkind(op)	((op)->i.aux & 0xF)
129
#define getoff(op)	(((op)->i.aux >> 4) & 0xF)
130
131
#define dest(p,x)	((x) + ((p)+(x))->i.offset)
132
133
#define MAXOFF		0xF
134
135
#define isprop(op,p)	(opproperties[(op)->i.code] & (p))
136
#define isjmp(op)	isprop(op, ISJMP)
137
#define iscapture(op) 	isprop(op, ISCAPTURE)
138
#define ischeck(op)	isprop(op, ISCHECK)
139
#define istest(op)	isprop(op, ISTEST)
140
#define isnofail(op)	isprop(op, ISNOFAIL)
141
#define ismovable(op)	isprop(op, ISMOVABLE)
142
#define isfenvoff(op)	isprop(op, ISFENVOFF)
143
#define hascharset(op)	isprop(op, HASCHARSET)
144
145
146
/* kinds of captures */
147
typedef enum CapKind {
148
  Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
149
  Cquery, Cstring, Csubst, Caccum, Cruntime
150
} CapKind;
151
152
#define iscapnosize(k)	((k) == Cposition || (k) == Cconst)
153
154
155
typedef struct Capture {
156
  const char *s;  /* position */
157
  short idx;
158
  byte kind;
159
  byte siz;
160
} Capture;
161
162
163
/* maximum size (in elements) for a pattern */
164
#define MAXPATTSIZE	(SHRT_MAX - 10)
165
166
167
/* size (in elements) for an instruction plus extra l bytes */
168
#define instsize(l)	(((l) - 1)/sizeof(Instruction) + 2)
169
170
171
/* size (in elements) for a ISet instruction */
172
#define CHARSETINSTSIZE		instsize(CHARSETSIZE)
173
174
175
176
#define loopset(v,b)	{ int v; for (v = 0; v < CHARSETSIZE; v++) b; }
177
178
179
#define testchar(st,c)	(((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
180
#define setchar(st,c)	((st)[(c) >> 3] |= (1 << ((c) & 7)))
181
182
183
184
static int sizei (const Instruction *i) {
185
  if (hascharset(i)) return CHARSETINSTSIZE;
186
  else if (i->i.code == IFunc) return i->i.offset;
187
  else return 1;
188
}
189
190
191
static const char *val2str (lua_State *L, int idx) {
192
  const char *k = lua_tostring(L, idx);
193
  if (k != NULL)
194
    return lua_pushfstring(L, "rule '%s'", k);
195
  else
196
    return lua_pushfstring(L, "rule <a %s>", luaL_typename(L, -1));
197
}
198
199
200
static int getposition (lua_State *L, int t, int i) {
201
  int res;
202
  lua_getfenv(L, -1);
203
  lua_rawgeti(L, -1, i);  /* get key from pattern's environment */
204
  lua_gettable(L, t);  /* get position from positions table */
205
  res = lua_tointeger(L, -1);
206
  if (res == 0) {  /* key has no registered position? */
207
    lua_rawgeti(L, -2, i);  /* get key again */
208
    return luaL_error(L, "%s is not defined in given grammar", val2str(L, -1));
209
  }
210
  lua_pop(L, 2);  /* remove environment and position */
211
  return res;
212
}
213
214
215
216
/*
217
** {======================================================
218
** Printing patterns
219
** =======================================================
220
*/
221
222
223
static void printcharset (const Charset st) {
224
  int i;
225
  printf("[");
226
  for (i = 0; i <= UCHAR_MAX; i++) {
227
    int first = i;
228
    while (testchar(st, i) && i <= UCHAR_MAX) i++;
229
    if (i - 1 == first)  /* unary range? */
230
      printf("(%02x)", first);
231
    else if (i - 1 > first)  /* non-empty range? */
232
      printf("(%02x-%02x)", first, i - 1);
233
  }
234
  printf("]");
235
}
236
237
238
static void printcapkind (int kind) {
239
  const char *const modes[] = {
240
    "close", "position", "constant", "backref",
241
    "argument", "simple", "table", "function",
242
    "query", "string", "substitution", "accumulator",
243
    "runtime"};
244
  printf("%s", modes[kind]);
245
}
246
247
248
static void printjmp (const Instruction *op, const Instruction *p) {
249
  printf("-> %ld", (long)(dest(0, p) - op));
250
}
251
252
253
static void printinst (const Instruction *op, const Instruction *p) {
254
  const char *const names[] = {
255
    "any", "char", "set", "zset",
256
    "testany", "testchar", "testset", "testzset",
257
    "span", "spanz",
258
    "ret", "end",
259
    "choice", "jmp", "call", "open_call",
260
    "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
261
     "func",
262
     "fullcapture", "emptycapture", "emptycaptureidx", "opencapture",
263
     "closecapture", "closeruntime"
264
  };
265
  printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
266
  switch ((Opcode)p->i.code) {
267
    case IChar: {
268
      printf("'%c'", p->i.aux);
269
      break;
270
    }
271
    case ITestChar: {
272
      printf("'%c'", p->i.aux);
273
      printjmp(op, p);
274
      break;
275
    }
276
    case IAny: {
277
      printf("* %d", p->i.aux);
278
      break;
279
    }
280
    case ITestAny: {
281
      printf("* %d", p->i.aux);
282
      printjmp(op, p);
283
      break;
284
    }
285
    case IFullCapture: case IOpenCapture:
286
    case IEmptyCapture: case IEmptyCaptureIdx:
287
    case ICloseCapture: case ICloseRunTime: {
288
      printcapkind(getkind(p));
289
      printf("(n = %d)  (off = %d)", getoff(p), p->i.offset);
290
      break;
291
    }
292
    case ISet: case IZSet: case ISpan: case ISpanZ: {
293
      printcharset((p+1)->buff);
294
      break;
295
    }
296
    case ITestSet: case ITestZSet: {
297
      printcharset((p+1)->buff);
298
      printjmp(op, p);
299
      break;
300
    }
301
    case IOpenCall: {
302
      printf("-> %d", p->i.offset);
303
      break;
304
    }
305
    case IChoice: {
306
      printjmp(op, p);
307
      printf(" (%d)", p->i.aux);
308
      break;
309
    }
310
    case IJmp: case ICall: case ICommit:
311
    case IPartialCommit: case IBackCommit: {
312
      printjmp(op, p);
313
      break;
314
    }
315
    default: break;
316
  }
317
  printf("\n");
318
}
319
320
321
static void printpatt (Instruction *p) {
322
  Instruction *op = p;
323
  for (;;) {
324
    printinst(op, p);
325
    if (p->i.code == IEnd) break;
326
    p += sizei(p);
327
  }
328
}
329
330
331
static void printcap (Capture *cap) {
332
  printcapkind(cap->kind);
333
  printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
334
}
335
336
337
static void printcaplist (Capture *cap) {
338
  for (; cap->s; cap++) printcap(cap);
339
}
340
341
/* }====================================================== */
342
343
344
345
346
/*
347
** {======================================================
348
** Virtual Machine
349
** =======================================================
350
*/
351
352
353
typedef struct Stack {
354
  const char *s;
355
  const Instruction *p;
356
  int caplevel;
357
} Stack;
358
359
360
static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
361
                       const char *o, const char *s, int ptop);
362
363
364
static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
365
  Capture *newc;
366
  if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
367
    luaL_error(L, "too many captures");
368
  newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
369
  memcpy(newc, cap, captop * sizeof(Capture));
370
  lua_replace(L, caplistidx(ptop));
371
  return newc;
372
}
373
374
375
static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
376
  int i;
377
  assert(base[0].kind == Cruntime && base[0].siz == 0);
378
  base[0].idx = fd;  /* first returned capture */
379
  for (i = 1; i < n; i++) {  /* add extra captures */
380
    base[i].siz = 1;  /* mark it as closed */
381
    base[i].s = s;
382
    base[i].kind = Cruntime;
383
    base[i].idx = fd + i;  /* stack index */
384
  }
385
  base[n].kind = Cclose;  /* add closing entry */
386
  base[n].siz = 1;
387
  base[n].s = s;
388
}
389
390
391
static const char *match (lua_State *L,
392
                          const char *o, const char *s, const char *e,
393
                          Instruction *op, Capture *capture, int ptop) {
394
  Stack stackbase[MAXBACK];
395
  Stack *stacklimit = stackbase + MAXBACK;
396
  Stack *stack = stackbase;  /* point to first empty slot in stack */
397
  int capsize = IMAXCAPTURES;
398
  int captop = 0;  /* point to first empty slot in captures */
399
  const Instruction *p = op;
400
  stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
401
  for (;;) {
402
#if defined(DEBUG)
403
      printf("s: |%s| stck: %d c: %d  ", s, stack - stackbase, captop);
404
      printinst(op, p);
405
#endif
406
    switch ((Opcode)p->i.code) {
407
      case IEnd: {
408
        assert(stack == stackbase + 1);
409
        capture[captop].kind = Cclose;
410
        capture[captop].s = NULL;
411
        return s;
412
      }
413
      case IGiveup: {
414
        assert(stack == stackbase);
415
        return NULL;
416
      }
417
      case IRet: {
418
        assert(stack > stackbase && (stack - 1)->s == NULL);
419
        p = (--stack)->p;
420
        continue;
421
      }
422
      case IAny: {
423
        int n = p->i.aux;
424
        if (n > e - s) goto fail;
425
        else { p++; s += n; }
426
        continue;
427
      }
428
      case ITestAny: {
429
        int n = p->i.aux;
430
        if (n > e - s) p += p->i.offset;
431
        else { p++; s += n; }
432
        continue;
433
      }
434
      case IChar: {
435
        if ((byte)*s != p->i.aux || s >= e) goto fail;
436
        else { p++; s++; }
437
        continue;
438
      }
439
      case ITestChar: {
440
        if ((byte)*s != p->i.aux || s >= e) p += p->i.offset;
441
        else { p++; s++; }
442
        continue;
443
      }
444
      case ISet: {
445
        int c = (unsigned char)*s;
446
        if (!testchar((p+1)->buff, c)) goto fail;
447
        else { p += CHARSETINSTSIZE; s++; }
448
        continue;
449
      }
450
      case ITestSet: {
451
        int c = (unsigned char)*s;
452
        if (!testchar((p+1)->buff, c)) p += p->i.offset;
453
        else { p += CHARSETINSTSIZE; s++; }
454
        continue;
455
      }
456
      case IZSet: {
457
        int c = (unsigned char)*s;
458
        if (!testchar((p+1)->buff, c) || s >= e) goto fail;
459
        else { p += CHARSETINSTSIZE; s++; }
460
        continue;
461
      }
462
      case ITestZSet: {
463
        int c = (unsigned char)*s;
464
        if (!testchar((p+1)->buff, c) || s >= e) p += p->i.offset;
465
        else { p += CHARSETINSTSIZE; s++; }
466
        continue;
467
      }
468
      case ISpan: {
469
        for (;; s++) {
470
          int c = (unsigned char)*s;
471
          if (!testchar((p+1)->buff, c)) break;
472
        }
473
        p += CHARSETINSTSIZE;
474
        continue;
475
      }
476
      case ISpanZ: {
477
        for (; s < e; s++) {
478
          int c = (unsigned char)*s;
479
          if (!testchar((p+1)->buff, c)) break;
480
        }
481
        p += CHARSETINSTSIZE;
482
        continue;
483
      }
484
      case IFunc: {
485
        const char *r = (p+1)->f((p+2)->buff, o, s, e);
486
        if (r == NULL) goto fail;
487
        s = r;
488
        p += p->i.offset;
489
        continue;
490
      }
491
      case IJmp: {
492
        p += p->i.offset;
493
        continue;
494
      }
495
      case IChoice: {
496
        if (stack >= stacklimit)
497
          return (luaL_error(L, "too many pending calls/choices"), (char *)0);
498
        stack->p = dest(0, p);
499
        stack->s = s - p->i.aux;
500
        stack->caplevel = captop;
501
        stack++;
502
        p++;
503
        continue;
504
      }
505
      case ICall: {
506
        if (stack >= stacklimit)
507
          return (luaL_error(L, "too many pending calls/choices"), (char *)0);
508
        stack->s = NULL;
509
        stack->p = p + 1;  /* save return address */
510
        stack++;
511
        p += p->i.offset;
512
        continue;
513
      }
514
      case ICommit: {
515
        assert(stack > stackbase && (stack - 1)->s != NULL);
516
        stack--;
517
        p += p->i.offset;
518
        continue;
519
      }
520
      case IPartialCommit: {
521
        assert(stack > stackbase && (stack - 1)->s != NULL);
522
        (stack - 1)->s = s;
523
        (stack - 1)->caplevel = captop;
524
        p += p->i.offset;
525
        continue;
526
      }
527
      case IBackCommit: {
528
        assert(stack > stackbase && (stack - 1)->s != NULL);
529
        s = (--stack)->s;
530
        p += p->i.offset;
531
        continue;
532
      }
533
      case IFailTwice:
534
        assert(stack > stackbase);
535
        stack--;
536
        /* go through */
537
      case IFail:
538
      fail: { /* pattern failed: try to backtrack */
539
        do {  /* remove pending calls */
540
          assert(stack > stackbase);
541
          s = (--stack)->s;
542
        } while (s == NULL);
543
        captop = stack->caplevel;
544
        p = stack->p;
545
        continue;
546
      }
547
      case ICloseRunTime: {
548
        int fr = lua_gettop(L) + 1;  /* stack index of first result */
549
        int ncap = runtimecap(L, capture + captop, capture, o, s, ptop);
550
        lua_Integer res = lua_tointeger(L, fr) - 1;  /* offset */
551
        int n = lua_gettop(L) - fr;  /* number of new captures */
552
        if (res == -1) {  /* may not be a number */
553
          if (!lua_toboolean(L, fr)) {  /* false value? */
554
            lua_settop(L, fr - 1);  /* remove results */
555
            goto fail;  /* and fail */
556
          }
557
          else if (lua_isboolean(L, fr))  /* true? */
558
            res = s - o;  /* keep current position */
559
        }
560
        if (res < s - o || res > e - o)
561
          luaL_error(L, "invalid position returned by match-time capture");
562
        s = o + res;  /* update current position */
563
        captop -= ncap;  /* remove nested captures */
564
        lua_remove(L, fr);  /* remove first result (offset) */
565
        if (n > 0) {  /* captures? */
566
          if ((captop += n + 1) >= capsize) {
567
            capture = doublecap(L, capture, captop, ptop);
568
            capsize = 2 * captop;
569
          }
570
          adddyncaptures(s, capture + captop - n - 1, n, fr);
571
        }
572
        p++;
573
        continue;
574
      }
575
      case ICloseCapture: {
576
        const char *s1 = s - getoff(p);
577
        assert(captop > 0);
578
        if (capture[captop - 1].siz == 0 &&
579
            s1 - capture[captop - 1].s < UCHAR_MAX) {
580
          capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
581
          p++;
582
          continue;
583
        }
584
        else {
585
          capture[captop].siz = 1;  /* mark entry as closed */
586
          goto capture;
587
        }
588
      }
589
      case IEmptyCapture: case IEmptyCaptureIdx:
590
        capture[captop].siz = 1;  /* mark entry as closed */
591
        goto capture;
592
      case IOpenCapture:
593
        capture[captop].siz = 0;  /* mark entry as open */
594
        goto capture;
595
      case IFullCapture:
596
        capture[captop].siz = getoff(p) + 1;  /* save capture size */
597
      capture: {
598
        capture[captop].s = s - getoff(p);
599
        capture[captop].idx = p->i.offset;
600
        capture[captop].kind = getkind(p);
601
        if (++captop >= capsize) {
602
          capture = doublecap(L, capture, captop, ptop);
603
          capsize = 2 * captop;
604
        }
605
        p++;
606
        continue;
607
      }
608
      case IOpenCall: {
609
        lua_rawgeti(L, penvidx(ptop), p->i.offset);
610
        luaL_error(L, "reference to %s outside a grammar", val2str(L, -1));
611
      }
612
      default: assert(0); return NULL;
613
    }
614
  }
615
}
616
617
/* }====================================================== */
618
619
620
/*
621
** {======================================================
622
** Verifier
623
** =======================================================
624
*/
625
626
627
static int verify (lua_State *L, Instruction *op, const Instruction *p,
628
                   Instruction *e, int postable, int rule) {
629
  static const char dummy[] = "";
630
  Stack back[MAXBACK];
631
  int backtop = 0;  /* point to first empty slot in back */
632
  while (p != e) {
633
    switch ((Opcode)p->i.code) {
634
      case IRet: {
635
        p = back[--backtop].p;
636
        continue;
637
      }
638
      case IChoice: {
639
        if (backtop >= MAXBACK)
640
          return luaL_error(L, "too many pending calls/choices");
641
        back[backtop].p = dest(0, p);
642
        back[backtop++].s = dummy;
643
        p++;
644
        continue;
645
      }
646
      case ICall: {
647
        assert((p + 1)->i.code != IRet);  /* no tail call */
648
        if (backtop >= MAXBACK)
649
          return luaL_error(L, "too many pending calls/choices");
650
        back[backtop].s = NULL;
651
        back[backtop++].p = p + 1;
652
        goto dojmp;
653
      }
654
      case IOpenCall: {
655
        int i;
656
        if (postable == 0)  /* grammar still not fixed? */
657
          goto fail;  /* to be verified later */
658
        for (i = 0; i < backtop; i++) {
659
          if (back[i].s == NULL && back[i].p == p + 1)
660
            return luaL_error(L, "%s is left recursive", val2str(L, rule));
661
        }
662
        if (backtop >= MAXBACK)
663
          return luaL_error(L, "too many pending calls/choices");
664
        back[backtop].s = NULL;
665
        back[backtop++].p = p + 1;
666
        p = op + getposition(L, postable, p->i.offset);
667
        continue;
668
      }
669
      case IBackCommit:
670
      case ICommit: {
671
        assert(backtop > 0 && p->i.offset > 0);
672
        backtop--;
673
        goto dojmp;
674
      }
675
      case IPartialCommit: {
676
        assert(backtop > 0);
677
        if (p->i.offset > 0) goto dojmp;  /* forward jump */
678
        else {  /* loop will be detected when checking corresponding rule */
679
          assert(postable != 0);
680
          backtop--;
681
          p++;  /* just go on now */
682
          continue;
683
        }
684
      }
685
      case ITestAny:
686
      case ITestChar:  /* all these cases jump for empty subject */
687
      case ITestSet:
688
      case ITestZSet:
689
      case IJmp: 
690
      dojmp: {
691
        p += p->i.offset;
692
        continue;
693
      }
694
      case IAny:
695
      case IChar:
696
      case ISet:
697
      case IZSet:
698
      case IFailTwice:  /* assume that first level failed; try to backtrack */
699
        goto fail;
700
      case IFail: {
701
        if (p->i.aux) {  /* is an 'and' predicate? */
702
          assert((p - 1)->i.code == IBackCommit && (p - 1)->i.offset == 2);
703
          p++;  /* pretend it succeeded and go ahead */
704
          continue;
705
        }
706
        /* else go through */
707
      }
708
      fail: { /* pattern failed: try to backtrack */
709
        do {
710
          if (backtop-- == 0)
711
            return 1;  /* no more backtracking */
712
        } while (back[backtop].s == NULL);
713
        p = back[backtop].p;
714
        continue;
715
      }
716
      case ISpan: case ISpanZ:
717
      case IOpenCapture: case ICloseCapture:
718
      case IEmptyCapture: case IEmptyCaptureIdx:
719
      case IFullCapture: {
720
        p += sizei(p);
721
        continue;
722
      }
723
      case ICloseRunTime: {
724
        goto fail;  /* be liberal in this case */
725
      }
726
      case IFunc: {
727
        const char *r = (p+1)->f((p+2)->buff, dummy, dummy, dummy);
728
        if (r == NULL) goto fail;
729
        p += p->i.offset;
730
        continue;
731
      }
732
      case IEnd:  /* cannot happen (should stop before it) */
733
      default: assert(0); return 0;
734
    }
735
  }
736
  assert(backtop == 0);
737
  return 0;
738
}
739
740
741
static void checkrule (lua_State *L, Instruction *op, int from, int to,
742
                       int postable, int rule) {
743
  int i;
744
  int lastopen = 0;  /* more recent OpenCall seen in the code */
745
  for (i = from; i < to; i += sizei(op + i)) {
746
    if (op[i].i.code == IPartialCommit && op[i].i.offset < 0) {  /* loop? */
747
      int start = dest(op, i);
748
      assert(op[start - 1].i.code == IChoice && dest(op, start - 1) == i + 1);
749
      if (start <= lastopen) {  /* loop does contain an open call? */
750
        if (!verify(L, op, op + start, op + i, postable, rule)) /* check body */
751
          luaL_error(L, "possible infinite loop in %s", val2str(L, rule));
752
      }
753
    }
754
    else if (op[i].i.code == IOpenCall)
755
      lastopen = i;
756
  }
757
  assert(op[i - 1].i.code == IRet);
758
  verify(L, op, op + from, op + to - 1, postable, rule);
759
}
760
761
762
763
764
/* }====================================================== */
765
766
767
768
769
/*
770
** {======================================================
771
** Building Patterns
772
** =======================================================
773
*/
774
775
enum charsetanswer { NOINFO, ISCHARSET, VALIDSTARTS };
776
777
typedef struct CharsetTag {
778
  enum charsetanswer tag;
779
  Charset cs;
780
} CharsetTag;
781
782
783
static Instruction *getpatt (lua_State *L, int idx, int *size);
784
785
786
static void check2test (Instruction *p, int n) {
787
  assert(ischeck(p));
788
  p->i.code += ITestAny - IAny;
789
  p->i.offset = n;
790
}
791
792
793
/*
794
** invert array slice p[0]-p[e] (both inclusive)
795
*/
796
static void invert (Instruction *p, int e) {
797
  int i;
798
  for (i = 0; i < e; i++, e--) {
799
    Instruction temp = p[i];
800
    p[i] = p[e];
801
    p[e] = temp;
802
  }
803
}
804
805
806
/*
807
** rotate array slice p[0]-p[e] (both inclusive) 'n' steps
808
** to the 'left'
809
*/
810
static void rotate (Instruction *p, int e, int n) {
811
  invert(p, n - 1);
812
  invert(p + n, e - n);
813
  invert(p, e);
814
}
815
816
817
#define op_step(p)	((p)->i.code == IAny ? (p)->i.aux : 1)
818
819
820
static int skipchecks (Instruction *p, int up, int *pn) {
821
  int i, n = 0;
822
  for (i = 0; ischeck(p + i); i += sizei(p + i)) {
823
    int st = op_step(p + i);
824
    if (n + st > MAXOFF - up) break;
825
    n += st;
826
  }
827
  *pn = n;
828
  return i;
829
}
830
831
832
#define ismovablecap(op)	(ismovable(op) && getoff(op) < MAXOFF)
833
834
static void optimizecaptures (Instruction *p) {
835
  int i;
836
  int limit = 0;
837
  for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
838
    if (isjmp(p + i) && dest(p, i) >= limit)
839
      limit = dest(p, i) + 1;  /* do not optimize jump targets */
840
    else if (i >= limit && ismovablecap(p + i) && ischeck(p + i + 1)) {
841
      int end, n, j;  /* found a border capture|check */
842
      int maxoff = getoff(p + i);
843
      int start = i;
844
      /* find first capture in the group */
845
      while (start > limit && ismovablecap(p + start - 1)) {
846
        start--;
847
        if (getoff(p + start) > maxoff) maxoff = getoff(p + start);
848
      }
849
      end = skipchecks(p + i + 1, maxoff, &n) + i;  /* find last check */
850
      if (n == 0) continue;  /* first check is too big to move across */
851
      assert(n <= MAXOFF && start <= i && i < end);
852
      for (j = start; j <= i; j++)
853
        p[j].i.aux += (n << 4);  /* correct offset of captures to be moved */
854
      rotate(p + start, end - start, i - start + 1);  /* move them up */
855
      i = end;
856
      assert(ischeck(p + start) && iscapture(p + i));
857
    }
858
  }
859
}
860
861
862
static int target (Instruction *p, int i) {
863
  while (p[i].i.code == IJmp)  i += p[i].i.offset;
864
  return i;
865
}
866
867
868
static void optimizejumps (Instruction *p) {
869
  int i;
870
  for (i = 0; p[i].i.code != IEnd; i += sizei(p + i)) {
871
    if (isjmp(p + i))
872
      p[i].i.offset = target(p, dest(p, i)) - i;
873
  }
874
}
875
876
877
static void optimizechoice (Instruction *p) {
878
  assert(p->i.code == IChoice);
879
  if (ischeck(p + 1)) {
880
    int lc = sizei(p + 1);
881
    rotate(p, lc, 1);
882
    assert(ischeck(p) && (p + lc)->i.code == IChoice);
883
    (p + lc)->i.aux = op_step(p);
884
    check2test(p, (p + lc)->i.offset);
885
    (p + lc)->i.offset -= lc;
886
  }
887
}
888
889
890
/*
891
** A 'headfail' pattern is a pattern that can only fails in its first
892
** instruction, which must be a check.
893
*/
894
static int isheadfail (Instruction *p) {
895
  if (!ischeck(p)) return 0;
896
  /* check that other operations cannot fail */
897
  for (p += sizei(p); p->i.code != IEnd; p += sizei(p))
898
    if (!isnofail(p)) return 0;
899
  return 1;
900
}
901
902
903
#define checkpattern(L, idx) ((Instruction *)luaL_checkudata(L, idx, PATTERN_T))
904
905
906
static int jointable (lua_State *L, int p1) {
907
  int n, n1, i;
908
  lua_getfenv(L, p1);
909
  n1 = lua_objlen(L, -1);  /* number of elements in p1's env */
910
  lua_getfenv(L, -2);
911
  if (n1 == 0 || lua_equal(L, -2, -1)) {
912
    lua_pop(L, 2);
913
    return 0;  /* no need to change anything */
914
  }
915
  n = lua_objlen(L, -1);  /* number of elements in p's env */
916
  if (n == 0) {
917
    lua_pop(L, 1);  /* removes p env */
918
    lua_setfenv(L, -2);  /* p now shares p1's env */
919
    return 0;  /* no need to correct anything */
920
  }
921
  lua_createtable(L, n + n1, 0);
922
  /* stack: p; p1 env; p env; new p env */
923
  for (i = 1; i <= n; i++) {
924
    lua_rawgeti(L, -2, i);
925
    lua_rawseti(L, -2, i);
926
  }
927
  for (i = 1; i <= n1; i++) {
928
    lua_rawgeti(L, -3, i);
929
    lua_rawseti(L, -2, n + i);
930
  }
931
  lua_setfenv(L, -4);  /* new table becomes p env */
932
  lua_pop(L, 2);  /* remove p1 env and old p env */
933
  return n;
934
}
935
936
937
#define copypatt(p1,p2,sz)	memcpy(p1, p2, (sz) * sizeof(Instruction));
938
939
#define pattsize(L,idx)		(lua_objlen(L, idx)/sizeof(Instruction) - 1)
940
941
942
static int addpatt (lua_State *L, Instruction *p, int p1idx) {
943
  Instruction *p1 = (Instruction *)lua_touserdata(L, p1idx);
944
  int sz = pattsize(L, p1idx);
945
  int corr = jointable(L, p1idx);
946
  copypatt(p, p1, sz + 1);
947
  if (corr != 0) {
948
    Instruction *px;
949
    for (px = p; px < p + sz; px += sizei(px)) {
950
      if (isfenvoff(px) && px->i.offset != 0)
951
        px->i.offset += corr;
952
    }
953
  }
954
  return sz;
955
}
956
957
958
static void setinstaux (Instruction *i, Opcode op, int offset, int aux) {
959
  i->i.code = op;
960
  i->i.offset = offset;
961
  i->i.aux = aux;
962
}
963
964
#define setinst(i,op,off)	setinstaux(i,op,off,0)
965
966
#define setinstcap(i,op,idx,k,n)  setinstaux(i,op,idx,((k) | ((n) << 4)))
967
968
969
static int value2fenv (lua_State *L, int vidx) {
970
  lua_createtable(L, 1, 0);
971
  lua_pushvalue(L, vidx);
972
  lua_rawseti(L, -2, 1);
973
  lua_setfenv(L, -2);
974
  return 1;
975
}
976
977
978
static Instruction *newpatt (lua_State *L, size_t n) {
979
  Instruction *p;
980
  if (n >= MAXPATTSIZE - 1)
981
    luaL_error(L, "pattern too big");
982
  p = (Instruction *)lua_newuserdata(L, (n + 1) * sizeof(Instruction));
983
  luaL_getmetatable(L, PATTERN_T);
984
  lua_setmetatable(L, -2);
985
  setinst(p + n, IEnd, 0);
986
  return p;
987
}
988
989
990
static void fillcharset (Instruction *p, Charset cs) {
991
  switch (p[0].i.code) {
992
    case IZSet: case ITestZSet:
993
      assert(testchar(p[1].buff, '\0'));
994
      /* go through */
995
    case ISet: case ITestSet: {
996
      loopset(i, cs[i] = p[1].buff[i]);
997
      break;
998
    }
999
    case IChar: case ITestChar: {
1000
      loopset(i, cs[i] = 0);
1001
      setchar(cs, p[0].i.aux);
1002
      break;
1003
    }
1004
    default: {  /* any char may start unhandled instructions */
1005
      loopset(i, cs[i] = 0xff);
1006
      break;
1007
    }
1008
  }
1009
}
1010
1011
1012
/*
1013
** Function 'tocharset' gets information about which chars may be a
1014
** valid start for a pattern.
1015
*/
1016
1017
static enum charsetanswer tocharset (Instruction *p, CharsetTag *c) {
1018
  if (ischeck(p)) {
1019
    fillcharset(p, c->cs);
1020
    if ((p + sizei(p))->i.code == IEnd && op_step(p) == 1)
1021
      c->tag = ISCHARSET;
1022
    else
1023
      c->tag = VALIDSTARTS;
1024
  }
1025
  else
1026
    c->tag = NOINFO;
1027
  return c->tag;
1028
}
1029
1030
1031
static int exclusiveset (Charset c1, Charset c2) {
1032
  /* non-empty intersection? */
1033
  loopset(i, {if ((c1[i] & c2[i]) != 0) return 0;});
1034
  return 1;  /* no intersection */
1035
}
1036
1037
1038
static int exclusive (CharsetTag *c1, CharsetTag *c2) {
1039
  if (c1->tag == NOINFO || c2->tag == NOINFO)
1040
    return 0;  /* one of them is not filled */
1041
  else return exclusiveset(c1->cs, c2->cs);
1042
}
1043
1044
1045
#define correctset(p)	{ if (testchar(p[1].buff, '\0')) p->i.code++; }
1046
1047
1048
static Instruction *newcharset (lua_State *L) {
1049
  Instruction *p = newpatt(L, CHARSETINSTSIZE);
1050
  p[0].i.code = ISet;
1051
  loopset(i, p[1].buff[i] = 0);
1052
  return p;
1053
}
1054
1055
1056
static int set_l (lua_State *L) {
1057
  size_t l;
1058
  const char *s = luaL_checklstring(L, 1, &l);
1059
  if (l == 1)
1060
    getpatt(L, 1, NULL);  /* a unit set is equivalent to a literal */
1061
  else {
1062
    Instruction *p = newcharset(L);
1063
    while (l--) {
1064
      setchar(p[1].buff, (unsigned char)(*s));
1065
      s++;
1066
    }
1067
    correctset(p);
1068
  }
1069
  return 1;
1070
}
1071
1072
1073
static int range_l (lua_State *L) {
1074
  int arg;
1075
  int top = lua_gettop(L);
1076
  Instruction *p = newcharset(L);
1077
  for (arg = 1; arg <= top; arg++) {
1078
    int c;
1079
    size_t l;
1080
    const char *r = luaL_checklstring(L, arg, &l);
1081
    luaL_argcheck(L, l == 2, arg, "range must have two characters");
1082
    for (c = (byte)r[0]; c <= (byte)r[1]; c++)
1083
      setchar(p[1].buff, c);
1084
  }
1085
  correctset(p);
1086
  return 1;
1087
}
1088
1089
1090
static int nter_l (lua_State *L) {
1091
  Instruction *p;
1092
  luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
1093
  p = newpatt(L, 1);
1094
  setinst(p, IOpenCall, value2fenv(L, 1));
1095
  return 1;
1096
}
1097
1098
1099
1100
static int testpattern (lua_State *L, int idx) {
1101
  if (lua_touserdata(L, idx)) {  /* value is a userdata? */
1102
    if (lua_getmetatable(L, idx)) {  /* does it have a metatable? */
1103
      luaL_getmetatable(L, PATTERN_T);
1104
      if (lua_rawequal(L, -1, -2)) {  /* does it have the correct mt? */
1105
        lua_pop(L, 2);  /* remove both metatables */
1106
        return 1;
1107
      }
1108
    }
1109
  }
1110
  return 0;
1111
}
1112
1113
1114
static Instruction *fix_l (lua_State *L, int t) {
1115
  Instruction *p;
1116
  int i;
1117
  int totalsize = 2;  /* include initial call and jump */
1118
  int n = 0;  /* number of rules */
1119
  int base = lua_gettop(L);
1120
  lua_newtable(L);  /* to store relative positions of each rule */
1121
  lua_pushinteger(L, 1);  /* default initial rule */
1122
  /* collect patterns and compute sizes */
1123
  lua_pushnil(L);
1124
  while (lua_next(L, t) != 0) {
1125
    int l;
1126
    if (lua_tonumber(L, -2) == 1 && lua_isstring(L, -1)) {
1127
      lua_replace(L, base + 2);  /* use this value as initial rule */
1128
      continue;
1129
    }
1130
    if (!testpattern(L, -1))
1131
      luaL_error(L, "invalid field in grammar");
1132
    l = pattsize(L, -1) + 1;  /* space for pattern + ret */
1133
    if (totalsize >= MAXPATTSIZE - l)
1134
      luaL_error(L, "grammar too large");
1135
    luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
1136
    lua_insert(L, -2);  /* put key on top */
1137
    lua_pushvalue(L, -1);  /* duplicate key (for lua_next) */
1138
    lua_pushvalue(L, -1);  /* duplicate key (to index positions table)) */
1139
    lua_pushinteger(L, totalsize);  /* position for this rule */
1140
    lua_settable(L, base + 1);  /* store key=>position in positions table */
1141
    totalsize += l;
1142
    n++;
1143
  }
1144
  luaL_argcheck(L, n > 0, t, "empty grammar");
1145
  p = newpatt(L, totalsize);  /* create new pattern */
1146
  p++;  /* save space for call */
1147
  setinst(p++, IJmp, totalsize - 1);  /* after call, jumps to the end */
1148
  for (i = 1; i <= n; i++) {  /* copy all rules into new pattern */
1149
    p += addpatt(L, p, base + 1 + i*2);
1150
    setinst(p++, IRet, 0);
1151
  }
1152
  p -= totalsize;  /* back to first position */
1153
  totalsize = 2;  /* go through each rule's position */
1154
  for (i = 1; i <= n; i++) {  /* check all rules */
1155
    int l = pattsize(L, base + 1 + i*2) + 1;
1156
    checkrule(L, p, totalsize, totalsize + l, base + 1, base + 2 + i*2);
1157
    totalsize += l;
1158
  }
1159
  lua_pushvalue(L, base + 2);  /* get initial rule */
1160
  lua_gettable(L, base + 1);  /* get its position in postions table */
1161
  i = lua_tonumber(L, -1);  /* convert to number */
1162
  lua_pop(L, 1);
1163
  if (i == 0)  /* is it defined? */
1164
    luaL_error(L, "initial rule not defined in given grammar");
1165
  setinst(p, ICall, i);  /* first instruction calls initial rule */
1166
  /* correct calls */
1167
  for (i = 0; i < totalsize; i += sizei(p + i)) {
1168
    if (p[i].i.code == IOpenCall) {
1169
      int pos = getposition(L, base + 1, p[i].i.offset);
1170
      p[i].i.code = (p[target(p, i + 1)].i.code == IRet) ? IJmp : ICall;
1171
      p[i].i.offset = pos - i;
1172
    }
1173
  }
1174
  optimizejumps(p);
1175
  lua_replace(L, t);  /* put new pattern in old's position */
1176
  lua_settop(L, base);  /* remove rules and positions table */
1177
  return p;
1178
}
1179
1180
1181
static Instruction *any (lua_State *L, int n, int extra, int *offsetp) {
1182
  int offset = offsetp ? *offsetp : 0;
1183
  Instruction *p = newpatt(L, (n - 1)/UCHAR_MAX + extra + 1);
1184
  Instruction *p1 = p + offset;
1185
  for (; n > UCHAR_MAX; n -= UCHAR_MAX)
1186
    setinstaux(p1++, IAny, 0, UCHAR_MAX);
1187
  setinstaux(p1++, IAny, 0, n);
1188
  if (offsetp) *offsetp = p1 - p;
1189
  return p;
1190
}
1191
1192
1193
static Instruction *getpatt (lua_State *L, int idx, int *size) {
1194
  Instruction *p;
1195
  switch (lua_type(L, idx)) {
1196
    case LUA_TSTRING: {
1197
      size_t i, len;
1198
      const char *s = lua_tolstring(L, idx, &len);
1199
      p = newpatt(L, len);
1200
      for (i = 0; i < len; i++)
1201
        setinstaux(p + i, IChar, 0, (unsigned char)s[i]);
1202
      lua_replace(L, idx);
1203
      break;
1204
    }
1205
    case LUA_TNUMBER: {
1206
      int n = lua_tointeger(L, idx);
1207
      if (n == 0)  /* empty pattern? */
1208
        p = newpatt(L, 0);
1209
      else if (n > 0)
1210
        p = any(L, n, 0, NULL);
1211
      else if (-n <= UCHAR_MAX) {
1212
        p = newpatt(L, 2);
1213
        setinstaux(p, ITestAny, 2, -n);
1214
        setinst(p + 1, IFail, 0);
1215
      }
1216
      else {
1217
        int offset = 2;  /* space for ITestAny & IChoice */
1218
        p = any(L, -n - UCHAR_MAX, 3, &offset);
1219
        setinstaux(p, ITestAny, offset + 1, UCHAR_MAX);
1220
        setinstaux(p + 1, IChoice, offset, UCHAR_MAX);
1221
        setinst(p + offset, IFailTwice, 0);
1222
      }
1223
      lua_replace(L, idx);
1224
      break;
1225
    }
1226
    case LUA_TBOOLEAN: {
1227
      if (lua_toboolean(L, idx))  /* true? */
1228
        p = newpatt(L, 0);  /* empty pattern (always succeeds) */
1229
      else {
1230
        p = newpatt(L, 1);
1231
        setinst(p, IFail, 0);
1232
      }
1233
      lua_replace(L, idx);
1234
      break;
1235
    }
1236
    case LUA_TTABLE: {
1237
      p = fix_l(L, idx);
1238
      break;
1239
    }
1240
    case LUA_TFUNCTION: {
1241
      p = newpatt(L, 2);
1242
      setinstcap(p, IOpenCapture, value2fenv(L, idx), Cruntime, 0);
1243
      setinstcap(p + 1, ICloseRunTime, 0, Cclose, 0);
1244
      lua_replace(L, idx);
1245
      break;
1246
    }
1247
    default: {
1248
      p = checkpattern(L, idx);
1249
      break;
1250
    }
1251
  }
1252
  if (size) *size = pattsize(L, idx);
1253
  return p;
1254
}
1255
1256
1257
static int getpattl (lua_State *L, int idx) {
1258
  int size;
1259
  getpatt(L, idx, &size);
1260
  return size;
1261
}
1262
1263
1264
static int pattern_l (lua_State *L) {
1265
  lua_settop(L, 1);
1266
  getpatt(L, 1, NULL);
1267
  return 1;
1268
}
1269
1270
1271
#define isany(p)	((p)->i.code == IAny && ((p) + 1)->i.code == IEnd)
1272
1273
1274
static int concat_l (lua_State *L) {
1275
  /* p1; p2; */
1276
  int l1, l2;
1277
  Instruction *p1 = getpatt(L, 1, &l1);
1278
  Instruction *p2 = getpatt(L, 2, &l2);
1279
  if (isany(p1) && isany(p2))
1280
    any(L, p1->i.aux + p2->i.aux, 0, NULL);
1281
  else {
1282
    Instruction *op = newpatt(L, l1 + l2);
1283
    Instruction *p = op + addpatt(L, op, 1);
1284
    addpatt(L, p, 2);
1285
    optimizecaptures(op);
1286
  }
1287
  return 1;
1288
}
1289
1290
1291
static int diff_l (lua_State *L) {
1292
  int l1, l2;
1293
  Instruction *p1 = getpatt(L, 1, &l1);
1294
  Instruction *p2 = getpatt(L, 2, &l2);
1295
  CharsetTag st1, st2;
1296
  if (tocharset(p1, &st1) == ISCHARSET && tocharset(p2, &st2) == ISCHARSET) {
1297
    Instruction *p = newcharset(L);
1298
    loopset(i, p[1].buff[i] = st1.cs[i] & ~st2.cs[i]);
1299
    correctset(p);
1300
  }
1301
  else if (isheadfail(p2)) {
1302
    Instruction *p = newpatt(L, l2 + 1 + l1);
1303
    p += addpatt(L, p, 2);
1304
    check2test(p - l2, l2 + 1);
1305
    setinst(p++, IFail, 0);
1306
    addpatt(L, p, 1);
1307
  }
1308
  else {  /* !e2 . e1 */
1309
    /* !e -> choice L1; e; failtwice; L1: ... */
1310
    Instruction *p = newpatt(L, 1 + l2 + 1 + l1);
1311
    Instruction *pi = p;
1312
    setinst(p++, IChoice, 1 + l2 + 1);
1313
    p += addpatt(L, p, 2);
1314
    setinst(p++, IFailTwice, 0);
1315
    addpatt(L, p, 1);
1316
    optimizechoice(pi);
1317
  }
1318
  return 1;
1319
}
1320
1321
1322
static int unm_l (lua_State *L) {
1323
  lua_pushliteral(L, "");
1324
  lua_insert(L, 1);
1325
  return diff_l(L);
1326
}
1327
1328
1329
static int pattand_l (lua_State *L) {
1330
  /* &e -> choice L1; e; backcommit L2; L1: fail; L2: ... */
1331
  int l1 = getpattl(L, 1);
1332
  Instruction *p = newpatt(L, 1 + l1 + 2);
1333
  setinst(p++, IChoice, 1 + l1 + 1);
1334
  p += addpatt(L, p, 1);
1335
  setinst(p++, IBackCommit, 2);
1336
  setinstaux(p, IFail, 0, 1);
1337
  return 1;
1338
}
1339
1340
1341
static int firstpart (Instruction *p, int l) {
1342
  if (istest(p)) {
1343
    int e = p[0].i.offset - 1;
1344
    if ((p[e].i.code == IJmp || p[e].i.code == ICommit) &&
1345
        e + p[e].i.offset == l)
1346
      return e + 1;
1347
  }
1348
  else if (p[0].i.code == IChoice) {
1349
    int e = p[0].i.offset - 1;
1350
    if (p[e].i.code == ICommit && e + p[e].i.offset == l)
1351
      return e + 1;
1352
  }
1353
  return 0;
1354
}
1355
1356
1357
static Instruction *auxnew (lua_State *L, Instruction **op, int *size,
1358
                                         int extra) {
1359
  *op = newpatt(L, *size + extra);
1360
  jointable(L, 1);
1361
  *size += extra;
1362
  return *op + *size - extra;
1363
}
1364
1365
1366
static int nofail (Instruction *p, int l) {
1367
  int i;
1368
  for (i = 0; i < l; i += sizei(p + i)) {
1369
    if (!isnofail(p + i)) return 0;
1370
  }
1371
  return 1;
1372
}
1373
1374
1375
static int interfere (Instruction *p1, int l1, CharsetTag *st2) {
1376
  if (nofail(p1, l1))  /* p1 cannot fail? */
1377
    return 0;  /* nothing can intefere with it */
1378
  if (st2->tag == NOINFO) return 1;
1379
  switch (p1->i.code) {
1380
    case ITestChar: return testchar(st2->cs, p1->i.aux);
1381
    case ITestSet: return !exclusiveset(st2->cs, (p1 + 1)->buff);
1382
    default: assert(p1->i.code == ITestAny); return 1;
1383
  }
1384
}
1385
1386
1387
static Instruction *basicUnion (lua_State *L, Instruction *p1, int l1,
1388
                                int l2, int *size, CharsetTag *st2) {
1389
  Instruction *op;
1390
  CharsetTag st1;
1391
  tocharset(p1, &st1);
1392
  if (st1.tag == ISCHARSET && st2->tag == ISCHARSET) {
1393
    Instruction *p = auxnew(L, &op, size, CHARSETINSTSIZE);
1394
    setinst(p, ISet, 0);
1395
    loopset(i, p[1].buff[i] = st1.cs[i] | st2->cs[i]);
1396
    correctset(p);
1397
  }
1398
  else if (exclusive(&st1, st2) || isheadfail(p1)) {
1399
    Instruction *p = auxnew(L, &op, size, l1 + 1 + l2);
1400
    copypatt(p, p1, l1);
1401
    check2test(p, l1 + 1);
1402
    p += l1;
1403
    setinst(p++, IJmp, l2 + 1);
1404
    addpatt(L, p, 2);
1405
  }
1406
  else {
1407
    /* choice L1; e1; commit L2; L1: e2; L2: ... */
1408
    Instruction *p = auxnew(L, &op, size, 1 + l1 + 1 + l2);
1409
    setinst(p++, IChoice, 1 + l1 + 1);
1410
    copypatt(p, p1, l1); p += l1;
1411
    setinst(p++, ICommit, 1 + l2);
1412
    addpatt(L, p, 2);
1413
    optimizechoice(p - (1 + l1 + 1));
1414
  }
1415
  return op;
1416
}
1417
1418
1419
static Instruction *separateparts (lua_State *L, Instruction *p1, int l1,
1420
                                   int l2, int *size, CharsetTag *st2) {
1421
  int sp = firstpart(p1, l1);
1422
  if (sp == 0)  /* first part is entire p1? */
1423
    return basicUnion(L, p1, l1, l2, size, st2);
1424
  else if ((p1 + sp - 1)->i.code == ICommit || !interfere(p1, sp, st2)) {
1425
    Instruction *p;
1426
    int init = *size;
1427
    int end = init + sp;
1428
    *size = end;
1429
    p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2);
1430
    copypatt(p + init, p1, sp);
1431
    (p + end - 1)->i.offset = *size - (end - 1);
1432
    return p;
1433
  }
1434
  else {  /* must change back to non-optimized choice */
1435
    Instruction *p;
1436
    int init = *size;
1437
    int end = init + sp + 1;  /* needs one extra instruction (choice) */
1438
    int sizefirst = sizei(p1);  /* size of p1's first instruction (the test) */
1439
    *size = end;
1440
    p = separateparts(L, p1 + sp, l1 - sp, l2, size, st2);
1441
    copypatt(p + init, p1, sizefirst);  /* copy the test */
1442
    (p + init)->i.offset++;  /* correct jump (because of new instruction) */
1443
    init += sizefirst;
1444
    setinstaux(p + init, IChoice, sp - sizefirst + 1, 1); init++;
1445
    copypatt(p + init, p1 + sizefirst, sp - sizefirst - 1);
1446
    init += sp - sizefirst - 1;
1447
    setinst(p + init, ICommit, *size - (end - 1));
1448
    return p;
1449
  }
1450
}
1451
1452
1453
static int union_l (lua_State *L) {
1454
  int l1, l2;
1455
  int size = 0;
1456
  Instruction *p1 = getpatt(L, 1, &l1);
1457
  Instruction *p2 = getpatt(L, 2, &l2);
1458
  CharsetTag st2;
1459
  if (p1->i.code == IFail)  /* check for identity element */
1460
    lua_pushvalue(L, 2);
1461
  else if (p2->i.code == IFail)
1462
    lua_pushvalue(L, 1);
1463
  else {
1464
    tocharset(p2, &st2);
1465
    separateparts(L, p1, l1, l2, &size, &st2);
1466
  }
1467
  return 1;
1468
}
1469
1470
1471
static int repeatcharset (lua_State *L, Charset cs, int l1, int n) {
1472
  /* e; ...; e; span; */
1473
  int i;
1474
  Instruction *p = newpatt(L, n*l1 + CHARSETINSTSIZE);
1475
  for (i = 0; i < n; i++) {
1476
    p += addpatt(L, p, 1);
1477
  }
1478
  setinst(p, ISpan, 0);
1479
  loopset(k, p[1].buff[k] = cs[k]);
1480
  correctset(p);
1481
  return 1;
1482
}
1483
1484
1485
static Instruction *repeatheadfail (lua_State *L, int l1, int n) {
1486
  /* e; ...; e; L2: e'(L1); jump L2; L1: ... */
1487
  int i;
1488
  Instruction *p = newpatt(L, (n + 1)*l1 + 1);
1489
  Instruction *op = p;
1490
  for (i = 0; i < n; i++) {
1491
    p += addpatt(L, p, 1);
1492
  }
1493
  p += addpatt(L, p, 1);
1494
  check2test(p - l1, l1 + 1);
1495
  setinst(p, IJmp, -l1);
1496
  return op;
1497
}
1498
1499
1500
static Instruction *repeats (lua_State *L, Instruction *p1, int l1, int n) {
1501
  /* e; ...; e; choice L1; L2: e; partialcommit L2; L1: ... */
1502
  int i;
1503
  Instruction *op = newpatt(L, (n + 1)*l1 + 2);
1504
  Instruction *p = op;
1505
  if (!verify(L, p1, p1, p1 + l1, 0, 0))
1506
    luaL_error(L, "loop body may accept empty string");
1507
  for (i = 0; i < n; i++) {
1508
    p += addpatt(L, p, 1);
1509
  }
1510
  setinst(p++, IChoice, 1 + l1 + 1);
1511
  p += addpatt(L, p, 1);
1512
  setinst(p, IPartialCommit, -l1);
1513
  return op;
1514
}
1515
1516
1517
static void optionalheadfail (lua_State *L, int l1, int n) {
1518
  Instruction *op = newpatt(L, n * l1);
1519
  Instruction *p = op;
1520
  int i;
1521
  for (i = 0; i < n; i++) {
1522
    p += addpatt(L, p, 1);
1523
    check2test(p - l1, (n - i)*l1);
1524
  }
1525
}
1526
1527
1528
static void optionals (lua_State *L, int l1, int n) {
1529
  /* choice L1; e; partialcommit L2; L2: ... e; L1: commit L3; L3: ... */
1530
  int i;
1531
  Instruction *op = newpatt(L, n*(l1 + 1) + 1);
1532
  Instruction *p = op;
1533
  setinst(p++, IChoice, 1 + n*(l1 + 1));
1534
  for (i = 0; i < n; i++) {
1535
    p += addpatt(L, p, 1);
1536
    setinst(p++, IPartialCommit, 1);
1537
  }
1538
  setinst(p - 1, ICommit, 1);  /* correct last commit */
1539
  optimizechoice(op);
1540
}
1541
1542
1543
static int star_l (lua_State *L) {
1544
  int l1;
1545
  int n = luaL_checkint(L, 2);
1546
  Instruction *p1 = getpatt(L, 1, &l1);
1547
  if (n >= 0) {
1548
    CharsetTag st;
1549
    Instruction *op;
1550
    if (tocharset(p1, &st) == ISCHARSET)
1551
      return repeatcharset(L, st.cs, l1, n);
1552
    if (isheadfail(p1))
1553
      op = repeatheadfail(L, l1, n);
1554
    else
1555
      op = repeats(L, p1, l1, n);
1556
    optimizecaptures(op);
1557
    optimizejumps(op);
1558
  }
1559
  else {
1560
    if (isheadfail(p1))
1561
      optionalheadfail(L, l1, -n);
1562
    else
1563
      optionals(L, l1, -n);
1564
  }
1565
  return 1;
1566
}
1567
1568
1569
static int getlabel (lua_State *L, int labelidx) {
1570
  if (labelidx == 0) return 0;
1571
  else return value2fenv(L, labelidx);
1572
}
1573
1574
1575
static int capture_aux (lua_State *L, int kind, int labelidx) {
1576
  int l1, n;
1577
  Instruction *p1 = getpatt(L, 1, &l1);
1578
  int lc = skipchecks(p1, 0, &n);
1579
  if (lc == l1) {  /* got whole pattern? */
1580
    /* may use a IFullCapture instruction at its end */
1581
    Instruction *p = newpatt(L, l1 + 1);
1582
    int label = getlabel(L, labelidx);
1583
    p += addpatt(L, p, 1);
1584
    setinstcap(p, IFullCapture, label, kind, n);
1585
  }
1586
  else {  /* must use open-close pair */
1587
    Instruction *op = newpatt(L, 1 + l1 + 1);
1588
    Instruction *p = op;
1589
    setinstcap(p++, IOpenCapture, getlabel(L, labelidx), kind, 0);
1590
    p += addpatt(L, p, 1);
1591
    setinstcap(p, ICloseCapture, 0, Cclose, 0);
1592
    optimizecaptures(op);
1593
  }
1594
  return 1;
1595
}
1596
1597
1598
static int capture_l (lua_State *L) { return capture_aux(L, Csimple, 0); }
1599
static int tcapture_l (lua_State *L) { return capture_aux(L, Ctable, 0); }
1600
static int capsubst_l (lua_State *L) { return capture_aux(L, Csubst, 0); }
1601
static int capaccum_l (lua_State *L) { return capture_aux(L, Caccum, 0); }
1602
1603
static int rcapture_l (lua_State *L) {
1604
  switch (lua_type(L, 2)) {
1605
    case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
1606
    case LUA_TTABLE: return capture_aux(L, Cquery, 2);
1607
    case LUA_TSTRING: return capture_aux(L, Cstring, 2);
1608
    default: return luaL_argerror(L, 2, "invalid replacement value");
1609
  }
1610
}
1611
1612
1613
static int position_l (lua_State *L) {
1614
  Instruction *p = newpatt(L, 1);
1615
  setinstcap(p, IEmptyCapture, 0, Cposition, 0);
1616
  return 1;
1617
}
1618
1619
1620
static int emptycap_aux (lua_State *L, int kind, const char *msg) {
1621
  int n = luaL_checkint(L, 1);
1622
  Instruction *p = newpatt(L, 1);
1623
  luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, msg);
1624
  setinstcap(p, IEmptyCapture, n, kind, 0);
1625
  return 1;
1626
}
1627
1628
1629
static int backref_l (lua_State *L) {
1630
  return emptycap_aux(L, Cbackref, "invalid back reference");
1631
}
1632
1633
1634
static int argcap_l (lua_State *L) {
1635
  return emptycap_aux(L, Carg, "invalid argument index");
1636
}
1637
1638
1639
static int matchtime_l (lua_State *L) {
1640
  int l1 = getpattl(L, 1);
1641
  Instruction *op = newpatt(L, 1 + l1 + 1);
1642
  Instruction *p = op;
1643
  luaL_checktype(L, 2, LUA_TFUNCTION);
1644
  setinstcap(p++, IOpenCapture, value2fenv(L, 2), Cruntime, 0);
1645
  p += addpatt(L, p, 1);
1646
  setinstcap(p, ICloseRunTime, 0, Cclose, 0);
1647
  optimizecaptures(op);
1648
  return 1;
1649
}
1650
1651
1652
static int capconst_l (lua_State *L) {
1653
  int i, j;
1654
  int n = lua_gettop(L);
1655
  Instruction *p = newpatt(L, n);
1656
  lua_createtable(L, n, 0);  /* new environment for the new pattern */
1657
  for (i = j = 1; i <= n; i++) {
1658
    if (lua_isnil(L, i))
1659
      setinstcap(p++, IEmptyCaptureIdx, 0, Cconst, 0);
1660
    else {
1661
      setinstcap(p++, IEmptyCaptureIdx, j, Cconst, 0);
1662
      lua_pushvalue(L, i);
1663
      lua_rawseti(L, -2, j++);
1664
    }
1665
  }
1666
  lua_setfenv(L, -2);   /* set environment */
1667
  return 1;
1668
}
1669
1670
1671
/* }====================================================== */
1672
1673
1674
1675
/*
1676
** {======================================================
1677
** User-Defined Patterns
1678
** =======================================================
1679
*/
1680
1681
static void newpattfunc (lua_State *L, PattFunc f, const void *ud, size_t l) {
1682
  int n = instsize(l) + 1;
1683
  Instruction *p = newpatt(L, n);
1684
  p[0].i.code = IFunc;
1685
  p[0].i.offset = n;
1686
  p[1].f = f;
1687
  memcpy(p[2].buff, ud, l);
1688
}
1689
1690
1691
#include <ctype.h>
1692
1693
static const char *span (const void *ud, const char *o,
1694
                                         const char *s,
1695
                                         const char *e) {
1696
  const char *u = (const char *)ud;
1697
  (void)o; (void)e;
1698
  return s + strspn(s, u);
1699
}
1700
1701
1702
static int span_l (lua_State *L) {
1703
  const char *s = luaL_checkstring(L, 1);
1704
  newpattfunc(L, span, s, strlen(s) + 1);
1705
  return 1;
1706
}
1707
1708
/* }====================================================== */
1709
1710
1711
1712
/*
1713
** {======================================================
1714
** Captures
1715
** =======================================================
1716
*/
1717
1718
1719
typedef struct CapState {
1720
  Capture *cap;  /* current capture */
1721
  Capture *ocap;  /* (original) capture list */
1722
  lua_State *L;
1723
  int ptop;  /* index of last argument to 'match' */
1724
  const char *s;  /* original string */
1725
  int valuecached;  /* value stored in cache slot */
1726
} CapState;
1727
1728
1729
#define captype(cap)	((cap)->kind)
1730
1731
#define isclosecap(cap)	(captype(cap) == Cclose)
1732
1733
#define closeaddr(c)	((c)->s + (c)->siz - 1)
1734
1735
#define isfullcap(cap)	((cap)->siz != 0)
1736
1737
#define pushluaval(cs) lua_rawgeti((cs)->L, penvidx((cs)->ptop), (cs)->cap->idx)
1738
1739
#define pushsubject(cs, c) lua_pushlstring((cs)->L, (c)->s, (c)->siz - 1)
1740
1741
1742
#define updatecache(cs,v) { if ((v) != (cs)->valuecached) updatecache_(cs,v); }
1743
1744
1745
static void updatecache_ (CapState *cs, int v) {
1746
  lua_rawgeti(cs->L, penvidx(cs->ptop), v);
1747
  lua_replace(cs->L, subscache(cs->ptop));
1748
  cs->valuecached = v;
1749
}
1750
1751
1752
static int pushcapture (CapState *cs);
1753
1754
1755
static Capture *findopen (Capture *cap) {
1756
  int n = 0;
1757
  for (;;) {
1758
    cap--;
1759
    if (isclosecap(cap)) n++;
1760
    else if (!isfullcap(cap))
1761
      if (n-- == 0) return cap;
1762
  }
1763
}
1764
1765
1766
static Capture *findback (CapState *cs, Capture *cap, int n) {
1767
  int i;
1768
  for (i = 0; i < n; i++) {
1769
    if (cap == cs->ocap)
1770
      luaL_error(cs->L, "invalid back reference (%d)", n);
1771
    cap--;
1772
    if (isclosecap(cap))
1773
      cap = findopen(cap);
1774
    else if (!isfullcap(cap))
1775
      i--;  /* does not count enclosing captures */
1776
  }
1777
  assert(!isclosecap(cap));
1778
  return cap;
1779
}
1780
1781
1782
static int pushallcaptures (CapState *cs, int addextra) {
1783
  Capture *co = cs->cap;
1784
  int n = 0;
1785
  if (isfullcap(cs->cap++)) {
1786
    pushsubject(cs, co);  /* push whole match */
1787
    return 1;
1788
  }
1789
  while (!isclosecap(cs->cap))
1790
    n += pushcapture(cs);
1791
  if (addextra || n == 0) {  /* need extra? */
1792
    lua_pushlstring(cs->L, co->s, cs->cap->s - co->s);  /* push whole match */
1793
    n++;
1794
  }
1795
  cs->cap++;  /* skip close entry */
1796
  return n;
1797
}
1798
1799
1800
static int simplecap (CapState *cs) {
1801
  int n;
1802
  lua_pushnil(cs->L);  /* open space */
1803
  n = pushallcaptures(cs, 1);
1804
  lua_replace(cs->L, -(n + 1));  /* put extra in previously opened slot */
1805
  return n;
1806
}
1807
1808
1809
static int backrefcap (CapState *cs) {
1810
  int n = (cs->cap)->idx;
1811
  Capture *curr = cs->cap;
1812
  Capture *backref = findback(cs, curr, n);
1813
  cs->cap = backref;
1814
  n = pushcapture(cs);
1815
  cs->cap = curr + 1;
1816
  return n;
1817
}
1818
1819
1820
static int tablecap (CapState *cs) {
1821
  int n = 0;
1822
  lua_newtable(cs->L);
1823
  if (isfullcap(cs->cap++))
1824
    return 1;  /* table is empty */
1825
  while (!isclosecap(cs->cap)) {
1826
    int i;
1827
    int k = pushcapture(cs);
1828
    for (i = k; i > 0; i--)
1829
      lua_rawseti(cs->L, -(i + 1), n + i);
1830
    n += k;
1831
  }
1832
  cs->cap++;  /* skip close entry */
1833
  return 1;
1834
}
1835
1836
1837
static int querycap (CapState *cs) {
1838
  int idx = cs->cap->idx;
1839
  int n = pushallcaptures(cs, 0);
1840
  if (n > 1)  /* extra captures? */
1841
    lua_pop(cs->L, n - 1);  /* throw them away */
1842
  updatecache(cs, idx);
1843
  lua_gettable(cs->L, subscache(cs->ptop));
1844
  if (!lua_isnil(cs->L, -1))
1845
    return 1;
1846
  else {
1847
    lua_pop(cs->L, 1);  /* remove value */
1848
    return 0;
1849
  }
1850
}
1851
1852
1853
static int accumcap (CapState *cs) {
1854
  lua_State *L = cs->L;
1855
  if (isfullcap(cs->cap++) || isclosecap(cs->cap) || pushcapture(cs) != 1)
1856
    return luaL_error(L, "no initial value for accumulator capture");
1857
  while (!isclosecap(cs->cap)) {
1858
    int n;
1859
    if (captype(cs->cap) != Cfunction)
1860
      return luaL_error(L, "invalid (non function) capture to accumulate");
1861
    pushluaval(cs);
1862
    lua_insert(L, -2);  /* put function before previous capture */
1863
    n = pushallcaptures(cs, 0);
1864
    lua_call(L, n + 1, 1);
1865
  }
1866
  cs->cap++;
1867
  return 1;
1868
}
1869
1870
1871
static int functioncap (CapState *cs) {
1872
  int n;
1873
  int top = lua_gettop(cs->L);
1874
  pushluaval(cs);
1875
  n = pushallcaptures(cs, 0);
1876
  lua_call(cs->L, n, LUA_MULTRET);
1877
  return lua_gettop(cs->L) - top;
1878
}
1879
1880
1881
static int runtimecap (lua_State *L, Capture *close, Capture *ocap,
1882
                       const char *o, const char *s, int ptop) {
1883
  CapState cs;
1884
  int n;
1885
  Capture *open = findopen(close);
1886
  assert(captype(open) == Cruntime);
1887
  close->kind = Cclose;
1888
  close->s = s;
1889
  cs.ocap = ocap; cs.cap = open; cs.L = L;
1890
  cs.s = o; cs.valuecached = 0; cs.ptop = ptop;
1891
  luaL_checkstack(L, 4, "too many runtime captures");
1892
  pushluaval(&cs);
1893
  lua_pushvalue(L, SUBJIDX);  /* push original subject */
1894
  lua_pushinteger(L, s - o + 1);  /* current position */
1895
  n = pushallcaptures(&cs, 0);
1896
  lua_call(L, n + 2, LUA_MULTRET);
1897
  return close - open;
1898
}
1899
1900
1901
1902
typedef struct StrAux {
1903
  const char *s;
1904
  const char *e;
1905
} StrAux;
1906
1907
#define MAXSTRCAPS	10
1908
1909
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
1910
  int k = n++;
1911
  if (k < MAXSTRCAPS) cps[k].s = cs->cap->s;
1912
  if (!isfullcap(cs->cap++)) {
1913
    while (!isclosecap(cs->cap)) {
1914
      if (captype(cs->cap) != Csimple)
1915
        return luaL_error(cs->L,
1916
                          "invalid capture #%d in replacement pattern", n);
1917
      n = getstrcaps(cs, cps, n);
1918
    }
1919
    cs->cap++;  /* skip close */
1920
  }
1921
  if (k < MAXSTRCAPS) cps[k].e = closeaddr(cs->cap - 1);
1922
  return n;
1923
}
1924
1925
1926
static void stringcap (luaL_Buffer *b, CapState *cs) {
1927
  StrAux cps[MAXSTRCAPS];
1928
  int n;
1929
  size_t len, i;
1930
  const char *c;
1931
  updatecache(cs, cs->cap->idx);
1932
  c = lua_tolstring(cs->L, subscache(cs->ptop), &len);
1933
  n = getstrcaps(cs, cps, 0) - 1;
1934
  for (i = 0; i < len; i++) {
1935
    if (c[i] != '%' || c[++i] < '0' || c[i] > '9')
1936
      luaL_addchar(b, c[i]);
1937
    else {
1938
      int l = c[i] - '0';
1939
      if (l > n)
1940
        luaL_error(cs->L, "invalid capture index (%c)", c[i]);
1941
      luaL_addlstring(b, cps[l].s, cps[l].e - cps[l].s);
1942
    }
1943
  }
1944
}
1945
1946
1947
static void substcap (CapState *cs) {
1948
  luaL_Buffer b;
1949
  const char *curr = (cs->cap - 1)->s;
1950
  luaL_buffinit(cs->L, &b);
1951
  while (!isclosecap(cs->cap)) {
1952
    int n;
1953
    const char *next = cs->cap->s;
1954
    luaL_addlstring(&b, curr, next - curr);
1955
    if (captype(cs->cap) == Cstring)
1956
      stringcap(&b, cs);  /* add capture directly to buffer */
1957
    else if ((n = pushcapture(cs)) == 0) {  /* no capture? */
1958
      curr = next;  /* result keeps the original text */
1959
      continue;
1960
    }
1961
    else {
1962
      if (n > 1) lua_pop(cs->L, n - 1);  /* ignore extra values */
1963
      if (!lua_isstring(cs->L, -1))
1964
        luaL_error(cs->L, "invalid replacement value (a %s)",
1965
                          luaL_typename(cs->L, -1));
1966
      luaL_addvalue(&b);  /* add result to accumulator */
1967
    }
1968
    /* continue after match */
1969
    curr = closeaddr(cs->cap - 1);
1970
  }
1971
  luaL_addlstring(&b, curr, cs->cap->s - curr);
1972
  luaL_pushresult(&b);
1973
  cs->cap++;
1974
}
1975
1976
1977
static int pushcapture (CapState *cs) {
1978
  luaL_checkstack(cs->L, 4, "too many unstored captures");
1979
  switch (captype(cs->cap)) {
1980
    case Cposition: {
1981
      lua_pushinteger(cs->L, cs->cap->s - cs->s + 1);
1982
      cs->cap++;
1983
      return 1;
1984
    }
1985
    case Cconst: {
1986
      pushluaval(cs);
1987
      cs->cap++;
1988
      return 1;
1989
    }
1990
    case Carg: {
1991
      int arg = (cs->cap++)->idx;
1992
      if (arg + FIXEDARGS > cs->ptop)
1993
        return luaL_error(cs->L, "reference to absent argument #%d", arg);
1994
      lua_pushvalue(cs->L, arg + FIXEDARGS);
1995
      return 1;
1996
    }
1997
    case Csimple: {
1998
      if (isfullcap(cs->cap)) {
1999
        pushsubject(cs, cs->cap);
2000
        cs->cap++;
2001
        return 1;
2002
      }
2003
      else return simplecap(cs);
2004
    }
2005
    case Cruntime: {
2006
      int i = 0;
2007
      while (!isclosecap(cs->cap++)) {
2008
        luaL_checkstack(cs->L, 4, "too many unstored captures");
2009
        lua_pushvalue(cs->L, (cs->cap - 1)->idx);
2010
        i++;
2011
      }
2012
      return i;
2013
    }
2014
    case Cstring: {
2015
      luaL_Buffer b;
2016
      luaL_buffinit(cs->L, &b);
2017
      stringcap(&b, cs);
2018
      luaL_pushresult(&b);
2019
      return 1;
2020
    }
2021
    case Csubst: {
2022
      if (isfullcap(cs->cap++))  /* no changes? */
2023
        pushsubject(cs, cs->cap - 1);  /* push original subject */
2024
      else
2025
        substcap(cs);
2026
      return 1;
2027
    }
2028
    case Cbackref: return backrefcap(cs);
2029
    case Ctable: return tablecap(cs);
2030
    case Cfunction: return functioncap(cs);
2031
    case Cquery: return querycap(cs);
2032
    case Caccum: return accumcap(cs);
2033
    default: assert(0); return 0;
2034
  }
2035
}
2036
2037
2038
static int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
2039
  Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
2040
  int n = 0;
2041
  if (!isclosecap(capture)) {  /* is there any capture? */
2042
    CapState cs;
2043
    cs.ocap = cs.cap = capture; cs.L = L;
2044
    cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
2045
    do {  /* collect their values */
2046
      n += pushcapture(&cs);
2047
    } while (!isclosecap(cs.cap));
2048
  }
2049
  if (n == 0) {  /* no capture values? */
2050
    lua_pushinteger(L, r - s + 1);  /* return only end position */
2051
    n = 1;
2052
  }
2053
  return n;
2054
}
2055
2056
/* }====================================================== */
2057
2058
2059
static int version_l (lua_State *L) {
2060
  lua_pushstring(L, VERSION);
2061
  return 1;
2062
}
2063
2064
2065
static int type_l (lua_State *L) {
2066
  if (testpattern(L, 1))
2067
    lua_pushliteral(L, "pattern");
2068
  else
2069
    lua_pushnil(L);
2070
  return 1;
2071
}
2072
2073
2074
static int printpat_l (lua_State *L) {
2075
  Instruction *p = getpatt(L, 1, NULL);
2076
  int n, i;
2077
  lua_getfenv(L, 1);
2078
  n = lua_objlen(L, -1);
2079
  printf("[");
2080
  for (i = 1; i <= n; i++) {
2081
    printf("%d = ", i);
2082
    lua_rawgeti(L, -1, i);
2083
    if (lua_isstring(L, -1))
2084
      printf("%s  ", lua_tostring(L, -1));
2085
    else
2086
      printf("%s  ", lua_typename(L, lua_type(L, -1)));
2087
    lua_pop(L, 1);
2088
  }
2089
  printf("]\n");
2090
  printpatt(p);
2091
  return 0;
2092
}
2093
2094
2095
static int matchl (lua_State *L) {
2096
  Capture capture[IMAXCAPTURES];
2097
  const char *r;
2098
  size_t l;
2099
  Instruction *p = getpatt(L, 1, NULL);
2100
  const char *s = luaL_checklstring(L, SUBJIDX, &l);
2101
  int ptop = lua_gettop(L);
2102
  lua_Integer ii = luaL_optinteger(L, 3, 1);
2103
  size_t i = (ii > 0) ?
2104
             (((size_t)ii <= l) ? (size_t)ii - 1 : l) :
2105
             (((size_t)-ii <= l) ? l - ((size_t)-ii) : 0);
2106
  lua_pushnil(L);  /* subscache */
2107
  lua_pushlightuserdata(L, capture);  /* caplistidx */
2108
  lua_getfenv(L, 1);  /* penvidx */
2109
  r = match(L, s, s + i, s + l, p, capture, ptop);
2110
  if (r == NULL) {
2111
    lua_pushnil(L);
2112
    return 1;
2113
  }
2114
  return getcaptures(L, s, r, ptop);
2115
}
2116
2117
2118
static struct luaL_reg pattreg[] = {
2119
  {"match", matchl},
2120
  {"print", printpat_l},
2121
  {"C", capture_l},
2122
  {"Ca", capaccum_l},
2123
  {"Cc", capconst_l},
2124
  {"Cp", position_l},
2125
  {"Cb", backref_l},
2126
  {"Carg", argcap_l},
2127
  {"Cmt", matchtime_l},
2128
  {"Cs", capsubst_l},
2129
  {"Ct", tcapture_l},
2130
  {"P", pattern_l},
2131
  {"R", range_l},
2132
  {"S", set_l},
2133
  {"V", nter_l},
2134
  {"span", span_l},
2135
  {"type", type_l},
2136
  {"version", version_l},
2137
  {NULL, NULL}
2138
};
2139
2140
2141
static struct luaL_reg metapattreg[] = {
2142
  {"__add", union_l},
2143
  {"__pow", star_l},
2144
  {"__sub", diff_l},
2145
  {"__mul", concat_l},
2146
  {"__div", rcapture_l},
2147
  {"__unm", unm_l},
2148
  {"__len", pattand_l},
2149
  {NULL, NULL}
2150
};
2151
2152
2153
int luaopen_lpeg (lua_State *L);
2154
int luaopen_lpeg (lua_State *L) {
2155
  lua_newtable(L);
2156
  lua_replace(L, LUA_ENVIRONINDEX);  /* empty env for new patterns */
2157
  luaL_newmetatable(L, PATTERN_T);
2158
  luaL_register(L, NULL, metapattreg);
2159
  luaL_register(L, "lpeg", pattreg);
2160
  lua_pushliteral(L, "__index");
2161
  lua_pushvalue(L, -2);
2162
  lua_settable(L, -4);
2163
  return 1;
2164
}
2165