8
typedef struct Inst Inst;
12
long type; /* < 0x10000 ==> literal, otherwise action */
26
#define subid r.rsubid
27
#define rclass r.class
28
#define other r.rother
29
#define right r.rright
36
Inst *startinst; /* First inst. of program; might not be program[0] */
37
Inst *bstartinst; /* same for backwards machine */
39
typedef struct Ilist Ilist;
42
Inst *inst; /* Instruction of the thread */
44
Posn startp; /* first char of match */
49
Ilist *tl, *nl; /* This list, next list */
50
Ilist list[2][NLIST+1]; /* +1 for trailing null */
51
static Rangeset sempty;
56
* 0x100xx are operators, value == precedence
57
* 0x200xx are tokens, i.e. operands for operators
59
#define OPERATOR 0x10000 /* Bitmask of all operators */
60
#define START 0x10000 /* Start, used for marker on stack */
61
#define RBRA 0x10001 /* Right bracket, ) */
62
#define LBRA 0x10002 /* Left bracket, ( */
63
#define OR 0x10003 /* Alternation, | */
64
#define CAT 0x10004 /* Concatentation, implicit operator */
65
#define STAR 0x10005 /* Closure, * */
66
#define PLUS 0x10006 /* a+ == aa* */
67
#define QUEST 0x10007 /* a? == a|nothing, i.e. 0 or 1 a's */
68
#define ANY 0x20000 /* Any character but newline, . */
69
#define NOP 0x20001 /* No operation, internal use only */
70
#define BOL 0x20002 /* Beginning of line, ^ */
71
#define EOL 0x20003 /* End of line, $ */
72
#define CCLASS 0x20004 /* Character class, [] */
73
#define NCCLASS 0x20005 /* Negated character class, [^] */
74
#define END 0x20077 /* Terminate: match found */
76
#define ISATOR 0x10000
82
typedef struct Node Node;
90
Node andstack[NSTACK];
92
int atorstack[NSTACK];
94
int lastwasand; /* Last token was operand */
96
int subidstack[NSTACK];
100
Rune *exprp; /* pointer to next character in source expression */
101
#define DCLASS 10 /* allocation increment */
102
int nclass; /* number active */
103
int Nclass; /* high water mark */
107
int addinst(Ilist *l, Inst *inst, Rangeset *sep);
108
void newmatch(Rangeset*);
109
void bnewmatch(Rangeset*);
110
void pushand(Inst*, Inst*);
114
void startlex(Rune*);
119
void optimize(Inst*);
120
void bldcclass(void);
125
Strzero(&lastregexp);
130
regerror_c(Err e, int c)
132
Strzero(&lastregexp);
139
if(progp >= &program[NPROG])
158
/* Start with a low priority operator to prime parser */
160
while((token=lex()) != END){
161
if((token&ISATOR) == OPERATOR)
166
/* Close with a low priority operator */
173
--andp; /* points to first and only operand */
183
if(Strcmp(s, &lastregexp)==0)
185
for(i=0; i<nclass; i++)
190
startinst = realcompile(s->s);
194
bstartinst = realcompile(s->s);
196
Strduplstr(&lastregexp, s);
204
operator(CAT); /* catenate is implicit */
208
i->type = NCCLASS; /* UGH */
209
i->rclass = nclass-1; /* UGH */
218
if(t==RBRA && --nbra<0)
222
* if(++cursubid >= NSUBEXP)
225
cursubid++; /* silently ignored */
234
if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
235
lastwasand = TRUE; /* these look like operands */
243
sprint(buf, "regexp: can't happen: %s", s);
248
pushand(Inst *f, Inst *l)
250
if(andp >= &andstack[NSTACK])
251
cant("operand stack overflow");
260
if(atorp >= &atorstack[NSTACK])
261
cant("operator stack overflow");
263
if(cursubid >= NSUBEXP)
272
if(andp <= &andstack[0])
274
regerror_c(Emissop, op);
276
regerror(Ebadregexp);
283
if(atorp <= &atorstack[0])
284
cant("operator stack underflow");
295
while(pri==RBRA || atorp[-1]>=pri){
299
inst2 = newinst(RBRA);
300
inst2->subid = *subidp;
301
op1->last->next = inst2;
302
inst1 = newinst(LBRA);
303
inst1->subid = *subidp;
304
inst1->next = op1->first;
305
pushand(inst1, inst2);
306
return; /* must have been RBRA */
308
panic("unknown regexp operator");
313
inst2 = newinst(NOP);
314
op2->last->next = inst2;
315
op1->last->next = inst2;
317
inst1->right = op1->first;
318
inst1->left = op2->first;
319
pushand(inst1, inst2);
324
if(backwards && op2->first->type!=END)
325
t = op1, op1 = op2, op2 = t;
326
op1->last->next = op2->first;
327
pushand(op1->first, op2->last);
332
op2->last->next = inst1;
333
inst1->right = op2->first;
334
pushand(inst1, inst1);
339
op2->last->next = inst1;
340
inst1->right = op2->first;
341
pushand(op2->first, inst1);
346
inst2 = newinst(NOP);
348
inst1->right = op2->first;
349
op2->last->next = inst2;
350
pushand(inst1, inst2);
358
optimize(Inst *start)
362
for(inst=start; inst->type!=END; inst++){
364
while(target->type == NOP)
365
target = target->next;
376
dprint("operators\n");
377
for(ip = atorstack; ip<atorp; ip++)
378
dprint("0%o\n", *ip);
379
dprint("operands\n");
380
for(stk = andstack; stk<andp; stk++)
381
dprint("0%o\t0%o\n", stk->first->type, stk->last->type);
389
dprint("%d:\t0%o\t%d\t%d\n", l-program, l->type,
390
l->left-program, l->right-program);
410
if((c= *exprp++)=='n')
415
--exprp; /* In case we come here again */
454
if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0))
456
if(exprp[0] == '\\'){
462
return *exprp++|0x10000;
473
classp = emalloc(DCLASS*RUNESIZE);
476
/* we have already seen the '[' */
478
classp[n++] = '\n'; /* don't match newline in negate case */
483
while((c1 = nextrec()) != ']'){
489
if(n+4 >= na){ /* 3 runes plus NUL */
491
classp = erealloc(classp, na*RUNESIZE);
494
exprp++; /* eat '-' */
495
if((c2 = nextrec()) == ']')
497
classp[n+0] = Runemax;
505
if(nclass == Nclass){
507
class = erealloc(class, Nclass*sizeof(Rune*));
509
class[nclass++] = classp;
513
classmatch(int classno, int c, int negate)
520
if(p[1]<=c && c<=p[2])
530
* Note optimization in addinst:
531
* *l must be pending when addinst called; if *l has been looked
532
* at already, the optimization is a bug.
535
addinst(Ilist *l, Inst *inst, Rangeset *sep)
539
for(p = l; p->inst; p++){
541
if((sep)->p[0].p1 < p->se.p[0].p1)
542
p->se= *sep; /* this would be bug */
543
return 0; /* It's already there */
553
execute(File *f, Posn startp, Posn eof)
562
int startchar = startinst->type<OPERATOR? startinst->type : 0;
564
list[0][0].inst = list[1][0].inst = 0;
566
/* Execute machine once for each character */
572
case 0: /* let loop run one more click */
575
case 1: /* expired; wrap to beginning */
576
if(sel.p[0].p1>=0 || eof!=INFINITY)
578
list[0][0].inst = list[1][0].inst = 0;
584
}else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0)
586
/* fast check for first char */
587
if(startchar && nnl==0 && c!=startchar)
594
if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){
595
/* Add first instruction to this list */
597
if(addinst(tl, startinst, &sempty))
602
/* Execute machine until this list is empty */
603
for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
606
default: /* regular character */
609
if(addinst(nl, inst->next, &tlp->se))
616
tlp->se.p[inst->subid].p1 = p;
621
tlp->se.p[inst->subid].p2 = p;
629
if(p==0 || filereadc(f, p - 1)=='\n'){
640
if(c>=0 && classmatch(inst->rclass, c, 0))
644
if(c>=0 && classmatch(inst->rclass, c, 1))
648
/* evaluate right choice later */
649
if(addinst(tl, inst->right, &tlp->se))
652
/* efficiency: advance and re-evaluate */
655
case END: /* Match! */
663
return sel.p[0].p1>=0;
667
newmatch(Rangeset *sp)
671
if(sel.p[0].p1<0 || sp->p[0].p1<sel.p[0].p1 ||
672
(sp->p[0].p1==sel.p[0].p1 && sp->p[0].p2>sel.p[0].p2))
673
for(i = 0; i<NSUBEXP; i++)
678
bexecute(File *f, Posn startp)
687
int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0;
689
list[0][0].inst = list[1][0].inst = 0;
691
/* Execute machine once for each character, including terminal NUL */
694
if((c = filereadc(f, p - 1))==-1){
696
case 0: /* let loop run one more click */
699
case 1: /* expired; wrap to end */
703
list[0][0].inst = list[1][0].inst = 0;
709
}else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0)
711
/* fast check for first char */
712
if(startchar && nnl==0 && c!=startchar)
719
if(sel.p[0].p1<0 && (!wrapped || p>startp)){
720
/* Add first instruction to this list */
721
/* the minus is so the optimizations in addinst work */
723
if(addinst(tl, bstartinst, &sempty))
728
/* Execute machine until this list is empty */
729
for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
732
default: /* regular character */
735
if(addinst(nl, inst->next, &tlp->se))
742
tlp->se.p[inst->subid].p1 = p;
747
tlp->se.p[inst->subid].p2 = p;
762
if(p==f->b.nc || filereadc(f, p)=='\n')
766
if(c>=0 && classmatch(inst->rclass, c, 0))
770
if(c>=0 && classmatch(inst->rclass, c, 1))
774
/* evaluate right choice later */
775
if(addinst(tlp, inst->right, &tlp->se))
778
/* efficiency: advance and re-evaluate */
781
case END: /* Match! */
782
tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus sign */
790
return sel.p[0].p1>=0;
794
bnewmatch(Rangeset *sp)
797
if(sel.p[0].p1<0 || sp->p[0].p1>sel.p[0].p2 || (sp->p[0].p1==sel.p[0].p2 && sp->p[0].p2<sel.p[0].p1))
798
for(i = 0; i<NSUBEXP; i++){ /* note the reversal; p1<=p2 */
799
sel.p[i].p1 = sp->p[i].p2;
800
sel.p[i].p2 = sp->p[i].p1;