55
55
/* Lens names for pretty printing */
56
56
static const char *const tags[] = {
57
"del", "store", "key", "label", "seq", "counter", "concat", "union",
57
"del", "store", "value", "key", "label", "seq", "counter",
58
59
"subtree", "star", "maybe", "rec"
77
#define BUG_LENS_TAG(lns) bug_lens_tag(lns, __FILE__, __LINE__)
79
static void bug_lens_tag(struct lens *lens, const char *file, int lineno) {
80
char *s = format_lens(lens);
82
if (lens != NULL && lens->info != NULL && lens->info->error != NULL) {
83
bug_on(lens->info->error, file, lineno, "Unexpected lens tag %s", s);
85
/* We are really screwed */
76
92
/* Construct a finite automaton from REGEXP and return it in *FA.
78
94
* Return NULL if REGEXP is valid, if the regexp REGEXP has syntax errors,
156
172
lens->nchildren += (l2->tag == tag) ? l2->nchildren : 1;
158
174
lens->recursive = l1->recursive || l2->recursive;
175
lens->rec_internal = l1->rec_internal || l2->rec_internal;
160
177
if (ALLOC_N(lens->children, lens->nchildren) < 0) {
161
178
lens->nchildren = 0;
186
203
if (ALLOC_N(types, lens->nchildren) < 0)
189
if (!lens->recursive) {
206
if (! lens->rec_internal) {
207
/* Inside a recursive lens, we assign types with lns_check_rec
208
* once we know the entire lens */
190
209
for (int t=0; t < ntypes; t++) {
210
if (lens->recursive && t == CTYPE)
191
212
for (int i=0; i < lens->nchildren; i++)
192
213
types[i] = ltype(lens->children[i], t);
193
214
ltype(lens, t) = (*combinator)(info, lens->nchildren, types);
199
219
for (int i=0; i < lens->nchildren; i++)
200
assert(tag != lens->children[i]->tag);
220
ensure(tag != lens->children[i]->tag, lens->info);
220
240
int recursive = l1->recursive || l2->recursive;
221
241
int ctype_nullable = l1->ctype_nullable || l2->ctype_nullable;
223
if (check && !recursive) {
224
244
struct value *exn = typecheck_union(info, l1, l2);
240
260
int recursive = l1->recursive || l2->recursive;
241
261
int ctype_nullable = l1->ctype_nullable && l2->ctype_nullable;
243
if (check && !recursive) {
244
264
struct value *exn = typecheck_concat(info, l1, l2);
249
268
if (l1->value && l2->value) {
250
269
return make_exn_value(info, "Multiple stores in concat");
313
332
lens->atype = subtree_atype(info, l->ktype, l->vtype);
314
333
lens->value = lens->key = 0;
315
334
lens->recursive = l->recursive;
335
lens->rec_internal = l->rec_internal;
316
336
if (! l->recursive)
317
337
lens->ctype_nullable = l->ctype_nullable;
318
338
return make_lens_value(lens);
321
341
struct value *lns_make_star(struct info *info, struct lens *l, int check) {
322
342
struct lens *lens;
324
if (check && !l->recursive) {
325
345
struct value *exn = typecheck_iter(info, l);
331
350
return make_exn_value(info, "Multiple stores in iteration");
339
358
ltype(lens, t) = regexp_iter(info, ltype(l, t), 0, -1);
341
360
lens->recursive = l->recursive;
361
lens->rec_internal = l->rec_internal;
342
362
lens->ctype_nullable = 1;
343
363
return make_lens_value(lens);
358
378
struct value *lns_make_maybe(struct info *info, struct lens *l, int check) {
359
379
struct lens *lens;
361
if (check && !l->recursive) {
362
382
struct value *exn = typecheck_maybe(info, l);
367
386
lens = make_lens_unop(L_MAYBE, info, l);
368
387
for (int t=0; t < ntypes; t++)
393
413
static struct regexp *restrict_regexp(struct regexp *r) {
394
414
char *nre = NULL;
415
struct regexp *result = NULL;
398
419
ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
400
421
RESERVED_FROM, RESERVED_TO);
401
assert(nre_len == strlen(nre));
402
// FIXME: Tell the user what's wrong
422
ERR_NOMEM(ret == REG_ESPACE || ret < 0, r->info);
423
BUG_ON(ret != 0, r->info, NULL);
424
ensure(nre_len == strlen(nre), r->info);
406
426
ret = regexp_c_locale(&nre, &nre_len);
427
ERR_NOMEM(ret < 0, r->info);
412
r = make_regexp(r->info, nre, r->nocase);
413
if (regexp_compile(r) != 0)
429
result = make_regexp(r->info, nre, r->nocase);
431
BUG_ON(regexp_compile(result) != 0, r->info,
432
"Could not compile restricted regexp");
437
unref(result, regexp);
418
441
struct value *lns_make_prim(enum lens_tag tag, struct info *info,
472
495
lens->regexp = regexp;
473
496
lens->string = string;
474
497
lens->key = (tag == L_KEY || tag == L_LABEL || tag == L_SEQ);
475
lens->value = (tag == L_STORE);
476
lens->consumes_value = (tag == L_STORE);
498
lens->value = (tag == L_STORE || tag == L_VALUE);
499
lens->consumes_value = (tag == L_STORE || tag == L_VALUE);
477
500
lens->atype = regexp_make_empty(info);
478
501
/* Set the ctype */
479
502
if (tag == L_DEL || tag == L_STORE || tag == L_KEY) {
480
503
lens->ctype = ref(regexp);
481
504
lens->ctype_nullable = regexp_matches_empty(lens->ctype);
482
} else if (tag == L_LABEL || tag == L_SEQ || tag == L_COUNTER) {
505
} else if (tag == L_LABEL || tag == L_VALUE
506
|| tag == L_SEQ || tag == L_COUNTER) {
483
507
lens->ctype = regexp_make_empty(info);
484
508
lens->ctype_nullable = 1;
504
529
/* Set the vtype */
505
530
if (tag == L_STORE) {
506
531
lens->vtype = restrict_regexp(lens->regexp);
532
} else if (tag == L_VALUE) {
533
lens->vtype = make_regexp_literal(info, lens->string->str);
509
536
return make_lens_value(lens);
589
static struct value *ambig_check(struct info *info,
590
struct fa *fa1, struct fa *fa2,
591
struct regexp *r1, struct regexp *r2,
592
const char *msg, bool iterated) {
616
static struct value *
617
ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
618
enum lens_type typ, struct lens *l1, struct lens *l2,
619
const char *msg, bool iterated) {
593
620
char *upv, *pv, *v;
595
622
struct value *exn = NULL;
609
636
if (upv != NULL) {
610
char *e_u = escape(upv, pv - upv);
611
char *e_up = escape(upv, v - upv);
612
char *e_upv = escape(upv, -1);
613
char *e_pv = escape(pv, -1);
614
char *e_v = escape(v, -1);
615
char *s1 = regexp_escape(r1);
616
char *s2 = regexp_escape(r2);
637
char *e_u, *e_up, *e_upv, *e_pv, *e_v;
641
e_u = enc_format(upv, pv - upv);
642
e_up = enc_format(upv, v - upv);
643
e_upv = enc_format(upv, upv_len);
644
e_pv = enc_format(pv, strlen(pv));
645
e_v = enc_format(v, strlen(v));
646
lns_format_atype(l1, &s1);
647
lns_format_atype(l2, &s2);
649
e_u = escape(upv, pv - upv);
650
e_up = escape(upv, v - upv);
651
e_upv = escape(upv, -1);
652
e_pv = escape(pv, -1);
654
s1 = regexp_escape(ltype(l1, typ));
655
s2 = regexp_escape(ltype(l2, typ));
617
657
exn = make_exn_value(ref(info), "%s", msg);
619
659
exn_printf_line(exn, " Iterated regexp: /%s/", s1);
640
static struct value *ambig_concat_check(struct info *info, const char *msg,
641
struct regexp *r1, struct regexp *r2) {
680
static struct value *
681
ambig_concat_check(struct info *info, const char *msg,
682
enum lens_type typ, struct lens *l1, struct lens *l2) {
642
683
struct fa *fa1 = NULL;
643
684
struct fa *fa2 = NULL;
644
685
struct value *result = NULL;
686
struct regexp *r1 = ltype(l1, typ);
687
struct regexp *r2 = ltype(l2, typ);
646
689
if (r1 == NULL || r2 == NULL)
654
697
if (result != NULL)
657
result = ambig_check(info, fa1, fa2, r1, r2, msg, false);
700
result = ambig_check(info, fa1, fa2, typ, l1, l2, msg, false);
666
709
struct value *result = NULL;
668
711
result = ambig_concat_check(info, "ambiguous concatenation",
669
l1->ctype, l2->ctype);
670
713
if (result == NULL) {
671
714
result = ambig_concat_check(info, "ambiguous tree concatenation",
672
l1->atype, l2->atype);
674
717
if (result != NULL) {
675
718
char *fi = format_info(l1->info);
685
static struct value *ambig_iter_check(struct info *info, const char *msg,
728
static struct value *
729
ambig_iter_check(struct info *info, const char *msg,
730
enum lens_type typ, struct lens *l) {
687
731
struct fa *fas = NULL, *fa = NULL;
688
732
struct value *result = NULL;
733
struct regexp *r = ltype(l, typ);
697
742
fas = fa_iter(fa, 0, -1);
699
result = ambig_check(info, fa, fas, r, r, msg, true);
744
result = ambig_check(info, fa, fas, typ, l, l, msg, true);
707
752
static struct value *typecheck_iter(struct info *info, struct lens *l) {
708
753
struct value *result = NULL;
710
result = ambig_iter_check(info, "ambiguous iteration", l->ctype);
755
result = ambig_iter_check(info, "ambiguous iteration", CTYPE, l);
711
756
if (result == NULL) {
712
result = ambig_iter_check(info, "ambiguous tree iteration", l->atype);
757
result = ambig_iter_check(info, "ambiguous tree iteration", ATYPE, l);
714
759
if (result != NULL) {
715
760
char *fi = format_info(l->info);
723
768
/* Check (r)? as (<e>|r) where <e> is the empty language */
724
769
struct value *exn = NULL;
726
if (regexp_matches_empty(l->ctype)) {
771
if (l->ctype != NULL && regexp_matches_empty(l->ctype)) {
727
772
exn = make_exn_value(ref(info),
728
773
"illegal optional expression: /%s/ matches the empty word",
729
774
l->ctype->pattern->str);
736
781
depending on whether the current node has a non NULL value or not
738
783
if (exn == NULL && ! l->consumes_value) {
739
if (regexp_matches_empty(l->atype)) {
784
if (l->atype != NULL && regexp_matches_empty(l->atype)) {
740
785
exn = make_exn_value(ref(info),
741
786
"optional expression matches the empty tree but does not consume a value");
1070
static int lns_format_rec_atype(struct lens *l, char **buf) {
1073
if (l->rec_internal) {
1074
*buf = strdup("<<rec>>");
1075
return (*buf == NULL) ? -1 : 0;
1079
r = lns_format_atype(l->body, &c);
1082
r = xasprintf(buf, "<<rec:%s>>", c);
1084
return (r < 0) ? -1 : 0;
1022
1087
int lns_format_atype(struct lens *l, char **buf) {
1648
1720
struct regexp *loop = NULL;
1649
1721
for (int i=0; i < s->ntrans; i++) {
1650
1722
if (s == s->trans[i].to) {
1651
assert(loop == NULL);
1723
ensure(loop == NULL, rtn->info);
1652
1724
loop = s->trans[i].re;
1675
1747
struct regexp *result = NULL;
1676
1748
for (int i=0; i < prod->start->ntrans; i++) {
1677
1749
if (prod->start->trans[i].to == prod->end) {
1678
assert(result == NULL);
1750
ensure(result == NULL, rtn->info);
1679
1751
result = ref(prod->start->trans[i].re);
1766
1839
struct value *exn = NULL;
1767
1840
struct lens *acc = NULL;
1769
assert(l->tag == L_CONCAT || l->tag == L_UNION);
1842
ensure(l->tag == L_CONCAT || l->tag == L_UNION, l->info);
1770
1843
for (int i=0; i < l->nchildren; i++) {
1771
1844
exn = typecheck(l->children[i], check);
1772
1845
if (exn != NULL)
1779
1852
exn = (*make)(info, acc, ref(l->children[i]), check);
1782
assert(exn->tag == V_LENS);
1855
ensure(exn->tag == V_LENS, l->info);
1783
1856
acc = ref(exn->lens);
1784
1857
unref(exn, value);
1946
2021
/* The types in the order of approximation */
1947
2022
static const enum lens_type types[] = { KTYPE, VTYPE, ATYPE };
1949
assert(rec->tag == L_REC);
1950
assert(body->recursive);
1951
2023
struct value *result = NULL;
2025
ensure(rec->tag == L_REC, info);
2026
ensure(body->recursive, info);
2027
ensure(rec->rec_internal, info);
1953
2029
/* To help memory management, we avoid the cycle inherent ina recursive
1954
2030
* lens by using two instances of an L_REC lens. One is marked with
1955
2031
* rec_internal, and used inside the body of the lens. The other is the
1958
2034
* The internal instance of the recursive lens is REC, the external one
1959
2035
* is TOP, constructed below
1961
rec->rec_internal = 1;
1962
2037
rec->body = body; /* REC does not own BODY */
1964
2039
for (int i=0; i < ARRAY_CARDINALITY(types); i++) {
1969
2044
if (rec->atype == NULL) {
1970
result = make_exn_value(rec->info,
2045
result = make_exn_value(ref(rec->info),
1971
2046
"recursive lens generates the empty language for its %s",
1972
2047
rec->ctype == NULL ? "ctype" : "atype");