1337
1212
/* check if string matches the given pattern. returns <0 for
1338
1213
error, 0 for failure, and 1 for success */
1339
#if !REMOVE_SRE_MATCH_MACROS
1341
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
1343
SRE_CHAR* end = (SRE_CHAR *)state->end;
1344
Py_ssize_t alloc_pos, ctx_pos = -1;
1345
Py_ssize_t i, ret = 0;
1347
unsigned int sigcount=0; /* Iteration Counter; for signalling */
1349
SRE_MATCH_CONTEXT* ctx;
1350
SRE_MATCH_CONTEXT* nextctx;
1352
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
1354
DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
1355
ctx->last_ctx_pos = -1;
1356
ctx->jump = JUMP_NONE;
1357
ctx->pattern = pattern;
1358
ctx_pos = alloc_pos;
1362
ctx->ptr = (SRE_CHAR *)state->ptr;
1364
if (ctx->pattern[0] == SRE_OP_INFO) {
1365
/* optimization info block */
1366
/* <INFO> <1=skip> <2=flags> <3=min> ... */
1367
if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
1368
TRACE(("reject (got %d chars, need %d)\n",
1369
(end - ctx->ptr), ctx->pattern[3]));
1372
ctx->pattern += ctx->pattern[1] + 1;
1377
if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
1378
RETURN_ERROR(SRE_ERROR_INTERRUPTED);
1380
switch (*ctx->pattern++) {
1385
TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
1386
ctx->ptr, ctx->pattern[0]));
1387
i = ctx->pattern[0];
1389
state->lastindex = i/2 + 1;
1390
if (i > state->lastmark) {
1391
/* state->lastmark is the highest valid index in the
1392
state->mark array. If it is increased by more than 1,
1393
the intervening marks must be set to NULL to signal
1394
that these marks have not been encountered. */
1395
Py_ssize_t j = state->lastmark + 1;
1397
state->mark[j++] = NULL;
1398
state->lastmark = i;
1400
state->mark[i] = ctx->ptr;
1404
case SRE_OP_LITERAL:
1405
/* match literal string */
1406
/* <LITERAL> <code> */
1407
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
1408
ctx->ptr, *ctx->pattern));
1409
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
1415
case SRE_OP_NOT_LITERAL:
1416
/* match anything that is not literal character */
1417
/* <NOT_LITERAL> <code> */
1418
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
1419
ctx->ptr, *ctx->pattern));
1420
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
1426
case SRE_OP_SUCCESS:
1427
/* end of pattern */
1428
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
1429
state->ptr = ctx->ptr;
1433
/* match at given position */
1435
TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
1436
if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
1441
case SRE_OP_CATEGORY:
1442
/* match at given category */
1443
/* <CATEGORY> <code> */
1444
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
1445
ctx->ptr, *ctx->pattern));
1446
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
1453
/* match anything (except a newline) */
1455
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
1456
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
1461
case SRE_OP_ANY_ALL:
1462
/* match anything */
1464
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
1465
if (ctx->ptr >= end)
1471
/* match set member (or non_member) */
1472
/* <IN> <skip> <set> */
1473
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
1474
if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
1476
ctx->pattern += ctx->pattern[0];
1480
case SRE_OP_LITERAL_IGNORE:
1481
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
1482
ctx->pattern, ctx->ptr, ctx->pattern[0]));
1483
if (ctx->ptr >= end ||
1484
state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
1490
case SRE_OP_NOT_LITERAL_IGNORE:
1491
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
1492
ctx->pattern, ctx->ptr, *ctx->pattern));
1493
if (ctx->ptr >= end ||
1494
state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
1500
case SRE_OP_IN_IGNORE:
1501
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
1503
|| !SRE_CHARSET(ctx->pattern+1,
1504
(SRE_CODE)state->lower(*ctx->ptr)))
1506
ctx->pattern += ctx->pattern[0];
1513
/* <JUMP> <offset> */
1514
TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
1515
ctx->ptr, ctx->pattern[0]));
1516
ctx->pattern += ctx->pattern[0];
1521
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
1522
TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
1524
ctx->u.rep = state->repeat;
1526
MARK_PUSH(ctx->lastmark);
1527
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1528
if (ctx->pattern[1] == SRE_OP_LITERAL &&
1530
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1532
if (ctx->pattern[1] == SRE_OP_IN &&
1534
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1536
state->ptr = ctx->ptr;
1537
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1540
MARK_POP_DISCARD(ctx->lastmark);
1541
RETURN_ON_ERROR(ret);
1545
MARK_POP_KEEP(ctx->lastmark);
1549
MARK_POP_DISCARD(ctx->lastmark);
1552
case SRE_OP_REPEAT_ONE:
1553
/* match repeated sequence (maximizing regexp) */
1555
/* this operator only works if the repeated item is
1556
exactly one character wide, and we're not already
1557
collecting backtracking points. for other cases,
1558
use the MAX_REPEAT operator */
1560
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1562
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1563
ctx->pattern[1], ctx->pattern[2]));
1565
if (ctx->ptr + ctx->pattern[1] > end)
1566
RETURN_FAILURE; /* cannot match */
1568
state->ptr = ctx->ptr;
1570
ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1571
RETURN_ON_ERROR(ret);
1572
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1574
ctx->ptr += ctx->count;
1576
/* when we arrive here, count contains the number of
1577
matches, and ctx->ptr points to the tail of the target
1578
string. check if the rest of the pattern matches,
1579
and backtrack if not. */
1581
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1584
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1585
/* tail is empty. we're finished */
1586
state->ptr = ctx->ptr;
1592
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1593
/* tail starts with a literal. skip positions where
1594
the rest of the pattern cannot possibly match */
1595
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1597
while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1598
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1602
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1604
state->ptr = ctx->ptr;
1605
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1606
ctx->pattern+ctx->pattern[0]);
1608
RETURN_ON_ERROR(ret);
1620
while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1621
state->ptr = ctx->ptr;
1622
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1623
ctx->pattern+ctx->pattern[0]);
1625
RETURN_ON_ERROR(ret);
1635
case SRE_OP_MIN_REPEAT_ONE:
1636
/* match repeated sequence (minimizing regexp) */
1638
/* this operator only works if the repeated item is
1639
exactly one character wide, and we're not already
1640
collecting backtracking points. for other cases,
1641
use the MIN_REPEAT operator */
1643
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1645
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1646
ctx->pattern[1], ctx->pattern[2]));
1648
if (ctx->ptr + ctx->pattern[1] > end)
1649
RETURN_FAILURE; /* cannot match */
1651
state->ptr = ctx->ptr;
1653
if (ctx->pattern[1] == 0)
1656
/* count using pattern min as the maximum */
1657
ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1658
RETURN_ON_ERROR(ret);
1659
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1660
if (ret < (Py_ssize_t) ctx->pattern[1])
1661
/* didn't match minimum number of times */
1663
/* advance past minimum matches of repeat */
1665
ctx->ptr += ctx->count;
1668
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1669
/* tail is empty. we're finished */
1670
state->ptr = ctx->ptr;
1676
while ((Py_ssize_t)ctx->pattern[2] == SRE_MAX_REPEAT
1677
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1678
state->ptr = ctx->ptr;
1679
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1680
ctx->pattern+ctx->pattern[0]);
1682
RETURN_ON_ERROR(ret);
1685
state->ptr = ctx->ptr;
1686
ret = SRE_COUNT(state, ctx->pattern+3, 1);
1687
RETURN_ON_ERROR(ret);
1688
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1700
/* create repeat context. all the hard work is done
1701
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1702
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1703
TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1704
ctx->pattern[1], ctx->pattern[2]));
1706
/* install new repeat context */
1707
ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1712
ctx->u.rep->count = -1;
1713
ctx->u.rep->pattern = ctx->pattern;
1714
ctx->u.rep->prev = state->repeat;
1715
ctx->u.rep->last_ptr = NULL;
1716
state->repeat = ctx->u.rep;
1718
state->ptr = ctx->ptr;
1719
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1720
state->repeat = ctx->u.rep->prev;
1721
PyObject_FREE(ctx->u.rep);
1724
RETURN_ON_ERROR(ret);
1729
case SRE_OP_MAX_UNTIL:
1730
/* maximizing repeat */
1731
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1733
/* FIXME: we probably need to deal with zero-width
1734
matches in here... */
1736
ctx->u.rep = state->repeat;
1738
RETURN_ERROR(SRE_ERROR_STATE);
1740
state->ptr = ctx->ptr;
1742
ctx->count = ctx->u.rep->count+1;
1744
TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1745
ctx->ptr, ctx->count));
1747
if (ctx->count < ctx->u.rep->pattern[1]) {
1748
/* not enough matches */
1749
ctx->u.rep->count = ctx->count;
1750
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1751
ctx->u.rep->pattern+3);
1753
RETURN_ON_ERROR(ret);
1756
ctx->u.rep->count = ctx->count-1;
1757
state->ptr = ctx->ptr;
1761
if ((ctx->count < ctx->u.rep->pattern[2] ||
1762
ctx->u.rep->pattern[2] == SRE_MAX_REPEAT) &&
1763
state->ptr != ctx->u.rep->last_ptr) {
1764
/* we may have enough matches, but if we can
1765
match another item, do so */
1766
ctx->u.rep->count = ctx->count;
1768
MARK_PUSH(ctx->lastmark);
1769
/* zero-width match protection */
1770
DATA_PUSH(&ctx->u.rep->last_ptr);
1771
ctx->u.rep->last_ptr = state->ptr;
1772
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1773
ctx->u.rep->pattern+3);
1774
DATA_POP(&ctx->u.rep->last_ptr);
1776
MARK_POP_DISCARD(ctx->lastmark);
1777
RETURN_ON_ERROR(ret);
1780
MARK_POP(ctx->lastmark);
1782
ctx->u.rep->count = ctx->count-1;
1783
state->ptr = ctx->ptr;
1786
/* cannot match more repeated items here. make sure the
1788
state->repeat = ctx->u.rep->prev;
1789
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1790
RETURN_ON_SUCCESS(ret);
1791
state->repeat = ctx->u.rep;
1792
state->ptr = ctx->ptr;
1795
case SRE_OP_MIN_UNTIL:
1796
/* minimizing repeat */
1797
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1799
ctx->u.rep = state->repeat;
1801
RETURN_ERROR(SRE_ERROR_STATE);
1803
state->ptr = ctx->ptr;
1805
ctx->count = ctx->u.rep->count+1;
1807
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1808
ctx->ptr, ctx->count, ctx->u.rep->pattern));
1810
if (ctx->count < ctx->u.rep->pattern[1]) {
1811
/* not enough matches */
1812
ctx->u.rep->count = ctx->count;
1813
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1814
ctx->u.rep->pattern+3);
1816
RETURN_ON_ERROR(ret);
1819
ctx->u.rep->count = ctx->count-1;
1820
state->ptr = ctx->ptr;
1826
/* see if the tail matches */
1827
state->repeat = ctx->u.rep->prev;
1828
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1830
RETURN_ON_ERROR(ret);
1834
state->repeat = ctx->u.rep;
1835
state->ptr = ctx->ptr;
1839
if (ctx->count >= ctx->u.rep->pattern[2]
1840
&& ctx->u.rep->pattern[2] != SRE_MAX_REPEAT)
1843
ctx->u.rep->count = ctx->count;
1844
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1845
ctx->u.rep->pattern+3);
1847
RETURN_ON_ERROR(ret);
1850
ctx->u.rep->count = ctx->count-1;
1851
state->ptr = ctx->ptr;
1854
case SRE_OP_GROUPREF:
1855
/* match backreference */
1856
TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1857
ctx->ptr, ctx->pattern[0]));
1858
i = ctx->pattern[0];
1860
Py_ssize_t groupref = i+i;
1861
if (groupref >= state->lastmark) {
1864
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1865
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1866
if (!p || !e || e < p)
1869
if (ctx->ptr >= end || *ctx->ptr != *p)
1878
case SRE_OP_GROUPREF_IGNORE:
1879
/* match backreference */
1880
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1881
ctx->ptr, ctx->pattern[0]));
1882
i = ctx->pattern[0];
1884
Py_ssize_t groupref = i+i;
1885
if (groupref >= state->lastmark) {
1888
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1889
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1890
if (!p || !e || e < p)
1893
if (ctx->ptr >= end ||
1894
state->lower(*ctx->ptr) != state->lower(*p))
1903
case SRE_OP_GROUPREF_EXISTS:
1904
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1905
ctx->ptr, ctx->pattern[0]));
1906
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1907
i = ctx->pattern[0];
1909
Py_ssize_t groupref = i+i;
1910
if (groupref >= state->lastmark) {
1911
ctx->pattern += ctx->pattern[1];
1914
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1915
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1916
if (!p || !e || e < p) {
1917
ctx->pattern += ctx->pattern[1];
1926
/* assert subpattern */
1927
/* <ASSERT> <skip> <back> <pattern> */
1928
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1929
ctx->ptr, ctx->pattern[1]));
1930
state->ptr = ctx->ptr - ctx->pattern[1];
1931
if (state->ptr < state->beginning)
1933
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1934
RETURN_ON_FAILURE(ret);
1935
ctx->pattern += ctx->pattern[0];
1938
case SRE_OP_ASSERT_NOT:
1939
/* assert not subpattern */
1940
/* <ASSERT_NOT> <skip> <back> <pattern> */
1941
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1942
ctx->ptr, ctx->pattern[1]));
1943
state->ptr = ctx->ptr - ctx->pattern[1];
1944
if (state->ptr >= state->beginning) {
1945
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1947
RETURN_ON_ERROR(ret);
1951
ctx->pattern += ctx->pattern[0];
1954
case SRE_OP_FAILURE:
1955
/* immediate failure */
1956
TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1960
TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1962
RETURN_ERROR(SRE_ERROR_ILLEGAL);
1967
ctx_pos = ctx->last_ctx_pos;
1969
DATA_POP_DISCARD(ctx);
1972
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1975
case JUMP_MAX_UNTIL_2:
1976
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1977
goto jump_max_until_2;
1978
case JUMP_MAX_UNTIL_3:
1979
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1980
goto jump_max_until_3;
1981
case JUMP_MIN_UNTIL_2:
1982
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1983
goto jump_min_until_2;
1984
case JUMP_MIN_UNTIL_3:
1985
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1986
goto jump_min_until_3;
1988
TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1990
case JUMP_MAX_UNTIL_1:
1991
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1992
goto jump_max_until_1;
1993
case JUMP_MIN_UNTIL_1:
1994
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1995
goto jump_min_until_1;
1997
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1999
case JUMP_REPEAT_ONE_1:
2000
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
2001
goto jump_repeat_one_1;
2002
case JUMP_REPEAT_ONE_2:
2003
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
2004
goto jump_repeat_one_2;
2005
case JUMP_MIN_REPEAT_ONE:
2006
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
2007
goto jump_min_repeat_one;
2009
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
2011
case JUMP_ASSERT_NOT:
2012
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
2013
goto jump_assert_not;
2015
TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
2019
return ret; /* should never get here */
2022
#else /* REMOVE_SRE_MATCH_MACROS */
1214
/* TODO: Look into making alloc_pos a local varaible as it should really be a return value for the ALLOC functions. */
1215
/* TODO: Look into removing op_code as it is only used when a bad op code is passed and only for debugging */
2023
1216
typedef struct {
2024
1217
SRE_CHAR *end; /* Pointer to the end of the input state->ptr */
2025
1218
Py_ssize_t alloc_pos; /* Offset in state->data_state of newly */
2500
1791
/* TODO: SRE_MATCH_ON_CHARSET */
2501
1792
/* TODO: SRE_MATCH_ON_BIGCHARSET */
2502
/* TODO: SRE_MATCH_ON_GROUPREF */
2503
/* TODO: SRE_MATCH_ON_GROUPREF_EXISTS */
2504
/* TODO: SRE_MATCH_ON_GROUPREF_IGNORE */
1794
Py_LOCAL_INLINE(Py_ssize_t)
1795
SRE_MATCH_ON_GROUPREF(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state)
1797
/* match backreference */
1799
/* <GROUPREF> <group> ... */
1800
Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0];
1802
/* For every group ID, there are 2 grouprefs, one for beginning, */
1803
/* one for end; so groupref is twice group */
1804
Py_ssize_t groupref = group + group;
1806
TRACE(("|%p|%p|GROUPREF %d\n", self->ctx->pattern, self->ctx->ptr,
1809
/* Check if the groupref is in the valid range */
1810
if (groupref >= state->lastmark) {
1811
/* Failed to Match; Pop Stack */
1812
return SRE_MATCH_RETURN_FAILURE(self);
1815
/* Get the beginning and end of the Group captured from the */
1818
SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref];
1819
/* b is the beginning position in the Input Stream of the */
1820
/* previously matched group */
1822
SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1];
1823
/* e is the ending position in the Input Stream of the */
1824
/* previously matched group */
1826
/* Verify that the beginning and ending pointers are valid */
1827
/* and that the beginning occurs before the ending */
1828
if (!b || !e || e < b) {
1829
/* Failed to Match; Pop Stack */
1830
return SRE_MATCH_RETURN_FAILURE(self);
1834
/* For each matched character of the corresponding group: */
1836
/* Check it against the current Input Stream to */
1837
/* verify that it matches */
1838
/* Stop if the Input Stream is at its end */
1839
if (self->ctx->ptr >= self->end ||
1840
*self->ctx->ptr != *b) {
1842
/* Failed to Match; Pop Stack */
1843
return SRE_MATCH_RETURN_FAILURE(self);
1846
/* Increment the group match reference */
1849
/* Increment the Input Stream */
1855
/* Move to next Op Code */
1856
self->ctx->pattern++;
1858
return SRE_MATCH_PASS;
1861
Py_LOCAL_INLINE(Py_ssize_t)
1862
SRE_MATCH_ON_GROUPREF_EXISTS(SRE_MATCH_GLOBAL_CONTEXT *self,
1865
/* Match codeyes if group matched, otherwise match codeno */
1867
/* <GROUPREF_EXISTS> <group> <skipyes> codeyes (<JUMP> <skipno> */
1869
/* <skipyes> points to codeno, one after the <skipno>, if codeno */
1870
/* exists, otherwise to tail */
1871
/* <skipno> points to tail */
1872
/* Note: because <skipyes> is the SECOND parameter to */
1873
/* <GROUPREF_EXISTS>, it contains one more than its proper offset */
1874
/* from its position and is thus relative to the current value of */
1875
/* self->ctx->pattern */
1876
Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0];
1877
int skipyes = (int)self->ctx->pattern[1];
1879
/* For every group ID, there are 2 grouprefs, one for beginning, */
1880
/* one for end; so groupref is twice group */
1881
Py_ssize_t groupref = group + group;
1883
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", self->ctx->pattern,
1884
self->ctx->ptr, group));
1886
/* Check if the groupref is in the valid range */
1887
if (groupref >= state->lastmark) {
1888
/* Skip the codeyes and go directly to codeno */
1889
self->ctx->pattern += skipyes;
1891
/* Continue to codeno */
1892
return SRE_MATCH_PASS;
1896
/* Check to see if the indicated groupref has previously been */
1899
SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref];
1900
/* b is the beginning position in the Input Stream of the */
1901
/* previously matched group */
1903
SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1];
1904
/* e is the ending position in the Input Stream of the */
1905
/* previously matched group */
1907
/* Verify that the beginning and ending pointers are valid */
1908
/* and that the beginning occurs before the ending */
1909
if (!b || !e || e < b) {
1910
/* Skip the codeyes and go directly to codeno */
1911
self->ctx->pattern += skipyes;
1913
/* Continue to codeno */
1914
return SRE_MATCH_PASS;
1918
/* Move to next Op Code, i.e. codeyes (skipping the 2 parameters) */
1919
self->ctx->pattern += 2;
1921
return SRE_MATCH_PASS;
1924
Py_LOCAL_INLINE(Py_ssize_t)
1925
SRE_MATCH_ON_GROUPREF_IGNORE(SRE_MATCH_GLOBAL_CONTEXT *self,
1928
/* match backreference, ignoring case */
1930
/* <GROUPREF_IGNORE> <group> ... */
1931
Py_ssize_t group = (Py_ssize_t)self->ctx->pattern[0];
1933
/* For every group ID, there are 2 grouprefs, one for beginning, */
1934
/* one for end; so groupref is twice group */
1935
Py_ssize_t groupref = group + group;
1937
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", self->ctx->pattern,
1938
self->ctx->ptr, group));
1940
/* Check if the groupref is in the valid range */
1941
if (groupref >= state->lastmark) {
1942
/* Failed to Match; Pop Stack */
1943
return SRE_MATCH_RETURN_FAILURE(self);
1946
/* Get the beginning and end of the Group captured from the */
1949
SRE_CHAR *b = (SRE_CHAR *)state->mark[groupref];
1950
/* b is the beginning position in the Input Stream of the */
1951
/* previously matched group */
1953
SRE_CHAR *e = (SRE_CHAR *)state->mark[groupref + 1];
1954
/* e is the ending position in the Input Stream of the */
1955
/* previously matched group */
1957
/* Verify that the beginning and ending pointers are valid */
1958
/* and that the beginning occurs before the ending */
1959
if (!b || !e || e < b) {
1960
/* Failed to Match; Pop Stack */
1961
return SRE_MATCH_RETURN_FAILURE(self);
1965
/* For each matched character of the corresponding group: */
1967
/* Check it against the current Input Stream to */
1968
/* verify that it matches case-insensitvily */
1969
/* Stop if the Input Stream is at its end */
1970
if (self->ctx->ptr >= self->end ||
1971
state->lower(*self->ctx->ptr) != state->lower(*b)) {
1973
/* Failed to Match; Pop Stack */
1974
return SRE_MATCH_RETURN_FAILURE(self);
1977
/* Increment the group match reference */
1980
/* Increment the Input Stream */
1986
/* Move to next Op Code */
1987
self->ctx->pattern++;
1989
return SRE_MATCH_PASS;
2506
1992
Py_LOCAL_INLINE(Py_ssize_t)
2507
1993
SRE_MATCH_ON_IN(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state)
2509
1995
/* match set member (or non_member) */
2510
/* <IN> <skip> <set> */
1996
/* <IN> <skip> <set> tail */
1997
/* <skip> points to tail */
2511
1998
int skip = (int)self->ctx->pattern[0];
2512
1999
SRE_CODE *set = &self->ctx->pattern[1];
2514
2001
TRACE(("|%p|%p|IN\n", self->ctx->pattern, self->ctx->ptr));
2003
/* If not at the end of input, check the next character to see if */
2004
/* it is in the given Character Set */
2516
2005
if (self->ctx->ptr >= self->end ||
2517
2006
!SRE_CHARSET(set, *self->ctx->ptr)) {
2008
/* Failed to Match; Pop Context */
2518
2009
return SRE_MATCH_RETURN_FAILURE(self);
2519
/* TODO: Go to exit */
2522
2012
/* Move to next Op Code and Character */
2666
2165
return SRE_MATCH_PASS;
2669
/* TODO: SRE_MATCH_ON_MAX_UNTIL */
2670
/* TODO: SRE_MATCH_ON_MIN_UNTIL */
2168
Py_LOCAL_INLINE(Py_ssize_t)
2169
SRE_MATCH_ON_MAX_UNTIL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state)
2171
/* maximizing repeat */
2172
/* <REPEAT> <skip> <1=min> <2=max> item <MAX/MIN_UNTIL> tail */
2173
/* <skip> points to <MAX/MIN_UNTIL> */
2174
/* skip is not used */
2175
int min; /* Initialized Later */
2176
int max; /* Initialized Later */
2177
SRE_CODE *item; /* Initialized Later */
2179
/* FIXME: we probably need to deal with zero-width
2180
matches in here... */
2182
/* Get the Repeat Context from the one created by the */
2183
/* corresponding (in terms of nesting, i.e. if it goes repeat */
2184
/* repeat until repeat until until, ther outermost until */
2185
/* corresponding to the first repeat) repeat, which should have */
2186
/* been stored in the global state when the corresponding Repeat */
2187
/* was encountered */
2188
/* Store current Repeat Context in this Context */
2189
/* Note: This is done because although the Repeat Context is */
2190
/* created in the call before the Push Stack (i.e. in the Repeat */
2191
/* handler), because Push Stack is done generically, the current */
2192
/* Repeat Context is instead stored in the global State and then */
2193
/* placed in the newly created context here after the context has */
2194
/* been created or when returned to after another repeat match. */
2195
self->ctx->u.rep = state->repeat;
2197
/* Check to see if the Repeat Context is valid; if not, we have */
2198
/* Until without Repeat, and need to return an error immediately */
2199
/* as the pattern is invalid */
2200
if (!self->ctx->u.rep) {
2201
return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_STATE);
2204
/* Set the current input stream position to the one saved in the */
2205
/* Current Context */
2206
state->ptr = self->ctx->ptr;
2208
/* Set the count of sub-matches found (if first time, increments */
2209
/* from -1 to 0) to the current context */
2210
self->ctx->count = self->ctx->u.rep->count + 1;
2212
TRACE(("|%p|%p|MAX_UNTIL %d\n", self->ctx->pattern, self->ctx->ptr,
2215
/* The pattern in the Context's Repeat Context points to just */
2216
/* past the corresponding REPEAT op code; set the name aliases */
2218
min = (int)self->ctx->u.rep->pattern[1];
2219
max = (int)self->ctx->u.rep->pattern[2];
2220
item = &self->ctx->u.rep->pattern[3];
2222
if (self->ctx->count < min) {
2223
/* not enough matches */
2225
/* Put the number of matches back in the Repeat Context */
2226
self->ctx->u.rep->count = self->ctx->count;
2228
/* Parse the Repeated Pattern once again */
2229
/* Move the match pattern to item to be repeatedly matched */
2230
/* and create a new parse sub-context */
2231
/* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_1 when stack is */
2233
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2237
else if ((self->ctx->count < max || max == SRE_MAX_REPEAT) &&
2238
state->ptr != self->ctx->u.rep->last_ptr) {
2239
/* we may have enough matches, but if we can
2240
match another item, do so */
2242
/* Put the number of matches back in the Repeat Context */
2243
self->ctx->u.rep->count = self->ctx->count;
2245
/* Save the state's current Mark */
2246
SRE_MATCH_LASTMARK_SAVE(self, state);
2248
/* Save the current Context's previous Mark into the current */
2250
SRE_MATCH_MARK_PUSH(self, state, self->ctx->lastmark);
2252
/* zero-width match protection */
2253
/* Save the previous Input Stream position stored in the */
2254
/* Context's Repeat Context into the current State's Stack */
2255
SRE_MATCH_DATA_PUSH(self, state, &self->ctx->u.rep->last_ptr);
2257
/* Set the Current Repeat Context's previous Input Stream to */
2258
/* the Current Input Stream position */
2259
self->ctx->u.rep->last_ptr = state->ptr;
2261
/* Parse the Repeated Pattern once again */
2262
/* Move the match pattern to item to be repeatedly matched */
2263
/* and create a new parse sub-context */
2264
/* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_2 when stack is */
2266
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2271
/* cannot match more repeated items here. make sure the
2274
/* Restore the previous Repeat Context to be the current */
2275
/* Repeat Context */
2276
state->repeat = self->ctx->u.rep->prev;
2278
/* Parse the tail */
2279
/* Move the match pattern to tail to verify that matches and */
2280
/* create a new parse sub-context */
2281
/* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_3 when stack is */
2283
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2285
self->ctx->pattern);
2289
Py_LOCAL_INLINE(Py_ssize_t)
2290
SRE_MATCH_ON_MIN_UNTIL(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state)
2292
/* minimizing repeat */
2293
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
2294
/* <skip> points to <MAX/MIN_UNTIL> */
2295
/* skip is not used */
2296
int min; /* Initialized Later */
2297
/* max is not used */
2298
SRE_CODE *item; /* Initialized Later */
2300
/* Get the Repeat Context from the one created by the */
2301
/* corresponding (in terms of nesting, i.e. if it goes repeat */
2302
/* repeat until repeat until until, ther outermost until */
2303
/* corresponding to the first repeat) repeat, which should have */
2304
/* been stored in the global state when the corresponding Repeat */
2305
/* was encountered */
2306
/* Store current Repeat Context in this Context */
2307
/* Note: This is done because although the Repeat Context is */
2308
/* created in the call before the Push Stack (i.e. in the Repeat */
2309
/* handler), because Push Stack is done generically, the current */
2310
/* Repeat Context is instead stored in the global State and then */
2311
/* placed in the newly created context here after the context has */
2312
/* been created or when returned to after another repeat match. */
2313
self->ctx->u.rep = state->repeat;
2315
/* Check to see if the Repeat Context is valid; if not, we have */
2316
/* Until without Repeat, and need to return an error immediately */
2317
/* as the pattern is invalid */
2318
if (!self->ctx->u.rep) {
2319
return SRE_MATCH_RETURN_ERROR(self, SRE_ERROR_STATE);
2322
/* Set the current input stream position to the one saved in the */
2323
/* Current Context */
2324
state->ptr = self->ctx->ptr;
2326
/* Set the count of sub-matches found (if first time, increments */
2327
/* from -1 to 0) to the current context */
2328
self->ctx->count = self->ctx->u.rep->count + 1;
2330
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", self->ctx->pattern,
2331
self->ctx->ptr, self->ctx->count,
2332
self->ctx->u.rep->pattern));
2334
/* The pattern in the Context's Repeat Context points to just */
2335
/* past the corresponding REPEAT op code; set the name aliases */
2337
min = (int)self->ctx->u.rep->pattern[1];
2338
item = &self->ctx->u.rep->pattern[3];
2340
if (self->ctx->count < min) {
2341
/* not enough matches */
2343
/* Put the number of matches back in the Repeat Context */
2344
self->ctx->u.rep->count = self->ctx->count;
2346
/* Parse the Repeated Pattern once again */
2347
/* Move the match pattern to item to be repeatedly matched */
2348
/* and create a new parse sub-context */
2349
/* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_1 when stack is */
2351
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2356
/* Save the state's current Mark */
2357
SRE_MATCH_LASTMARK_SAVE(self, state);
2359
/* see if the tail matches */
2361
/* Restore the previous Repeat Context to be the current Repeat */
2363
state->repeat = self->ctx->u.rep->prev;
2365
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2367
self->ctx->pattern);
2672
2370
Py_LOCAL_INLINE(Py_ssize_t)
2673
2371
SRE_MATCH_ON_NOT_LITERAL(SRE_MATCH_GLOBAL_CONTEXT *self,
2962
2752
return SRE_MATCH_PASS;
2965
/* TODO: SRE_MATCH_ON_JUMP_MAX_UNTIL_1 */
2966
/* TODO: SRE_MATCH_ON_JUMP_MAX_UNTIL_2 */
2967
/* TODO: SRE_MATCH_ON_JUMP_MAX_UNTIL_3 */
2968
/* TODO: SRE_MATCH_ON_JUMP_MIN_UNTIL_1 */
2969
/* TODO: SRE_MATCH_ON_JUMP_MIN_UNTIL_2 */
2970
/* TODO: SRE_MATCH_ON_JUMP_MIN_UNTIL_3 */
2971
/* TODO: SRE_MATCH_ON_JUMP_REPEAT */
2755
Py_LOCAL_INLINE(Py_ssize_t)
2756
SRE_MATCH_ON_JUMP_MAX_UNTIL_1(SRE_MATCH_GLOBAL_CONTEXT *self,
2759
/* TODO: This code is EXACTLY the same as SRE_MATCH_ON_JUMP_MIN_UNTIL_1; merge functions! */
2760
/* maximizing repeat */
2761
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
2762
/* <skip> points to <MAX/MIN_UNTIL> */
2763
/* skip is not used */
2764
/* min is not used */
2765
/* max is not used */
2766
/* item is not used */
2768
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", self->ctx->pattern,
2771
/* Check for Success or Error */
2773
if (self->ret < 0) {
2774
/* Error, Return immediately */
2775
return SRE_MATCH_FAIL;
2778
/* Pop stack with Success */
2779
return SRE_MATCH_RETURN_SUCCESS(self);
2783
/* We did not find a match, so Decrement the Match count and */
2784
/* Pop Stack with failure. */
2785
/* Decrement Match Cont */
2786
self->ctx->u.rep->count = self->ctx->count - 1;
2788
/* Restore Input Stream position to be the one saved in this */
2789
/* Context; this will roll back the Input Stream to the last */
2790
/* point that matched. */
2791
state->ptr = self->ctx->ptr;
2793
/* Pop stack with Failure */
2794
return SRE_MATCH_RETURN_FAILURE(self);
2798
Py_LOCAL_INLINE(Py_ssize_t)
2799
SRE_MATCH_ON_JUMP_MAX_UNTIL_2(SRE_MATCH_GLOBAL_CONTEXT *self,
2802
/* maximizing repeat */
2803
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
2804
/* <skip> points to <MAX/MIN_UNTIL> */
2805
/* skip is not used */
2806
/* min is not used */
2807
/* max is not used */
2808
/* item is not used */
2810
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", self->ctx->pattern,
2813
/* Failed to match, roll back */
2814
/* Restore the previous Input Stream position stored in the */
2815
/* current State's Stack into the Context's Repeat Context */
2816
SRE_MATCH_DATA_POP(state, &self->ctx->u.rep->last_ptr);
2818
/* Check for Success or Error */
2820
/* Remove the previously saved Mark from the current */
2822
SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark);
2824
if (self->ret < 0) {
2825
/* Error, Return immediately */
2826
return SRE_MATCH_FAIL;
2829
/* Pop stack with Success */
2830
return SRE_MATCH_RETURN_SUCCESS(self);
2834
/* Failed to Match So Far... */
2836
/* Restore the current Context's previous Mark from the */
2837
/* current State's Stack */
2838
SRE_MATCH_MARK_POP(state, self->ctx->lastmark);
2840
/* Restore the state's current Mark */
2841
SRE_MATCH_LASTMARK_RESTORE(self, state);
2843
/* No match, match one less pattern and store the new */
2844
/* count in the Context's Repeat Context */
2845
self->ctx->u.rep->count = self->ctx->count - 1;
2847
/* Update the current Input Stream position from the one */
2848
/* saved in the current Context */
2849
state->ptr = self->ctx->ptr;
2851
/* cannot match more repeated items here. make sure the
2854
/* Restore the previous Repeat Context to be the current */
2855
/* Repeat Context */
2856
state->repeat = self->ctx->u.rep->prev;
2858
/* Parse the tail */
2859
/* Move the match pattern to tail to verify that matches and */
2860
/* create a new parse sub-context */
2861
/* Go to SRE_MATCH_ON_JUMP_JUMP_MAX_UNTIL_3 when stack is */
2863
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
2865
self->ctx->pattern);
2869
Py_LOCAL_INLINE(Py_ssize_t)
2870
SRE_MATCH_ON_JUMP_MAX_UNTIL_3(SRE_MATCH_GLOBAL_CONTEXT *self,
2873
/* maximizing repeat */
2874
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
2875
/* <skip> points to <MAX/MIN_UNTIL> */
2876
/* skip is not used */
2877
/* min is not used */
2878
/* max is not used */
2879
/* item is not used */
2881
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", self->ctx->pattern,
2884
/* Done searching through the Input Stream, did we find a match? */
2885
/* Check for Success or Error */
2887
if (self->ret < 0) {
2888
/* Error, Return immediately */
2889
return SRE_MATCH_FAIL;
2892
/* Pop stack with Success */
2893
return SRE_MATCH_RETURN_SUCCESS(self);
2897
/* Failed to Match, Pop stack */
2899
/* Restore the current Context's Repeat Context to be the */
2900
/* current Repeat Context */
2901
state->repeat = self->ctx->u.rep;
2903
/* Restore Input Stream position to be the one saved in this */
2904
/* Context; this will roll back the Input Stream to the last */
2905
/* point that matched. */
2906
state->ptr = self->ctx->ptr;
2908
/* Pop stack with Failure */
2909
return SRE_MATCH_RETURN_FAILURE(self);
2913
Py_LOCAL_INLINE(Py_ssize_t)
2914
SRE_MATCH_ON_JUMP_MIN_UNTIL_1(SRE_MATCH_GLOBAL_CONTEXT *self,
2917
/* TODO: This code is EXACTLY the same as SRE_MATCH_ON_JUMP_MAX_UNTIL_1; merge functions! */
2918
/* minimizing repeat */
2919
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
2920
/* <skip> points to <MAX/MIN_UNTIL> */
2921
/* skip is not used */
2922
/* min is not used */
2923
/* max is not used */
2924
/* item is not used */
2926
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", self->ctx->pattern,
2929
/* Check for Success or Error */
2931
if (self->ret < 0) {
2932
/* Error, Return immediately */
2933
return SRE_MATCH_FAIL;
2936
/* Pop stack with Success */
2937
return SRE_MATCH_RETURN_SUCCESS(self);
2941
/* We did not find a match, so Decrement the Match count and */
2942
/* Pop Stack with failure. */
2943
/* Decrement Match Cont */
2944
self->ctx->u.rep->count = self->ctx->count - 1;
2946
/* Restore Input Stream position to be the one saved in this */
2947
/* Context; this will roll back the Input Stream to the last */
2948
/* point that matched. */
2949
state->ptr = self->ctx->ptr;
2951
/* Pop stack with Failure */
2952
return SRE_MATCH_RETURN_FAILURE(self);
2956
Py_LOCAL_INLINE(Py_ssize_t)
2957
SRE_MATCH_ON_JUMP_MIN_UNTIL_2(SRE_MATCH_GLOBAL_CONTEXT *self,
2960
/* minimizing repeat */
2961
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
2962
/* <skip> points to <MAX/MIN_UNTIL> */
2963
/* skip is not used */
2964
/* min is not used */
2965
int max; /* Initialized Later */
2966
SRE_CODE *item; /* Initialized Later */
2968
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", self->ctx->pattern,
2971
/* Check for Success or Error */
2973
if (self->ret < 0) {
2974
/* Error, Return immediately */
2975
return SRE_MATCH_FAIL;
2978
/* Pop stack with Success */
2979
return SRE_MATCH_RETURN_SUCCESS(self);
2983
/* Failed to Match So Far... */
2985
/* The pattern in the Context's Repeat Context points to */
2986
/* just past the corresponding REPEAT op code; set the name */
2987
/* aliases based on this */
2988
max = (int)self->ctx->u.rep->pattern[2];
2990
/* Restore the current Context's Repeat Context to be the */
2991
/* current Repeat Context */
2992
state->repeat = self->ctx->u.rep;
2994
/* Restore Input Stream position to be the one saved in this */
2995
/* Context; this will roll back the Input Stream to the last */
2996
/* point that matched. */
2997
state->ptr = self->ctx->ptr;
2999
/* Restore the state's current Mark */
3000
SRE_MATCH_LASTMARK_RESTORE(self, state);
3002
/* Check to see if we have too many matches and thus fail */
3003
if (self->ctx->count >= max && max != SRE_MAX_REPEAT) {
3004
/* Failed to Match, Pop stack */
3005
return SRE_MATCH_RETURN_FAILURE(self);
3008
/* We have not reached our match limit, try to match */
3011
/* The pattern in the Context's Repeat Context points to */
3012
/* just past the corresponding REPEAT op code; set the */
3013
/* name aliases based on this */
3014
item = &self->ctx->u.rep->pattern[3];
3016
/* Put the number of matches back in the Repeat Context */
3017
self->ctx->u.rep->count = self->ctx->count;
3019
/* Parse the Repeated Pattern once again */
3020
/* Move the match pattern to item to be repeatedly */
3021
/* matched and create a new parse sub-context */
3022
/* Go to SRE_MATCH_ON_JUMP_JUMP_MIN_UNTIL_3 when stack is */
3024
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
3031
Py_LOCAL_INLINE(Py_ssize_t)
3032
SRE_MATCH_ON_JUMP_MIN_UNTIL_3(SRE_MATCH_GLOBAL_CONTEXT *self,
3035
/* minimizing repeat */
3036
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
3037
/* <skip> points to <MAX/MIN_UNTIL> */
3038
/* skip is not used */
3039
/* min is not used */
3040
/* max is not used */
3041
/* item is not used */
3043
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", self->ctx->pattern,
3046
/* If the tail matched or we encountered an unrecoverable error, */
3049
if (self->ret < 0) {
3050
/* Error, Return immediately */
3051
return SRE_MATCH_FAIL;
3054
/* Pop stack with Success */
3055
return SRE_MATCH_RETURN_SUCCESS(self);
3059
/* Tail did not Match, pop stack and see if we can try again */
3060
/* with another Match of item */
3062
/* No match, match one less pattern and store the new */
3063
/* count in the Context's Repeat Context */
3064
self->ctx->u.rep->count = self->ctx->count - 1;
3066
/* Update the current Input Stream position from the one */
3067
/* saved in the current Context */
3068
state->ptr = self->ctx->ptr;
3070
/* Failed to Match, Pop stack */
3071
return SRE_MATCH_RETURN_FAILURE(self);
3075
Py_LOCAL_INLINE(Py_ssize_t)
3076
SRE_MATCH_ON_JUMP_REPEAT(SRE_MATCH_GLOBAL_CONTEXT *self,
3079
/* create repeat context. all the hard work is done
3080
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
3081
/* <REPEAT> <skip> <1=min> <2=max> item <MAX/MIN_UNTIL> tail */
3082
/* <skip> points to <MAX/MIN_UNTIL> */
3083
/* skip is not used */
3084
/* min is not used */
3085
/* max is not used */
3087
TRACE(("|%p|%p|JUMP_REPEAT\n", self->ctx->pattern, self->ctx->ptr));
3089
/* Roll back the Repeat State */
3090
state->repeat = self->ctx->u.rep->prev;
3092
/* Free the memory used by the Repeat State */
3093
PyObject_FREE(self->ctx->u.rep);
3095
/* Check for errors */
3097
if (self->ret < 0) {
3098
return SRE_MATCH_FAIL;
3101
/* Successful Match */
3102
return SRE_MATCH_RETURN_SUCCESS(self);
3106
/* Failed to match */
3107
return SRE_MATCH_RETURN_FAILURE(self);
2973
3111
Py_LOCAL_INLINE(Py_ssize_t)
2974
3112
SRE_MATCH_ON_JUMP_REPEAT_ONE_1(SRE_MATCH_GLOBAL_CONTEXT *self,
3179
3352
return SRE_MATCH_FAIL;
3355
/* Pop Contect with a Success Condition */
3182
3356
return SRE_MATCH_RETURN_SUCCESS(self);
3186
if (self->ctx->u.rep) {
3187
SRE_MATCH_MARK_POP_KEEP(state, self->ctx->lastmark);
3190
SRE_MATCH_LASTMARK_RESTORE(self, state);
3192
for (; self->ctx->pattern[0];
3193
self->ctx->pattern += self->ctx->pattern[0]) {
3194
if (self->ctx->pattern[1] == SRE_OP_LITERAL &&
3195
(self->ctx->ptr >= self->end ||
3196
(SRE_CODE) *self->ctx->ptr != self->ctx->pattern[2])) {
3197
/* Literal Miss-Match, continue */
3201
else if (self->ctx->pattern[1] == SRE_OP_IN &&
3202
(self->ctx->ptr >= self->end ||
3203
!SRE_CHARSET(self->ctx->pattern + 3,
3204
(SRE_CODE)*self->ctx->ptr))) {
3205
/* Character Class Miss-Match, continue */
3209
state->ptr = self->ctx->ptr;
3211
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
3213
self->ctx->pattern +
3217
if (self->ctx->u.rep) {
3218
SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark);
3221
return SRE_MATCH_RETURN_FAILURE(self);
3222
/* TODO: Go to exit */
3225
/* TODO: SRE_MATCH_ON_JUMP_ASSERT */
3226
/* TODO: SRE_MATCH_ON_JUMP_ASSERT_NOT */
3360
/* If there is a Repeat Context stored in this Context */
3361
if (self->ctx->u.rep) {
3362
/* Preserve the Mark left there by the original Branch */
3364
SRE_MATCH_MARK_POP_KEEP(state, self->ctx->lastmark);
3367
/* Restore the previous Mark stored in the Current Context to be */
3368
/* the current last Mark. */
3369
SRE_MATCH_LASTMARK_RESTORE(self, state);
3371
/* This is done as a do-while because it represnts the return */
3372
/* to the for iteration, which would increment */
3373
/* self->ctx->pattern by skip before continuing */
3375
/* The last Branch option would or has failed to match */
3376
/* Increment the pattern to the next branch point */
3377
self->ctx->pattern += skip;
3379
/* Update the named aliases list */
3380
skip = (int)self->ctx->pattern[0];
3381
p_next_op = &self->ctx->pattern[1];
3382
literal = (SRE_CHAR)self->ctx->pattern[2];
3383
set = &self->ctx->pattern[3];
3385
/* Skip simple match cases; e.g. Literal and In op codes */
3386
/* If skip is the <FAILURE> op code, it will be zero and the */
3387
/* loop will exit */
3390
/* Check for Literal Miss-Match */
3391
((*p_next_op == SRE_OP_LITERAL &&
3392
/* Check for End of Input Stream */
3393
(self->ctx->ptr >= self->end ||
3394
/* Check for Literal Miss-match */
3395
(SRE_CODE)*self->ctx->ptr != literal)) ||
3397
/* Check for Character Class Miss-Match */
3398
(*p_next_op == SRE_OP_IN &&
3399
/* Check for End of Input Stream */
3400
(self->ctx->ptr >= self->end ||
3401
/* Check for Character not in Class */
3402
!SRE_CHARSET(set, (SRE_CODE)*self->ctx->ptr)))));
3404
/* If skip is the <FAILURE> op code, it will be zero and the */
3405
/* pseudo-recursion will be skipped. */
3407
/* Set the current input buffer position to that stored */
3408
/* in the Current Context */
3409
state->ptr = self->ctx->ptr;
3411
/* Move the match pattern to the next code block's op */
3412
/* code and create a new parse sub-context */
3413
/* Go to this function (SRE_MATCH_ON_JUMP_BRANCH) when */
3414
/* stack is popped */
3415
/*** Prepare for Pseudo-Recursion ***/
3416
/* Return non-zero on error */
3417
if (SRE_MATCH_DATA_ALLOC(self, state, &nextctx)) {
3418
return SRE_MATCH_FAIL;
3420
nextctx->last_ctx_pos = self->ctx_pos;
3421
nextctx->jump = JUMP_BRANCH;
3422
nextctx->pattern = p_next_op;
3423
self->ctx_pos = self->alloc_pos;
3424
self->ctx = nextctx;
3425
/* goto entrance; */
3427
/*** Restore after Pseudo-Recursion ***/
3428
self->good = SRE_MATCH_EXIT;
3429
return SRE_MATCH_PASS;
3430
return SRE_MATCH_ON_PUSH_CTX_AND_POSITION(self, state,
3435
/* There are no more codes sections in the Branch, and */
3436
/* none of the previous ones matched, so fail and pop */
3439
if (self->ctx->u.rep) {
3440
/* A mark was left in the Context's Repeat Context; */
3442
SRE_MATCH_MARK_POP_DISCARD(state, self->ctx->lastmark);
3445
/* Fail and Pop COntext */
3446
return SRE_MATCH_RETURN_FAILURE(self);
3451
Py_LOCAL_INLINE(Py_ssize_t)
3452
SRE_MATCH_ON_JUMP_ASSERT(SRE_MATCH_GLOBAL_CONTEXT *self,
3455
/* assert subpattern */
3456
/* <ASSERT> <skip> <back> <pattern> <SUCCESS> tail */
3457
/* <skip> points to tail */
3458
/* <back> is 0 for look-ahead */
3459
int skip = (int)self->ctx->pattern[0];
3460
/* back is not used */
3461
/* pattern is not used */
3463
TRACE(("|%p|%p|JUMP_ASSERT\n", self->ctx->pattern, self->ctx->ptr));
3465
/* Check for failure */
3466
if (self->ret < 0) {
3467
return SRE_MATCH_FAIL;
3469
else if (self->ret == 0) {
3470
return SRE_MATCH_RETURN_FAILURE(self);
3473
/* Advance Pattern to Next Op Code */
3474
self->ctx->pattern += skip;
3476
/* Successful Return */
3477
return SRE_MATCH_PASS;
3481
Py_LOCAL_INLINE(Py_ssize_t)
3482
SRE_MATCH_ON_JUMP_ASSERT_NOT(SRE_MATCH_GLOBAL_CONTEXT *self,
3485
/* assert subpattern */
3486
/* <ASSERT_NOT> <skip> <back> <pattern> <SUCCESS> tail */
3487
/* <skip> points to tail */
3488
/* <back> is 0 for look-ahead */
3489
int skip = (int)self->ctx->pattern[0];
3490
/* back is not used */
3491
/* pattern is not used */
3493
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", self->ctx->pattern,
3496
/* Check for Errors, and if successful, fail because this is */
3499
if (self->ret < 0) {
3500
return SRE_MATCH_FAIL;
3503
return SRE_MATCH_RETURN_FAILURE(self);
3507
/* Advance Pattern to Next Op Code */
3508
self->ctx->pattern += skip;
3510
/* Successful Return */
3511
return SRE_MATCH_PASS;
3228
3515
Py_ssize_t (*SRE_MATCH_LOOKUP_TABLE[SRE_OP__COUNT])
3229
3516
(SRE_MATCH_GLOBAL_CONTEXT *self, SRE_STATE *state) = {
3429
3717
LOCAL(Py_ssize_t)
3430
SRE_MATCHa(SRE_STATE* state, SRE_CODE* pattern)
3433
unsigned int sigcount; /* Iteration Counter; used for signalling */
3434
SRE_MATCH_CONTEXT *nextctx; /* Next Context */
3436
SRE_MATCH_GLOBAL_CONTEXT self = {
3437
/* Member Initialization */
3438
0, /* end (init below) */
3441
0, /* ret (default to success) */
3442
JUMP_NONE, /* jump */
3443
0, /* ctx (pointer) */
3444
(unsigned int)-1 /* op_code */
3446
self.end = (SRE_CHAR *)state->end;
3448
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
3450
SRE_MATCH_DATA_ALLOC(&self, state, &self.ctx);
3451
self.ctx->last_ctx_pos = -1;
3452
self.ctx->jump = JUMP_NONE;
3453
self.ctx->pattern = pattern;
3454
self.ctx_pos = self.alloc_pos;
3458
SRE_MATCH_PARSE_HEADER(&self, state);
3459
/* TODO: Go to exit on !self.good */
3463
/* Every 1024 iterations, accept an interrupt from the user */
3464
if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
3465
return SRE_MATCH_RETURN_ERROR(&self, SRE_ERROR_INTERRUPTED);
3466
/* TODO: when this is called, should return right away! */
3468
switch (*self.ctx->pattern++) {
3471
SRE_MATCH_ON_MARK(&self, state);
3474
case SRE_OP_LITERAL:
3475
if (!SRE_MATCH_ON_LITERAL(&self, state)) {
3476
/* TODO: Check for ret < 0 and if so return */
3481
case SRE_OP_NOT_LITERAL:
3482
if (!SRE_MATCH_ON_NOT_LITERAL(&self, state)) {
3483
/* TODO: Check for ret < 0 and if so return */
3488
case SRE_OP_SUCCESS:
3489
if (!SRE_MATCH_ON_SUCCESS(&self, state)) {
3490
/* TODO: Check for ret < 0 and if so return */
3493
/* Should never get here; always sets self.good to false */
3496
if (!SRE_MATCH_ON_AT(&self, state)) {
3497
/* TODO: Check for ret < 0 and if so return */
3502
case SRE_OP_CATEGORY:
3503
if (!SRE_MATCH_ON_CATEGORY(&self, state)) {
3504
/* TODO: Check for ret < 0 and if so return */
3510
if (!SRE_MATCH_ON_ANY(&self, state)) {
3511
/* TODO: Check for ret < 0 and if so return */
3516
case SRE_OP_ANY_ALL:
3517
if (!SRE_MATCH_ON_ANY_ALL(&self, state)) {
3518
/* TODO: Check for ret < 0 and if so return */
3524
if (!SRE_MATCH_ON_IN(&self, state)) {
3525
/* TODO: Check for ret < 0 and if so return */
3530
case SRE_OP_LITERAL_IGNORE:
3531
if (!SRE_MATCH_ON_LITERAL_IGNORE(&self, state)) {
3532
/* TODO: Check for ret < 0 and if so return */
3537
case SRE_OP_NOT_LITERAL_IGNORE:
3538
if (!SRE_MATCH_ON_NOT_LITERAL_IGNORE(&self, state)) {
3539
/* TODO: Check for ret < 0 and if so return */
3544
case SRE_OP_IN_IGNORE:
3545
if (!SRE_MATCH_ON_IN_IGNORE(&self, state)) {
3546
/* TODO: Check for ret < 0 and if so return */
3553
SRE_MATCH_ON_JUMP(&self, state);
3558
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
3559
TRACE(("|%p|%p|BRANCH\n", self.ctx->pattern, self.ctx->ptr));
3560
SRE_MATCH_LASTMARK_SAVE(&self, state);
3561
self.ctx->u.rep = state->repeat;
3562
if (self.ctx->u.rep)
3563
SRE_MATCH_MARK_PUSH(&self, state, self.ctx->lastmark);
3564
for (; self.ctx->pattern[0]; self.ctx->pattern += self.ctx->pattern[0]) {
3565
if (self.ctx->pattern[1] == SRE_OP_LITERAL &&
3566
(self.ctx->ptr >= self.end ||
3567
(SRE_CODE) *self.ctx->ptr != self.ctx->pattern[2]))
3569
if (self.ctx->pattern[1] == SRE_OP_IN &&
3570
(self.ctx->ptr >= self.end ||
3571
!SRE_CHARSET(self.ctx->pattern + 3, (SRE_CODE) *self.ctx->ptr)))
3573
state->ptr = self.ctx->ptr;
3574
/*** Prepare for Pseudo-Recursion ***/
3575
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3576
nextctx->last_ctx_pos = self.ctx_pos;
3577
nextctx->jump = JUMP_BRANCH;
3578
nextctx->pattern = self.ctx->pattern + 1;
3579
self.ctx_pos = self.alloc_pos;
3583
/*** Restore after Pseudo-Recursion ***/
3585
if (self.ctx->u.rep)
3586
SRE_MATCH_MARK_POP_DISCARD(state, self.ctx->lastmark);
3587
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3588
/* TODO: Return if error */
3589
SRE_MATCH_RETURN_SUCCESS(&self);
3590
/* TODO: Go to exit */
3592
if (self.ctx->u.rep)
3593
SRE_MATCH_MARK_POP_KEEP(state, self.ctx->lastmark);
3594
SRE_MATCH_LASTMARK_RESTORE(&self, state);
3596
if (self.ctx->u.rep)
3597
SRE_MATCH_MARK_POP_DISCARD(state, self.ctx->lastmark);
3598
SRE_MATCH_RETURN_FAILURE(&self);
3599
/* TODO: Go to exit */
3601
case SRE_OP_REPEAT_ONE:
3602
/* match repeated sequence (maximizing regexp) */
3604
/* this operator only works if the repeated item is
3605
exactly one character wide, and we're not already
3606
collecting backtracking points. for other cases,
3607
use the MAX_REPEAT operator */
3609
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
3611
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", self.ctx->pattern, self.ctx->ptr,
3612
self.ctx->pattern[1], self.ctx->pattern[2]));
3614
if (self.ctx->ptr + self.ctx->pattern[1] > self.end)
3615
SRE_MATCH_RETURN_FAILURE(&self); /* cannot match */
3616
/* TODO: Go to exit */
3618
state->ptr = self.ctx->ptr;
3620
self.ret = SRE_COUNT(state, self.ctx->pattern+3, self.ctx->pattern[2]);
3621
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3622
/* TODO: Return if error */
3623
SRE_MATCH_DATA_LOOKUP_AT(state, &self.ctx, self.ctx_pos);
3624
self.ctx->count = self.ret;
3625
self.ctx->ptr += self.ctx->count;
3627
/* when we arrive here, count contains the number of
3628
matches, and self.ctx->ptr points to the tail of the target
3629
string. check if the rest of the pattern matches,
3630
and backtrack if not. */
3632
if (self.ctx->count < (Py_ssize_t) self.ctx->pattern[1])
3633
SRE_MATCH_RETURN_FAILURE(&self);
3634
/* TODO: Go to exit */
3636
if (self.ctx->pattern[self.ctx->pattern[0]] == SRE_OP_SUCCESS) {
3637
/* tail is empty. we're finished */
3638
state->ptr = self.ctx->ptr;
3639
SRE_MATCH_RETURN_SUCCESS(&self);
3640
/* TODO: Go to exit */
3643
SRE_MATCH_LASTMARK_SAVE(&self, state);
3645
if (self.ctx->pattern[self.ctx->pattern[0]] == SRE_OP_LITERAL) {
3646
/* tail starts with a literal. skip positions where
3647
the rest of the pattern cannot possibly match */
3648
self.ctx->u.chr = self.ctx->pattern[self.ctx->pattern[0]+1];
3650
while (self.ctx->count >= (Py_ssize_t) self.ctx->pattern[1] &&
3651
(self.ctx->ptr >= self.end || *self.ctx->ptr != self.ctx->u.chr)) {
3655
if (self.ctx->count < (Py_ssize_t) self.ctx->pattern[1])
3657
state->ptr = self.ctx->ptr;
3658
/*** Prepare for Pseudo-Recursion ***/
3659
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3660
nextctx->last_ctx_pos = self.ctx_pos;
3661
nextctx->jump = JUMP_REPEAT_ONE_1;
3662
nextctx->pattern = self.ctx->pattern +
3663
self.ctx->pattern[0];
3664
self.ctx_pos = self.alloc_pos;
3668
/*** Restore after Pseudo-Recursion ***/
3670
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3671
/* TODO: Return if error */
3672
SRE_MATCH_RETURN_SUCCESS(&self);
3673
/* TODO: Go to exit */
3676
SRE_MATCH_LASTMARK_RESTORE(&self, state);
3684
while (self.ctx->count >= (Py_ssize_t) self.ctx->pattern[1]) {
3685
state->ptr = self.ctx->ptr;
3686
/*** Prepare for Pseudo-Recursion ***/
3687
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3688
nextctx->last_ctx_pos = self.ctx_pos;
3689
nextctx->jump = JUMP_REPEAT_ONE_2;
3690
nextctx->pattern = self.ctx->pattern +
3691
self.ctx->pattern[0];
3692
self.ctx_pos = self.alloc_pos;
3696
/*** Restore after Pseudo-Recursion ***/
3698
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3699
/* TODO: Return if error */
3700
SRE_MATCH_RETURN_SUCCESS(&self);
3701
/* TODO: Go to exit */
3705
SRE_MATCH_LASTMARK_RESTORE(&self, state);
3708
SRE_MATCH_RETURN_FAILURE(&self);
3709
/* TODO: Go to exit */
3711
case SRE_OP_MIN_REPEAT_ONE:
3712
/* match repeated sequence (minimizing regexp) */
3714
/* this operator only works if the repeated item is
3715
exactly one character wide, and we're not already
3716
collecting backtracking points. for other cases,
3717
use the MIN_REPEAT operator */
3719
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
3721
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", self.ctx->pattern, self.ctx->ptr,
3722
self.ctx->pattern[1], self.ctx->pattern[2]));
3724
if (self.ctx->ptr + self.ctx->pattern[1] > self.end)
3725
SRE_MATCH_RETURN_FAILURE(&self); /* cannot match */
3726
/* TODO: Go to exit */
3728
state->ptr = self.ctx->ptr;
3730
if (self.ctx->pattern[1] == 0)
3731
self.ctx->count = 0;
3733
/* count using pattern min as the maximum */
3734
self.ret = SRE_COUNT(state, self.ctx->pattern+3, self.ctx->pattern[1]);
3735
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3736
/* TODO: Return if error */
3737
SRE_MATCH_DATA_LOOKUP_AT(state, &self.ctx, self.ctx_pos);
3738
if (self.ret < (Py_ssize_t) self.ctx->pattern[1])
3739
/* didn't match minimum number of times */
3740
SRE_MATCH_RETURN_FAILURE(&self);
3741
/* TODO: Go to exit */
3742
/* advance past minimum matches of repeat */
3743
self.ctx->count = self.ret;
3744
self.ctx->ptr += self.ctx->count;
3747
if (self.ctx->pattern[self.ctx->pattern[0]] == SRE_OP_SUCCESS) {
3748
/* tail is empty. we're finished */
3749
state->ptr = self.ctx->ptr;
3750
SRE_MATCH_RETURN_SUCCESS(&self);
3751
/* TODO: Go to exit */
3755
SRE_MATCH_LASTMARK_SAVE(&self, state);
3756
while ((Py_ssize_t)self.ctx->pattern[2] == 65535
3757
|| self.ctx->count <= (Py_ssize_t)self.ctx->pattern[2]) {
3758
state->ptr = self.ctx->ptr;
3759
/*** Prepare for Pseudo-Recursion ***/
3760
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3761
nextctx->last_ctx_pos = self.ctx_pos;
3762
nextctx->jump = JUMP_MIN_REPEAT_ONE;
3763
nextctx->pattern = self.ctx->pattern +
3764
self.ctx->pattern[0];
3765
self.ctx_pos = self.alloc_pos;
3768
jump_min_repeat_one:
3769
/*** Restore after Pseudo-Recursion ***/
3771
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3772
/* TODO: Return if error */
3773
SRE_MATCH_RETURN_SUCCESS(&self);
3774
/* TODO: Go to exit */
3776
state->ptr = self.ctx->ptr;
3777
self.ret = SRE_COUNT(state, self.ctx->pattern+3, 1);
3778
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3779
/* TODO: Return if error */
3780
SRE_MATCH_DATA_LOOKUP_AT(state, &self.ctx, self.ctx_pos);
3783
assert(self.ret == 1);
3786
SRE_MATCH_LASTMARK_RESTORE(&self, state);
3789
SRE_MATCH_RETURN_FAILURE(&self);
3790
/* TODO: Go to exit */
3793
/* create repeat context. all the hard work is done
3794
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
3795
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
3796
TRACE(("|%p|%p|REPEAT %d %d\n", self.ctx->pattern, self.ctx->ptr,
3797
self.ctx->pattern[1], self.ctx->pattern[2]));
3799
/* install new repeat context */
3800
self.ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*self.ctx->u.rep));
3801
if (!self.ctx->u.rep) {
3803
SRE_MATCH_RETURN_FAILURE(&self);
3804
/* TODO: Go to exit */
3806
self.ctx->u.rep->count = -1;
3807
self.ctx->u.rep->pattern = self.ctx->pattern;
3808
self.ctx->u.rep->prev = state->repeat;
3809
self.ctx->u.rep->last_ptr = NULL;
3810
state->repeat = self.ctx->u.rep;
3812
state->ptr = self.ctx->ptr;
3813
/*** Prepare for Pseudo-Recursion ***/
3814
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3815
nextctx->last_ctx_pos = self.ctx_pos;
3816
nextctx->jump = JUMP_REPEAT;
3817
nextctx->pattern = self.ctx->pattern +
3818
self.ctx->pattern[0];
3819
self.ctx_pos = self.alloc_pos;
3823
/*** Restore after Pseudo-Recursion ***/
3824
state->repeat = self.ctx->u.rep->prev;
3825
PyObject_FREE(self.ctx->u.rep);
3828
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3829
/* TODO: Return if error */
3830
SRE_MATCH_RETURN_SUCCESS(&self);
3831
/* TODO: Go to exit */
3833
SRE_MATCH_RETURN_FAILURE(&self);
3834
/* TODO: Go to exit */
3836
case SRE_OP_MAX_UNTIL:
3837
/* maximizing repeat */
3838
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
3840
/* FIXME: we probably need to deal with zero-width
3841
matches in here... */
3843
self.ctx->u.rep = state->repeat;
3844
if (!self.ctx->u.rep)
3845
SRE_MATCH_RETURN_ERROR(&self, SRE_ERROR_STATE);
3846
/* TODO: when this is called, should return right away! */
3848
state->ptr = self.ctx->ptr;
3850
self.ctx->count = self.ctx->u.rep->count+1;
3852
TRACE(("|%p|%p|MAX_UNTIL %d\n", self.ctx->pattern,
3853
self.ctx->ptr, self.ctx->count));
3855
if (self.ctx->count < self.ctx->u.rep->pattern[1]) {
3856
/* not enough matches */
3857
self.ctx->u.rep->count = self.ctx->count;
3858
/*** Prepare for Pseudo-Recursion ***/
3859
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3860
nextctx->last_ctx_pos = self.ctx_pos;
3861
nextctx->jump = JUMP_MAX_UNTIL_1;
3862
nextctx->pattern = self.ctx->u.rep->pattern + 3;
3863
self.ctx_pos = self.alloc_pos;
3867
/*** Restore after Pseudo-Recursion ***/
3869
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3870
/* TODO: Return if error */
3871
SRE_MATCH_RETURN_SUCCESS(&self);
3872
/* TODO: Go to exit */
3874
self.ctx->u.rep->count = self.ctx->count-1;
3875
state->ptr = self.ctx->ptr;
3876
SRE_MATCH_RETURN_FAILURE(&self);
3877
/* TODO: Go to exit */
3880
if ((self.ctx->count < self.ctx->u.rep->pattern[2] ||
3881
self.ctx->u.rep->pattern[2] == 65535) &&
3882
state->ptr != self.ctx->u.rep->last_ptr) {
3883
/* we may have enough matches, but if we can
3884
match another item, do so */
3885
self.ctx->u.rep->count = self.ctx->count;
3886
SRE_MATCH_LASTMARK_SAVE(&self, state);
3887
SRE_MATCH_MARK_PUSH(&self, state, self.ctx->lastmark);
3888
/* zero-width match protection */
3889
SRE_MATCH_DATA_PUSH(&self, state, &self.ctx->u.rep->last_ptr);
3890
self.ctx->u.rep->last_ptr = state->ptr;
3891
/*** Prepare for Pseudo-Recursion ***/
3892
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3893
nextctx->last_ctx_pos = self.ctx_pos;
3894
nextctx->jump = JUMP_MAX_UNTIL_2;
3895
nextctx->pattern = self.ctx->u.rep->pattern + 3;
3896
self.ctx_pos = self.alloc_pos;
3900
/*** Restore after Pseudo-Recursion ***/
3901
SRE_MATCH_DATA_POP(state, &self.ctx->u.rep->last_ptr);
3903
SRE_MATCH_MARK_POP_DISCARD(state, self.ctx->lastmark);
3904
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3905
/* TODO: Return if error */
3906
SRE_MATCH_RETURN_SUCCESS(&self);
3907
/* TODO: Go to exit */
3909
SRE_MATCH_MARK_POP(state, self.ctx->lastmark);
3910
SRE_MATCH_LASTMARK_RESTORE(&self, state);
3911
self.ctx->u.rep->count = self.ctx->count-1;
3912
state->ptr = self.ctx->ptr;
3915
/* cannot match more repeated items here. make sure the
3917
state->repeat = self.ctx->u.rep->prev;
3918
/*** Prepare for Pseudo-Recursion ***/
3919
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3920
nextctx->last_ctx_pos = self.ctx_pos;
3921
nextctx->jump = JUMP_MAX_UNTIL_3;
3922
nextctx->pattern = self.ctx->pattern;
3923
self.ctx_pos = self.alloc_pos;
3927
/*** Restore after Pseudo-Recursion ***/
3928
SRE_MATCH_RETURN_ON_SUCCESS(&self, self.ret);
3929
/* TODO: Return if error; else Go to exit */
3930
state->repeat = self.ctx->u.rep;
3931
state->ptr = self.ctx->ptr;
3932
SRE_MATCH_RETURN_FAILURE(&self);
3933
/* TODO: Go to exit */
3935
case SRE_OP_MIN_UNTIL:
3936
/* minimizing repeat */
3937
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
3939
self.ctx->u.rep = state->repeat;
3940
if (!self.ctx->u.rep)
3941
SRE_MATCH_RETURN_ERROR(&self, SRE_ERROR_STATE);
3942
/* TODO: when this is called, should return right away! */
3944
state->ptr = self.ctx->ptr;
3946
self.ctx->count = self.ctx->u.rep->count+1;
3948
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", self.ctx->pattern,
3949
self.ctx->ptr, self.ctx->count, self.ctx->u.rep->pattern));
3951
if (self.ctx->count < self.ctx->u.rep->pattern[1]) {
3952
/* not enough matches */
3953
self.ctx->u.rep->count = self.ctx->count;
3954
/*** Prepare for Pseudo-Recursion ***/
3955
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3956
nextctx->last_ctx_pos = self.ctx_pos;
3957
nextctx->jump = JUMP_MIN_UNTIL_1;
3958
nextctx->pattern = self.ctx->u.rep->pattern + 3;
3959
self.ctx_pos = self.alloc_pos;
3963
/*** Restore after Pseudo-Recursion ***/
3965
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3966
/* TODO: Return if error */
3967
SRE_MATCH_RETURN_SUCCESS(&self);
3968
/* TODO: Go to exit */
3970
self.ctx->u.rep->count = self.ctx->count-1;
3971
state->ptr = self.ctx->ptr;
3972
SRE_MATCH_RETURN_FAILURE(&self);
3973
/* TODO: Go to exit */
3976
SRE_MATCH_LASTMARK_SAVE(&self, state);
3978
/* see if the tail matches */
3979
state->repeat = self.ctx->u.rep->prev;
3980
/*** Prepare for Pseudo-Recursion ***/
3981
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
3982
nextctx->last_ctx_pos = self.ctx_pos;
3983
nextctx->jump = JUMP_MIN_UNTIL_2;
3984
nextctx->pattern = self.ctx->pattern;
3985
self.ctx_pos = self.alloc_pos;
3989
/*** Restore after Pseudo-Recursion ***/
3991
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
3992
/* TODO: Return if error */
3993
SRE_MATCH_RETURN_SUCCESS(&self);
3994
/* TODO: Go to exit */
3997
state->repeat = self.ctx->u.rep;
3998
state->ptr = self.ctx->ptr;
4000
SRE_MATCH_LASTMARK_RESTORE(&self, state);
4002
if (self.ctx->count >= self.ctx->u.rep->pattern[2]
4003
&& self.ctx->u.rep->pattern[2] != 65535)
4004
SRE_MATCH_RETURN_FAILURE(&self);
4005
/* TODO: Go to exit */
4007
self.ctx->u.rep->count = self.ctx->count;
4008
/*** Prepare for Pseudo-Recursion ***/
4009
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
4010
nextctx->last_ctx_pos = self.ctx_pos;
4011
nextctx->jump = JUMP_MIN_UNTIL_3;
4012
nextctx->pattern = self.ctx->u.rep->pattern + 3;
4013
self.ctx_pos = self.alloc_pos;
4017
/*** Restore after Pseudo-Recursion ***/
4019
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
4020
/* TODO: Return if error */
4021
SRE_MATCH_RETURN_SUCCESS(&self);
4022
/* TODO: Go to exit */
4024
self.ctx->u.rep->count = self.ctx->count-1;
4025
state->ptr = self.ctx->ptr;
4026
SRE_MATCH_RETURN_FAILURE(&self);
4027
/* TODO: Go to exit */
4029
case SRE_OP_GROUPREF:
4030
/* match backreference */
4031
TRACE(("|%p|%p|GROUPREF %d\n", self.ctx->pattern,
4032
self.ctx->ptr, self.ctx->pattern[0]));
4033
i = self.ctx->pattern[0];
4035
Py_ssize_t groupref = i+i;
4036
if (groupref >= state->lastmark) {
4037
SRE_MATCH_RETURN_FAILURE(&self);
4038
/* TODO: Go to exit */
4040
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
4041
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
4042
if (!p || !e || e < p)
4043
SRE_MATCH_RETURN_FAILURE(&self);
4044
/* TODO: Go to exit */
4046
if (self.ctx->ptr >= self.end || *self.ctx->ptr != *p)
4047
SRE_MATCH_RETURN_FAILURE(&self);
4048
/* TODO: Go to exit */
4049
p++; self.ctx->ptr++;
4053
self.ctx->pattern++;
4056
case SRE_OP_GROUPREF_IGNORE:
4057
/* match backreference */
4058
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", self.ctx->pattern,
4059
self.ctx->ptr, self.ctx->pattern[0]));
4060
i = self.ctx->pattern[0];
4062
Py_ssize_t groupref = i+i;
4063
if (groupref >= state->lastmark) {
4064
SRE_MATCH_RETURN_FAILURE(&self);
4065
/* TODO: Go to exit */
4067
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
4068
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
4069
if (!p || !e || e < p)
4070
SRE_MATCH_RETURN_FAILURE(&self);
4071
/* TODO: Go to exit */
4073
if (self.ctx->ptr >= self.end ||
4074
state->lower(*self.ctx->ptr) != state->lower(*p))
4075
SRE_MATCH_RETURN_FAILURE(&self);
4076
/* TODO: Go to exit */
4077
p++; self.ctx->ptr++;
4081
self.ctx->pattern++;
4084
case SRE_OP_GROUPREF_EXISTS:
4085
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", self.ctx->pattern,
4086
self.ctx->ptr, self.ctx->pattern[0]));
4087
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
4088
i = self.ctx->pattern[0];
4090
Py_ssize_t groupref = i+i;
4091
if (groupref >= state->lastmark) {
4092
self.ctx->pattern += self.ctx->pattern[1];
4095
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
4096
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
4097
if (!p || !e || e < p) {
4098
self.ctx->pattern += self.ctx->pattern[1];
4103
self.ctx->pattern += 2;
4107
/* assert subpattern */
4108
/* <ASSERT> <skip> <back> <pattern> */
4109
TRACE(("|%p|%p|ASSERT %d\n", self.ctx->pattern,
4110
self.ctx->ptr, self.ctx->pattern[1]));
4111
state->ptr = self.ctx->ptr - self.ctx->pattern[1];
4112
if (state->ptr < state->beginning)
4113
SRE_MATCH_RETURN_FAILURE(&self);
4114
/* TODO: Go to exit */
4115
/*** Prepare for Pseudo-Recursion ***/
4116
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
4117
nextctx->last_ctx_pos = self.ctx_pos;
4118
nextctx->jump = JUMP_ASSERT;
4119
nextctx->pattern = self.ctx->pattern + 2;
4120
self.ctx_pos = self.alloc_pos;
4124
/*** Restore after Pseudo-Recursion ***/
4125
SRE_MATCH_RETURN_ON_FAILURE(&self, self.ret);
4126
/* TODO: Return if error; else Go to exit */
4127
self.ctx->pattern += self.ctx->pattern[0];
4130
case SRE_OP_ASSERT_NOT:
4131
/* assert not subpattern */
4132
/* <ASSERT_NOT> <skip> <back> <pattern> */
4133
TRACE(("|%p|%p|ASSERT_NOT %d\n", self.ctx->pattern,
4134
self.ctx->ptr, self.ctx->pattern[1]));
4135
state->ptr = self.ctx->ptr - self.ctx->pattern[1];
4136
if (state->ptr >= state->beginning) {
4137
/*** Prepare for Pseudo-Recursion ***/
4138
SRE_MATCH_DATA_ALLOC(&self, state, &nextctx);
4139
nextctx->last_ctx_pos = self.ctx_pos;
4140
nextctx->jump = JUMP_ASSERT_NOT;
4141
nextctx->pattern = self.ctx->pattern + 2;
4142
self.ctx_pos = self.alloc_pos;
4146
/*** Restore after Pseudo-Recursion ***/
4148
SRE_MATCH_RETURN_ON_ERROR(&self, self.ret);
4149
/* TODO: Return if error */
4150
SRE_MATCH_RETURN_FAILURE(&self);
4151
/* TODO: Go to exit */
4154
self.ctx->pattern += self.ctx->pattern[0];
4157
case SRE_OP_FAILURE:
4158
if (!SRE_MATCH_ON_FAILURE(&self, state)) {
4159
/* TODO: Check for ret < 0 and if so return */
4165
TRACE(("|%p|%p|UNKNOWN %d\n", self.ctx->pattern, self.ctx->ptr,
4166
self.ctx->pattern[-1]));
4167
SRE_MATCH_RETURN_ERROR(&self, SRE_ERROR_ILLEGAL);
4168
/* TODO: when this is called, should return right away! */
4173
self.ctx_pos = self.ctx->last_ctx_pos;
4174
self.jump = self.ctx->jump;
4175
SRE_MATCH_DATA_POP_DISCARD(state);
4176
if (self.ctx_pos == -1)
4178
SRE_MATCH_DATA_LOOKUP_AT(state, &self.ctx, self.ctx_pos);
4180
switch (self.jump) {
4181
case JUMP_MAX_UNTIL_2:
4182
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", self.ctx->pattern, self.ctx->ptr));
4183
goto jump_max_until_2;
4184
case JUMP_MAX_UNTIL_3:
4185
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", self.ctx->pattern, self.ctx->ptr));
4186
goto jump_max_until_3;
4187
case JUMP_MIN_UNTIL_2:
4188
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", self.ctx->pattern, self.ctx->ptr));
4189
goto jump_min_until_2;
4190
case JUMP_MIN_UNTIL_3:
4191
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", self.ctx->pattern, self.ctx->ptr));
4192
goto jump_min_until_3;
4194
TRACE(("|%p|%p|JUMP_BRANCH\n", self.ctx->pattern, self.ctx->ptr));
4196
case JUMP_MAX_UNTIL_1:
4197
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", self.ctx->pattern, self.ctx->ptr));
4198
goto jump_max_until_1;
4199
case JUMP_MIN_UNTIL_1:
4200
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", self.ctx->pattern, self.ctx->ptr));
4201
goto jump_min_until_1;
4203
TRACE(("|%p|%p|JUMP_REPEAT\n", self.ctx->pattern, self.ctx->ptr));
4205
case JUMP_REPEAT_ONE_1:
4206
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", self.ctx->pattern, self.ctx->ptr));
4207
goto jump_repeat_one_1;
4208
case JUMP_REPEAT_ONE_2:
4209
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", self.ctx->pattern, self.ctx->ptr));
4210
goto jump_repeat_one_2;
4211
case JUMP_MIN_REPEAT_ONE:
4212
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", self.ctx->pattern, self.ctx->ptr));
4213
goto jump_min_repeat_one;
4215
TRACE(("|%p|%p|JUMP_ASSERT\n", self.ctx->pattern, self.ctx->ptr));
4217
case JUMP_ASSERT_NOT:
4218
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", self.ctx->pattern, self.ctx->ptr));
4219
goto jump_assert_not;
4221
TRACE(("|%p|%p|RETURN %d\n", self.ctx->pattern, self.ctx->ptr, self.ret));
4225
return self.ret; /* should never get here */
4228
#endif /* !REMOVE_SRE_MATCH_MACROS */
4230
3718
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
4232
3720
SRE_CHAR* ptr = (SRE_CHAR *)state->start;