333
457
/* the following section is compiled twice, with different character
337
SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
339
/* check if pointer is at given position */
341
Py_ssize_t thisp, thatp;
345
case SRE_AT_BEGINNING:
346
case SRE_AT_BEGINNING_STRING:
347
return ((void*) ptr == state->beginning);
349
case SRE_AT_BEGINNING_LINE:
350
return ((void*) ptr == state->beginning ||
351
SRE_IS_LINEBREAK((int) ptr[-1]));
354
return (((void*) (ptr+1) == state->end &&
355
SRE_IS_LINEBREAK((int) ptr[0])) ||
356
((void*) ptr == state->end));
358
case SRE_AT_END_LINE:
359
return ((void*) ptr == state->end ||
360
SRE_IS_LINEBREAK((int) ptr[0]));
362
case SRE_AT_END_STRING:
363
return ((void*) ptr == state->end);
365
case SRE_AT_BOUNDARY:
366
if (state->beginning == state->end)
368
thatp = ((void*) ptr > state->beginning) ?
369
SRE_IS_WORD((int) ptr[-1]) : 0;
370
thisp = ((void*) ptr < state->end) ?
371
SRE_IS_WORD((int) ptr[0]) : 0;
372
return thisp != thatp;
374
case SRE_AT_NON_BOUNDARY:
375
if (state->beginning == state->end)
377
thatp = ((void*) ptr > state->beginning) ?
378
SRE_IS_WORD((int) ptr[-1]) : 0;
379
thisp = ((void*) ptr < state->end) ?
380
SRE_IS_WORD((int) ptr[0]) : 0;
381
return thisp == thatp;
383
case SRE_AT_LOC_BOUNDARY:
384
if (state->beginning == state->end)
386
thatp = ((void*) ptr > state->beginning) ?
387
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
388
thisp = ((void*) ptr < state->end) ?
389
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
390
return thisp != thatp;
392
case SRE_AT_LOC_NON_BOUNDARY:
393
if (state->beginning == state->end)
395
thatp = ((void*) ptr > state->beginning) ?
396
SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
397
thisp = ((void*) ptr < state->end) ?
398
SRE_LOC_IS_WORD((int) ptr[0]) : 0;
399
return thisp == thatp;
401
#if defined(HAVE_UNICODE)
402
case SRE_AT_UNI_BOUNDARY:
403
if (state->beginning == state->end)
405
thatp = ((void*) ptr > state->beginning) ?
406
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
407
thisp = ((void*) ptr < state->end) ?
408
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
409
return thisp != thatp;
411
case SRE_AT_UNI_NON_BOUNDARY:
412
if (state->beginning == state->end)
414
thatp = ((void*) ptr > state->beginning) ?
415
SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
416
thisp = ((void*) ptr < state->end) ?
417
SRE_UNI_IS_WORD((int) ptr[0]) : 0;
418
return thisp == thatp;
460
LOCAL(int) SRE_IN_RANGE(SRE_CODE ch, SRE_CODE lower, SRE_CODE upper) {
461
return lower <= ch && ch <= upper;
427
SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
429
/* check if character is a member of the given set */
464
LOCAL(int) SRE_IN(SRE_CODE* charset, SRE_CODE ch) {
465
// Check if character is a member of the given set.
466
SRE_CODE* charset_end = charset + charset[0];
471
switch (*charset++) {
472
case SRE_OP_BIGCHARSET:
473
// <BIGCHARSET> <set>
474
if (in_bigcharset(charset, ch))
476
charset = skip_bigcharset(charset);
480
if (in_charset(charset, ch))
482
charset = skip_charset(charset);
486
if (SRE_IS_DIGIT(ch))
439
489
case SRE_OP_LITERAL:
440
/* <LITERAL> <code> */
446
case SRE_OP_CATEGORY:
447
/* <CATEGORY> <code> */
448
if (sre_category(set[0], (int) ch))
454
if (sizeof(SRE_CODE) == 2) {
455
/* <CHARSET> <bitmap> (16 bits per code word) */
456
if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
461
/* <CHARSET> <bitmap> (32 bits per code word) */
462
if (ch < 256 && (set[ch >> 5] & (1 << (ch & 31))))
491
if (ch == charset[0])
495
case SRE_OP_LOC_NOT_WORD:
497
if (! SRE_LOC_IS_WORD(ch))
500
case SRE_OP_LOC_WORD:
502
if (SRE_LOC_IS_WORD(ch))
505
case SRE_OP_NOT_DIGIT:
507
if (!SRE_IS_DIGIT(ch))
510
case SRE_OP_NOT_LITERAL:
511
// <NOT_LITERAL> <code>
512
if (ch != charset[0])
516
case SRE_OP_NOT_WHITESPACE:
518
if (!SRE_IS_WHITESPACE(ch))
521
case SRE_OP_NOT_WORD:
523
if (! SRE_IS_WORD(ch))
468
526
case SRE_OP_RANGE:
469
/* <RANGE> <lower> <upper> */
470
if (set[0] <= ch && ch <= set[1])
479
case SRE_OP_BIGCHARSET:
480
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
482
Py_ssize_t count, block;
485
if (sizeof(SRE_CODE) == 2) {
486
block = ((unsigned char*)set)[ch >> 8];
488
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
493
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
494
* warnings when c's type supports only numbers < N+1 */
496
block = ((unsigned char*)set)[ch >> 8];
501
(set[block*8 + ((ch & 255)>>5)] & (1 << (ch & 31))))
527
// <RANGE> <lower> <upper>
528
if (SRE_IN_RANGE(ch, charset[0], charset[1]))
532
case SRE_OP_UNI_DIGIT:
534
if (SRE_UNI_IS_DIGIT(ch))
537
case SRE_OP_UNI_NOT_DIGIT:
539
if (!SRE_UNI_IS_DIGIT(ch))
542
case SRE_OP_UNI_NOT_WHITESPACE:
543
// <UNI_NOT_WHITESPACE>
544
if (! SRE_UNI_IS_WHITESPACE(ch))
547
case SRE_OP_UNI_NOT_WORD:
549
if (! SRE_UNI_IS_WORD(ch))
552
case SRE_OP_UNI_WHITESPACE:
554
if (SRE_UNI_IS_WHITESPACE(ch))
557
case SRE_OP_UNI_WORD:
559
if (SRE_UNI_IS_WORD(ch))
562
case SRE_OP_WHITESPACE:
564
if (SRE_IS_WHITESPACE(ch))
509
573
/* internal error -- there's not much we can do about it
510
574
here, so let's just pretend it didn't match... */
516
LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
519
SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
522
SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
523
SRE_CHAR* end = (SRE_CHAR *)state->end;
527
if (maxcount < end - ptr && maxcount != 65535)
528
end = ptr + maxcount;
530
switch (pattern[0]) {
534
TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
535
while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
540
/* repeated dot wildcard. */
541
TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
542
while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
547
/* repeated dot wildcard. skip to the end of the target
548
string, and backtrack from there */
549
TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
554
/* repeated literal */
556
TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
557
while (ptr < end && (SRE_CODE) *ptr == chr)
561
case SRE_OP_LITERAL_IGNORE:
562
/* repeated literal */
564
TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
565
while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
569
case SRE_OP_NOT_LITERAL:
570
/* repeated non-literal */
572
TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
573
while (ptr < end && (SRE_CODE) *ptr != chr)
577
case SRE_OP_NOT_LITERAL_IGNORE:
578
/* repeated non-literal */
580
TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
581
while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
586
/* repeated single character pattern */
587
TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
588
while ((SRE_CHAR*) state->ptr < end) {
589
i = SRE_MATCH(state, pattern);
577
} while (charset < charset_end);
582
typedef struct SRE_CONTEXT {
584
SRE_CHAR* text_start;
586
SRE_CHAR* final_linebreak;
587
SRE_CODE* pattern_ptr;
589
SRE_CHAR** repeat_start;
592
SRE_BACKTRACK_CHUNK* backtrack_chunk;
593
SRE_SAVED_MARKS_CHUNK* saved_marks_chunk;
596
LOCAL(int) SRE_CLEANUP(SRE_CONTEXT* context, SRE_STATE* state, int result) {
597
while (context->backtrack_chunk->previous != NULL) {
598
SRE_BACKTRACK_CHUNK* previous = context->backtrack_chunk->previous;
599
PyMem_FREE(context->backtrack_chunk);
600
context->backtrack_chunk = previous;
602
context->backtrack_chunk->count = 0;
604
while (context->saved_marks_chunk->previous != NULL) {
605
SRE_SAVED_MARKS_CHUNK* previous = context->saved_marks_chunk->previous;
606
PyMem_FREE(context->saved_marks_chunk);
607
context->saved_marks_chunk = previous;
609
context->saved_marks_chunk->count = 0;
611
state->backtrack_chunk = context->backtrack_chunk;
612
state->saved_marks_chunk = context->saved_marks_chunk;
617
LOCAL(int) SRE_SAVE_BACKTRACK(SRE_CONTEXT* context, int op, SRE_CODE* pattern_ptr, int index) {
618
SRE_BACKTRACK_ITEM* backtrack_item;
620
if (context->backtrack_chunk->count >= SRE_BACKTRACK_CHUNK_SIZE) {
621
SRE_BACKTRACK_CHUNK* new_backtrack_chunk = (SRE_BACKTRACK_CHUNK*)PyMem_MALLOC(sizeof(SRE_BACKTRACK_CHUNK));
622
if (new_backtrack_chunk == NULL)
623
return SRE_ERROR_MEMORY;
625
new_backtrack_chunk->previous = context->backtrack_chunk;
626
new_backtrack_chunk->count = 0;
627
context->backtrack_chunk = new_backtrack_chunk;
630
backtrack_item = &context->backtrack_chunk->items[context->backtrack_chunk->count++];
631
backtrack_item->op = op;
632
backtrack_item->pattern_ptr = pattern_ptr;
633
backtrack_item->text_ptr = context->text_ptr;
634
backtrack_item->index = index;
635
backtrack_item->repeat_counter = context->repeat_counter[index];
636
backtrack_item->repeat_start = context->repeat_start[index];
641
LOCAL(void) SRE_DISCARD_BACKTRACK(SRE_CONTEXT* context) {
642
SRE_BACKTRACK_ITEM* backtrack_item = &context->backtrack_chunk->items[--context->backtrack_chunk->count];
643
if (backtrack_item->index >= 0) {
644
context->repeat_counter[backtrack_item->index] = backtrack_item->repeat_counter;
645
context->repeat_start[backtrack_item->index] = backtrack_item->repeat_start;
648
if (context->backtrack_chunk->count == 0 && context->backtrack_chunk->previous != NULL) {
649
SRE_BACKTRACK_CHUNK* previous = context->backtrack_chunk->previous;
650
PyMem_FREE(context->backtrack_chunk);
651
context->backtrack_chunk = previous;
655
LOCAL(int) SRE_SAVE_MARKS(SRE_CONTEXT* context) {
656
if (context->saved_marks_chunk->count + context->mark_count > SRE_SAVED_MARKS_CHUNK_SIZE) {
657
SRE_SAVED_MARKS_CHUNK* new_marks_chunk = (SRE_SAVED_MARKS_CHUNK*)PyMem_MALLOC(sizeof(SRE_SAVED_MARKS_CHUNK));
658
if (new_marks_chunk == NULL)
659
return SRE_ERROR_MEMORY;
661
new_marks_chunk->previous = context->saved_marks_chunk;
662
new_marks_chunk->count = 0;
663
context->saved_marks_chunk = new_marks_chunk;
666
memmove(&context->saved_marks_chunk->marks[context->saved_marks_chunk->count], context->mark, context->mark_count * sizeof(SRE_CHAR*));
667
context->saved_marks_chunk->count += context->mark_count;
672
LOCAL(void) SRE_RESTORE_MARKS(SRE_CONTEXT* context) {
673
context->saved_marks_chunk->count -= context->mark_count;
674
memmove(context->mark, &context->saved_marks_chunk->marks[context->saved_marks_chunk->count], context->mark_count * sizeof(SRE_CHAR*));
676
if (context->saved_marks_chunk->count == 0 && context->saved_marks_chunk->previous != NULL) {
677
SRE_SAVED_MARKS_CHUNK* previous = context->saved_marks_chunk->previous;
678
PyMem_FREE(context->saved_marks_chunk);
679
context->saved_marks_chunk = previous;
683
LOCAL(void) SRE_DISCARD_SAVED_MARKS(SRE_CONTEXT* context) {
684
context->saved_marks_chunk->count -= context->mark_count;
686
if (context->saved_marks_chunk->count == 0 && context->saved_marks_chunk->previous != NULL) {
687
SRE_SAVED_MARKS_CHUNK* previous = context->saved_marks_chunk->previous;
688
PyMem_FREE(context->saved_marks_chunk);
689
context->saved_marks_chunk = previous;
693
LOCAL(void) SRE_DISCARD_UNTIL_OP(SRE_CONTEXT* context, int op) {
695
SRE_BACKTRACK_ITEM* backtrack_item = &context->backtrack_chunk->items[context->backtrack_chunk->count - 1];
696
if (backtrack_item->op == op)
699
switch(backtrack_item->op) {
701
case SRE_OP_END_REPEAT_MAX:
702
case SRE_OP_END_REPEAT_MAX_REV:
703
case SRE_OP_END_REPEAT_MIN:
704
case SRE_OP_END_REPEAT_MIN_REV:
705
case SRE_OP_END_REPEAT_POSS:
706
case SRE_OP_END_REPEAT_POSS_REV:
707
case SRE_OP_REPEAT_MAX:
708
case SRE_OP_REPEAT_MAX_REV:
709
case SRE_OP_REPEAT_MIN:
710
case SRE_OP_REPEAT_MIN_REV:
711
case SRE_OP_REPEAT_POSS:
712
case SRE_OP_REPEAT_POSS_REV:
713
SRE_DISCARD_SAVED_MARKS(context);
595
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr,
596
(SRE_CHAR*) state->ptr - ptr));
597
return (SRE_CHAR*) state->ptr - ptr;
600
TRACE(("|%p|%p|COUNT %d\n", pattern, ptr, ptr - (SRE_CHAR*) state->ptr));
601
return ptr - (SRE_CHAR*) state->ptr;
604
#if 0 /* not used in this release */
606
SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
608
/* check if an SRE_OP_INFO block matches at the current position.
609
returns the number of SRE_CODE objects to skip if successful, 0
612
SRE_CHAR* end = state->end;
613
SRE_CHAR* ptr = state->ptr;
616
/* check minimal length */
617
if (pattern[3] && (end - ptr) < pattern[3])
620
/* check known prefix */
621
if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
622
/* <length> <skip> <prefix data> <overlap data> */
623
for (i = 0; i < pattern[5]; i++)
624
if ((SRE_CODE) ptr[i] != pattern[7 + i])
626
return pattern[0] + 2 * pattern[6];
632
/* The macros below should be used to protect recursive SRE_MATCH()
633
* calls that *failed* and do *not* return immediately (IOW, those
634
* that will backtrack). Explaining:
636
* - Recursive SRE_MATCH() returned true: that's usually a success
637
* (besides atypical cases like ASSERT_NOT), therefore there's no
638
* reason to restore lastmark;
640
* - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
641
* is returning to the caller: If the current SRE_MATCH() is the
642
* top function of the recursion, returning false will be a matching
643
* failure, and it doesn't matter where lastmark is pointing to.
644
* If it's *not* the top function, it will be a recursive SRE_MATCH()
645
* failure by itself, and the calling SRE_MATCH() will have to deal
646
* with the failure by the same rules explained here (it will restore
647
* lastmark by itself if necessary);
649
* - Recursive SRE_MATCH() returned false, and will continue the
650
* outside 'for' loop: must be protected when breaking, since the next
651
* OP could potentially depend on lastmark;
653
* - Recursive SRE_MATCH() returned false, and will be called again
654
* inside a local for/while loop: must be protected between each
655
* loop iteration, since the recursive SRE_MATCH() could do anything,
656
* and could potentially depend on lastmark.
658
* For more information, check the discussion at SF patch #712900.
660
#define LASTMARK_SAVE() \
662
ctx->lastmark = state->lastmark; \
663
ctx->lastindex = state->lastindex; \
665
#define LASTMARK_RESTORE() \
667
state->lastmark = ctx->lastmark; \
668
state->lastindex = ctx->lastindex; \
671
#define RETURN_ERROR(i) do { return i; } while(0)
672
#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
673
#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
675
#define RETURN_ON_ERROR(i) \
676
do { if (i < 0) RETURN_ERROR(i); } while (0)
677
#define RETURN_ON_SUCCESS(i) \
678
do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
679
#define RETURN_ON_FAILURE(i) \
680
do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
684
#define DATA_STACK_ALLOC(state, type, ptr) \
686
alloc_pos = state->data_stack_base; \
687
TRACE(("allocating %s in %d (%d)\n", \
688
SFY(type), alloc_pos, sizeof(type))); \
689
if (state->data_stack_size < alloc_pos+sizeof(type)) { \
690
int j = data_stack_grow(state, sizeof(type)); \
691
if (j < 0) return j; \
693
DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
695
ptr = (type*)(state->data_stack+alloc_pos); \
696
state->data_stack_base += sizeof(type); \
699
#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
701
TRACE(("looking up %s at %d\n", SFY(type), pos)); \
702
ptr = (type*)(state->data_stack+pos); \
705
#define DATA_STACK_PUSH(state, data, size) \
707
TRACE(("copy data in %p to %d (%d)\n", \
708
data, state->data_stack_base, size)); \
709
if (state->data_stack_size < state->data_stack_base+size) { \
710
int j = data_stack_grow(state, size); \
711
if (j < 0) return j; \
713
DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
715
memcpy(state->data_stack+state->data_stack_base, data, size); \
716
state->data_stack_base += size; \
719
#define DATA_STACK_POP(state, data, size, discard) \
721
TRACE(("copy data to %p from %d (%d)\n", \
722
data, state->data_stack_base-size, size)); \
723
memcpy(data, state->data_stack+state->data_stack_base-size, size); \
725
state->data_stack_base -= size; \
728
#define DATA_STACK_POP_DISCARD(state, size) \
730
TRACE(("discard data from %d (%d)\n", \
731
state->data_stack_base-size, size)); \
732
state->data_stack_base -= size; \
735
#define DATA_PUSH(x) \
736
DATA_STACK_PUSH(state, (x), sizeof(*(x)))
737
#define DATA_POP(x) \
738
DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
739
#define DATA_POP_DISCARD(x) \
740
DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
741
#define DATA_ALLOC(t,p) \
742
DATA_STACK_ALLOC(state, t, p)
743
#define DATA_LOOKUP_AT(t,p,pos) \
744
DATA_STACK_LOOKUP_AT(state,t,p,pos)
746
#define MARK_PUSH(lastmark) \
747
do if (lastmark > 0) { \
748
i = lastmark; /* ctx->lastmark may change if reallocated */ \
749
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
751
#define MARK_POP(lastmark) \
752
do if (lastmark > 0) { \
753
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
755
#define MARK_POP_KEEP(lastmark) \
756
do if (lastmark > 0) { \
757
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
759
#define MARK_POP_DISCARD(lastmark) \
760
do if (lastmark > 0) { \
761
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
765
#define JUMP_MAX_UNTIL_1 1
766
#define JUMP_MAX_UNTIL_2 2
767
#define JUMP_MAX_UNTIL_3 3
768
#define JUMP_MIN_UNTIL_1 4
769
#define JUMP_MIN_UNTIL_2 5
770
#define JUMP_MIN_UNTIL_3 6
771
#define JUMP_REPEAT 7
772
#define JUMP_REPEAT_ONE_1 8
773
#define JUMP_REPEAT_ONE_2 9
774
#define JUMP_MIN_REPEAT_ONE 10
775
#define JUMP_BRANCH 11
776
#define JUMP_ASSERT 12
777
#define JUMP_ASSERT_NOT 13
778
#define JUMP_POSS_REPEAT_1 14
779
#define JUMP_POSS_REPEAT_2 15
780
#define JUMP_ATOMIC_GROUP 16
781
#define JUMP__COUNT 17
783
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
784
DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
785
nextctx->last_ctx_pos = ctx_pos; \
786
nextctx->jump = jumpvalue; \
787
nextctx->pattern = nextpattern; \
788
ctx_pos = alloc_pos; \
792
while (0) /* gcc doesn't like labels at end of scopes */ \
795
Py_ssize_t last_ctx_pos;
801
Py_ssize_t lastindex;
716
SRE_DISCARD_BACKTRACK(context);
720
LOCAL(int) SRE_AT_BOUNDARY(SRE_CONTEXT* context) {
721
int before = context->text_ptr > context->text_start && SRE_IS_WORD(context->text_ptr[-1]);
722
int after = context->text_ptr < context->text_end && SRE_IS_WORD(context->text_ptr[0]);
723
return before != after;
726
LOCAL(int) SRE_LOC_AT_BOUNDARY(SRE_CONTEXT* context) {
727
int before = context->text_ptr > context->text_start && SRE_LOC_IS_WORD(context->text_ptr[-1]);
728
int after = context->text_ptr < context->text_end && SRE_LOC_IS_WORD(context->text_ptr[0]);
729
return before != after;
732
LOCAL(int) SRE_UNI_AT_BOUNDARY(SRE_CONTEXT* context) {
733
int before = context->text_ptr > context->text_start && SRE_UNI_IS_WORD(context->text_ptr[-1]);
734
int after = context->text_ptr < context->text_end && SRE_UNI_IS_WORD(context->text_ptr[0]);
735
return before != after;
738
LOCAL(int) SRE_LOOK_AHEAD_ONE(SRE_CONTEXT* context, SRE_STATE* state, SRE_CODE* look_literal) {
739
switch (look_literal[0]) {
741
return context->text_ptr[0] == (SRE_CHAR)look_literal[1];
742
case SRE_OP_LITERAL_IGNORE:
743
return state->lower(context->text_ptr[0]) == (SRE_CHAR)look_literal[1];
744
case SRE_OP_LITERAL_STRING:
745
return context->text_ptr[0] == (SRE_CHAR)look_literal[2];
746
case SRE_OP_LITERAL_STRING_IGNORE:
747
return state->lower(context->text_ptr[0]) == (SRE_CHAR)look_literal[2];
748
case SRE_OP_BOUNDARY:
749
return SRE_AT_BOUNDARY(context);
750
case SRE_OP_LOC_BOUNDARY:
751
return SRE_LOC_AT_BOUNDARY(context);
752
case SRE_OP_UNI_BOUNDARY:
753
return SRE_UNI_AT_BOUNDARY(context);
759
LOCAL(int) SRE_LOOK_AHEAD_ONE_REV(SRE_CONTEXT* context, SRE_STATE* state, SRE_CODE* look_literal) {
760
switch (look_literal[0]) {
762
return context->text_ptr[-1] == (SRE_CHAR)look_literal[1];
763
case SRE_OP_LITERAL_IGNORE:
764
return state->lower(context->text_ptr[-1]) == (SRE_CHAR)look_literal[1];
765
case SRE_OP_LITERAL_STRING:
766
return context->text_ptr[-1] == (SRE_CHAR)look_literal[2];
767
case SRE_OP_LITERAL_STRING_IGNORE:
768
return state->lower(context->text_ptr[-1]) == (SRE_CHAR)look_literal[2];
769
case SRE_OP_BOUNDARY:
770
return SRE_AT_BOUNDARY(context);
771
case SRE_OP_LOC_BOUNDARY:
772
return SRE_LOC_AT_BOUNDARY(context);
773
case SRE_OP_UNI_BOUNDARY:
774
return SRE_UNI_AT_BOUNDARY(context);
780
LOCAL(void) SRE_LOOK_AHEAD_MANY(SRE_CONTEXT* context, SRE_CHAR* limit_ptr, SRE_STATE* state, SRE_CODE* look_literal) {
781
switch (look_literal[0]) {
783
while (context->text_ptr >= limit_ptr && context->text_ptr[0] != (SRE_CHAR)look_literal[1])
786
case SRE_OP_LITERAL_IGNORE:
787
while (context->text_ptr >= limit_ptr && state->lower(context->text_ptr[0]) != (SRE_CHAR)look_literal[1])
790
case SRE_OP_LITERAL_STRING:
791
while (context->text_ptr >= limit_ptr && context->text_ptr[0] != (SRE_CHAR)look_literal[2])
794
case SRE_OP_LITERAL_STRING_IGNORE:
795
while (context->text_ptr >= limit_ptr && state->lower(context->text_ptr[0]) != (SRE_CHAR)look_literal[2])
798
case SRE_OP_BOUNDARY:
799
while (context->text_ptr >= limit_ptr && !SRE_AT_BOUNDARY(context))
802
case SRE_OP_LOC_BOUNDARY:
803
while (context->text_ptr >= limit_ptr && !SRE_LOC_AT_BOUNDARY(context))
806
case SRE_OP_UNI_BOUNDARY:
807
while (context->text_ptr >= limit_ptr && !SRE_UNI_AT_BOUNDARY(context))
813
LOCAL(void) SRE_LOOK_AHEAD_MANY_REV(SRE_CONTEXT* context, SRE_CHAR* limit_ptr, SRE_STATE* state, SRE_CODE* look_literal) {
814
switch (look_literal[0]) {
816
while (context->text_ptr <= limit_ptr && context->text_ptr[-1] != (SRE_CHAR)look_literal[1])
819
case SRE_OP_LITERAL_IGNORE:
820
while (context->text_ptr <= limit_ptr && state->lower(context->text_ptr[-1]) != (SRE_CHAR)look_literal[1])
823
case SRE_OP_LITERAL_STRING:
824
while (context->text_ptr <= limit_ptr && context->text_ptr[-1] != (SRE_CHAR)look_literal[2])
827
case SRE_OP_LITERAL_STRING_IGNORE:
828
while (context->text_ptr <= limit_ptr && state->lower(context->text_ptr[-1]) != (SRE_CHAR)look_literal[2])
831
case SRE_OP_BOUNDARY:
832
while (context->text_ptr <= limit_ptr && !SRE_AT_BOUNDARY(context))
835
case SRE_OP_LOC_BOUNDARY:
836
while (context->text_ptr <= limit_ptr && !SRE_LOC_AT_BOUNDARY(context))
839
case SRE_OP_UNI_BOUNDARY:
840
while (context->text_ptr <= limit_ptr && !SRE_UNI_AT_BOUNDARY(context))
808
846
/* check if string matches the given pattern. returns <0 for
809
847
error, 0 for failure, and 1 for success */
810
848
LOCAL(Py_ssize_t)
811
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
813
SRE_CHAR* end = (SRE_CHAR *)state->end;
814
Py_ssize_t alloc_pos, ctx_pos = -1;
815
Py_ssize_t i, ret = 0;
817
unsigned int sigcount=0;
819
SRE_MATCH_CONTEXT* ctx;
820
SRE_MATCH_CONTEXT* nextctx;
822
TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
824
DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
825
ctx->last_ctx_pos = -1;
826
ctx->jump = JUMP_NONE;
827
ctx->pattern = pattern;
832
ctx->ptr = (SRE_CHAR *)state->ptr;
834
if (ctx->pattern[0] == SRE_OP_INFO) {
835
/* optimization info block */
836
/* <INFO> <1=skip> <2=flags> <3=min> ... */
837
if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
838
TRACE(("reject (got %d chars, need %d)\n",
839
(end - ctx->ptr), ctx->pattern[3]));
842
ctx->pattern += ctx->pattern[1] + 1;
849
SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern_ptr) {
851
unsigned int sigcount = 0;
854
context.text_ptr = state->ptr;
855
context.text_start = (SRE_CHAR *)state->beginning;
856
context.text_end = (SRE_CHAR *)state->end;
857
context.pattern_ptr = pattern_ptr;
858
context.repeat_counter = state->repeat_counter;
859
context.repeat_start = (SRE_CHAR**)state->repeat_start;
860
context.backtrack_chunk = state->backtrack_chunk;
861
context.saved_marks_chunk = state->saved_marks_chunk;
862
context.mark = (SRE_CHAR**)state->mark;
863
context.mark_count = state->mark_count;
865
// Point to the final newline if it's the final character.
866
context.final_linebreak = context.text_start < context.text_end && SRE_IS_LINEBREAK(context.text_end[-1]) ? context.text_end - 1 : NULL;
868
TRACE(("|%p|%p|ENTER\n", context.pattern_ptr, context.text_ptr));
869
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_FAILURE, NULL, -1);
871
return SRE_CLEANUP(&context, state, result);
872
memset(context.mark, 0, context.mark_count * sizeof(SRE_CHAR*));
847
877
if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
848
RETURN_ERROR(SRE_ERROR_INTERRUPTED);
850
switch (*ctx->pattern++) {
855
TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
856
ctx->ptr, ctx->pattern[0]));
859
state->lastindex = i/2 + 1;
860
if (i > state->lastmark) {
861
/* state->lastmark is the highest valid index in the
862
state->mark array. If it is increased by more than 1,
863
the intervening marks must be set to NULL to signal
864
that these marks have not been encountered. */
865
Py_ssize_t j = state->lastmark + 1;
867
state->mark[j++] = NULL;
870
state->mark[i] = ctx->ptr;
875
/* match literal string */
876
/* <LITERAL> <code> */
877
TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
878
ctx->ptr, *ctx->pattern));
879
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
885
case SRE_OP_NOT_LITERAL:
886
/* match anything that is not literal character */
887
/* <NOT_LITERAL> <code> */
888
TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
889
ctx->ptr, *ctx->pattern));
890
if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
898
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
899
state->ptr = ctx->ptr;
903
/* match at given position */
905
TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
906
if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
911
case SRE_OP_CATEGORY:
912
/* match at given category */
913
/* <CATEGORY> <code> */
914
TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
915
ctx->ptr, *ctx->pattern));
916
if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
878
return SRE_CLEANUP(&context, state, SRE_ERROR_INTERRUPTED);
880
switch (*context.pattern_ptr++) {
923
/* match anything (except a newline) */
925
TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
926
if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
882
// Match anything (except a newline).
884
TRACE(("|%p|%p|ANY\n", context.pattern_ptr, context.text_ptr));
885
if (context.text_ptr >= context.text_end || SRE_IS_LINEBREAK(context.text_ptr[0]))
931
889
case SRE_OP_ANY_ALL:
934
TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
892
TRACE(("|%p|%p|ANY_ALL\n", context.pattern_ptr, context.text_ptr));
893
if (context.text_ptr >= context.text_end)
897
case SRE_OP_ANY_ALL_REV:
900
TRACE(("|%p|%p|ANY_ALL_REV\n", context.pattern_ptr, context.text_ptr));
901
if (context.text_ptr <= context.text_start)
906
// Match anything (except a newline).
908
TRACE(("|%p|%p|ANY_REV\n", context.pattern_ptr, context.text_ptr));
909
if (context.text_ptr <= context.text_start || SRE_IS_LINEBREAK(context.text_ptr[-1]))
914
// Assert subpattern.
915
// <ASSERT> <skip to end> ... <END_ASSERT>
916
TRACE(("|%p|%p|ASSERT\n", context.pattern_ptr, context.text_ptr));
917
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT, context.pattern_ptr, -1);
919
return SRE_CLEANUP(&context, state, result);
920
result = SRE_SAVE_MARKS(&context);
922
return SRE_CLEANUP(&context, state, result);
923
context.pattern_ptr++;
925
case SRE_OP_ASSERT_NOT:
926
// Assert not subpattern.
927
// <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
928
TRACE(("|%p|%p|ASSERT_NOT\n", context.pattern_ptr, context.text_ptr));
929
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ASSERT_NOT, context.pattern_ptr, -1);
931
return SRE_CLEANUP(&context, state, result);
932
result = SRE_SAVE_MARKS(&context);
934
return SRE_CLEANUP(&context, state, result);
935
context.pattern_ptr++;
938
// Atomic subpattern.
939
// <ATOMIC> ... <END_ATOMIC>
940
TRACE(("|%p|%p|ATOMIC\n", context.pattern_ptr, context.text_ptr));
941
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_ATOMIC, NULL, -1);
943
return SRE_CLEANUP(&context, state, result);
944
result = SRE_SAVE_MARKS(&context);
946
return SRE_CLEANUP(&context, state, result);
948
case SRE_OP_BIGCHARSET:
949
// Match character in charset.
950
// <BIGCHARSET> <charset>
951
TRACE(("|%p|%p|BIGCHARSET\n", context.pattern_ptr, context.text_ptr));
952
if (context.text_ptr >= context.text_end || !in_bigcharset(context.pattern_ptr, context.text_ptr[0]))
954
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
957
case SRE_OP_BIGCHARSET_IGNORE:
958
// Match character in charset, ignoring case.
959
// <BIGCHARSET_IGNORE> <charset>
960
TRACE(("|%p|%p|BIGCHARSET\n", context.pattern_ptr, context.text_ptr));
961
if (context.text_ptr >= context.text_end || !in_bigcharset(context.pattern_ptr, state->lower(context.text_ptr[0])))
963
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
966
case SRE_OP_BIGCHARSET_IGNORE_REV:
967
// Match character in charset, ignoring case.
968
// <BIGCHARSET_IGNORE_REV> <charset>
969
TRACE(("|%p|%p|BIGCHARSET_REV\n", context.pattern_ptr, context.text_ptr));
970
if (context.text_ptr <= context.text_start || !in_bigcharset(context.pattern_ptr, state->lower(context.text_ptr[-1])))
972
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
975
case SRE_OP_BIGCHARSET_REV:
976
// Match character in charset.
977
// <BIGCHARSET_REV> <charset>
978
TRACE(("|%p|%p|BIGCHARSET_REV\n", context.pattern_ptr, context.text_ptr));
979
if (context.text_ptr <= context.text_start || !in_bigcharset(context.pattern_ptr, context.text_ptr[-1]))
981
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
984
case SRE_OP_BOUNDARY:
985
// Boundary between word and non-word.
987
TRACE(("|%p|%p|BOUNDARY\n", context.pattern_ptr, context.text_ptr));
988
if (! SRE_AT_BOUNDARY(&context))
993
// <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
994
TRACE(("|%p|%p|BRANCH\n", context.pattern_ptr, context.text_ptr));
995
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_BRANCH, context.pattern_ptr, -1);
997
return SRE_CLEANUP(&context, state, result);
998
result = SRE_SAVE_MARKS(&context);
1000
return SRE_CLEANUP(&context, state, result);
1001
context.pattern_ptr++;
1003
case SRE_OP_CHARSET:
1004
// Match character in charset.
1005
// <CHARSET> <charset>
1006
TRACE(("|%p|%p|CHARSET\n", context.pattern_ptr, context.text_ptr));
1007
if (context.text_ptr >= context.text_end || !in_charset(context.pattern_ptr, context.text_ptr[0]))
1009
context.pattern_ptr = skip_charset(context.pattern_ptr);
1012
case SRE_OP_CHARSET_IGNORE:
1013
// Match character in charset, ignoring case.
1014
// <CHARSET_IGNORE> <charset>
1015
TRACE(("|%p|%p|CHARSET\n", context.pattern_ptr, context.text_ptr));
1016
if (context.text_ptr >= context.text_end || !in_charset(context.pattern_ptr, state->lower(context.text_ptr[0])))
1018
context.pattern_ptr = skip_charset(context.pattern_ptr);
1021
case SRE_OP_CHARSET_IGNORE_REV:
1022
// Match character in charset, ignoring case.
1023
// <CHARSET_IGNORE_REV> <charset>
1024
TRACE(("|%p|%p|CHARSET_REV\n", context.pattern_ptr, context.text_ptr));
1025
if (context.text_ptr <= context.text_start || !in_charset(context.pattern_ptr, state->lower(context.text_ptr[-1])))
1027
context.pattern_ptr = skip_charset(context.pattern_ptr);
1030
case SRE_OP_CHARSET_REV:
1031
// Match character in charset.
1032
// <CHARSET_REV> <charset>
1033
TRACE(("|%p|%p|CHARSET_REV\n", context.pattern_ptr, context.text_ptr));
1034
if (context.text_ptr <= context.text_start || !in_charset(context.pattern_ptr, context.text_ptr[-1]))
1036
context.pattern_ptr = skip_charset(context.pattern_ptr);
1042
TRACE(("|%p|%p|DIGIT\n", context.pattern_ptr, context.text_ptr));
1043
if (context.text_ptr >= context.text_end || !SRE_IS_DIGIT(context.text_ptr[0]))
1047
case SRE_OP_DIGIT_REV:
1050
TRACE(("|%p|%p|DIGIT_REV\n", context.pattern_ptr, context.text_ptr));
1051
if (context.text_ptr <= context.text_start || !SRE_IS_DIGIT(context.text_ptr[-1]))
1055
case SRE_OP_END_ASSERT:
1057
// Assert subpattern.
1058
// <ASSERT> <skip to end> ... <END_ASSERT>
1059
SRE_BACKTRACK_ITEM* backtrack_item;
1060
TRACE(("|%p|%p|END_ASSERT\n", context.pattern_ptr, context.text_ptr));
1061
SRE_DISCARD_UNTIL_OP(&context, SRE_OP_ASSERT);
1062
backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
1063
context.text_ptr = backtrack_item->text_ptr;
1064
SRE_DISCARD_BACKTRACK(&context);
1065
SRE_RESTORE_MARKS(&context);
1068
case SRE_OP_END_ASSERT_NOT:
1070
// Assert not subpattern.
1071
// <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
1072
TRACE(("|%p|%p|END_ASSERT_NOT\n", context.pattern_ptr, context.text_ptr));
1073
SRE_DISCARD_UNTIL_OP(&context, SRE_OP_ASSERT_NOT);
1074
SRE_DISCARD_BACKTRACK(&context);
1075
SRE_RESTORE_MARKS(&context);
1078
case SRE_OP_END_ATOMIC:
1080
// Atomic subpattern.
1081
// <ATOMIC> <skip to end> ... <END_ATOMIC>
1082
SRE_DISCARD_UNTIL_OP(&context, SRE_OP_ATOMIC);
1083
SRE_DISCARD_BACKTRACK(&context);
1084
SRE_DISCARD_SAVED_MARKS(&context);
1087
case SRE_OP_END_OF_LINE:
1090
TRACE(("|%p|%p|END_OF_LINE\n", context.pattern_ptr, context.text_ptr));
1091
if (context.text_ptr < context.text_end && !SRE_IS_LINEBREAK(context.text_ptr[0]))
1094
case SRE_OP_END_OF_STRING:
1097
TRACE(("|%p|%p|END_OF_STRING\n", context.pattern_ptr, context.text_ptr));
1098
if (context.text_ptr < context.text_end)
1101
case SRE_OP_END_OF_STRING_2:
1102
// End of string or final line.
1103
// <END_OF_STRING_2>
1104
TRACE(("|%p|%p|END_OF_STRING_2\n", context.pattern_ptr, context.text_ptr));
1105
if (context.text_ptr < context.text_end && context.text_ptr != context.final_linebreak)
1108
case SRE_OP_END_REPEAT_MAX:
1110
// End of greedy repeat.
1111
// <REPEAT_MAX> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX> <skip to start>
1112
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1113
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1114
int index = repeat_ptr[1];
1115
int repeat_min = repeat_ptr[2];
1116
int repeat_max = repeat_ptr[3];
1117
SRE_CODE* body = repeat_ptr + 4;
1118
SRE_CODE* tail = end_repeat_ptr + 1;
1119
Py_ssize_t limit = context.text_end - context.text_ptr;
1120
int curr_repeats = ++context.repeat_counter[index];
1121
TRACE(("|%p|%p|END_REPEAT_MAX\n", context.pattern_ptr, context.text_ptr));
1122
if (context.text_ptr == context.repeat_start[index])
1123
context.pattern_ptr = tail;
1124
else if (curr_repeats < repeat_min) {
1125
if (repeat_min - curr_repeats > limit)
1127
context.pattern_ptr = body;
1128
context.repeat_start[index] = context.text_ptr;
1130
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1131
if (curr_repeats < repeat_max && limit > 0) {
1132
SRE_CODE* look_literal = tail;
1133
while (look_literal[0] == SRE_OP_MARK)
1135
// Look at what follows to avoid unnecessary backtracking.
1136
if (SRE_LOOK_AHEAD_ONE(&context, state, look_literal)) {
1137
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX, end_repeat_ptr, -1);
1139
return SRE_CLEANUP(&context, state, result);
1140
result = SRE_SAVE_MARKS(&context);
1142
return SRE_CLEANUP(&context, state, result);
1144
context.pattern_ptr = body;
1145
context.repeat_start[index] = context.text_ptr;
1147
context.pattern_ptr = tail;
1151
case SRE_OP_END_REPEAT_MAX_REV:
1153
// End of greedy repeat.
1154
// <REPEAT_MAX_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
1155
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1156
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1157
int index = repeat_ptr[1];
1158
int repeat_min = repeat_ptr[2];
1159
int repeat_max = repeat_ptr[3];
1160
SRE_CODE* body = repeat_ptr + 4;
1161
SRE_CODE* tail = end_repeat_ptr + 1;
1162
Py_ssize_t limit = context.text_ptr - context.text_start;
1163
int curr_repeats = ++context.repeat_counter[index];
1164
TRACE(("|%p|%p|END_REPEAT_MAX_REV\n", context.pattern_ptr, context.text_ptr));
1165
if (context.text_ptr == context.repeat_start[index])
1166
context.pattern_ptr = tail;
1167
else if (curr_repeats < repeat_min) {
1168
if (repeat_min - curr_repeats > limit)
1170
context.pattern_ptr = body;
1171
context.repeat_start[index] = context.text_ptr;
1173
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1174
if (curr_repeats < repeat_max && limit > 0) {
1175
SRE_CODE* look_literal = tail;
1176
while (look_literal[0] == SRE_OP_MARK)
1178
// Look at what follows to avoid unnecessary backtracking.
1179
if (SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal)) {
1180
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX_REV, end_repeat_ptr, -1);
1182
return SRE_CLEANUP(&context, state, result);
1183
result = SRE_SAVE_MARKS(&context);
1185
return SRE_CLEANUP(&context, state, result);
1187
context.pattern_ptr = body;
1188
context.repeat_start[index] = context.text_ptr;
1190
context.pattern_ptr = tail;
1194
case SRE_OP_END_REPEAT_MIN:
1196
// End of lazy repeat.
1197
// <REPEAT_MIN> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN> <skip to start>
1198
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1199
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1200
int index = repeat_ptr[1];
1201
int repeat_min = repeat_ptr[2];
1202
int repeat_max = repeat_ptr[3];
1203
SRE_CODE* body = repeat_ptr + 4;
1204
SRE_CODE* tail = end_repeat_ptr + 1;
1205
Py_ssize_t limit = context.text_end - context.text_ptr;
1206
int curr_repeats = ++context.repeat_counter[index];
1207
TRACE(("|%p|%p|END_REPEAT_MIN\n", context.pattern_ptr, context.text_ptr));
1208
if (context.text_ptr == context.repeat_start[index])
1209
context.pattern_ptr = tail;
1210
else if (curr_repeats < repeat_min) {
1211
if (repeat_min - curr_repeats > limit)
1213
SRE_RESTORE_MARKS(&context);
1214
result = SRE_SAVE_MARKS(&context);
1216
return SRE_CLEANUP(&context, state, result);
1217
context.pattern_ptr = body;
1218
context.repeat_start[index] = context.text_ptr;
1222
SRE_CODE* look_literal = tail;
1223
while (look_literal[0] == SRE_OP_MARK)
1225
// Look at what follows to avoid unnecessary backtracking.
1226
match = SRE_LOOK_AHEAD_ONE(&context, state, look_literal);
1229
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1230
if (curr_repeats < repeat_max) {
1232
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
1233
backtrack_item->text_ptr = context.text_ptr;
1234
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN, end_repeat_ptr, -1);
1236
return SRE_CLEANUP(&context, state, result);
1237
result = SRE_SAVE_MARKS(&context);
1239
return SRE_CLEANUP(&context, state, result);
1240
context.pattern_ptr = tail;
1242
context.pattern_ptr = body;
1243
context.repeat_start[index] = context.text_ptr;
1247
context.pattern_ptr = tail;
1254
case SRE_OP_END_REPEAT_MIN_REV:
1256
// End of lazy repeat.
1257
// <REPEAT_MIN_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
1258
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1259
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1260
int index = repeat_ptr[1];
1261
int repeat_min = repeat_ptr[2];
1262
int repeat_max = repeat_ptr[3];
1263
SRE_CODE* body = repeat_ptr + 4;
1264
SRE_CODE* tail = end_repeat_ptr + 1;
1265
Py_ssize_t limit = context.text_ptr - context.text_start;
1266
int curr_repeats = ++context.repeat_counter[index];
1267
TRACE(("|%p|%p|END_REPEAT_MIN_REV\n", context.pattern_ptr, context.text_ptr));
1268
if (context.text_ptr == context.repeat_start[index])
1269
context.pattern_ptr = tail;
1270
else if (curr_repeats < repeat_min) {
1271
if (repeat_min - curr_repeats > limit)
1273
SRE_RESTORE_MARKS(&context);
1274
result = SRE_SAVE_MARKS(&context);
1276
return SRE_CLEANUP(&context, state, result);
1277
context.pattern_ptr = body;
1278
context.repeat_start[index] = context.text_ptr;
1282
SRE_CODE* look_literal = tail;
1283
while (look_literal[0] == SRE_OP_MARK)
1285
// Look at what follows to avoid unnecessary backtracking.
1286
match = SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal);
1289
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1290
if (curr_repeats < repeat_max) {
1292
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
1293
backtrack_item->text_ptr = context.text_ptr;
1294
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN, end_repeat_ptr, -1);
1296
return SRE_CLEANUP(&context, state, result);
1297
result = SRE_SAVE_MARKS(&context);
1299
return SRE_CLEANUP(&context, state, result);
1300
context.pattern_ptr = tail;
1302
context.pattern_ptr = body;
1303
context.repeat_start[index] = context.text_ptr;
1307
context.pattern_ptr = tail;
1314
case SRE_OP_END_REPEAT_POSS:
1316
// End of possessive repeat.
1317
// <REPEAT_POSS> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS> <skip to start>
1318
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1319
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1320
int index = repeat_ptr[1];
1321
int repeat_min = repeat_ptr[2];
1322
int repeat_max = repeat_ptr[3];
1323
SRE_CODE* body = repeat_ptr + 4;
1324
SRE_CODE* tail = end_repeat_ptr + 1;
1325
Py_ssize_t limit = context.text_end - context.text_ptr;
1326
int curr_repeats = ++context.repeat_counter[index];
1327
TRACE(("|%p|%p|END_REPEAT_POSS\n", context.pattern_ptr, context.text_ptr));
1328
SRE_DISCARD_UNTIL_OP(&context, SRE_OP_REPEAT_POSS);
1329
if (context.repeat_start[index] == context.text_ptr)
1330
context.pattern_ptr = tail;
1331
else if (curr_repeats < repeat_min) {
1332
if (repeat_min - curr_repeats > limit)
1334
context.pattern_ptr = body;
1335
context.repeat_start[index] = context.text_ptr;
1337
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1338
if (curr_repeats < repeat_max && limit > 0) {
1339
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS, end_repeat_ptr, -1);
1341
return SRE_CLEANUP(&context, state, result);
1342
result = SRE_SAVE_MARKS(&context);
1344
return SRE_CLEANUP(&context, state, result);
1345
context.pattern_ptr = body;
1346
context.repeat_start[index] = context.text_ptr;
1348
context.pattern_ptr = tail;
1352
case SRE_OP_END_REPEAT_POSS_REV:
1354
// End of possessive repeat.
1355
// <REPEAT_POSS_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
1356
SRE_CODE* repeat_ptr = context.pattern_ptr - context.pattern_ptr[0];
1357
SRE_CODE* end_repeat_ptr = context.pattern_ptr;
1358
int index = repeat_ptr[1];
1359
int repeat_min = repeat_ptr[2];
1360
int repeat_max = repeat_ptr[3];
1361
SRE_CODE* body = repeat_ptr + 4;
1362
SRE_CODE* tail = end_repeat_ptr + 1;
1363
Py_ssize_t limit = context.text_ptr - context.text_start;
1364
int curr_repeats = ++context.repeat_counter[index];
1365
TRACE(("|%p|%p|END_REPEAT_POSS_REV\n", context.pattern_ptr, context.text_ptr));
1366
SRE_DISCARD_UNTIL_OP(&context, SRE_OP_REPEAT_POSS_REV);
1367
if (context.repeat_start[index] == context.text_ptr)
1368
context.pattern_ptr = tail;
1369
else if (curr_repeats < repeat_min) {
1370
if (repeat_min - curr_repeats > limit)
1372
context.pattern_ptr = body;
1373
context.repeat_start[index] = context.text_ptr;
1375
repeat_max = sre_repeat_limit(repeat_max, curr_repeats + limit);
1376
if (curr_repeats < repeat_max && limit > 0) {
1377
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS_REV, end_repeat_ptr, -1);
1379
return SRE_CLEANUP(&context, state, result);
1380
result = SRE_SAVE_MARKS(&context);
1382
return SRE_CLEANUP(&context, state, result);
1383
context.pattern_ptr = body;
1384
context.repeat_start[index] = context.text_ptr;
1386
context.pattern_ptr = tail;
1390
case SRE_OP_GROUPREF:
1392
// Match backreference.
1393
// <GROUPREF> <group>
1395
SRE_CHAR* group_start;
1396
SRE_CHAR* group_end;
1399
TRACE(("|%p|%p|GROUPREF %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1400
group = context.pattern_ptr[0]; // Zero-based index. Note that externally group 0 is the entire matched string.
1401
group_start = context.mark[group * 2];
1402
group_end = context.mark[group * 2 + 1];
1403
if (group_start == NULL || group_end == NULL)
1405
length = group_end - group_start;
1406
if (length > context.text_end - context.text_ptr)
1409
while (i < length) {
1410
if (context.text_ptr[i] != group_start[i])
1414
context.pattern_ptr++;
1415
context.text_ptr += length;
1418
case SRE_OP_GROUPREF_EXISTS:
1420
// Whether backreference exists.
1421
// <GROUPREF_EXISTS> <group> <skip> code_yes <JUMP> <skip> code_no
1423
SRE_CHAR* group_start;
1424
SRE_CHAR* group_end;
1425
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1426
group = context.pattern_ptr[0]; // Zero-based index. Note that externally group 0 is the entire matched string.
1427
group_start = context.mark[group * 2];
1428
group_end = context.mark[group * 2 + 1];
1429
if (group_start == NULL || group_end == NULL)
1430
context.pattern_ptr += context.pattern_ptr[1];
1432
context.pattern_ptr += 2;
1435
case SRE_OP_GROUPREF_IGNORE:
1437
// Match backreference, ignoring case.
1438
// <GROUPREF_IGNORE> <group>
1440
SRE_CHAR* group_start;
1441
SRE_CHAR* group_end;
1444
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1445
group = context.pattern_ptr[0]; // Zero-based index. Note that externally group 0 is the entire matched string.
1446
group_start = context.mark[group * 2];
1447
group_end = context.mark[group * 2 + 1];
1448
if (group_start == NULL || group_end == NULL)
1450
length = group_end - group_start;
1451
if (length > context.text_end - context.text_ptr)
1454
while (i < length) {
1455
if (state->lower(context.text_ptr[i]) != state->lower(group_start[i]))
1459
context.pattern_ptr++;
1460
context.text_ptr += length;
1463
case SRE_OP_GROUPREF_IGNORE_REV:
1465
// Match backreference, ignoring case.
1466
// <GROUPREF_IGNORE_REV> <group>
1468
SRE_CHAR* group_start;
1469
SRE_CHAR* group_end;
1472
TRACE(("|%p|%p|GROUPREF_IGNORE_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1473
group = context.pattern_ptr[0]; // Zero-based index. Note that externally group 0 is the entire matched string.
1474
group_start = context.mark[group * 2];
1475
group_end = context.mark[group * 2 + 1];
1476
if (group_start == NULL || group_end == NULL)
1478
length = group_end - group_start;
1479
if (length > context.text_ptr - context.text_start)
1481
context.text_ptr -= length;
1483
while (i < length) {
1484
if (state->lower(context.text_ptr[i]) != state->lower(group_start[i]))
1488
context.pattern_ptr++;
1491
case SRE_OP_GROUPREF_REV:
1493
// Match backreference.
1494
// <GROUPREF_REV> <group>
1496
SRE_CHAR* group_start;
1497
SRE_CHAR* group_end;
1500
TRACE(("|%p|%p|GROUPREF_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1501
group = context.pattern_ptr[0]; // Zero-based index. Note that externally group 0 is the entire matched string.
1502
group_start = context.mark[group * 2];
1503
group_end = context.mark[group * 2 + 1];
1504
if (group_start == NULL || group_end == NULL)
1506
length = group_end - group_start;
1507
if (length > context.text_ptr - context.text_start)
1509
context.text_ptr -= length;
1511
while (i < length) {
1512
if (context.text_ptr[i] != group_start[i])
1516
context.pattern_ptr++;
941
/* match set member (or non_member) */
942
/* <IN> <skip> <set> */
943
TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
944
if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
946
ctx->pattern += ctx->pattern[0];
950
case SRE_OP_LITERAL_IGNORE:
951
TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
952
ctx->pattern, ctx->ptr, ctx->pattern[0]));
953
if (ctx->ptr >= end ||
954
state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
960
case SRE_OP_NOT_LITERAL_IGNORE:
961
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
962
ctx->pattern, ctx->ptr, *ctx->pattern));
963
if (ctx->ptr >= end ||
964
state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
1520
// Match set member.
1522
TRACE(("|%p|%p|IN\n", context.pattern_ptr, context.text_ptr));
1523
if (context.text_ptr >= context.text_end || !SRE_IN(context.pattern_ptr, context.text_ptr[0]))
1525
context.pattern_ptr += context.pattern_ptr[0];
970
1528
case SRE_OP_IN_IGNORE:
971
TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
973
|| !SRE_CHARSET(ctx->pattern+1,
974
(SRE_CODE)state->lower(*ctx->ptr)))
976
ctx->pattern += ctx->pattern[0];
1529
// Match set member, ignoring case.
1530
// <IN_IGNORE> <set>
1531
TRACE(("|%p|%p|IN_IGNORE\n", context.pattern_ptr, context.text_ptr));
1532
if (context.text_ptr >= context.text_end || !SRE_IN(context.pattern_ptr, state->lower(context.text_ptr[0])))
1534
context.pattern_ptr += context.pattern_ptr[0];
1537
case SRE_OP_IN_IGNORE_REV:
1538
// Match set member, ignoring case.
1539
// <IN_IGNORE_REV> <set>
1540
TRACE(("|%p|%p|IN_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
1541
if (context.text_ptr <= context.text_start || !SRE_IN(context.pattern_ptr, state->lower(context.text_ptr[-1])))
1543
context.pattern_ptr += context.pattern_ptr[0];
1547
// Match set member.
1549
TRACE(("|%p|%p|IN_REV\n", context.pattern_ptr, context.text_ptr));
1550
if (context.text_ptr <= context.text_start || !SRE_IN(context.pattern_ptr, context.text_ptr[-1]))
1552
context.pattern_ptr += context.pattern_ptr[0];
980
1555
case SRE_OP_JUMP:
983
/* <JUMP> <offset> */
984
TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
985
ctx->ptr, ctx->pattern[0]));
986
ctx->pattern += ctx->pattern[0];
991
/* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
992
TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
994
ctx->u.rep = state->repeat;
996
MARK_PUSH(ctx->lastmark);
997
for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
998
if (ctx->pattern[1] == SRE_OP_LITERAL &&
1000
(SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1002
if (ctx->pattern[1] == SRE_OP_IN &&
1004
!SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1006
state->ptr = ctx->ptr;
1007
DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1010
MARK_POP_DISCARD(ctx->lastmark);
1011
RETURN_ON_ERROR(ret);
1015
MARK_POP_KEEP(ctx->lastmark);
1019
MARK_POP_DISCARD(ctx->lastmark);
1022
case SRE_OP_REPEAT_ONE:
1023
/* match repeated sequence (maximizing regexp) */
1025
/* this operator only works if the repeated item is
1026
exactly one character wide, and we're not already
1027
collecting backtracking points. for other cases,
1028
use the MAX_REPEAT operator */
1030
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1032
TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1033
ctx->pattern[1], ctx->pattern[2]));
1035
if (ctx->ptr + ctx->pattern[1] > end)
1036
RETURN_FAILURE; /* cannot match */
1038
state->ptr = ctx->ptr;
1040
ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1041
RETURN_ON_ERROR(ret);
1042
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1044
ctx->ptr += ctx->count;
1046
/* when we arrive here, count contains the number of
1047
matches, and ctx->ptr points to the tail of the target
1048
string. check if the rest of the pattern matches,
1049
and backtrack if not. */
1051
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1054
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1055
/* tail is empty. we're finished */
1056
state->ptr = ctx->ptr;
1062
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1063
/* tail starts with a literal. skip positions where
1064
the rest of the pattern cannot possibly match */
1065
ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1067
while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1068
(ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1072
if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1074
state->ptr = ctx->ptr;
1075
DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1076
ctx->pattern+ctx->pattern[0]);
1078
RETURN_ON_ERROR(ret);
1090
while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1091
state->ptr = ctx->ptr;
1092
DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1093
ctx->pattern+ctx->pattern[0]);
1095
RETURN_ON_ERROR(ret);
1105
case SRE_OP_MIN_REPEAT_ONE:
1106
/* match repeated sequence (minimizing regexp) */
1108
/* this operator only works if the repeated item is
1109
exactly one character wide, and we're not already
1110
collecting backtracking points. for other cases,
1111
use the MIN_REPEAT operator */
1113
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1115
TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1116
ctx->pattern[1], ctx->pattern[2]));
1118
if (ctx->ptr + ctx->pattern[1] > end)
1119
RETURN_FAILURE; /* cannot match */
1121
state->ptr = ctx->ptr;
1123
if (ctx->pattern[1] == 0)
1126
/* count using pattern min as the maximum */
1127
ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1128
RETURN_ON_ERROR(ret);
1129
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1130
if (ret < (Py_ssize_t) ctx->pattern[1])
1131
/* didn't match minimum number of times */
1133
/* advance past minimum matches of repeat */
1135
ctx->ptr += ctx->count;
1138
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1139
/* tail is empty. we're finished */
1140
state->ptr = ctx->ptr;
1146
while ((Py_ssize_t)ctx->pattern[2] == 65535
1147
|| ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1148
state->ptr = ctx->ptr;
1149
DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1150
ctx->pattern+ctx->pattern[0]);
1152
RETURN_ON_ERROR(ret);
1155
state->ptr = ctx->ptr;
1156
ret = SRE_COUNT(state, ctx->pattern+3, 1);
1157
RETURN_ON_ERROR(ret);
1158
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1169
case SRE_OP_POSSESSIVE_ONE:
1170
/* match repeated sequence (maximizing regexp) without
1173
/* this operator only works if the repeated item is
1174
exactly one character wide, and we're not already
1175
collecting backtracking points. for other cases,
1176
use the MAX_REPEAT operator */
1178
/* <POSSESSIVE_ONE> <skip> <1=min> <2=max> item <SUCCESS>
1181
TRACE(("|%p|%p|POSSESSIVE_ONE %d %d\n", ctx->pattern,
1182
ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
1184
if (ctx->ptr + ctx->pattern[1] > end) {
1185
RETURN_FAILURE; /* cannot match */
1188
state->ptr = ctx->ptr;
1190
ret = SRE_COUNT(state, ctx->pattern + 3, ctx->pattern[2]);
1191
RETURN_ON_ERROR(ret);
1192
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1194
ctx->ptr += ctx->count;
1196
/* when we arrive here, count contains the number of
1197
matches, and ctx->ptr points to the tail of the target
1198
string. check if the rest of the pattern matches,
1201
/* Test for not enough repetitions in match */
1202
if (ctx->count < (Py_ssize_t) ctx->pattern[1]) {
1206
/* Update the pattern to point to the next op code */
1207
ctx->pattern += ctx->pattern[0];
1209
/* Let the tail be evaluated separately and consider this
1210
match successful. */
1211
if (*ctx->pattern == SRE_OP_SUCCESS) {
1212
/* tail is empty. we're finished */
1213
state->ptr = ctx->ptr;
1217
/* Attempt to match the rest of the string */
1221
/* create repeat context. all the hard work is done
1222
by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1223
/* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1224
TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1225
ctx->pattern[1], ctx->pattern[2]));
1227
/* install new repeat context */
1228
ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1233
ctx->u.rep->count = -1;
1234
ctx->u.rep->pattern = ctx->pattern;
1235
ctx->u.rep->prev = state->repeat;
1236
ctx->u.rep->last_ptr = NULL;
1237
state->repeat = ctx->u.rep;
1239
state->ptr = ctx->ptr;
1240
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1241
state->repeat = ctx->u.rep->prev;
1242
PyObject_FREE(ctx->u.rep);
1245
RETURN_ON_ERROR(ret);
1250
case SRE_OP_MAX_UNTIL:
1251
/* maximizing repeat */
1252
/* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1254
/* FIXME: we probably need to deal with zero-width
1255
matches in here... */
1257
ctx->u.rep = state->repeat;
1259
RETURN_ERROR(SRE_ERROR_STATE);
1261
state->ptr = ctx->ptr;
1263
ctx->count = ctx->u.rep->count+1;
1265
TRACE(("|%p|%p|MAX_UNTIL %d\n", ctx->pattern,
1266
ctx->ptr, ctx->count));
1268
if (ctx->count < ctx->u.rep->pattern[1]) {
1269
/* not enough matches */
1270
ctx->u.rep->count = ctx->count;
1271
DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1272
ctx->u.rep->pattern+3);
1274
RETURN_ON_ERROR(ret);
1277
ctx->u.rep->count = ctx->count-1;
1278
state->ptr = ctx->ptr;
1282
if ((ctx->count < ctx->u.rep->pattern[2] ||
1283
ctx->u.rep->pattern[2] == 65535) &&
1284
state->ptr != ctx->u.rep->last_ptr) {
1285
/* we may have enough matches, but if we can
1286
match another item, do so */
1287
ctx->u.rep->count = ctx->count;
1289
MARK_PUSH(ctx->lastmark);
1290
/* zero-width match protection */
1291
DATA_PUSH(&ctx->u.rep->last_ptr);
1292
ctx->u.rep->last_ptr = state->ptr;
1293
DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1294
ctx->u.rep->pattern+3);
1295
DATA_POP(&ctx->u.rep->last_ptr);
1297
MARK_POP_DISCARD(ctx->lastmark);
1298
RETURN_ON_ERROR(ret);
1301
MARK_POP(ctx->lastmark);
1303
ctx->u.rep->count = ctx->count-1;
1304
state->ptr = ctx->ptr;
1307
/* cannot match more repeated items here. make sure the
1309
state->repeat = ctx->u.rep->prev;
1310
DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1311
RETURN_ON_SUCCESS(ret);
1312
state->repeat = ctx->u.rep;
1313
state->ptr = ctx->ptr;
1316
case SRE_OP_MIN_UNTIL:
1317
/* minimizing repeat */
1318
/* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1320
ctx->u.rep = state->repeat;
1322
RETURN_ERROR(SRE_ERROR_STATE);
1324
state->ptr = ctx->ptr;
1326
ctx->count = ctx->u.rep->count+1;
1328
TRACE(("|%p|%p|MIN_UNTIL %d %p\n", ctx->pattern,
1329
ctx->ptr, ctx->count, ctx->u.rep->pattern));
1331
if (ctx->count < ctx->u.rep->pattern[1]) {
1332
/* not enough matches */
1333
ctx->u.rep->count = ctx->count;
1334
DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1335
ctx->u.rep->pattern+3);
1337
RETURN_ON_ERROR(ret);
1340
ctx->u.rep->count = ctx->count-1;
1341
state->ptr = ctx->ptr;
1347
/* see if the tail matches */
1348
state->repeat = ctx->u.rep->prev;
1349
DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1351
RETURN_ON_ERROR(ret);
1355
state->repeat = ctx->u.rep;
1356
state->ptr = ctx->ptr;
1360
if (ctx->count >= ctx->u.rep->pattern[2]
1361
&& ctx->u.rep->pattern[2] != 65535)
1364
ctx->u.rep->count = ctx->count;
1365
DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1366
ctx->u.rep->pattern+3);
1368
RETURN_ON_ERROR(ret);
1371
ctx->u.rep->count = ctx->count-1;
1372
state->ptr = ctx->ptr;
1375
case SRE_OP_POSSESSIVE_REPEAT:
1376
/* create possessive repeat contexts. */
1377
/* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern
1379
TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", ctx->pattern,
1380
ctx->ptr, ctx->pattern[1], ctx->pattern[2]));
1382
/* Set the global Input pointer to this context's Input
1384
state->ptr = ctx->ptr;
1386
/* Initialize Count to 0 */
1389
/* Check for minimum required matches. */
1390
while (ctx->count < (int)ctx->pattern[1]) {
1391
/* not enough matches */
1392
DO_JUMP(JUMP_POSS_REPEAT_1, jump_poss_repeat_1,
1395
RETURN_ON_ERROR(ret);
1399
state->ptr = ctx->ptr;
1404
/* Clear the context's Input stream pointer so that it
1405
doesn't match the global state so that the while loop can
1409
/* Keep trying to parse the <pattern> sub-pattern until the
1410
end is reached, creating a new context each time. */
1411
while ((ctx->count < (int)ctx->pattern[2] ||
1412
(int)ctx->pattern[2] == 65535) &&
1413
state->ptr != ctx->ptr) {
1414
/* Save the Capture Group Marker state into the current
1415
Context and back up the current highest number
1416
Capture Group marker. */
1418
MARK_PUSH(ctx->lastmark);
1420
/* zero-width match protection */
1421
/* Set the context's Input Stream pointer to be the
1422
current Input Stream pointer from the global
1423
state. When the loop reaches the next iteration,
1424
the context will then store the last known good
1425
position with the global state holding the Input
1426
Input Stream position that has been updated with
1427
the most recent match. Thus, if state's Input
1428
stream remains the same as the one stored in the
1429
current Context, we know we have successfully
1430
matched an empty string and that all subsequent
1431
matches will also be the empty string until the
1432
maximum number of matches are counted, and because
1433
of this, we could immediately stop at that point and
1434
consider this match successful. */
1435
ctx->ptr = state->ptr;
1437
/* We have not reached the maximin matches, so try to
1439
DO_JUMP(JUMP_POSS_REPEAT_2, jump_poss_repeat_2,
1442
/* Check to see if the last attempted match
1445
/* Drop the saved highest number Capture Group
1446
marker saved above and use the newly updated
1448
MARK_POP_DISCARD(ctx->lastmark);
1449
RETURN_ON_ERROR(ret);
1451
/* Success, increment the count. */
1454
/* Last attempted match failed. */
1456
/* Restore the previously saved highest number
1457
Capture Group marker since the last iteration
1458
did not match, then restore that to the global
1460
MARK_POP(ctx->lastmark);
1463
/* We have sufficient matches, so exit loop. */
1469
/* Jump to end of pattern indicated by skip. */
1470
ctx->pattern += ctx->pattern[0];
1471
ctx->ptr = state->ptr;
1474
case SRE_OP_ATOMIC_GROUP:
1475
/* Atomic Group Sub Pattern */
1476
/* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */
1477
TRACE(("|%p|%p|ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
1479
/* Set the global Input pointer to this context's Input
1481
state->ptr = ctx->ptr;
1483
/* Evaluate the Atomic Group in a new context, terminating
1484
when the end of the group, represented by a SUCCESS op
1485
code, is reached. */
1486
/* Group Pattern begins at an offset of 1 code. */
1487
DO_JUMP(JUMP_ATOMIC_GROUP, jump_atomic_group,
1490
/* Test Exit Condition */
1491
RETURN_ON_ERROR(ret);
1494
/* Atomic Group failed to Match. */
1495
state->ptr = ctx->ptr;
1500
/* Jump to end of pattern indicated by skip. */
1501
ctx->pattern += ctx->pattern[0];
1502
ctx->ptr = state->ptr;
1505
case SRE_OP_GROUPREF:
1506
/* match backreference */
1507
TRACE(("|%p|%p|GROUPREF %d\n", , ctx->pattern[0]));
1508
i = ctx->pattern[0];
1510
Py_ssize_t groupref = i+i;
1511
if (groupref >= state->lastmark) {
1514
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1515
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1516
if (!p || !e || e < p)
1519
if (ctx->ptr >= end || *ctx->ptr != *p)
1528
case SRE_OP_GROUPREF_IGNORE:
1529
/* match backreference */
1530
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1531
ctx->ptr, ctx->pattern[0]));
1532
i = ctx->pattern[0];
1534
Py_ssize_t groupref = i+i;
1535
if (groupref >= state->lastmark) {
1538
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1539
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1540
if (!p || !e || e < p)
1543
if (ctx->ptr >= end ||
1544
state->lower(*ctx->ptr) != state->lower(*p))
1553
case SRE_OP_GROUPREF_EXISTS:
1554
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1555
ctx->ptr, ctx->pattern[0]));
1556
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1557
i = ctx->pattern[0];
1559
Py_ssize_t groupref = i+i;
1560
if (groupref >= state->lastmark) {
1561
ctx->pattern += ctx->pattern[1];
1564
SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1565
SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1566
if (!p || !e || e < p) {
1567
ctx->pattern += ctx->pattern[1];
1576
/* assert subpattern */
1577
/* <ASSERT> <skip> <back> <pattern> */
1578
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1579
ctx->ptr, ctx->pattern[1]));
1580
state->ptr = ctx->ptr - ctx->pattern[1];
1581
if (state->ptr < state->beginning)
1583
DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1584
RETURN_ON_FAILURE(ret);
1585
ctx->pattern += ctx->pattern[0];
1588
case SRE_OP_ASSERT_NOT:
1589
/* assert not subpattern */
1590
/* <ASSERT_NOT> <skip> <back> <pattern> */
1591
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1592
ctx->ptr, ctx->pattern[1]));
1593
state->ptr = ctx->ptr - ctx->pattern[1];
1594
if (state->ptr >= state->beginning) {
1595
DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1597
RETURN_ON_ERROR(ret);
1601
ctx->pattern += ctx->pattern[0];
1604
case SRE_OP_FAILURE:
1605
/* immediate failure */
1606
TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1610
TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1612
RETURN_ERROR(SRE_ERROR_ILLEGAL);
1617
ctx_pos = ctx->last_ctx_pos;
1619
DATA_POP_DISCARD(ctx);
1622
DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1625
case JUMP_MAX_UNTIL_2:
1626
TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1627
goto jump_max_until_2;
1628
case JUMP_MAX_UNTIL_3:
1629
TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1630
goto jump_max_until_3;
1631
case JUMP_MIN_UNTIL_2:
1632
TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1633
goto jump_min_until_2;
1634
case JUMP_MIN_UNTIL_3:
1635
TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1636
goto jump_min_until_3;
1638
TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1640
case JUMP_MAX_UNTIL_1:
1641
TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1642
goto jump_max_until_1;
1643
case JUMP_MIN_UNTIL_1:
1644
TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1645
goto jump_min_until_1;
1646
case JUMP_POSS_REPEAT_1:
1647
TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", ctx->pattern, ctx->ptr));
1648
goto jump_poss_repeat_1;
1649
case JUMP_POSS_REPEAT_2:
1650
TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", ctx->pattern, ctx->ptr));
1651
goto jump_poss_repeat_2;
1653
TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1655
case JUMP_REPEAT_ONE_1:
1656
TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1657
goto jump_repeat_one_1;
1658
case JUMP_REPEAT_ONE_2:
1659
TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1660
goto jump_repeat_one_2;
1661
case JUMP_MIN_REPEAT_ONE:
1662
TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1663
goto jump_min_repeat_one;
1664
case JUMP_ATOMIC_GROUP:
1665
TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", ctx->pattern, ctx->ptr));
1666
goto jump_atomic_group;
1668
TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1670
case JUMP_ASSERT_NOT:
1671
TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1672
goto jump_assert_not;
1674
TRACE(("|%p|%p|RETURN %d\n", ctx->pattern, ctx->ptr, ret));
1678
return ret; /* should never get here */
1558
TRACE(("|%p|%p|JUMP %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1559
context.pattern_ptr += context.pattern_ptr[0];
1561
case SRE_OP_LITERAL:
1562
// Match literal character.
1564
TRACE(("|%p|%p|LITERAL %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1565
if (context.text_ptr >= context.text_end || (SRE_CODE) context.text_ptr[0] != context.pattern_ptr[0])
1567
context.pattern_ptr++;
1570
case SRE_OP_LITERAL_IGNORE:
1571
// Match literal character, ignoring case.
1572
// <LITERAL_IGNORE> <code>
1573
TRACE(("|%p|%p|LITERAL_IGNORE %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1574
if (context.text_ptr >= context.text_end || state->lower(context.text_ptr[0]) != context.pattern_ptr[0])
1576
context.pattern_ptr++;
1579
case SRE_OP_LITERAL_IGNORE_REV:
1580
// Match literal character, ignoring case.
1581
// <LITERAL_IGNORE_REV> <code>
1582
TRACE(("|%p|%p|LITERAL_IGNORE_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1583
if (context.text_ptr <= context.text_start || state->lower(context.text_ptr[-1]) != context.pattern_ptr[0])
1585
context.pattern_ptr++;
1588
case SRE_OP_LITERAL_REV:
1589
// Match literal character.
1590
// <LITERAL_REV> <code>
1591
TRACE(("|%p|%p|LITERAL_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1592
if (context.text_ptr <= context.text_start || (SRE_CODE) context.text_ptr[-1] != context.pattern_ptr[0])
1594
context.pattern_ptr++;
1597
case SRE_OP_LITERAL_STRING:
1599
// Match literal string.
1600
// <LITERAL_STRING> <length> ...
1603
TRACE(("|%p|%p|LITERAL_STRING %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1604
length = *context.pattern_ptr++;
1605
if (context.text_ptr + length > context.text_end)
1609
if ((SRE_CODE) context.text_ptr[i] != context.pattern_ptr[i])
1614
context.pattern_ptr += length;
1615
context.text_ptr += length;
1618
case SRE_OP_LITERAL_STRING_IGNORE:
1620
// Match literal string, ignoring case.
1621
// <LITERAL_STRING_IGNORE> <length> ...
1624
TRACE(("|%p|%p|LITERAL_STRING_IGNORE %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1625
length = *context.pattern_ptr++;
1626
if (length > context.text_end - context.text_ptr)
1630
if (state->lower(context.text_ptr[i]) != context.pattern_ptr[i])
1635
context.pattern_ptr += length;
1636
context.text_ptr += length;
1639
case SRE_OP_LITERAL_STRING_IGNORE_REV:
1641
// Match literal string, ignoring case.
1642
// <LITERAL_STRING_IGNORE_REV> <length> ...
1645
TRACE(("|%p|%p|LITERAL_STRING_IGNORE_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1646
length = *context.pattern_ptr++;
1647
if (length > context.text_ptr - context.text_start)
1649
context.text_ptr -= length;
1652
if (state->lower(context.text_ptr[i]) != context.pattern_ptr[i])
1657
context.pattern_ptr += length;
1660
case SRE_OP_LITERAL_STRING_REV:
1662
// Match literal string.
1663
// <LITERAL_STRING_REV> <length> ...
1666
TRACE(("|%p|%p|LITERAL_STRING_REV %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1667
length = *context.pattern_ptr++;
1668
if (context.text_ptr - length < context.text_start)
1670
context.text_ptr -= length;
1673
if ((SRE_CODE) context.text_ptr[i] != context.pattern_ptr[i])
1678
context.pattern_ptr += length;
1681
case SRE_OP_LOC_BOUNDARY:
1683
// Boundary between word and non-word.
1685
int before = context.text_ptr > context.text_start && SRE_LOC_IS_WORD(context.text_ptr[-1]);
1686
int after = context.text_ptr < context.text_end && SRE_LOC_IS_WORD(context.text_ptr[0]);
1687
TRACE(("|%p|%p|LOC_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
1688
if (before == after)
1692
case SRE_OP_LOC_NOT_BOUNDARY:
1694
// Not boundary between word and non-word.
1695
// <LOC_NOT_BOUNDARY>
1696
int before = context.text_ptr > context.text_start && SRE_LOC_IS_WORD(context.text_ptr[-1]);
1697
int after = context.text_ptr < context.text_end && SRE_LOC_IS_WORD(context.text_ptr[0]);
1698
TRACE(("|%p|%p|LOC_NOT_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
1699
if (before != after)
1703
case SRE_OP_LOC_NOT_WORD:
1706
TRACE(("|%p|%p|LOC_NOT_WORD\n", context.pattern_ptr, context.text_ptr));
1707
if (context.text_ptr >= context.text_end || SRE_LOC_IS_WORD(context.text_ptr[0]))
1711
case SRE_OP_LOC_NOT_WORD_REV:
1713
// <LOC_NOT_WORD_REV>
1714
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", context.pattern_ptr, context.text_ptr));
1715
if (context.text_ptr <= context.text_start || SRE_LOC_IS_WORD(context.text_ptr[-1]))
1719
case SRE_OP_LOC_WORD:
1722
TRACE(("|%p|%p|LOC_WORD\n", context.pattern_ptr, context.text_ptr));
1723
if (context.text_ptr >= context.text_end || !SRE_LOC_IS_WORD(context.text_ptr[0]))
1727
case SRE_OP_LOC_WORD_REV:
1730
TRACE(("|%p|%p|LOC_WORD_REV\n", context.pattern_ptr, context.text_ptr));
1731
if (context.text_ptr <= context.text_start || !SRE_LOC_IS_WORD(context.text_ptr[-1]))
1739
TRACE(("|%p|%p|MARK %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1740
context.mark[context.pattern_ptr[0]] = context.text_ptr;
1741
context.pattern_ptr++;
1744
case SRE_OP_NOT_BIGCHARSET:
1745
// Match character not in charset.
1746
// <NOT_BIGCHARSET> <charset>
1747
TRACE(("|%p|%p|NOT_BIGCHARSET\n", context.pattern_ptr, context.text_ptr));
1748
if (context.text_ptr >= context.text_end || in_bigcharset(context.pattern_ptr, context.text_ptr[0]))
1750
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
1753
case SRE_OP_NOT_BIGCHARSET_IGNORE:
1754
// Match character not in charset, ignoring case.
1755
// <NOT_BIGCHARSET_IGNORE> <charset>
1756
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", context.pattern_ptr, context.text_ptr));
1757
if (context.text_ptr >= context.text_end || in_bigcharset(context.pattern_ptr, state->lower(context.text_ptr[0])))
1759
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
1762
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
1763
// Match character not in charset, ignoring case.
1764
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
1765
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
1766
if (context.text_ptr <= context.text_start || in_bigcharset(context.pattern_ptr, state->lower(context.text_ptr[-1])))
1768
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
1771
case SRE_OP_NOT_BIGCHARSET_REV:
1772
// Match character not in charset.
1773
// <NOT_BIGCHARSET_REV> <charset>
1774
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", context.pattern_ptr, context.text_ptr));
1775
if (context.text_ptr <= context.text_start || in_bigcharset(context.pattern_ptr, context.text_ptr[-1]))
1777
context.pattern_ptr = skip_bigcharset(context.pattern_ptr);
1780
case SRE_OP_NOT_BOUNDARY:
1782
// Not boundary between word and non-word.
1784
int before = context.text_ptr > context.text_start && SRE_IS_WORD(context.text_ptr[-1]);
1785
int after = context.text_ptr < context.text_end && SRE_IS_WORD(context.text_ptr[0]);
1786
TRACE(("|%p|%p|NOT_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
1787
if (before != after)
1791
case SRE_OP_NOT_CHARSET:
1793
// Match character not in charset.
1794
// <NOT_CHARSET> <charset>
1795
TRACE(("|%p|%p|NOT_CHARSET\n", context.pattern_ptr, context.text_ptr));
1796
if (context.text_ptr >= context.text_end || in_charset(context.pattern_ptr, context.text_ptr[0]))
1798
context.pattern_ptr = skip_charset(context.pattern_ptr);
1802
case SRE_OP_NOT_CHARSET_IGNORE:
1804
// Match character not in charset, ignoring case.
1805
// <NOT_CHARSET_IGNORE> <charset>
1806
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", context.pattern_ptr, context.text_ptr));
1807
if (context.text_ptr >= context.text_end || in_charset(context.pattern_ptr, state->lower(context.text_ptr[0])))
1809
context.pattern_ptr = skip_charset(context.pattern_ptr);
1813
case SRE_OP_NOT_CHARSET_IGNORE_REV:
1815
// Match character not in charset, ignoring case.
1816
// <NOT_CHARSET_IGNORE_REV> <charset>
1817
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
1818
if (context.text_ptr <= context.text_start || in_charset(context.pattern_ptr, state->lower(context.text_ptr[-1])))
1820
context.pattern_ptr = skip_charset(context.pattern_ptr);
1824
case SRE_OP_NOT_CHARSET_REV:
1826
// Match character not in charset.
1827
// <NOT_CHARSET_REV> <charset>
1828
TRACE(("|%p|%p|NOT_CHARSET_REV\n", context.pattern_ptr, context.text_ptr));
1829
if (context.text_ptr <= context.text_start || in_charset(context.pattern_ptr, context.text_ptr[-1]))
1831
context.pattern_ptr = skip_charset(context.pattern_ptr);
1835
case SRE_OP_NOT_DIGIT:
1838
TRACE(("|%p|%p|NOT_DIGIT\n", context.pattern_ptr, context.text_ptr));
1839
if (context.text_ptr >= context.text_end || SRE_IS_DIGIT(context.text_ptr[0]))
1843
case SRE_OP_NOT_DIGIT_REV:
1846
TRACE(("|%p|%p|NOT_DIGIT_REV\n", context.pattern_ptr, context.text_ptr));
1847
if (context.text_ptr <= context.text_start || SRE_IS_DIGIT(context.text_ptr[-1]))
1852
// Match not set member.
1854
TRACE(("|%p|%p|NOT_IN\n", context.pattern_ptr, context.text_ptr));
1855
if (context.text_ptr >= context.text_end || SRE_IN(context.pattern_ptr, context.text_ptr[0]))
1857
context.pattern_ptr += context.pattern_ptr[0];
1860
case SRE_OP_NOT_IN_IGNORE:
1861
// Match not set member, ignoring case.
1862
// <NOT_IN_IGNORE> <set>
1863
TRACE(("|%p|%p|NOT_IN_IGNORE\n", context.pattern_ptr, context.text_ptr));
1864
if (context.text_ptr >= context.text_end || SRE_IN(context.pattern_ptr, state->lower(context.text_ptr[0])))
1866
context.pattern_ptr += context.pattern_ptr[0];
1869
case SRE_OP_NOT_IN_IGNORE_REV:
1870
// Match not set member, ignoring case.
1871
// <NOT_IN_IGNORE_REV> <set>
1872
TRACE(("|%p|%p|NOT_IN_IGNORE_REV\n", context.pattern_ptr, context.text_ptr));
1873
if (context.text_ptr <= context.text_start || SRE_IN(context.pattern_ptr, state->lower(context.text_ptr[-1])))
1875
context.pattern_ptr += context.pattern_ptr[0];
1878
case SRE_OP_NOT_IN_REV:
1879
// Match not set member.
1880
// <NOT_IN_REV> <set>
1881
TRACE(("|%p|%p|NOT_IN_REV\n", context.pattern_ptr, context.text_ptr));
1882
if (context.text_ptr <= context.text_start || SRE_IN(context.pattern_ptr, context.text_ptr[-1]))
1884
context.pattern_ptr += context.pattern_ptr[0];
1887
case SRE_OP_NOT_LITERAL:
1888
// Match a character that is not this character.
1889
// <NOT_LITERAL> <code>
1890
TRACE(("|%p|%p|NOT_LITERAL %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1891
if (context.text_ptr >= context.text_end || (SRE_CODE) context.text_ptr[0] == context.pattern_ptr[0])
1893
context.pattern_ptr++;
1896
case SRE_OP_NOT_LITERAL_IGNORE:
1897
// Match a character that is not this character, ignoring case.
1898
// <NOT_LITERAL_IGNORE> <code>
1899
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1900
if (context.text_ptr >= context.text_end || state->lower(context.text_ptr[0]) == context.pattern_ptr[0])
1902
context.pattern_ptr++;
1905
case SRE_OP_NOT_LITERAL_IGNORE_REV:
1906
// Match a character that is not this character, ignoring case.
1907
// <NOT_LITERAL_IGNORE_REV> <code>
1908
TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1909
if (context.text_ptr <= context.text_start || state->lower(context.text_ptr[-1]) == context.pattern_ptr[0])
1911
context.pattern_ptr++;
1914
case SRE_OP_NOT_LITERAL_REV:
1915
// Match a character that is not this character.
1916
// <NOT_LITERAL_REV> <code>
1917
TRACE(("|%p|%p|NOT_LITERAL %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1918
if (context.text_ptr <= context.text_start || (SRE_CODE) context.text_ptr[-1] == context.pattern_ptr[0])
1920
context.pattern_ptr++;
1923
case SRE_OP_NOT_RANGE:
1924
// Match not range character.
1925
// <NOT_RANGE> <lower> <upper>
1926
TRACE(("|%p|%p|NOT_RANGE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1927
if (context.text_ptr >= context.text_end || SRE_IN_RANGE(context.text_ptr[0], context.pattern_ptr[0], context.pattern_ptr[1]))
1929
context.pattern_ptr += 2;
1932
case SRE_OP_NOT_RANGE_IGNORE:
1933
// Match not range character, ignoring case.
1934
// <NOT_RANGE_IGNORE> <lower> <upper>
1935
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1936
if (context.text_ptr >= context.text_end || SRE_IN_RANGE(state->lower(context.text_ptr[0]), context.pattern_ptr[0], context.pattern_ptr[1]))
1938
context.pattern_ptr += 2;
1941
case SRE_OP_NOT_RANGE_IGNORE_REV:
1942
// Match not range character, ignoring case.
1943
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
1944
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1945
if (context.text_ptr <= context.text_start || SRE_IN_RANGE(state->lower(context.text_ptr[-1]), context.pattern_ptr[0], context.pattern_ptr[1]))
1947
context.pattern_ptr += 2;
1950
case SRE_OP_NOT_RANGE_REV:
1951
// Match not range character.
1952
// <NOT_RANGE_REV> <lower> <upper>
1953
TRACE(("|%p|%p|NOT_RANGE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1954
if (context.text_ptr <= context.text_start || SRE_IN_RANGE(context.text_ptr[-1], context.pattern_ptr[0], context.pattern_ptr[1]))
1956
context.pattern_ptr += 2;
1959
case SRE_OP_NOT_WHITESPACE:
1962
TRACE(("|%p|%p|NOT_WHITESPACE\n", context.pattern_ptr, context.text_ptr));
1963
if (context.text_ptr >= context.text_end || SRE_IS_WHITESPACE(context.text_ptr[0]))
1967
case SRE_OP_NOT_WHITESPACE_REV:
1969
// <NOT_WHITESPACE_REV>
1970
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", context.pattern_ptr, context.text_ptr));
1971
if (context.text_ptr <= context.text_start || SRE_IS_WHITESPACE(context.text_ptr[-1]))
1975
case SRE_OP_NOT_WORD:
1978
TRACE(("|%p|%p|NOT_WORD\n", context.pattern_ptr, context.text_ptr));
1979
if (context.text_ptr >= context.text_end || SRE_IS_WORD(context.text_ptr[0]))
1983
case SRE_OP_NOT_WORD_REV:
1986
TRACE(("|%p|%p|NOT_WORD_REV\n", context.pattern_ptr, context.text_ptr));
1987
if (context.text_ptr <= context.text_start || SRE_IS_WORD(context.text_ptr[-1]))
1992
// Match range character.
1993
// <RANGE> <lower> <upper>
1994
TRACE(("|%p|%p|RANGE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
1995
if (context.text_ptr >= context.text_end || !SRE_IN_RANGE(context.text_ptr[0], context.pattern_ptr[0], context.pattern_ptr[1]))
1997
context.pattern_ptr += 2;
2000
case SRE_OP_RANGE_IGNORE:
2001
// Match range character, ignoring case.
2002
// <RANGE_IGNORE> <lower> <upper>
2003
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
2004
if (context.text_ptr >= context.text_end || !SRE_IN_RANGE(state->lower(context.text_ptr[0]), context.pattern_ptr[0], context.pattern_ptr[1]))
2006
context.pattern_ptr += 2;
2009
case SRE_OP_RANGE_IGNORE_REV:
2010
// Match range character, ignoring case.
2011
// <RANGE_IGNORE_REV> <lower> <upper>
2012
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
2013
if (context.text_ptr <= context.text_start || !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), context.pattern_ptr[0], context.pattern_ptr[1]))
2015
context.pattern_ptr += 2;
2018
case SRE_OP_RANGE_REV:
2019
// Match range character.
2020
// <RANGE_REV> <lower> <upper>
2021
TRACE(("|%p|%p|RANGE %d %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[0]));
2022
if (context.text_ptr <= context.text_start || !SRE_IN_RANGE(context.text_ptr[-1], context.pattern_ptr[0], context.pattern_ptr[1]))
2024
context.pattern_ptr += 2;
2027
case SRE_OP_REPEAT_MAX:
2030
// <REPEAT_MAX> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX> <skip to start>
2031
SRE_CODE* repeat_ptr = context.pattern_ptr;
2032
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
2033
SRE_CODE* tail = end_repeat_ptr + 1;
2034
int index = repeat_ptr[1];
2035
int repeat_min = repeat_ptr[2];
2036
int repeat_max = repeat_ptr[3];
2037
SRE_CODE* body = repeat_ptr + 4;
2038
Py_ssize_t limit = context.text_end - context.text_ptr;
2039
TRACE(("|%p|%p|REPEAT_MAX %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2040
if (repeat_min > limit)
2042
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MAX, repeat_ptr, index);
2044
return SRE_CLEANUP(&context, state, result);
2045
result = SRE_SAVE_MARKS(&context);
2047
return SRE_CLEANUP(&context, state, result);
2048
context.repeat_counter[index] = 0;
2049
if (repeat_min == 0) {
2051
// Look at what follows to avoid unnecessary backtracking.
2053
SRE_CODE* look_literal = tail;
2054
while (look_literal[0] == SRE_OP_MARK)
2056
match = SRE_LOOK_AHEAD_ONE(&context, state, look_literal);
2060
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX, end_repeat_ptr, -1);
2062
return SRE_CLEANUP(&context, state, result);
2063
result = SRE_SAVE_MARKS(&context);
2065
return SRE_CLEANUP(&context, state, result);
2068
context.pattern_ptr = body;
2069
context.repeat_start[index] = context.text_ptr;
2072
case SRE_OP_REPEAT_MAX_REV:
2075
// <REPEAT_MAX_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
2076
SRE_CODE* repeat_ptr = context.pattern_ptr;
2077
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
2078
SRE_CODE* tail = end_repeat_ptr + 1;
2079
int index = repeat_ptr[1];
2080
int repeat_min = repeat_ptr[2];
2081
int repeat_max = repeat_ptr[3];
2082
SRE_CODE* body = repeat_ptr + 4;
2083
Py_ssize_t limit = context.text_ptr - context.text_start;
2084
TRACE(("|%p|%p|REPEAT_MAX_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2085
if (repeat_min > limit)
2087
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MAX_REV, repeat_ptr, index);
2089
return SRE_CLEANUP(&context, state, result);
2090
result = SRE_SAVE_MARKS(&context);
2092
return SRE_CLEANUP(&context, state, result);
2093
context.repeat_counter[index] = 0;
2094
if (repeat_min == 0) {
2096
// Look at what follows to avoid unnecessary backtracking.
2098
SRE_CODE* look_literal = tail;
2099
while (look_literal[0] == SRE_OP_MARK)
2101
match = SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal);
2105
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MAX_REV, end_repeat_ptr, -1);
2107
return SRE_CLEANUP(&context, state, result);
2108
result = SRE_SAVE_MARKS(&context);
2110
return SRE_CLEANUP(&context, state, result);
2113
context.pattern_ptr = body;
2114
context.repeat_start[index] = context.text_ptr;
2117
case SRE_OP_REPEAT_MIN:
2120
// <REPEAT_MIN> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN> <skip to start>
2121
SRE_CODE* repeat_ptr = context.pattern_ptr;
2122
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
2123
SRE_CODE* tail = end_repeat_ptr + 1;
2124
int index = repeat_ptr[1];
2125
int repeat_min = repeat_ptr[2];
2126
int repeat_max = repeat_ptr[3];
2127
SRE_CODE* body = repeat_ptr + 4;
2128
Py_ssize_t limit = context.text_end - context.text_ptr;
2129
TRACE(("|%p|%p|REPEAT_MIN %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2130
if (repeat_min > limit)
2132
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MIN, repeat_ptr, index);
2134
return SRE_CLEANUP(&context, state, result);
2135
result = SRE_SAVE_MARKS(&context);
2137
return SRE_CLEANUP(&context, state, result);
2138
context.repeat_counter[index] = 0;
2139
if (repeat_min == 0) {
2142
SRE_CODE* look_literal = tail;
2143
while (look_literal[0] == SRE_OP_MARK)
2145
// Look at what follows to avoid unnecessary backtracking.
2146
match = SRE_LOOK_AHEAD_ONE(&context, state, look_literal);
2150
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN, end_repeat_ptr, -1);
2152
return SRE_CLEANUP(&context, state, result);
2153
result = SRE_SAVE_MARKS(&context);
2155
return SRE_CLEANUP(&context, state, result);
2156
context.pattern_ptr = tail;
2158
context.pattern_ptr = body;
2159
context.repeat_start[index] = context.text_ptr;
2162
context.pattern_ptr = body;
2163
context.repeat_start[index] = context.text_ptr;
2167
case SRE_OP_REPEAT_MIN_REV:
2170
// <REPEAT_MIN_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
2171
SRE_CODE* repeat_ptr = context.pattern_ptr;
2172
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
2173
SRE_CODE* tail = end_repeat_ptr + 1;
2174
int index = repeat_ptr[1];
2175
int repeat_min = repeat_ptr[2];
2176
int repeat_max = repeat_ptr[3];
2177
SRE_CODE* body = repeat_ptr + 4;
2178
Py_ssize_t limit = context.text_ptr - context.text_start;
2179
TRACE(("|%p|%p|REPEAT_MIN_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2180
if (repeat_min > limit)
2182
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_MIN_REV, repeat_ptr, index);
2184
return SRE_CLEANUP(&context, state, result);
2185
result = SRE_SAVE_MARKS(&context);
2187
return SRE_CLEANUP(&context, state, result);
2188
context.repeat_counter[index] = 0;
2189
if (repeat_min == 0) {
2192
SRE_CODE* look_literal = tail;
2193
while (look_literal[0] == SRE_OP_MARK)
2195
// Look at what follows to avoid unnecessary backtracking.
2196
match = SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal);
2200
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_MIN_REV, end_repeat_ptr, -1);
2202
return SRE_CLEANUP(&context, state, result);
2203
result = SRE_SAVE_MARKS(&context);
2205
return SRE_CLEANUP(&context, state, result);
2206
context.pattern_ptr = tail;
2208
context.pattern_ptr = body;
2209
context.repeat_start[index] = context.text_ptr;
2212
context.pattern_ptr = body;
2213
context.repeat_start[index] = context.text_ptr;
2217
case SRE_OP_REPEAT_ONE_MAX:
2220
// <REPEAT_ONE_MAX> <skip to end> <index> <min> <max> ...
2221
SRE_CODE* repeat_ptr = context.pattern_ptr;
2222
int index = context.pattern_ptr[1];
2223
int repeat_min = context.pattern_ptr[2];
2224
int repeat_max = context.pattern_ptr[3];
2225
SRE_CODE* body = context.pattern_ptr + 4;
2226
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
2227
Py_ssize_t limit = context.text_end - context.text_ptr;
2228
SRE_CODE* look_literal = tail;
2229
SRE_CHAR* start_ptr = context.text_ptr;
2231
TRACE(("|%p|%p|REPEAT_ONE_MAX %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2232
if (repeat_min > limit)
2234
repeat_max = sre_repeat_limit(repeat_max, limit);
2235
end_ptr = start_ptr + repeat_max;
2236
// Match up to the maximum.
2240
TRACE(("|%p|%p|ANY\n", body, context.text_ptr));
2241
while (context.text_ptr < end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[0]))
2244
case SRE_OP_ANY_ALL:
2246
TRACE(("|%p|%p|ANY_ALL\n", body, context.text_ptr));
2247
context.text_ptr = end_ptr;
2249
case SRE_OP_BIGCHARSET:
2250
// <BIGCHARSET> <charset>
2251
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
2252
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, context.text_ptr[0]))
2255
case SRE_OP_BIGCHARSET_IGNORE:
2256
// <BIGCHARSET_IGNORE> <charset>
2257
TRACE(("|%p|%p|BIGCHARSET_IGNORE\n", body, context.text_ptr));
2258
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
2260
case SRE_OP_CHARSET:
2261
// <CHARSET> <charset>
2262
TRACE(("|%p|%p|CHARSET\n", body, context.text_ptr));
2263
while (context.text_ptr < end_ptr && in_charset(body + 1, context.text_ptr[0]))
2266
case SRE_OP_CHARSET_IGNORE:
2267
// <CHARSET_IGNORE> <charset>
2268
TRACE(("|%p|%p|CHARSET_IGNORE\n", body, context.text_ptr));
2269
while (context.text_ptr < end_ptr && in_charset(body + 1, state->lower(context.text_ptr[0])))
2274
TRACE(("|%p|%p|DIGIT\n", body, context.text_ptr));
2275
while (context.text_ptr < end_ptr && SRE_IS_DIGIT(context.text_ptr[0]))
2280
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
2281
while (context.text_ptr < end_ptr && SRE_IN(body + 1, context.text_ptr[0]))
2284
case SRE_OP_IN_IGNORE:
2285
// <IN_IGNORE> <set>
2286
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
2287
while (context.text_ptr < end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[0])))
2290
case SRE_OP_LITERAL:
2292
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
2293
while (context.text_ptr < end_ptr && context.text_ptr[0] == (SRE_CHAR)body[1])
2296
case SRE_OP_LITERAL_IGNORE:
2297
// <LITERAL_IGNORE> <code>
2298
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
2299
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) == (SRE_CHAR)body[1])
2302
case SRE_OP_LOC_NOT_WORD:
2304
TRACE(("|%p|%p|LOC_NOT_WORD\n", body, context.text_ptr));
2305
while (context.text_ptr < end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[0]))
2308
case SRE_OP_LOC_WORD:
2310
TRACE(("|%p|%p|LOC_WORD\n", body, context.text_ptr));
2311
while (context.text_ptr < end_ptr && SRE_LOC_IS_WORD(context.text_ptr[0]))
2314
case SRE_OP_NOT_BIGCHARSET:
2315
// <NOT_BIGCHARSET> <charset>
2316
TRACE(("|%p|%p|NOT_BIGCHARSET\n", body, context.text_ptr));
2317
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, context.text_ptr[0]))
2320
case SRE_OP_NOT_BIGCHARSET_IGNORE:
2321
// <NOT_BIGCHARSET_IGNORE> <charset>
2322
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", body, context.text_ptr));
2323
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
2326
case SRE_OP_NOT_CHARSET:
2327
// <NOT_CHARSET> <charset>
2328
TRACE(("|%p|%p|NOT_CHARSET\n", body, context.text_ptr));
2329
while (context.text_ptr < end_ptr && !in_charset(body + 1, context.text_ptr[0]))
2332
case SRE_OP_NOT_CHARSET_IGNORE:
2333
// <NOT_CHARSET_IGNORE> <charset>
2334
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", body, context.text_ptr));
2335
while (context.text_ptr < end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[0])))
2338
case SRE_OP_NOT_DIGIT:
2340
TRACE(("|%p|%p|NOT_DIGIT\n", body, context.text_ptr));
2341
while (context.text_ptr < end_ptr && !SRE_IS_DIGIT(context.text_ptr[0]))
2346
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
2347
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, context.text_ptr[0]))
2350
case SRE_OP_NOT_IN_IGNORE:
2351
// <NOT_IN_IGNORE> <set>
2352
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
2353
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[0])))
2356
case SRE_OP_NOT_LITERAL:
2357
// <NOT_LITERAL> <code>
2358
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
2359
while (context.text_ptr < end_ptr && context.text_ptr[0] != (SRE_CHAR)body[1])
2362
case SRE_OP_NOT_LITERAL_IGNORE:
2363
// <NOT_LITERAL_IGNORE> <code>
2364
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
2365
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) != (SRE_CHAR)body[1])
2368
case SRE_OP_NOT_RANGE:
2369
// <NOT_RANGE> <lower> <upper>
2370
TRACE(("|%p|%p|NOT_RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
2371
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
2374
case SRE_OP_NOT_RANGE_IGNORE:
2375
// <NOT_RANGE_IGNORE> <lower> <upper>
2376
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
2377
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
2380
case SRE_OP_NOT_WHITESPACE:
2382
TRACE(("|%p|%p|NOT_WHITESPACE\n", body, context.text_ptr));
2383
while (context.text_ptr < end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[0]))
2386
case SRE_OP_NOT_WORD:
2388
TRACE(("|%p|%p|NOT_WORD\n", body, context.text_ptr));
2389
while (context.text_ptr < end_ptr && !SRE_IS_WORD(context.text_ptr[0]))
2393
// <RANGE> <lower> <upper>
2394
TRACE(("|%p|%p|RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
2395
while (context.text_ptr < end_ptr && SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
2398
case SRE_OP_RANGE_IGNORE:
2399
// <RANGE_IGNORE> <lower> <upper>
2400
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
2401
while (context.text_ptr < end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
2404
case SRE_OP_UNI_DIGIT:
2406
TRACE(("|%p|%p|UNI_DIGIT\n", body, context.text_ptr));
2407
while (context.text_ptr < end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[0]))
2410
case SRE_OP_UNI_NOT_DIGIT:
2412
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", body, context.text_ptr));
2413
while (context.text_ptr < end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[0]))
2416
case SRE_OP_UNI_NOT_WHITESPACE:
2417
// <UNI_NOT_WHITESPACE>
2418
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", body, context.text_ptr));
2419
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
2422
case SRE_OP_UNI_NOT_WORD:
2424
TRACE(("|%p|%p|UNI_NOT_WORD\n", body, context.text_ptr));
2425
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[0]))
2428
case SRE_OP_UNI_WHITESPACE:
2430
TRACE(("|%p|%p|UNI_WHITESPACE\n", body, context.text_ptr));
2431
while (context.text_ptr < end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
2434
case SRE_OP_UNI_WORD:
2436
TRACE(("|%p|%p|UNI_WORD\n", body, context.text_ptr));
2437
while (context.text_ptr < end_ptr && SRE_UNI_IS_WORD(context.text_ptr[0]))
2440
case SRE_OP_WHITESPACE:
2442
TRACE(("|%p|%p|WHITESPACE\n", body, context.text_ptr));
2443
while (context.text_ptr < end_ptr && SRE_IS_WHITESPACE(context.text_ptr[0]))
2448
TRACE(("|%p|%p|WORD\n", body, context.text_ptr));
2449
while (context.text_ptr < end_ptr && SRE_IS_WORD(context.text_ptr[0]))
2453
start_ptr += repeat_min;
2454
// Look at what follows to avoid unnecessary backtracking.
2455
while (look_literal[0] == SRE_OP_MARK)
2457
SRE_LOOK_AHEAD_MANY(&context, start_ptr, state, look_literal);
2458
// Matched at least the minimum?
2459
if (context.text_ptr < start_ptr)
2461
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MAX, repeat_ptr, index);
2463
return SRE_CLEANUP(&context, state, result);
2464
context.repeat_counter[index] = repeat_min + (context.text_ptr - start_ptr);
2465
context.pattern_ptr = tail;
2468
case SRE_OP_REPEAT_ONE_MAX_REV:
2471
// <REPEAT_ONE_MAX_REV> <skip to end> <index> <min> <max> ...
2472
SRE_CODE* repeat_ptr = context.pattern_ptr;
2473
int index = context.pattern_ptr[1];
2474
int repeat_min = context.pattern_ptr[2];
2475
int repeat_max = context.pattern_ptr[3];
2476
SRE_CODE* body = context.pattern_ptr + 4;
2477
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
2478
Py_ssize_t limit = context.text_ptr - context.text_start;
2479
SRE_CODE* look_literal = tail;
2480
SRE_CHAR* start_ptr = context.text_ptr;
2482
TRACE(("|%p|%p|REPEAT_ONE_MAX_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2483
if (repeat_min > limit)
2485
repeat_max = sre_repeat_limit(repeat_max, limit);
2486
end_ptr = start_ptr - repeat_max;
2487
// Match up to the maximum.
2489
case SRE_OP_ANY_REV:
2491
TRACE(("|%p|%p|ANY_REV\n", body, context.text_ptr));
2492
while (context.text_ptr > end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[-1]))
2495
case SRE_OP_ANY_ALL_REV:
2497
TRACE(("|%p|%p|ANY_ALL_REV\n", body, context.text_ptr));
2498
context.text_ptr = end_ptr;
2500
case SRE_OP_BIGCHARSET_REV:
2501
// <BIGCHARSET_REV> <charset>
2502
TRACE(("|%p|%p|BIGCHARSET_REV\n", body, context.text_ptr));
2503
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, context.text_ptr[-1]))
2506
case SRE_OP_BIGCHARSET_IGNORE_REV:
2507
// <BIGCHARSET_IGNORE_REV> <charset>
2508
TRACE(("|%p|%p|BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
2509
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
2512
case SRE_OP_CHARSET_REV:
2513
// <CHARSET_REV> <charset>
2514
TRACE(("|%p|%p|CHARSET_REV\n", body, context.text_ptr));
2515
while (context.text_ptr > end_ptr && in_charset(body + 1, context.text_ptr[-1]))
2518
case SRE_OP_CHARSET_IGNORE_REV:
2519
// <CHARSET_IGNORE_REV> <charset>
2520
TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", body, context.text_ptr));
2521
while (context.text_ptr > end_ptr && in_charset(body + 1, state->lower(context.text_ptr[-1])))
2524
case SRE_OP_DIGIT_REV:
2526
TRACE(("|%p|%p|DIGIT_REV\n", body, context.text_ptr));
2527
while (context.text_ptr > end_ptr && SRE_IS_DIGIT(context.text_ptr[-1]))
2532
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
2533
while (context.text_ptr > end_ptr && SRE_IN(body + 1, context.text_ptr[-1]))
2536
case SRE_OP_IN_IGNORE_REV:
2537
// <IN_IGNORE_REV> <set>
2538
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
2539
while (context.text_ptr > end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
2542
case SRE_OP_LITERAL_REV:
2543
// <LITERAL_REV> <code>
2544
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
2545
while (context.text_ptr > end_ptr && context.text_ptr[-1] == (SRE_CHAR)body[1])
2548
case SRE_OP_LITERAL_IGNORE_REV:
2549
// <LITERAL_IGNORE_REV> <code>
2550
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
2551
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) == (SRE_CHAR)body[1])
2554
case SRE_OP_LOC_NOT_WORD_REV:
2555
// <LOC_NOT_WORD_REV>
2556
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", body, context.text_ptr));
2557
while (context.text_ptr > end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[-1]))
2560
case SRE_OP_LOC_WORD_REV:
2562
TRACE(("|%p|%p|LOC_WORD_REV\n", body, context.text_ptr));
2563
while (context.text_ptr > end_ptr && SRE_LOC_IS_WORD(context.text_ptr[-1]))
2566
case SRE_OP_NOT_BIGCHARSET_REV:
2567
// <NOT_BIGCHARSET_REV> <charset>
2568
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", body, context.text_ptr));
2569
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, context.text_ptr[-1]))
2572
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
2573
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
2574
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
2575
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
2578
case SRE_OP_NOT_CHARSET_REV:
2579
// <NOT_CHARSET_REV> <charset>
2580
TRACE(("|%p|%p|NOT_CHARSET_REV\n", body, context.text_ptr));
2581
while (context.text_ptr > end_ptr && !in_charset(body + 1, context.text_ptr[-1]))
2584
case SRE_OP_NOT_CHARSET_IGNORE_REV:
2585
// <NOT_CHARSET_IGNORE_REV> <charset>
2586
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", body, context.text_ptr));
2587
while (context.text_ptr > end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[-1])))
2590
case SRE_OP_NOT_DIGIT_REV:
2592
TRACE(("|%p|%p|NOT_DIGIT_REV\n", body, context.text_ptr));
2593
while (context.text_ptr > end_ptr && !SRE_IS_DIGIT(context.text_ptr[-1]))
2596
case SRE_OP_NOT_IN_REV:
2597
// <NOT_IN_REV> <set>
2598
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
2599
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, context.text_ptr[-1]))
2602
case SRE_OP_NOT_IN_IGNORE_REV:
2603
// <NOT_IN_IGNORE_REV> <set>
2604
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
2605
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
2608
case SRE_OP_NOT_LITERAL_REV:
2609
// <NOT_LITERAL_REV> <code>
2610
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
2611
while (context.text_ptr > end_ptr && context.text_ptr[-1] != (SRE_CHAR)body[1])
2614
case SRE_OP_NOT_LITERAL_IGNORE_REV:
2615
// <NOT_LITERAL_IGNORE_REV> <code>
2616
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
2617
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) != (SRE_CHAR)body[1])
2620
case SRE_OP_NOT_RANGE_REV:
2621
// <NOT_RANGE_REV> <lower> <upper>
2622
TRACE(("|%p|%p|NOT_RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
2623
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
2626
case SRE_OP_NOT_RANGE_IGNORE_REV:
2627
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
2628
TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
2629
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
2632
case SRE_OP_NOT_WHITESPACE_REV:
2633
// <NOT_WHITESPACE_REV>
2634
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", body, context.text_ptr));
2635
while (context.text_ptr > end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[-1]))
2638
case SRE_OP_NOT_WORD_REV:
2640
TRACE(("|%p|%p|NOT_WORD_REV\n", body, context.text_ptr));
2641
while (context.text_ptr > end_ptr && !SRE_IS_WORD(context.text_ptr[-1]))
2644
case SRE_OP_RANGE_REV:
2645
// <RANGE_REV> <lower> <upper>
2646
TRACE(("|%p|%p|RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
2647
while (context.text_ptr > end_ptr && SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
2650
case SRE_OP_RANGE_IGNORE_REV:
2651
// <RANGE_IGNORE_REV> <lower> <upper>
2652
TRACE(("|%p|%p|RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
2653
while (context.text_ptr > end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
2656
case SRE_OP_UNI_DIGIT_REV:
2658
TRACE(("|%p|%p|UNI_DIGIT_REV\n", body, context.text_ptr));
2659
while (context.text_ptr > end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
2662
case SRE_OP_UNI_NOT_DIGIT_REV:
2663
// <UNI_NOT_DIGIT_REV>
2664
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", body, context.text_ptr));
2665
while (context.text_ptr > end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
2668
case SRE_OP_UNI_NOT_WHITESPACE_REV:
2669
// <UNI_NOT_WHITESPACE_REV>
2670
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", body, context.text_ptr));
2671
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
2674
case SRE_OP_UNI_NOT_WORD_REV:
2675
// <UNI_NOT_WORD_REV>
2676
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", body, context.text_ptr));
2677
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[-1]))
2680
case SRE_OP_UNI_WHITESPACE_REV:
2681
// <UNI_WHITESPACE_REV>
2682
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", body, context.text_ptr));
2683
while (context.text_ptr > end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
2686
case SRE_OP_UNI_WORD_REV:
2688
TRACE(("|%p|%p|UNI_WORD_REV\n", body, context.text_ptr));
2689
while (context.text_ptr > end_ptr && SRE_UNI_IS_WORD(context.text_ptr[-1]))
2692
case SRE_OP_WHITESPACE_REV:
2694
TRACE(("|%p|%p|WHITESPACE_REV\n", body, context.text_ptr));
2695
while (context.text_ptr > end_ptr && SRE_IS_WHITESPACE(context.text_ptr[-1]))
2698
case SRE_OP_WORD_REV:
2700
TRACE(("|%p|%p|WORD_REV\n", body, context.text_ptr));
2701
while (context.text_ptr > end_ptr && SRE_IS_WORD(context.text_ptr[-1]))
2705
start_ptr -= repeat_min;
2706
// Look at what follows to avoid unnecessary backtracking.
2707
while (look_literal[0] == SRE_OP_MARK)
2709
SRE_LOOK_AHEAD_MANY_REV(&context, start_ptr, state, look_literal);
2710
// Matched at least the minimum?
2711
if (context.text_ptr > start_ptr)
2713
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MAX_REV, repeat_ptr, index);
2715
return SRE_CLEANUP(&context, state, result);
2716
context.repeat_counter[index] = repeat_min + (start_ptr - context.text_ptr);
2717
context.pattern_ptr = tail;
2720
case SRE_OP_REPEAT_ONE_MIN:
2723
// <REPEAT_ONE_MIN> <skip to end> <index> <min> <max> ...
2724
SRE_CODE* repeat_ptr = context.pattern_ptr;
2725
int index = context.pattern_ptr[1];
2726
int repeat_min = context.pattern_ptr[2];
2727
int repeat_max = context.pattern_ptr[3];
2728
SRE_CODE* body = context.pattern_ptr + 4;
2729
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
2730
Py_ssize_t limit = context.text_end - context.text_ptr;
2731
SRE_CODE* look_literal = tail;
2732
SRE_CHAR* start_ptr = context.text_ptr;
2735
TRACE(("|%p|%p|REPEAT_ONE_MIN %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
2736
if (repeat_min > limit)
2738
repeat_max = sre_repeat_limit(repeat_max, limit);
2739
end_ptr = start_ptr + repeat_min;
2740
// First match the minimum.
2744
TRACE(("|%p|%p|ANY\n", body, context.text_ptr));
2745
while (context.text_ptr < end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[0]))
2748
case SRE_OP_ANY_ALL:
2750
TRACE(("|%p|%p|ANY_ALL\n", body, context.text_ptr));
2751
context.text_ptr = end_ptr;
2753
case SRE_OP_BIGCHARSET:
2754
// <BIGCHARSET> <charset>
2755
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
2756
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, context.text_ptr[0]))
2759
case SRE_OP_BIGCHARSET_IGNORE:
2760
// <BIGCHARSET_IGNORE> <charset>
2761
TRACE(("|%p|%p|BIGCHARSET_IGNORE\n", body, context.text_ptr));
2762
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
2765
case SRE_OP_CHARSET:
2766
// <CHARSET> <charset>
2767
TRACE(("|%p|%p|CHARSET\n", body, context.text_ptr));
2768
while (context.text_ptr < end_ptr && in_charset(body + 1, context.text_ptr[0]))
2771
case SRE_OP_CHARSET_IGNORE:
2772
// <CHARSET_IGNORE> <charset>
2773
TRACE(("|%p|%p|CHARSET_IGNORE\n", body, context.text_ptr));
2774
while (context.text_ptr < end_ptr && in_charset(body + 1, state->lower(context.text_ptr[0])))
2779
TRACE(("|%p|%p|DIGIT\n", body, context.text_ptr));
2780
while (context.text_ptr < end_ptr && SRE_IS_DIGIT(context.text_ptr[0]))
2785
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
2786
while (context.text_ptr < end_ptr && SRE_IN(body + 1, context.text_ptr[0]))
2789
case SRE_OP_IN_IGNORE:
2790
// <IN_IGNORE> <set>
2791
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
2792
while (context.text_ptr < end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[0])))
2795
case SRE_OP_LITERAL:
2797
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
2798
while (context.text_ptr < end_ptr && context.text_ptr[0] == (SRE_CHAR)body[1])
2801
case SRE_OP_LITERAL_IGNORE:
2802
// <LITERAL_IGNORE> <code>
2803
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
2804
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) == (SRE_CHAR)body[1])
2807
case SRE_OP_LOC_NOT_WORD:
2809
TRACE(("|%p|%p|LOC_NOT_WORD\n", body, context.text_ptr));
2810
while (context.text_ptr < end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[0]))
2813
case SRE_OP_LOC_WORD:
2815
TRACE(("|%p|%p|LOC_WORD\n", body, context.text_ptr));
2816
while (context.text_ptr < end_ptr && SRE_LOC_IS_WORD(context.text_ptr[0]))
2819
case SRE_OP_NOT_BIGCHARSET:
2820
// <NOT_BIGCHARSET> <charset>
2821
TRACE(("|%p|%p|NOT_BIGCHARSET\n", body, context.text_ptr));
2822
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, context.text_ptr[0]))
2825
case SRE_OP_NOT_BIGCHARSET_IGNORE:
2826
// <NOT_BIGCHARSET_IGNORE> <charset>
2827
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", body, context.text_ptr));
2828
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
2831
case SRE_OP_NOT_CHARSET:
2832
// <NOT_CHARSET> <charset>
2833
TRACE(("|%p|%p|NOT_CHARSET\n", body, context.text_ptr));
2834
while (context.text_ptr < end_ptr && !in_charset(body + 1, context.text_ptr[0]))
2837
case SRE_OP_NOT_CHARSET_IGNORE:
2838
// <NOT_CHARSET_IGNORE> <charset>
2839
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", body, context.text_ptr));
2840
while (context.text_ptr < end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[0])))
2843
case SRE_OP_NOT_DIGIT:
2845
TRACE(("|%p|%p|NOT_DIGIT\n", body, context.text_ptr));
2846
while (context.text_ptr < end_ptr && !SRE_IS_DIGIT(context.text_ptr[0]))
2851
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
2852
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, context.text_ptr[0]))
2855
case SRE_OP_NOT_IN_IGNORE:
2856
// <NOT_IN_IGNORE> <set>
2857
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
2858
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[0])))
2861
case SRE_OP_NOT_LITERAL:
2862
// <NOT_LITERAL> <code>
2863
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
2864
while (context.text_ptr < end_ptr && context.text_ptr[0] != (SRE_CHAR)body[1])
2867
case SRE_OP_NOT_LITERAL_IGNORE:
2868
// <NOT_LITERAL_IGNORE> <code>
2869
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
2870
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) != (SRE_CHAR)body[1])
2873
case SRE_OP_NOT_RANGE:
2874
// <NOT_RANGE> <lower> <upper>
2875
TRACE(("|%p|%p|NOT_RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
2876
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
2879
case SRE_OP_NOT_RANGE_IGNORE:
2880
// <NOT_RANGE_IGNORE> <lower> <upper>
2881
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
2882
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
2885
case SRE_OP_NOT_WHITESPACE:
2887
TRACE(("|%p|%p|NOT_WHITESPACE\n", body, context.text_ptr));
2888
while (context.text_ptr < end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[0]))
2891
case SRE_OP_NOT_WORD:
2893
TRACE(("|%p|%p|NOT_WORD\n", body, context.text_ptr));
2894
while (context.text_ptr < end_ptr && !SRE_IS_WORD(context.text_ptr[0]))
2898
// <RANGE> <lower> <upper>
2899
TRACE(("|%p|%p|RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
2900
while (context.text_ptr < end_ptr && SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
2903
case SRE_OP_RANGE_IGNORE:
2904
// <RANGE_IGNORE> <lower> <upper>
2905
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
2906
while (context.text_ptr < end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
2909
case SRE_OP_UNI_DIGIT:
2911
TRACE(("|%p|%p|UNI_DIGIT\n", body, context.text_ptr));
2912
while (context.text_ptr < end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[0]))
2915
case SRE_OP_UNI_NOT_DIGIT:
2917
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", body, context.text_ptr));
2918
while (context.text_ptr < end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[0]))
2921
case SRE_OP_UNI_NOT_WHITESPACE:
2922
// <UNI_NOT_WHITESPACE>
2923
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", body, context.text_ptr));
2924
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
2927
case SRE_OP_UNI_NOT_WORD:
2929
TRACE(("|%p|%p|UNI_NOT_WORD\n", body, context.text_ptr));
2930
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[0]))
2933
case SRE_OP_UNI_WHITESPACE:
2935
TRACE(("|%p|%p|UNI_WHITESPACE\n", body, context.text_ptr));
2936
while (context.text_ptr < end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
2939
case SRE_OP_UNI_WORD:
2941
TRACE(("|%p|%p|UNI_WORD\n", body, context.text_ptr));
2942
while (context.text_ptr < end_ptr && SRE_UNI_IS_WORD(context.text_ptr[0]))
2945
case SRE_OP_WHITESPACE:
2947
TRACE(("|%p|%p|WHITESPACE\n", body, context.text_ptr));
2948
while (context.text_ptr < end_ptr && SRE_IS_WHITESPACE(context.text_ptr[0]))
2953
TRACE(("|%p|%p|WORD\n", body, context.text_ptr));
2954
while (context.text_ptr < end_ptr && SRE_IS_WORD(context.text_ptr[0]))
2958
// Matched at least the minimum?
2959
if (context.text_ptr < end_ptr)
2961
// Now try up to the maximum.
2962
end_ptr = start_ptr + repeat_max;
2963
while (look_literal[0] == SRE_OP_MARK)
2966
// Look at what follows to avoid unnecessary backtracking.
2967
if (SRE_LOOK_AHEAD_ONE(&context, state, look_literal)) {
2968
if (context.text_ptr < end_ptr) {
2969
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MIN, repeat_ptr, index);
2971
return SRE_CLEANUP(&context, state, result);
2973
context.repeat_counter[index] = context.text_ptr - start_ptr;
2974
context.pattern_ptr = tail;
2978
if (context.text_ptr >= end_ptr)
2983
TRACE(("|%p|%p|ANY\n", body, context.text_ptr));
2984
match = !SRE_IS_LINEBREAK(context.text_ptr[0]);
2986
case SRE_OP_ANY_ALL:
2988
TRACE(("|%p|%p|ANY_ALL\n", body, context.text_ptr));
2991
case SRE_OP_BIGCHARSET:
2992
// <BIGCHARSET> <charset>
2993
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
2994
match = in_bigcharset(body + 1, context.text_ptr[0]);
2996
case SRE_OP_BIGCHARSET_IGNORE:
2997
// <BIGCHARSET_IGNORE> <charset>
2998
TRACE(("|%p|%p|BIGCHARSET_IGNORE\n", body, context.text_ptr));
2999
match = in_bigcharset(body + 1, state->lower(context.text_ptr[0]));
3001
case SRE_OP_CHARSET:
3002
// <CHARSET> <charset>
3003
TRACE(("|%p|%p|CHARSET\n", body, context.text_ptr));
3004
match = in_charset(body + 1, context.text_ptr[0]);
3006
case SRE_OP_CHARSET_IGNORE:
3007
// <CHARSET_IGNORE> <charset>
3008
TRACE(("|%p|%p|CHARSET_IGNORE\n", body, context.text_ptr));
3009
match = in_charset(body + 1, state->lower(context.text_ptr[0]));
3013
TRACE(("|%p|%p|DIGIT\n", body, context.text_ptr));
3014
match = SRE_IS_DIGIT(context.text_ptr[0]);
3018
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3019
match = SRE_IN(body + 1, context.text_ptr[0]);
3021
case SRE_OP_IN_IGNORE:
3022
// <IN_IGNORE> <set>
3023
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3024
match = SRE_IN(body + 1, state->lower(context.text_ptr[0]));
3026
case SRE_OP_LITERAL:
3028
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
3029
match = context.text_ptr[0] == (SRE_CHAR)body[1];
3031
case SRE_OP_LITERAL_IGNORE:
3032
// <LITERAL_IGNORE> <code>
3033
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
3034
match = state->lower(context.text_ptr[0]) == (SRE_CHAR)body[1];
3036
case SRE_OP_LOC_NOT_WORD:
3038
TRACE(("|%p|%p|LOC_NOT_WORD\n", body, context.text_ptr));
3039
match = !SRE_LOC_IS_WORD(context.text_ptr[0]);
3041
case SRE_OP_LOC_WORD:
3043
TRACE(("|%p|%p|LOC_WORD\n", body, context.text_ptr));
3044
match = SRE_LOC_IS_WORD(context.text_ptr[0]);
3046
case SRE_OP_NOT_BIGCHARSET:
3047
// <NOT_BIGCHARSET> <charset>
3048
TRACE(("|%p|%p|NOT_BIGCHARSET\n", body, context.text_ptr));
3049
match = !in_bigcharset(body + 1, context.text_ptr[0]);
3051
case SRE_OP_NOT_BIGCHARSET_IGNORE:
3052
// <NOT_BIGCHARSET_IGNORE> <charset>
3053
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", body, context.text_ptr));
3054
match = !in_bigcharset(body + 1, state->lower(context.text_ptr[0]));
3056
case SRE_OP_NOT_CHARSET:
3057
// <NOT_CHARSET> <charset>
3058
TRACE(("|%p|%p|NOT_CHARSET\n", body, context.text_ptr));
3059
match = !in_charset(body + 1, context.text_ptr[0]);
3061
case SRE_OP_NOT_CHARSET_IGNORE:
3062
// <NOT_CHARSET_IGNORE> <charset>
3063
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", body, context.text_ptr));
3064
match = !in_charset(body + 1, state->lower(context.text_ptr[0]));
3066
case SRE_OP_NOT_DIGIT:
3068
TRACE(("|%p|%p|NOT_DIGIT\n", body, context.text_ptr));
3069
match = !SRE_IS_DIGIT(context.text_ptr[0]);
3073
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
3074
match = !SRE_IN(body + 1, context.text_ptr[0]);
3076
case SRE_OP_NOT_IN_IGNORE:
3077
// <NOT_IN_IGNORE> <set>
3078
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
3079
match = !SRE_IN(body + 1, state->lower(context.text_ptr[0]));
3081
case SRE_OP_NOT_LITERAL:
3082
// <NOT_LITERAL> <code>
3083
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
3084
match = context.text_ptr[0] != (SRE_CHAR)body[1];
3086
case SRE_OP_NOT_LITERAL_IGNORE:
3087
// <NOT_LITERAL_IGNORE> <code>
3088
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
3089
match = state->lower(context.text_ptr[0]) != (SRE_CHAR)body[1];
3091
case SRE_OP_NOT_RANGE:
3092
// <NOT_RANGE> <lower> <upper>
3093
TRACE(("|%p|%p|NOT_RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
3094
match = !SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]);
3096
case SRE_OP_NOT_RANGE_IGNORE:
3097
// <NOT_RANGE_IGNORE> <lower> <upper>
3098
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
3099
match = !SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]);
3101
case SRE_OP_NOT_WHITESPACE:
3103
TRACE(("|%p|%p|NOT_WHITESPACE\n", body, context.text_ptr));
3104
match = !SRE_IS_WHITESPACE(context.text_ptr[0]);
3106
case SRE_OP_NOT_WORD:
3108
TRACE(("|%p|%p|NOT_WORD\n", body, context.text_ptr));
3109
match = !SRE_IS_WORD(context.text_ptr[0]);
3112
// <RANGE> <lower> <upper>
3113
TRACE(("|%p|%p|RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
3114
match = SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]);
3116
case SRE_OP_RANGE_IGNORE:
3117
// <RANGE_IGNORE> <lower> <upper>
3118
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
3119
match = SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]);
3121
case SRE_OP_UNI_DIGIT:
3123
TRACE(("|%p|%p|UNI_DIGIT\n", body, context.text_ptr));
3124
match = SRE_UNI_IS_DIGIT(context.text_ptr[0]);
3126
case SRE_OP_UNI_NOT_DIGIT:
3128
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", body, context.text_ptr));
3129
match = !SRE_UNI_IS_DIGIT(context.text_ptr[0]);
3131
case SRE_OP_UNI_NOT_WHITESPACE:
3132
// <UNI_NOT_WHITESPACE>
3133
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", body, context.text_ptr));
3134
match = !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]);
3136
case SRE_OP_UNI_NOT_WORD:
3138
TRACE(("|%p|%p|UNI_NOT_WORD\n", body, context.text_ptr));
3139
match = !SRE_UNI_IS_WORD(context.text_ptr[0]);
3141
case SRE_OP_UNI_WHITESPACE:
3143
TRACE(("|%p|%p|UNI_WHITESPACE\n", body, context.text_ptr));
3144
match = SRE_UNI_IS_WHITESPACE(context.text_ptr[0]);
3146
case SRE_OP_UNI_WORD:
3148
TRACE(("|%p|%p|UNI_WORD\n", body, context.text_ptr));
3149
match = SRE_UNI_IS_WORD(context.text_ptr[0]);
3151
case SRE_OP_WHITESPACE:
3153
TRACE(("|%p|%p|WHITESPACE\n", body, context.text_ptr));
3154
match = SRE_IS_WHITESPACE(context.text_ptr[0]);
3158
TRACE(("|%p|%p|WORD\n", body, context.text_ptr));
3159
match = SRE_IS_WORD(context.text_ptr[0]);
3167
case SRE_OP_REPEAT_ONE_MIN_REV:
3170
// <REPEAT_ONE_MIN_REV> <skip to end> <index> <min> <max> ...
3171
SRE_CODE* repeat_ptr = context.pattern_ptr;
3172
int index = context.pattern_ptr[1];
3173
int repeat_min = context.pattern_ptr[2];
3174
int repeat_max = context.pattern_ptr[3];
3175
SRE_CODE* body = context.pattern_ptr + 4;
3176
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
3177
Py_ssize_t limit = context.text_start - context.text_ptr;
3178
SRE_CODE* look_literal = tail;
3179
SRE_CHAR* start_ptr = context.text_ptr;
3182
TRACE(("|%p|%p|REPEAT_ONE_MIN_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
3183
if (repeat_min > limit)
3185
repeat_max = sre_repeat_limit(repeat_max, limit);
3186
end_ptr = start_ptr - repeat_min;
3187
// First match the minimum.
3189
case SRE_OP_ANY_REV:
3191
TRACE(("|%p|%p|ANY_REV\n", body, context.text_ptr));
3192
while (context.text_ptr > end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[-1]))
3195
case SRE_OP_ANY_ALL_REV:
3197
TRACE(("|%p|%p|ANY_ALL_REV\n", body, context.text_ptr));
3198
context.text_ptr = end_ptr;
3200
case SRE_OP_BIGCHARSET_REV:
3201
// <BIGCHARSET_REV> <charset>
3202
TRACE(("|%p|%p|BIGCHARSET_REV\n", body, context.text_ptr));
3203
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, context.text_ptr[-1]))
3206
case SRE_OP_BIGCHARSET_IGNORE_REV:
3207
// <BIGCHARSET_IGNORE_REV> <charset>
3208
TRACE(("|%p|%p|BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3209
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
3212
case SRE_OP_CHARSET_REV:
3213
// <CHARSET_REV> <charset>
3214
TRACE(("|%p|%p|CHARSET_REV\n", body, context.text_ptr));
3215
while (context.text_ptr > end_ptr && in_charset(body + 1, context.text_ptr[-1]))
3218
case SRE_OP_CHARSET_IGNORE_REV:
3219
// <CHARSET_IGNORE_REV> <charset>
3220
TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", body, context.text_ptr));
3221
while (context.text_ptr > end_ptr && in_charset(body + 1, state->lower(context.text_ptr[-1])))
3224
case SRE_OP_DIGIT_REV:
3226
TRACE(("|%p|%p|DIGIT_REV\n", body, context.text_ptr));
3227
while (context.text_ptr > end_ptr && SRE_IS_DIGIT(context.text_ptr[-1]))
3232
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3233
while (context.text_ptr > end_ptr && SRE_IN(body + 1, context.text_ptr[-1]))
3236
case SRE_OP_IN_IGNORE_REV:
3237
// <IN_IGNORE_REV> <set>
3238
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3239
while (context.text_ptr > end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
3242
case SRE_OP_LITERAL_REV:
3243
// <LITERAL_REV> <code>
3244
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3245
while (context.text_ptr > end_ptr && context.text_ptr[-1] == (SRE_CHAR)body[1])
3248
case SRE_OP_LITERAL_IGNORE_REV:
3249
// <LITERAL_IGNORE_REV> <code>
3250
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3251
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) == (SRE_CHAR)body[1])
3254
case SRE_OP_LOC_NOT_WORD_REV:
3255
// <LOC_NOT_WORD_REV>
3256
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", body, context.text_ptr));
3257
while (context.text_ptr > end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[-1]))
3260
case SRE_OP_LOC_WORD_REV:
3262
TRACE(("|%p|%p|LOC_WORD_REV\n", body, context.text_ptr));
3263
while (context.text_ptr > end_ptr && SRE_LOC_IS_WORD(context.text_ptr[-1]))
3266
case SRE_OP_NOT_BIGCHARSET_REV:
3267
// <NOT_BIGCHARSET_REV> <charset>
3268
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", body, context.text_ptr));
3269
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, context.text_ptr[-1]))
3272
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
3273
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
3274
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3275
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
3278
case SRE_OP_NOT_CHARSET_REV:
3279
// <NOT_CHARSET_REV> <charset>
3280
TRACE(("|%p|%p|NOT_CHARSET_REV\n", body, context.text_ptr));
3281
while (context.text_ptr > end_ptr && !in_charset(body + 1, context.text_ptr[-1]))
3284
case SRE_OP_NOT_CHARSET_IGNORE_REV:
3285
// <NOT_CHARSET_IGNORE_REV> <charset>
3286
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", body, context.text_ptr));
3287
while (context.text_ptr > end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[-1])))
3290
case SRE_OP_NOT_DIGIT_REV:
3292
TRACE(("|%p|%p|NOT_DIGIT_REV\n", body, context.text_ptr));
3293
while (context.text_ptr > end_ptr && !SRE_IS_DIGIT(context.text_ptr[-1]))
3296
case SRE_OP_NOT_IN_REV:
3297
// <NOT_IN_REV> <set>
3298
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3299
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, context.text_ptr[-1]))
3302
case SRE_OP_NOT_IN_IGNORE_REV:
3303
// <NOT_IN_IGNORE_REV> <set>
3304
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3305
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
3308
case SRE_OP_NOT_LITERAL_REV:
3309
// <NOT_LITERAL_REV> <code>
3310
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3311
while (context.text_ptr > end_ptr && context.text_ptr[-1] != (SRE_CHAR)body[1])
3314
case SRE_OP_NOT_LITERAL_IGNORE_REV:
3315
// <NOT_LITERAL_IGNORE_REV> <code>
3316
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3317
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) != (SRE_CHAR)body[1])
3320
case SRE_OP_NOT_RANGE_REV:
3321
// <NOT_RANGE_REV> <lower> <upper>
3322
TRACE(("|%p|%p|NOT_RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3323
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
3326
case SRE_OP_NOT_RANGE_IGNORE_REV:
3327
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
3328
TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3329
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
3332
case SRE_OP_NOT_WHITESPACE_REV:
3333
// <NOT_WHITESPACE_REV>
3334
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", body, context.text_ptr));
3335
while (context.text_ptr > end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[-1]))
3338
case SRE_OP_NOT_WORD_REV:
3340
TRACE(("|%p|%p|NOT_WORD_REV\n", body, context.text_ptr));
3341
while (context.text_ptr > end_ptr && !SRE_IS_WORD(context.text_ptr[-1]))
3344
case SRE_OP_RANGE_REV:
3345
// <RANGE_REV> <lower> <upper>
3346
TRACE(("|%p|%p|RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3347
while (context.text_ptr > end_ptr && SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
3350
case SRE_OP_RANGE_IGNORE_REV:
3351
// <RANGE_IGNORE_REV> <lower> <upper>
3352
TRACE(("|%p|%p|RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3353
while (context.text_ptr > end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
3356
case SRE_OP_UNI_DIGIT_REV:
3358
TRACE(("|%p|%p|UNI_DIGIT_REV\n", body, context.text_ptr));
3359
while (context.text_ptr > end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
3362
case SRE_OP_UNI_NOT_DIGIT_REV:
3363
// <UNI_NOT_DIGIT_REV>
3364
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", body, context.text_ptr));
3365
while (context.text_ptr > end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
3368
case SRE_OP_UNI_NOT_WHITESPACE_REV:
3369
// <UNI_NOT_WHITESPACE_REV>
3370
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", body, context.text_ptr));
3371
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
3374
case SRE_OP_UNI_NOT_WORD_REV:
3375
// <UNI_NOT_WORD_REV>
3376
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", body, context.text_ptr));
3377
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[-1]))
3380
case SRE_OP_UNI_WHITESPACE_REV:
3381
// <UNI_WHITESPACE_REV>
3382
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", body, context.text_ptr));
3383
while (context.text_ptr > end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
3386
case SRE_OP_UNI_WORD_REV:
3388
TRACE(("|%p|%p|UNI_WORD_REV\n", body, context.text_ptr));
3389
while (context.text_ptr > end_ptr && SRE_UNI_IS_WORD(context.text_ptr[-1]))
3392
case SRE_OP_WHITESPACE_REV:
3394
TRACE(("|%p|%p|WHITESPACE_REV\n", body, context.text_ptr));
3395
while (context.text_ptr > end_ptr && SRE_IS_WHITESPACE(context.text_ptr[-1]))
3398
case SRE_OP_WORD_REV:
3400
TRACE(("|%p|%p|WORD_REV\n", body, context.text_ptr));
3401
while (context.text_ptr > end_ptr && SRE_IS_WORD(context.text_ptr[-1]))
3405
// Matched at least the minimum?
3406
if (context.text_ptr > end_ptr)
3408
// Now try up to the maximum.
3409
end_ptr = start_ptr - repeat_max;
3410
while (look_literal[0] == SRE_OP_MARK)
3413
// Look at what follows to avoid unnecessary backtracking.
3414
if (SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal)) {
3415
if (context.text_ptr > end_ptr) {
3416
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_ONE_MIN_REV, repeat_ptr, index);
3418
return SRE_CLEANUP(&context, state, result);
3420
context.repeat_counter[index] = start_ptr - context.text_ptr;
3421
context.pattern_ptr = tail;
3425
if (context.text_ptr <= end_ptr)
3428
case SRE_OP_ANY_REV:
3430
TRACE(("|%p|%p|ANY_REV\n", body, context.text_ptr));
3431
match = !SRE_IS_LINEBREAK(context.text_ptr[-1]);
3433
case SRE_OP_ANY_ALL_REV:
3435
TRACE(("|%p|%p|ANY_ALL_REV\n", body, context.text_ptr));
3438
case SRE_OP_BIGCHARSET_REV:
3439
// <BIGCHARSET_REV> <charset>
3440
TRACE(("|%p|%p|BIGCHARSET_REV\n", body, context.text_ptr));
3441
match = in_bigcharset(body + 1, context.text_ptr[-1]);
3443
case SRE_OP_BIGCHARSET_IGNORE_REV:
3444
// <BIGCHARSET_IGNORE_REV> <charset>
3445
TRACE(("|%p|%p|BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3446
match = in_bigcharset(body + 1, state->lower(context.text_ptr[-1]));
3448
case SRE_OP_CHARSET_REV:
3449
// <CHARSET_REV> <charset>
3450
TRACE(("|%p|%p|CHARSET_REV\n", body, context.text_ptr));
3451
match = in_charset(body + 1, context.text_ptr[-1]);
3453
case SRE_OP_CHARSET_IGNORE_REV:
3454
// <CHARSET_IGNORE_REV> <charset>
3455
TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", body, context.text_ptr));
3456
match = in_charset(body + 1, state->lower(context.text_ptr[-1]));
3458
case SRE_OP_DIGIT_REV:
3460
TRACE(("|%p|%p|DIGIT_REV\n", body, context.text_ptr));
3461
match = SRE_IS_DIGIT(context.text_ptr[-1]);
3465
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3466
match = SRE_IN(body + 1, context.text_ptr[-1]);
3468
case SRE_OP_IN_IGNORE_REV:
3469
// <IN_IGNORE_REV> <set>
3470
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3471
match = SRE_IN(body + 1, state->lower(context.text_ptr[-1]));
3473
case SRE_OP_LITERAL_REV:
3474
// <LITERAL_REV> <code>
3475
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3476
match = context.text_ptr[-1] == (SRE_CHAR)body[1];
3478
case SRE_OP_LITERAL_IGNORE_REV:
3479
// <LITERAL_IGNORE_REV> <code>
3480
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3481
match = state->lower(context.text_ptr[-1]) == (SRE_CHAR)body[1];
3483
case SRE_OP_LOC_NOT_WORD_REV:
3484
// <LOC_NOT_WORD_REV>
3485
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", body, context.text_ptr));
3486
match = !SRE_LOC_IS_WORD(context.text_ptr[-1]);
3488
case SRE_OP_LOC_WORD_REV:
3490
TRACE(("|%p|%p|LOC_WORD_REV\n", body, context.text_ptr));
3491
match = SRE_LOC_IS_WORD(context.text_ptr[-1]);
3493
case SRE_OP_NOT_BIGCHARSET_REV:
3494
// <NOT_BIGCHARSET_REV> <charset>
3495
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", body, context.text_ptr));
3496
match = !in_bigcharset(body + 1, context.text_ptr[-1]);
3498
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
3499
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
3500
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3501
match = !in_bigcharset(body + 1, state->lower(context.text_ptr[-1]));
3503
case SRE_OP_NOT_CHARSET_REV:
3504
// <NOT_CHARSET_REV> <charset>
3505
TRACE(("|%p|%p|NOT_CHARSET_REV\n", body, context.text_ptr));
3506
match = !in_charset(body + 1, context.text_ptr[-1]);
3508
case SRE_OP_NOT_CHARSET_IGNORE_REV:
3509
// <NOT_CHARSET_IGNORE_REV> <charset>
3510
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", body, context.text_ptr));
3511
match = !in_charset(body + 1, state->lower(context.text_ptr[-1]));
3513
case SRE_OP_NOT_DIGIT_REV:
3515
TRACE(("|%p|%p|NOT_DIGIT_REV\n", body, context.text_ptr));
3516
match = !SRE_IS_DIGIT(context.text_ptr[-1]);
3518
case SRE_OP_NOT_IN_REV:
3519
// <NOT_IN_REV> <set>
3520
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3521
match = !SRE_IN(body + 1, context.text_ptr[-1]);
3523
case SRE_OP_NOT_IN_IGNORE_REV:
3524
// <NOT_IN_IGNORE_REV> <set>
3525
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3526
match = !SRE_IN(body + 1, state->lower(context.text_ptr[-1]));
3528
case SRE_OP_NOT_LITERAL_REV:
3529
// <NOT_LITERAL_REV> <code>
3530
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3531
match = context.text_ptr[-1] != (SRE_CHAR)body[1];
3533
case SRE_OP_NOT_LITERAL_IGNORE_REV:
3534
// <NOT_LITERAL_IGNORE_REV> <code>
3535
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3536
match = state->lower(context.text_ptr[-1]) != (SRE_CHAR)body[1];
3538
case SRE_OP_NOT_RANGE_REV:
3539
// <NOT_RANGE_REV> <lower> <upper>
3540
TRACE(("|%p|%p|NOT_RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3541
match = !SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]);
3543
case SRE_OP_NOT_RANGE_IGNORE_REV:
3544
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
3545
TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3546
match = !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]);
3548
case SRE_OP_NOT_WHITESPACE_REV:
3549
// <NOT_WHITESPACE_REV>
3550
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", body, context.text_ptr));
3551
match = !SRE_IS_WHITESPACE(context.text_ptr[-1]);
3553
case SRE_OP_NOT_WORD_REV:
3555
TRACE(("|%p|%p|NOT_WORD_REV\n", body, context.text_ptr));
3556
match = !SRE_IS_WORD(context.text_ptr[-1]);
3558
case SRE_OP_RANGE_REV:
3559
// <RANGE_REV> <lower> <upper>
3560
TRACE(("|%p|%p|RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3561
match = SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]);
3563
case SRE_OP_RANGE_IGNORE_REV:
3564
// <RANGE_IGNORE_REV> <lower> <upper>
3565
TRACE(("|%p|%p|RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
3566
match = SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]);
3568
case SRE_OP_UNI_DIGIT_REV:
3570
TRACE(("|%p|%p|UNI_DIGIT_REV\n", body, context.text_ptr));
3571
match = SRE_UNI_IS_DIGIT(context.text_ptr[-1]);
3573
case SRE_OP_UNI_NOT_DIGIT_REV:
3574
// <UNI_NOT_DIGIT_REV>
3575
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", body, context.text_ptr));
3576
match = !SRE_UNI_IS_DIGIT(context.text_ptr[-1]);
3578
case SRE_OP_UNI_NOT_WHITESPACE_REV:
3579
// <UNI_NOT_WHITESPACE_REV>
3580
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", body, context.text_ptr));
3581
match = !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]);
3583
case SRE_OP_UNI_NOT_WORD_REV:
3584
// <UNI_NOT_WORD_REV>
3585
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", body, context.text_ptr));
3586
match = !SRE_UNI_IS_WORD(context.text_ptr[-1]);
3588
case SRE_OP_UNI_WHITESPACE_REV:
3589
// <UNI_WHITESPACE_REV>
3590
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", body, context.text_ptr));
3591
match = SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]);
3593
case SRE_OP_UNI_WORD_REV:
3595
TRACE(("|%p|%p|UNI_WORD_REV\n", body, context.text_ptr));
3596
match = SRE_UNI_IS_WORD(context.text_ptr[-1]);
3598
case SRE_OP_WHITESPACE_REV:
3600
TRACE(("|%p|%p|WHITESPACE_REV\n", body, context.text_ptr));
3601
match = SRE_IS_WHITESPACE(context.text_ptr[-1]);
3603
case SRE_OP_WORD_REV:
3605
TRACE(("|%p|%p|WORD_REV\n", body, context.text_ptr));
3606
match = SRE_IS_WORD(context.text_ptr[-1]);
3614
case SRE_OP_REPEAT_ONE_POSS:
3616
// Possessive repeat
3617
// <REPEAT_ONE_POSS> <index> <skip to end> <min> <max> ...
3618
int index = context.pattern_ptr[1];
3619
int repeat_min = context.pattern_ptr[2];
3620
int repeat_max = context.pattern_ptr[3];
3621
SRE_CODE* body = context.pattern_ptr + 4;
3622
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
3623
Py_ssize_t limit = context.text_end - context.text_ptr;
3624
SRE_CHAR* start_ptr = context.text_ptr;
3626
TRACE(("|%p|%p|REPEAT_ONE_POSS %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
3627
if (repeat_min > limit)
3629
repeat_max = sre_repeat_limit(repeat_max, limit);
3630
end_ptr = start_ptr + repeat_max;
3631
// Match up to the maximum.
3635
TRACE(("|%p|%p|ANY\n", body, context.text_ptr));
3636
while (context.text_ptr < end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[0]))
3639
case SRE_OP_ANY_ALL:
3641
TRACE(("|%p|%p|ANY_ALL\n", body, context.text_ptr));
3642
context.text_ptr = end_ptr;
3644
case SRE_OP_BIGCHARSET:
3645
// <BIGCHARSET> <charset>
3646
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
3647
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, context.text_ptr[0]))
3650
case SRE_OP_BIGCHARSET_IGNORE:
3651
// <BIGCHARSET_IGNORE> <charset>
3652
TRACE(("|%p|%p|BIGCHARSET_IGNORE\n", body, context.text_ptr));
3653
while (context.text_ptr < end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
3656
case SRE_OP_CHARSET:
3657
// <CHARSET> <charset>
3658
TRACE(("|%p|%p|CHARSET\n", body, context.text_ptr));
3659
while (context.text_ptr < end_ptr && in_charset(body + 1, context.text_ptr[0]))
3662
case SRE_OP_CHARSET_IGNORE:
3663
// <CHARSET_IGNORE> <charset>
3664
TRACE(("|%p|%p|CHARSET_IGNORE\n", body, context.text_ptr));
3665
while (context.text_ptr < end_ptr && in_charset(body + 1, state->lower(context.text_ptr[0])))
3670
TRACE(("|%p|%p|DIGIT\n", body, context.text_ptr));
3671
while (context.text_ptr < end_ptr && SRE_IS_DIGIT(context.text_ptr[0]))
3676
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3677
while (context.text_ptr < end_ptr && SRE_IN(body + 1, context.text_ptr[0]))
3680
case SRE_OP_IN_IGNORE:
3681
// <IN_IGNORE> <set>
3682
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
3683
while (context.text_ptr < end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[0])))
3686
case SRE_OP_LITERAL:
3688
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
3689
while (context.text_ptr < end_ptr && context.text_ptr[0] == (SRE_CHAR)body[1])
3692
case SRE_OP_LITERAL_IGNORE:
3693
// <LITERAL_IGNORE> <code>
3694
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
3695
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) == (SRE_CHAR)body[1])
3698
case SRE_OP_LOC_NOT_WORD:
3700
TRACE(("|%p|%p|LOC_NOT_WORD\n", body, context.text_ptr));
3701
while (context.text_ptr < end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[0]))
3704
case SRE_OP_LOC_WORD:
3706
TRACE(("|%p|%p|LOC_WORD\n", body, context.text_ptr));
3707
while (context.text_ptr < end_ptr && SRE_LOC_IS_WORD(context.text_ptr[0]))
3710
case SRE_OP_NOT_BIGCHARSET:
3711
// <NOT_BIGCHARSET> <charset>
3712
TRACE(("|%p|%p|NOT_BIGCHARSET\n", body, context.text_ptr));
3713
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, context.text_ptr[0]))
3716
case SRE_OP_NOT_BIGCHARSET_IGNORE:
3717
// <NOT_BIGCHARSET_IGNORE> <charset>
3718
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", body, context.text_ptr));
3719
while (context.text_ptr < end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[0])))
3722
case SRE_OP_NOT_CHARSET:
3723
// <NOT_CHARSET> <charset>
3724
TRACE(("|%p|%p|NOT_CHARSET\n", body, context.text_ptr));
3725
while (context.text_ptr < end_ptr && !in_charset(body + 1, context.text_ptr[0]))
3728
case SRE_OP_NOT_CHARSET_IGNORE:
3729
// <NOT_CHARSET_IGNORE> <charset>
3730
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", body, context.text_ptr));
3731
while (context.text_ptr < end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[0])))
3734
case SRE_OP_NOT_DIGIT:
3736
TRACE(("|%p|%p|NOT_DIGIT\n", body, context.text_ptr));
3737
while (context.text_ptr < end_ptr && !SRE_IS_DIGIT(context.text_ptr[0]))
3742
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
3743
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, context.text_ptr[0]))
3746
case SRE_OP_NOT_IN_IGNORE:
3747
// <NOT_IN_IGNORE> <set>
3748
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
3749
while (context.text_ptr < end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[0])))
3752
case SRE_OP_NOT_LITERAL:
3753
// <NOT_LITERAL> <code>
3754
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[0]));
3755
while (context.text_ptr < end_ptr && context.text_ptr[0] != (SRE_CHAR)body[1])
3758
case SRE_OP_NOT_LITERAL_IGNORE:
3759
// <NOT_LITERAL_IGNORE> <code>
3760
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[0]));
3761
while (context.text_ptr < end_ptr && state->lower(context.text_ptr[0]) != (SRE_CHAR)body[1])
3764
case SRE_OP_NOT_RANGE:
3765
// <NOT_RANGE> <lower> <upper>
3766
TRACE(("|%p|%p|NOT_RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
3767
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
3770
case SRE_OP_NOT_RANGE_IGNORE:
3771
// <NOT_RANGE_IGNORE> <lower> <upper>
3772
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
3773
while (context.text_ptr < end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
3776
case SRE_OP_NOT_WHITESPACE:
3778
TRACE(("|%p|%p|NOT_WHITESPACE\n", body, context.text_ptr));
3779
while (context.text_ptr < end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[0]))
3782
case SRE_OP_NOT_WORD:
3784
TRACE(("|%p|%p|NOT_WORD\n", body, context.text_ptr));
3785
while (context.text_ptr < end_ptr && !SRE_IS_WORD(context.text_ptr[0]))
3789
// <RANGE> <lower> <upper>
3790
TRACE(("|%p|%p|RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
3791
while (context.text_ptr < end_ptr && SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]))
3794
case SRE_OP_RANGE_IGNORE:
3795
// <RANGE_IGNORE> <lower> <upper>
3796
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
3797
while (context.text_ptr < end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]))
3800
case SRE_OP_UNI_DIGIT:
3802
TRACE(("|%p|%p|UNI_DIGIT\n", body, context.text_ptr));
3803
while (context.text_ptr < end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[0]))
3806
case SRE_OP_UNI_NOT_DIGIT:
3808
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", body, context.text_ptr));
3809
while (context.text_ptr < end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[0]))
3812
case SRE_OP_UNI_NOT_WHITESPACE:
3813
// <UNI_NOT_WHITESPACE>
3814
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", body, context.text_ptr));
3815
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
3818
case SRE_OP_UNI_NOT_WORD:
3820
TRACE(("|%p|%p|UNI_NOT_WORD\n", body, context.text_ptr));
3821
while (context.text_ptr < end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[0]))
3824
case SRE_OP_UNI_WHITESPACE:
3826
TRACE(("|%p|%p|UNI_WHITESPACE\n", body, context.text_ptr));
3827
while (context.text_ptr < end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
3830
case SRE_OP_UNI_WORD:
3832
TRACE(("|%p|%p|UNI_WORD\n", body, context.text_ptr));
3833
while (context.text_ptr < end_ptr && SRE_UNI_IS_WORD(context.text_ptr[0]))
3836
case SRE_OP_WHITESPACE:
3838
TRACE(("|%p|%p|WHITESPACE\n", body, context.text_ptr));
3839
while (context.text_ptr < end_ptr && SRE_IS_WHITESPACE(context.text_ptr[0]))
3844
TRACE(("|%p|%p|WORD\n", body, context.text_ptr));
3845
while (context.text_ptr < end_ptr && SRE_IS_WORD(context.text_ptr[0]))
3849
// Matched at least the minimum?
3850
if (context.text_ptr < start_ptr + repeat_min)
3852
context.pattern_ptr = tail;
3855
case SRE_OP_REPEAT_ONE_POSS_REV:
3857
// Possessive repeat
3858
// <REPEAT_ONE_POSS_REV> <index> <skip to end> <min> <max> ...
3859
int index = context.pattern_ptr[1];
3860
int repeat_min = context.pattern_ptr[2];
3861
int repeat_max = context.pattern_ptr[3];
3862
SRE_CODE* body = context.pattern_ptr + 4;
3863
SRE_CODE* tail = context.pattern_ptr + context.pattern_ptr[0];
3864
Py_ssize_t limit = context.text_ptr - context.text_start;
3865
SRE_CHAR* start_ptr = context.text_ptr;
3867
TRACE(("|%p|%p|REPEAT_ONE_POSS_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
3868
if (repeat_min > limit)
3870
repeat_max = sre_repeat_limit(repeat_max, limit);
3871
end_ptr = start_ptr - repeat_max;
3872
// Now match up to the maximum.
3874
case SRE_OP_ANY_REV:
3876
TRACE(("|%p|%p|ANY_REV\n", body, context.text_ptr));
3877
while (context.text_ptr > end_ptr && !SRE_IS_LINEBREAK(context.text_ptr[-1]))
3880
case SRE_OP_ANY_ALL_REV:
3882
TRACE(("|%p|%p|ANY_ALL_REV\n", body, context.text_ptr));
3883
context.text_ptr = end_ptr;
3885
case SRE_OP_BIGCHARSET_REV:
3886
// <BIGCHARSET_REV> <charset>
3887
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
3888
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, context.text_ptr[-1]))
3891
case SRE_OP_BIGCHARSET_IGNORE_REV:
3892
// <BIGCHARSET_IGNORE_REV> <charset>
3893
TRACE(("|%p|%p|BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3894
while (context.text_ptr > end_ptr && in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
3897
case SRE_OP_CHARSET_REV:
3898
// <CHARSET_REV> <charset>
3899
TRACE(("|%p|%p|CHARSET_REV\n", body, context.text_ptr));
3900
while (context.text_ptr > end_ptr && in_charset(body + 1, context.text_ptr[-1]))
3903
case SRE_OP_CHARSET_IGNORE_REV:
3904
// <CHARSET_IGNORE_REV> <charset>
3905
TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", body, context.text_ptr));
3906
while (context.text_ptr > end_ptr && in_charset(body + 1, state->lower(context.text_ptr[-1])))
3909
case SRE_OP_DIGIT_REV:
3911
TRACE(("|%p|%p|DIGIT_REV\n", body, context.text_ptr));
3912
while (context.text_ptr > end_ptr && SRE_IS_DIGIT(context.text_ptr[-1]))
3917
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
3918
while (context.text_ptr > end_ptr && SRE_IN(body + 1, context.text_ptr[-1]))
3921
case SRE_OP_IN_IGNORE_REV:
3922
// <IN_IGNORE_REV> <set>
3923
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
3924
while (context.text_ptr > end_ptr && SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
3927
case SRE_OP_LITERAL_REV:
3928
// <LITERAL_REV> <code>
3929
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3930
while (context.text_ptr > end_ptr && context.text_ptr[-1] == (SRE_CHAR)body[1])
3933
case SRE_OP_LITERAL_IGNORE_REV:
3934
// <LITERAL_IGNORE_REV> <code>
3935
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3936
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) == (SRE_CHAR)body[1])
3939
case SRE_OP_LOC_NOT_WORD_REV:
3940
// <LOC_NOT_WORD_REV>
3941
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", body, context.text_ptr));
3942
while (context.text_ptr > end_ptr && !SRE_LOC_IS_WORD(context.text_ptr[-1]))
3945
case SRE_OP_LOC_WORD_REV:
3947
TRACE(("|%p|%p|LOC_WORD_REV\n", body, context.text_ptr));
3948
while (context.text_ptr > end_ptr && SRE_LOC_IS_WORD(context.text_ptr[-1]))
3951
case SRE_OP_NOT_BIGCHARSET_REV:
3952
// <NOT_BIGCHARSET_REV> <charset>
3953
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", body, context.text_ptr));
3954
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, context.text_ptr[-1]))
3957
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
3958
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
3959
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
3960
while (context.text_ptr > end_ptr && !in_bigcharset(body + 1, state->lower(context.text_ptr[-1])))
3963
case SRE_OP_NOT_CHARSET_REV:
3964
// <NOT_CHARSET_REV> <charset>
3965
TRACE(("|%p|%p|NOT_CHARSET_REV\n", body, context.text_ptr));
3966
while (context.text_ptr > end_ptr && !in_charset(body + 1, context.text_ptr[-1]))
3969
case SRE_OP_NOT_CHARSET_IGNORE_REV:
3970
// <NOT_CHARSET_IGNORE_REV> <charset>
3971
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", body, context.text_ptr));
3972
while (context.text_ptr > end_ptr && !in_charset(body + 1, state->lower(context.text_ptr[-1])))
3975
case SRE_OP_NOT_DIGIT_REV:
3977
TRACE(("|%p|%p|NOT_DIGIT_REV\n", body, context.text_ptr));
3978
while (context.text_ptr > end_ptr && !SRE_IS_DIGIT(context.text_ptr[-1]))
3981
case SRE_OP_NOT_IN_REV:
3982
// <NOT_IN_REV> <set>
3983
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3984
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, context.text_ptr[-1]))
3987
case SRE_OP_NOT_IN_IGNORE_REV:
3988
// <NOT_IN_IGNORE_REV> <set>
3989
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
3990
while (context.text_ptr > end_ptr && !SRE_IN(body + 1, state->lower(context.text_ptr[-1])))
3993
case SRE_OP_NOT_LITERAL_REV:
3994
// <NOT_LITERAL_REV> <code>
3995
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
3996
while (context.text_ptr > end_ptr && context.text_ptr[-1] != (SRE_CHAR)body[1])
3999
case SRE_OP_NOT_LITERAL_IGNORE_REV:
4000
// <NOT_LITERAL_IGNORE_REV> <code>
4001
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
4002
while (context.text_ptr > end_ptr && state->lower(context.text_ptr[-1]) != (SRE_CHAR)body[1])
4005
case SRE_OP_NOT_RANGE_REV:
4006
// <NOT_RANGE_REV> <lower> <upper>
4007
TRACE(("|%p|%p|NOT_RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4008
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
4011
case SRE_OP_NOT_RANGE_IGNORE_REV:
4012
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
4013
TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4014
while (context.text_ptr > end_ptr && !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
4017
case SRE_OP_NOT_WHITESPACE_REV:
4018
// <NOT_WHITESPACE_REV>
4019
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", body, context.text_ptr));
4020
while (context.text_ptr > end_ptr && !SRE_IS_WHITESPACE(context.text_ptr[-1]))
4023
case SRE_OP_NOT_WORD_REV:
4025
TRACE(("|%p|%p|NOT_WORD_REV\n", body, context.text_ptr));
4026
while (context.text_ptr > end_ptr && !SRE_IS_WORD(context.text_ptr[-1]))
4029
case SRE_OP_RANGE_REV:
4030
// <RANGE_REV> <lower> <upper>
4031
TRACE(("|%p|%p|RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4032
while (context.text_ptr > end_ptr && SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]))
4035
case SRE_OP_RANGE_IGNORE_REV:
4036
// <RANGE_IGNORE_REV> <lower> <upper>
4037
TRACE(("|%p|%p|RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4038
while (context.text_ptr > end_ptr && SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]))
4041
case SRE_OP_UNI_DIGIT_REV:
4043
TRACE(("|%p|%p|UNI_DIGIT_REV\n", body, context.text_ptr));
4044
while (context.text_ptr > end_ptr && SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
4047
case SRE_OP_UNI_NOT_DIGIT_REV:
4048
// <UNI_NOT_DIGIT_REV>
4049
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", body, context.text_ptr));
4050
while (context.text_ptr > end_ptr && !SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
4053
case SRE_OP_UNI_NOT_WHITESPACE_REV:
4054
// <UNI_NOT_WHITESPACE_REV>
4055
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", body, context.text_ptr));
4056
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
4059
case SRE_OP_UNI_NOT_WORD_REV:
4060
// <UNI_NOT_WORD_REV>
4061
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", body, context.text_ptr));
4062
while (context.text_ptr > end_ptr && !SRE_UNI_IS_WORD(context.text_ptr[-1]))
4065
case SRE_OP_UNI_WHITESPACE_REV:
4066
// <UNI_WHITESPACE_REV>
4067
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", body, context.text_ptr));
4068
while (context.text_ptr > end_ptr && SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
4071
case SRE_OP_UNI_WORD_REV:
4073
TRACE(("|%p|%p|UNI_WORD_REV\n", body, context.text_ptr));
4074
while (context.text_ptr > end_ptr && SRE_UNI_IS_WORD(context.text_ptr[-1]))
4077
case SRE_OP_WHITESPACE_REV:
4079
TRACE(("|%p|%p|WHITESPACE_REV\n", body, context.text_ptr));
4080
while (context.text_ptr > end_ptr && SRE_IS_WHITESPACE(context.text_ptr[-1]))
4083
case SRE_OP_WORD_REV:
4085
TRACE(("|%p|%p|WORD_REV\n", body, context.text_ptr));
4086
while (context.text_ptr > end_ptr && SRE_IS_WORD(context.text_ptr[-1]))
4090
// Matched at least the minimum?
4091
if (context.text_ptr > start_ptr + repeat_min)
4093
context.pattern_ptr = tail;
4096
case SRE_OP_REPEAT_POSS:
4098
// Possessive repeat.
4099
// <REPEAT_POSS> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS> <skip to start>
4100
SRE_CODE* repeat_ptr = context.pattern_ptr;
4101
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
4102
SRE_CODE* tail = end_repeat_ptr + 1;
4103
int index = repeat_ptr[1];
4104
int repeat_min = repeat_ptr[2];
4105
int repeat_max = repeat_ptr[3];
4106
SRE_CODE* body = repeat_ptr + 4;
4107
Py_ssize_t limit = context.text_end - context.text_ptr;
4108
TRACE(("|%p|%p|REPEAT_POSS %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
4109
if (repeat_min > limit)
4111
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_POSS, repeat_ptr, index);
4113
return SRE_CLEANUP(&context, state, result);
4114
result = SRE_SAVE_MARKS(&context);
4116
return SRE_CLEANUP(&context, state, result);
4117
context.repeat_counter[index] = 0;
4118
if (repeat_min == 0) {
4119
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS, end_repeat_ptr, -1);
4121
return SRE_CLEANUP(&context, state, result);
4122
result = SRE_SAVE_MARKS(&context);
4124
return SRE_CLEANUP(&context, state, result);
4126
context.pattern_ptr = body;
4127
context.repeat_start[index] = context.text_ptr;
4130
case SRE_OP_REPEAT_POSS_REV:
4132
// Possessive repeat.
4133
// <REPEAT_POSS_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
4134
SRE_CODE* repeat_ptr = context.pattern_ptr;
4135
SRE_CODE* end_repeat_ptr = context.pattern_ptr + context.pattern_ptr[0];
4136
SRE_CODE* tail = end_repeat_ptr + 1;
4137
int index = repeat_ptr[1];
4138
int repeat_min = repeat_ptr[2];
4139
int repeat_max = repeat_ptr[3];
4140
SRE_CODE* body = repeat_ptr + 4;
4141
Py_ssize_t limit = context.text_ptr - context.text_start;
4142
TRACE(("|%p|%p|REPEAT_POSS_REV %d %d\n", context.pattern_ptr, context.text_ptr, repeat_min, repeat_max));
4143
if (repeat_min > limit)
4145
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_REPEAT_POSS_REV, repeat_ptr, index);
4147
return SRE_CLEANUP(&context, state, result);
4148
result = SRE_SAVE_MARKS(&context);
4150
return SRE_CLEANUP(&context, state, result);
4151
context.repeat_counter[index] = 0;
4152
if (repeat_min == 0) {
4153
result = SRE_SAVE_BACKTRACK(&context, SRE_OP_END_REPEAT_POSS_REV, end_repeat_ptr, -1);
4155
return SRE_CLEANUP(&context, state, result);
4156
result = SRE_SAVE_MARKS(&context);
4158
return SRE_CLEANUP(&context, state, result);
4160
context.pattern_ptr = body;
4161
context.repeat_start[index] = context.text_ptr;
4164
case SRE_OP_START_OF_LINE:
4167
TRACE(("|%p|%p|START_OF_LINE\n", context.pattern_ptr, context.text_ptr));
4168
if (context.text_ptr > context.text_start && !SRE_IS_LINEBREAK(context.text_ptr[-1]))
4171
case SRE_OP_START_OF_STRING:
4173
// <START_OF_STRING>
4174
TRACE(("|%p|%p|START_OF_STRING\n", context.pattern_ptr, context.text_ptr));
4175
if (context.text_ptr > context.text_start)
4178
case SRE_OP_SUCCESS:
4182
SRE_CHAR* end_ptr = NULL;
4183
TRACE(("|%p|%p|SUCCESS\n", context.pattern_ptr, context.text_ptr));
4184
// Find the mark which matched the furthest to the right.
4185
for (m = 1; m < context.mark_count; m += 2) {
4186
if (context.mark[m - 1] != NULL && context.mark[m] != NULL) {
4187
state->lastmark = m;
4188
if (end_ptr < context.mark[m]) {
4189
state->lastindex = 1 + m / 2;
4190
end_ptr = context.mark[m];
4194
state->ptr = context.text_ptr;
4195
return SRE_CLEANUP(&context, state, 1);
4197
case SRE_OP_UNI_BOUNDARY:
4199
// Boundary between word and non-word.
4201
int before = context.text_ptr > context.text_start && SRE_UNI_IS_WORD(context.text_ptr[-1]);
4202
int after = context.text_ptr < context.text_end && SRE_UNI_IS_WORD(context.text_ptr[0]);
4203
TRACE(("|%p|%p|UNI_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
4204
if (before == after)
4208
case SRE_OP_UNI_DIGIT:
4211
TRACE(("|%p|%p|UNI_DIGIT\n", context.pattern_ptr, context.text_ptr));
4212
if (context.text_ptr >= context.text_end || !SRE_UNI_IS_DIGIT(context.text_ptr[0]))
4216
case SRE_OP_UNI_DIGIT_REV:
4219
TRACE(("|%p|%p|UNI_DIGIT_REV\n", context.pattern_ptr, context.text_ptr));
4220
if (context.text_ptr <= context.text_start || !SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
4224
case SRE_OP_UNI_NOT_BOUNDARY:
4226
// Not boundary between word and non-word.
4227
// <UNI_NOT_BOUNDARY>
4228
int before = context.text_ptr > context.text_start && SRE_UNI_IS_WORD(context.text_ptr[-1]);
4229
int after = context.text_ptr < context.text_end && SRE_UNI_IS_WORD(context.text_ptr[0]);
4230
TRACE(("|%p|%p|UNI_NOT_BOUNDARY\n", context.pattern_ptr, context.text_ptr));
4231
if (before != after)
4235
case SRE_OP_UNI_NOT_DIGIT:
4238
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", context.pattern_ptr, context.text_ptr));
4239
if (context.text_ptr >= context.text_end || SRE_UNI_IS_DIGIT(context.text_ptr[0]))
4243
case SRE_OP_UNI_NOT_DIGIT_REV:
4245
// <UNI_NOT_DIGIT_REV>
4246
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", context.pattern_ptr, context.text_ptr));
4247
if (context.text_ptr <= context.text_start || SRE_UNI_IS_DIGIT(context.text_ptr[-1]))
4251
case SRE_OP_UNI_NOT_WHITESPACE:
4253
// <UNI_NOT_WHITESPACE>
4254
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", context.pattern_ptr, context.text_ptr));
4255
if (context.text_ptr >= context.text_end || SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
4259
case SRE_OP_UNI_NOT_WHITESPACE_REV:
4261
// <UNI_NOT_WHITESPACE_REV>
4262
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", context.pattern_ptr, context.text_ptr));
4263
if (context.text_ptr <= context.text_start || SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
4267
case SRE_OP_UNI_NOT_WORD:
4270
TRACE(("|%p|%p|UNI_NOT_WORD\n", context.pattern_ptr, context.text_ptr));
4271
if (context.text_ptr >= context.text_end || SRE_UNI_IS_WORD(context.text_ptr[0]))
4275
case SRE_OP_UNI_NOT_WORD_REV:
4277
// <UNI_NOT_WORD_REV>
4278
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", context.pattern_ptr, context.text_ptr));
4279
if (context.text_ptr <= context.text_start || SRE_UNI_IS_WORD(context.text_ptr[-1]))
4283
case SRE_OP_UNI_WHITESPACE:
4286
TRACE(("|%p|%p|UNI_WHITESPACE\n", context.pattern_ptr, context.text_ptr));
4287
if (context.text_ptr >= context.text_end || !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]))
4291
case SRE_OP_UNI_WHITESPACE_REV:
4293
// <UNI_WHITESPACE_REV>
4294
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", context.pattern_ptr, context.text_ptr));
4295
if (context.text_ptr <= context.text_start || !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]))
4299
case SRE_OP_UNI_WORD:
4302
TRACE(("|%p|%p|UNI_WORD\n", context.pattern_ptr, context.text_ptr));
4303
if (context.text_ptr >= context.text_end || !SRE_UNI_IS_WORD(context.text_ptr[0]))
4307
case SRE_OP_UNI_WORD_REV:
4310
TRACE(("|%p|%p|UNI_WORD_REV\n", context.pattern_ptr, context.text_ptr));
4311
if (context.text_ptr <= context.text_start || !SRE_UNI_IS_WORD(context.text_ptr[-1]))
4315
case SRE_OP_WHITESPACE:
4318
TRACE(("|%p|%p|WHITESPACE\n", context.pattern_ptr, context.text_ptr));
4319
if (context.text_ptr >= context.text_end || !SRE_IS_WHITESPACE(context.text_ptr[0]))
4323
case SRE_OP_WHITESPACE_REV:
4326
TRACE(("|%p|%p|WHITESPACE_REV\n", context.pattern_ptr, context.text_ptr));
4327
if (context.text_ptr <= context.text_start || !SRE_IS_WHITESPACE(context.text_ptr[-1]))
4334
TRACE(("|%p|%p|WORD\n", context.pattern_ptr, context.text_ptr));
4335
if (context.text_ptr >= context.text_end || !SRE_IS_WORD(context.text_ptr[0]))
4339
case SRE_OP_WORD_REV:
4342
TRACE(("|%p|%p|WORD_REV\n", context.pattern_ptr, context.text_ptr));
4343
if (context.text_ptr <= context.text_start || !SRE_IS_WORD(context.text_ptr[-1]))
4348
TRACE(("|%p|%p|UNKNOWN %d\n", context.pattern_ptr, context.text_ptr, context.pattern_ptr[-1]));
4349
return SRE_CLEANUP(&context, state, SRE_ERROR_ILLEGAL);
4354
TRACE(("|%p|%p|BACKTRACK ", context.pattern_ptr, context.text_ptr));
4355
switch(context.backtrack_chunk->items[context.backtrack_chunk->count - 1].op) {
4357
// Assert subpattern.
4358
// <ASSERT> <skip to end> ... <END_ASSERT>
4359
TRACE(("ASSERT\n"));
4360
SRE_RESTORE_MARKS(&context);
4361
SRE_DISCARD_BACKTRACK(&context);
4363
case SRE_OP_ASSERT_NOT:
4365
// Assert not subpattern.
4366
// <ASSERT_NOT> <skip to end> ... <END_ASSERT_NOT>
4367
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4368
TRACE(("ASSERT_NOT\n"));
4369
context.pattern_ptr = backtrack_item->pattern_ptr;
4370
context.text_ptr = backtrack_item->text_ptr;
4371
SRE_RESTORE_MARKS(&context);
4372
SRE_DISCARD_BACKTRACK(&context);
4373
context.pattern_ptr += context.pattern_ptr[0];
4377
// Atomic subpattern.
4378
// <ATOMIC> <skip to end> ... <END_ATOMIC>
4379
TRACE(("ATOMIC\n"));
4380
SRE_RESTORE_MARKS(&context);
4381
SRE_DISCARD_BACKTRACK(&context);
4386
// <BRANCH> <skip to next> ... <JUMP> <skip to end> <skip to next> ... <JUMP> <skip to end> 0
4387
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4388
TRACE(("BRANCH\n"));
4389
context.pattern_ptr = backtrack_item->pattern_ptr;
4390
context.pattern_ptr += context.pattern_ptr[0];
4391
SRE_RESTORE_MARKS(&context);
4392
if (context.pattern_ptr[0] == 0) {
4393
SRE_DISCARD_BACKTRACK(&context);
4396
backtrack_item->pattern_ptr = context.pattern_ptr;
4397
result = SRE_SAVE_MARKS(&context);
4399
return SRE_CLEANUP(&context, state, result);
4400
context.text_ptr = backtrack_item->text_ptr;
4401
context.pattern_ptr++;
4404
case SRE_OP_END_REPEAT_MAX:
4406
// End of greedy repeat.
4407
// <REPEAT_MAX> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX> <skip to start>
4408
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4409
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4410
SRE_CODE* tail = end_repeat_ptr + 1;
4411
TRACE(("END_REPEAT_MAX\n"));
4412
context.text_ptr = backtrack_item->text_ptr;
4413
SRE_RESTORE_MARKS(&context);
4414
SRE_DISCARD_BACKTRACK(&context);
4415
context.pattern_ptr = tail;
4418
case SRE_OP_END_REPEAT_MAX_REV:
4420
// End of greedy repeat.
4421
// <REPEAT_MAX_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
4422
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4423
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4424
SRE_CODE* tail = end_repeat_ptr + 1;
4425
TRACE(("END_REPEAT_MAX_REV\n"));
4426
context.text_ptr = backtrack_item->text_ptr;
4427
SRE_RESTORE_MARKS(&context);
4428
SRE_DISCARD_BACKTRACK(&context);
4429
context.pattern_ptr = tail;
4432
case SRE_OP_END_REPEAT_MIN:
4434
// End of lazy repeat.
4435
// <REPEAT_MIN> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN> <skip to start>
4436
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4437
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4438
SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[0];
4439
int index = repeat_ptr[1];
4440
int repeat_max = repeat_ptr[3];
4441
SRE_CODE* body = repeat_ptr + 4;
4442
SRE_CODE* tail = end_repeat_ptr + 1;
4443
Py_ssize_t limit = context.text_end - context.text_ptr;
4444
TRACE(("END_REPEAT_MIN\n"));
4445
context.text_ptr = backtrack_item->text_ptr;
4446
SRE_RESTORE_MARKS(&context);
4447
SRE_DISCARD_BACKTRACK(&context);
4450
context.pattern_ptr = body;
4453
case SRE_OP_END_REPEAT_MIN_REV:
4455
// End of lazy repeat.
4456
// <REPEAT_MIN_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
4457
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4458
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4459
SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[0];
4460
int index = repeat_ptr[1];
4461
int repeat_max = repeat_ptr[3];
4462
SRE_CODE* body = repeat_ptr + 4;
4463
SRE_CODE* tail = end_repeat_ptr + 1;
4464
Py_ssize_t limit = context.text_ptr - context.text_start;
4465
TRACE(("END_REPEAT_MIN_REV\n"));
4466
context.text_ptr = backtrack_item->text_ptr;
4467
SRE_RESTORE_MARKS(&context);
4468
SRE_DISCARD_BACKTRACK(&context);
4471
context.pattern_ptr = body;
4474
case SRE_OP_END_REPEAT_POSS:
4476
// End of possessive repeat.
4477
// <REPEAT_POSS> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS> <skip to start>
4478
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4479
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4480
SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[0];
4481
SRE_CODE* tail = end_repeat_ptr + 1;
4482
TRACE(("END_REPEAT_POSS\n"));
4483
context.text_ptr = backtrack_item->text_ptr;
4484
SRE_RESTORE_MARKS(&context);
4485
SRE_DISCARD_BACKTRACK(&context);
4486
context.pattern_ptr = tail;
4489
case SRE_OP_END_REPEAT_POSS_REV:
4491
// End of possessive repeat.
4492
// <REPEAT_POSS_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
4493
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4494
SRE_CODE* end_repeat_ptr = backtrack_item->pattern_ptr;
4495
SRE_CODE* repeat_ptr = end_repeat_ptr - end_repeat_ptr[0];
4496
SRE_CODE* tail = end_repeat_ptr + 1;
4497
TRACE(("END_REPEAT_POSS_REV\n"));
4498
context.text_ptr = backtrack_item->text_ptr;
4499
SRE_RESTORE_MARKS(&context);
4500
SRE_DISCARD_BACKTRACK(&context);
4501
context.pattern_ptr = tail;
4504
case SRE_OP_FAILURE:
4506
return SRE_CLEANUP(&context, state, 0);
4507
case SRE_OP_REPEAT_MAX:
4510
// <REPEAT_MAX> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX> <skip to start>
4511
TRACE(("REPEAT_MAX\n"));
4512
SRE_RESTORE_MARKS(&context);
4513
SRE_DISCARD_BACKTRACK(&context);
4516
case SRE_OP_REPEAT_MAX_REV:
4519
// <REPEAT_MAX_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MAX_REV> <skip to start>
4520
TRACE(("REPEAT_MAX\n"));
4521
SRE_RESTORE_MARKS(&context);
4522
SRE_DISCARD_BACKTRACK(&context);
4525
case SRE_OP_REPEAT_MIN:
4528
// <REPEAT_MIN> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN> <skip to start>
4529
TRACE(("REPEAT_MIN\n"));
4530
SRE_RESTORE_MARKS(&context);
4531
SRE_DISCARD_BACKTRACK(&context);
4534
case SRE_OP_REPEAT_MIN_REV:
4537
// <REPEAT_MIN_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_MIN_REV> <skip to start>
4538
TRACE(("REPEAT_MIN\n"));
4539
SRE_RESTORE_MARKS(&context);
4540
SRE_DISCARD_BACKTRACK(&context);
4543
case SRE_OP_REPEAT_ONE_MAX:
4546
// <REPEAT_ONE_MAX> <skip to end> <index> <min> <max> ...
4547
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4548
SRE_CODE* repeat_ptr = backtrack_item->pattern_ptr;
4549
SRE_CODE* tail = repeat_ptr + repeat_ptr[0];
4550
int index = repeat_ptr[1];
4551
int repeat_min = repeat_ptr[2];
4552
SRE_CHAR* start_ptr;
4553
SRE_CODE* look_literal = tail;
4554
context.text_ptr = backtrack_item->text_ptr;
4555
start_ptr = context.text_ptr - context.repeat_counter[index] + repeat_min;
4557
// Look at what follows to avoid unnecessary backtracking.
4558
while (look_literal[0] == SRE_OP_MARK)
4560
SRE_LOOK_AHEAD_MANY(&context, start_ptr, state, look_literal);
4561
// Matched at least the minimum?
4562
if (context.text_ptr < start_ptr) {
4563
SRE_DISCARD_BACKTRACK(&context);
4566
backtrack_item->text_ptr = context.text_ptr;
4567
context.repeat_counter[index] = repeat_min + (context.text_ptr - start_ptr);
4568
context.pattern_ptr = tail;
4571
case SRE_OP_REPEAT_ONE_MAX_REV:
4574
// <REPEAT_ONE_MAX_REV> <skip to end> <index> <min> <max> ...
4575
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4576
SRE_CODE* repeat_ptr = backtrack_item->pattern_ptr;
4577
SRE_CODE* tail = repeat_ptr + repeat_ptr[0];
4578
int index = repeat_ptr[1];
4579
int repeat_min = repeat_ptr[2];
4580
SRE_CHAR* start_ptr;
4581
SRE_CODE* look_literal = tail;
4582
context.text_ptr = backtrack_item->text_ptr;
4583
start_ptr = context.text_ptr + context.repeat_counter[index] - repeat_min;
4585
// Look at what follows to avoid unnecessary backtracking.
4586
while (look_literal[0] == SRE_OP_MARK)
4588
SRE_LOOK_AHEAD_MANY_REV(&context, start_ptr, state, look_literal);
4589
// Matched at least the minimum?
4590
if (context.text_ptr > start_ptr) {
4591
SRE_DISCARD_BACKTRACK(&context);
4594
backtrack_item->text_ptr = context.text_ptr;
4595
context.repeat_counter[index] = repeat_min + (start_ptr - context.text_ptr);
4596
context.pattern_ptr = tail;
4599
case SRE_OP_REPEAT_ONE_MIN:
4602
// <REPEAT_ONE_MIN> <skip to end> <index> <min> <max> ...
4603
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4604
SRE_CODE* repeat_ptr = backtrack_item->pattern_ptr;
4605
int index = repeat_ptr[1];
4606
int repeat_max = repeat_ptr[3];
4607
SRE_CODE* body = repeat_ptr + 4;
4608
SRE_CODE* tail = repeat_ptr + repeat_ptr[0];
4609
SRE_CODE* look_literal = tail;
4610
SRE_CHAR* start_ptr;
4613
Py_ssize_t limit = context.text_end - context.text_ptr;
4614
context.text_ptr = backtrack_item->text_ptr;
4615
if (repeat_max > context.repeat_counter[index] + limit || repeat_max == UNLIMITED_REPEATS)
4616
repeat_max = context.repeat_counter[index] + limit;
4617
// Now match up to the maximum.
4618
start_ptr = context.text_ptr - context.repeat_counter[index];
4619
end_ptr = start_ptr + repeat_max;
4620
if (context.text_ptr >= end_ptr) {
4621
SRE_DISCARD_BACKTRACK(&context);
4624
while (look_literal[0] == SRE_OP_MARK)
4630
TRACE(("|%p|%p|ANY\n", body, context.text_ptr));
4631
match = !SRE_IS_LINEBREAK(context.text_ptr[0]);
4633
case SRE_OP_ANY_ALL:
4635
TRACE(("|%p|%p|ANY_ALL\n", body, context.text_ptr));
4638
case SRE_OP_BIGCHARSET:
4639
// <BIGCHARSET> <charset>
4640
TRACE(("|%p|%p|BIGCHARSET\n", body, context.text_ptr));
4641
match = in_bigcharset(body + 1, context.text_ptr[0]);
4643
case SRE_OP_BIGCHARSET_IGNORE:
4644
// <BIGCHARSET_IGNORE> <charset>
4645
TRACE(("|%p|%p|BIGCHARSET_IGNORE\n", body, context.text_ptr));
4646
match = in_bigcharset(body + 1, state->lower(context.text_ptr[0]));
4648
case SRE_OP_CHARSET:
4649
// <CHARSET> <charset>
4650
TRACE(("|%p|%p|CHARSET\n", body, context.text_ptr));
4651
match = in_charset(body + 1, context.text_ptr[0]);
4653
case SRE_OP_CHARSET_IGNORE:
4654
// <CHARSET_IGNORE> <charset>
4655
TRACE(("|%p|%p|CHARSET_IGNORE\n", body, context.text_ptr));
4656
match = in_charset(body + 1, state->lower(context.text_ptr[0]));
4660
TRACE(("|%p|%p|DIGIT\n", body, context.text_ptr));
4661
match = SRE_IS_DIGIT(context.text_ptr[0]);
4665
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
4666
match = SRE_IN(body + 1, context.text_ptr[0]);
4668
case SRE_OP_IN_IGNORE:
4669
// <IN_IGNORE> <set>
4670
TRACE(("|%p|%p|IN\n", body, context.text_ptr));
4671
match = SRE_IN(body + 1, state->lower(context.text_ptr[0]));
4673
case SRE_OP_LITERAL:
4675
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
4676
match = context.text_ptr[0] == (SRE_CHAR)body[1];
4678
case SRE_OP_LITERAL_IGNORE:
4679
// <LITERAL_IGNORE> <code>
4680
TRACE(("|%p|%p|LITERAL %d\n", body, context.text_ptr, body[1]));
4681
match = state->lower(context.text_ptr[0]) == (SRE_CHAR)body[1];
4683
case SRE_OP_LOC_NOT_WORD:
4685
TRACE(("|%p|%p|LOC_NOT_WORD\n", body, context.text_ptr));
4686
match = !SRE_LOC_IS_WORD(context.text_ptr[0]);
4688
case SRE_OP_LOC_WORD:
4690
TRACE(("|%p|%p|LOC_WORD\n", body, context.text_ptr));
4691
match = SRE_LOC_IS_WORD(context.text_ptr[0]);
4693
case SRE_OP_NOT_BIGCHARSET:
4694
// <NOT_BIGCHARSET> <charset>
4695
TRACE(("|%p|%p|NOT_BIGCHARSET\n", body, context.text_ptr));
4696
match = !in_bigcharset(body + 1, context.text_ptr[0]);
4698
case SRE_OP_NOT_BIGCHARSET_IGNORE:
4699
// <NOT_BIGCHARSET_IGNORE> <charset>
4700
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE\n", body, context.text_ptr));
4701
match = !in_bigcharset(body + 1, state->lower(context.text_ptr[0]));
4703
case SRE_OP_NOT_CHARSET:
4704
// <NOT_CHARSET> <charset>
4705
TRACE(("|%p|%p|NOT_CHARSET\n", body, context.text_ptr));
4706
match = !in_charset(body + 1, context.text_ptr[0]);
4708
case SRE_OP_NOT_CHARSET_IGNORE:
4709
// <NOT_CHARSET_IGNORE> <charset>
4710
TRACE(("|%p|%p|NOT_CHARSET_IGNORE\n", body, context.text_ptr));
4711
match = !in_charset(body + 1, state->lower(context.text_ptr[0]));
4713
case SRE_OP_NOT_DIGIT:
4715
TRACE(("|%p|%p|NOT_DIGIT\n", body, context.text_ptr));
4716
match = !SRE_IS_DIGIT(context.text_ptr[0]);
4720
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
4721
match = !SRE_IN(body + 1, context.text_ptr[0]);
4723
case SRE_OP_NOT_IN_IGNORE:
4724
// <NOT_IN_IGNORE> <set>
4725
TRACE(("|%p|%p|NOT_IN\n", body, context.text_ptr));
4726
match = !SRE_IN(body + 1, state->lower(context.text_ptr[0]));
4728
case SRE_OP_NOT_LITERAL:
4729
// <NOT_LITERAL> <code>
4730
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
4731
match = context.text_ptr[0] != (SRE_CHAR)body[1];
4733
case SRE_OP_NOT_LITERAL_IGNORE:
4734
// <NOT_LITERAL_IGNORE> <code>
4735
TRACE(("|%p|%p|NOT_LITERAL %d\n", body, context.text_ptr, body[1]));
4736
match = state->lower(context.text_ptr[0]) != (SRE_CHAR)body[1];
4738
case SRE_OP_NOT_RANGE:
4739
// <NOT_RANGE> <lower> <upper>
4740
TRACE(("|%p|%p|NOT_RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
4741
match = !SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]);
4743
case SRE_OP_NOT_RANGE_IGNORE:
4744
// <NOT_RANGE_IGNORE> <lower> <upper>
4745
TRACE(("|%p|%p|NOT_RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
4746
match = !SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]);
4748
case SRE_OP_NOT_WHITESPACE:
4750
TRACE(("|%p|%p|NOT_WHITESPACE\n", body, context.text_ptr));
4751
match = !SRE_IS_WHITESPACE(context.text_ptr[0]);
4753
case SRE_OP_NOT_WORD:
4755
TRACE(("|%p|%p|NOT_WORD\n", body, context.text_ptr));
4756
match = !SRE_IS_WORD(context.text_ptr[0]);
4759
// <RANGE> <lower> <upper>
4760
TRACE(("|%p|%p|RANGE %d %d\n", body, context.text_ptr, body[1], body[2]));
4761
match = SRE_IN_RANGE(context.text_ptr[0], body[1], body[2]);
4763
case SRE_OP_RANGE_IGNORE:
4764
// <RANGE_IGNORE> <lower> <upper>
4765
TRACE(("|%p|%p|RANGE_IGNORE %d %d\n", body, context.text_ptr, body[1], body[2]));
4766
match = SRE_IN_RANGE(state->lower(context.text_ptr[0]), body[1], body[2]);
4768
case SRE_OP_UNI_DIGIT:
4770
TRACE(("|%p|%p|UNI_DIGIT\n", body, context.text_ptr));
4771
match = SRE_UNI_IS_DIGIT(context.text_ptr[0]);
4773
case SRE_OP_UNI_NOT_DIGIT:
4775
TRACE(("|%p|%p|UNI_NOT_DIGIT\n", body, context.text_ptr));
4776
match = !SRE_UNI_IS_DIGIT(context.text_ptr[0]);
4778
case SRE_OP_UNI_NOT_WHITESPACE:
4779
// <UNI_NOT_WHITESPACE>
4780
TRACE(("|%p|%p|UNI_NOT_WHITESPACE\n", body, context.text_ptr));
4781
match = !SRE_UNI_IS_WHITESPACE(context.text_ptr[0]);
4783
case SRE_OP_UNI_NOT_WORD:
4785
TRACE(("|%p|%p|UNI_NOT_WORD\n", body, context.text_ptr));
4786
match = !SRE_UNI_IS_WORD(context.text_ptr[0]);
4788
case SRE_OP_UNI_WHITESPACE:
4790
TRACE(("|%p|%p|UNI_WHITESPACE\n", body, context.text_ptr));
4791
match = SRE_UNI_IS_WHITESPACE(context.text_ptr[0]);
4793
case SRE_OP_UNI_WORD:
4795
TRACE(("|%p|%p|UNI_WORD\n", body, context.text_ptr));
4796
match = SRE_UNI_IS_WORD(context.text_ptr[0]);
4798
case SRE_OP_WHITESPACE:
4800
TRACE(("|%p|%p|WHITESPACE\n", body, context.text_ptr));
4801
match = SRE_IS_WHITESPACE(context.text_ptr[0]);
4805
TRACE(("|%p|%p|WORD\n", body, context.text_ptr));
4806
match = SRE_IS_WORD(context.text_ptr[0]);
4810
SRE_DISCARD_BACKTRACK(&context);
4813
// The character does match.
4815
// Look at what follows to avoid unnecessary backtracking.
4816
if (SRE_LOOK_AHEAD_ONE(&context, state, look_literal)) {
4817
backtrack_item->text_ptr = context.text_ptr;
4818
context.repeat_counter[index] = context.text_ptr - start_ptr;
4819
context.pattern_ptr = tail;
4823
if (context.text_ptr >= end_ptr) {
4824
SRE_DISCARD_BACKTRACK(&context);
4827
goto next_min_backtrack;
4829
case SRE_OP_REPEAT_ONE_MIN_REV:
4832
// <REPEAT_ONE_MIN_REV> <skip to end> <index> <min> <max> ...
4833
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
4834
SRE_CODE* repeat_ptr = backtrack_item->pattern_ptr;
4835
int index = repeat_ptr[1];
4836
int repeat_max = repeat_ptr[3];
4837
SRE_CODE* body = repeat_ptr + 4;
4838
SRE_CODE* tail = repeat_ptr + repeat_ptr[0];
4839
SRE_CODE* look_literal = tail;
4840
SRE_CHAR* start_ptr;
4843
Py_ssize_t limit = context.text_ptr - context.text_start;
4844
context.text_ptr = backtrack_item->text_ptr;
4845
if (repeat_max > context.repeat_counter[index] + limit || repeat_max == UNLIMITED_REPEATS)
4846
repeat_max = context.repeat_counter[index] + limit;
4847
// Now match up to the maximum.
4848
start_ptr = context.text_ptr + context.repeat_counter[index];
4849
end_ptr = start_ptr - repeat_max;
4850
if (context.text_ptr <= end_ptr) {
4851
SRE_DISCARD_BACKTRACK(&context);
4854
while (look_literal[0] == SRE_OP_MARK)
4856
next_min_backtrack_rev:
4858
case SRE_OP_ANY_REV:
4860
TRACE(("|%p|%p|ANY_REV\n", body, context.text_ptr));
4861
match = !SRE_IS_LINEBREAK(context.text_ptr[-1]);
4863
case SRE_OP_ANY_ALL_REV:
4865
TRACE(("|%p|%p|ANY_ALL_REV\n", body, context.text_ptr));
4868
case SRE_OP_BIGCHARSET_REV:
4869
// <BIGCHARSET_REV> <charset>
4870
TRACE(("|%p|%p|BIGCHARSET_REV\n", body, context.text_ptr));
4871
match = in_bigcharset(body + 1, context.text_ptr[-1]);
4873
case SRE_OP_BIGCHARSET_IGNORE_REV:
4874
// <BIGCHARSET_IGNORE_REV> <charset>
4875
TRACE(("|%p|%p|BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
4876
match = in_bigcharset(body + 1, state->lower(context.text_ptr[-1]));
4878
case SRE_OP_CHARSET_REV:
4879
// <CHARSET_REV> <charset>
4880
TRACE(("|%p|%p|CHARSET_REV\n", body, context.text_ptr));
4881
match = in_charset(body + 1, context.text_ptr[-1]);
4883
case SRE_OP_CHARSET_IGNORE_REV:
4884
// <CHARSET_IGNORE_REV> <charset>
4885
TRACE(("|%p|%p|CHARSET_IGNORE_REV\n", body, context.text_ptr));
4886
match = in_charset(body + 1, state->lower(context.text_ptr[-1]));
4888
case SRE_OP_DIGIT_REV:
4890
TRACE(("|%p|%p|DIGIT_REV\n", body, context.text_ptr));
4891
match = SRE_IS_DIGIT(context.text_ptr[-1]);
4895
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
4896
match = SRE_IN(body, context.text_ptr[-1]);
4898
case SRE_OP_IN_IGNORE_REV:
4899
// <IN_IGNORE_REV> <set>
4900
TRACE(("|%p|%p|IN_REV\n", body, context.text_ptr));
4901
match = SRE_IN(body + 1, state->lower(context.text_ptr[-1]));
4903
case SRE_OP_LITERAL_REV:
4904
// <LITERAL_REV> <code>
4905
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
4906
match = context.text_ptr[-1] == (SRE_CHAR)body[1];
4908
case SRE_OP_LITERAL_IGNORE_REV:
4909
// <LITERAL_IGNORE_REV> <code>
4910
TRACE(("|%p|%p|LITERAL_REV %d\n", body, context.text_ptr, body[1]));
4911
match = state->lower(context.text_ptr[-1]) == (SRE_CHAR)body[1];
4913
case SRE_OP_LOC_NOT_WORD_REV:
4914
// <LOC_NOT_WORD_REV>
4915
TRACE(("|%p|%p|LOC_NOT_WORD_REV\n", body, context.text_ptr));
4916
match = !SRE_LOC_IS_WORD(context.text_ptr[-1]);
4918
case SRE_OP_LOC_WORD_REV:
4920
TRACE(("|%p|%p|LOC_WORD_REV\n", body, context.text_ptr));
4921
match = SRE_LOC_IS_WORD(context.text_ptr[-1]);
4923
case SRE_OP_NOT_BIGCHARSET_REV:
4924
// <NOT_BIGCHARSET_REV> <charset>
4925
TRACE(("|%p|%p|NOT_BIGCHARSET_REV\n", body, context.text_ptr));
4926
match = !in_bigcharset(body + 1, context.text_ptr[-1]);
4928
case SRE_OP_NOT_BIGCHARSET_IGNORE_REV:
4929
// <NOT_BIGCHARSET_IGNORE_REV> <charset>
4930
TRACE(("|%p|%p|NOT_BIGCHARSET_IGNORE_REV\n", body, context.text_ptr));
4931
match = !in_bigcharset(body + 1, state->lower(context.text_ptr[-1]));
4933
case SRE_OP_NOT_CHARSET_REV:
4934
// <NOT_CHARSET_REV> <charset>
4935
TRACE(("|%p|%p|NOT_CHARSET_REV\n", body, context.text_ptr));
4936
match = !in_charset(body + 1, context.text_ptr[-1]);
4938
case SRE_OP_NOT_CHARSET_IGNORE_REV:
4939
// <NOT_CHARSET_IGNORE_REV> <charset>
4940
TRACE(("|%p|%p|NOT_CHARSET_IGNORE_REV\n", body, context.text_ptr));
4941
match = !in_charset(body + 1, state->lower(context.text_ptr[-1]));
4943
case SRE_OP_NOT_DIGIT_REV:
4945
TRACE(("|%p|%p|NOT_DIGIT_REV\n", body, context.text_ptr));
4946
match = !SRE_IS_DIGIT(context.text_ptr[-1]);
4948
case SRE_OP_NOT_IN_REV:
4949
// <NOT_IN_REV> <set>
4950
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
4951
match = !SRE_IN(body + 1, context.text_ptr[-1]);
4953
case SRE_OP_NOT_IN_IGNORE_REV:
4954
// <NOT_IN_IGNORE_REV> <set>
4955
TRACE(("|%p|%p|NOT_IN_REV\n", body, context.text_ptr));
4956
match = !SRE_IN(body + 1, state->lower(context.text_ptr[-1]));
4958
case SRE_OP_NOT_LITERAL_REV:
4959
// <NOT_LITERAL_REV> <code>
4960
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
4961
match = context.text_ptr[-1] != (SRE_CHAR)body[1];
4963
case SRE_OP_NOT_LITERAL_IGNORE_REV:
4964
// <NOT_LITERAL_IGNORE_REV> <code>
4965
TRACE(("|%p|%p|NOT_LITERAL_REV %d\n", body, context.text_ptr, body[1]));
4966
match = state->lower(context.text_ptr[-1]) != (SRE_CHAR)body[1];
4968
case SRE_OP_NOT_RANGE_REV:
4969
// <NOT_RANGE_REV> <lower> <upper>
4970
TRACE(("|%p|%p|NOT_RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4971
match = !SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]);
4973
case SRE_OP_NOT_RANGE_IGNORE_REV:
4974
// <NOT_RANGE_IGNORE_REV> <lower> <upper>
4975
TRACE(("|%p|%p|NOT_RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4976
match = !SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]);
4978
case SRE_OP_NOT_WHITESPACE_REV:
4979
// <NOT_WHITESPACE_REV>
4980
TRACE(("|%p|%p|NOT_WHITESPACE_REV\n", body, context.text_ptr));
4981
match = !SRE_IS_WHITESPACE(context.text_ptr[-1]);
4983
case SRE_OP_NOT_WORD_REV:
4985
TRACE(("|%p|%p|NOT_WORD_REV\n", body, context.text_ptr));
4986
match = !SRE_IS_WORD(context.text_ptr[-1]);
4988
case SRE_OP_RANGE_REV:
4989
// <RANGE_REV> <lower> <upper>
4990
TRACE(("|%p|%p|RANGE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4991
match = SRE_IN_RANGE(context.text_ptr[-1], body[1], body[2]);
4993
case SRE_OP_RANGE_IGNORE_REV:
4994
// <RANGE_IGNORE_REV> <lower> <upper>
4995
TRACE(("|%p|%p|RANGE_IGNORE_REV %d %d\n", body, context.text_ptr, body[1], body[2]));
4996
match = SRE_IN_RANGE(state->lower(context.text_ptr[-1]), body[1], body[2]);
4998
case SRE_OP_UNI_DIGIT_REV:
5000
TRACE(("|%p|%p|UNI_DIGIT_REV\n", body, context.text_ptr));
5001
match = SRE_UNI_IS_DIGIT(context.text_ptr[-1]);
5003
case SRE_OP_UNI_NOT_DIGIT_REV:
5004
// <UNI_NOT_DIGIT_REV>
5005
TRACE(("|%p|%p|UNI_NOT_DIGIT_REV\n", body, context.text_ptr));
5006
match = !SRE_UNI_IS_DIGIT(context.text_ptr[-1]);
5008
case SRE_OP_UNI_NOT_WHITESPACE_REV:
5009
// <UNI_NOT_WHITESPACE_REV>
5010
TRACE(("|%p|%p|UNI_NOT_WHITESPACE_REV\n", body, context.text_ptr));
5011
match = !SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]);
5013
case SRE_OP_UNI_NOT_WORD_REV:
5014
// <UNI_NOT_WORD_REV>
5015
TRACE(("|%p|%p|UNI_NOT_WORD_REV\n", body, context.text_ptr));
5016
match = !SRE_UNI_IS_WORD(context.text_ptr[-1]);
5018
case SRE_OP_UNI_WHITESPACE_REV:
5019
// <UNI_WHITESPACE_REV>
5020
TRACE(("|%p|%p|UNI_WHITESPACE_REV\n", body, context.text_ptr));
5021
match = SRE_UNI_IS_WHITESPACE(context.text_ptr[-1]);
5023
case SRE_OP_UNI_WORD_REV:
5025
TRACE(("|%p|%p|UNI_WORD_REV\n", body, context.text_ptr));
5026
match = SRE_UNI_IS_WORD(context.text_ptr[-1]);
5028
case SRE_OP_WHITESPACE_REV:
5030
TRACE(("|%p|%p|WHITESPACE_REV\n", body, context.text_ptr));
5031
match = SRE_IS_WHITESPACE(context.text_ptr[-1]);
5033
case SRE_OP_WORD_REV:
5035
TRACE(("|%p|%p|WORD_REV\n", body, context.text_ptr));
5036
match = SRE_IS_WORD(context.text_ptr[-1]);
5040
SRE_DISCARD_BACKTRACK(&context);
5043
// The character does match.
5045
// Look at what follows to avoid unnecessary backtracking.
5046
if (SRE_LOOK_AHEAD_ONE_REV(&context, state, look_literal)) {
5047
backtrack_item->text_ptr = context.text_ptr;
5048
context.repeat_counter[index] = start_ptr - context.text_ptr;
5049
context.pattern_ptr = tail;
5053
if (context.text_ptr <= end_ptr) {
5054
SRE_DISCARD_BACKTRACK(&context);
5057
goto next_min_backtrack_rev;
5059
case SRE_OP_REPEAT_POSS:
5061
// Possessive repeat.
5062
// <REPEAT_POSS> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS> <skip to start>
5063
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
5064
TRACE(("REPEAT_POSS\n"));
5065
SRE_RESTORE_MARKS(&context);
5066
SRE_DISCARD_BACKTRACK(&context);
5069
case SRE_OP_REPEAT_POSS_REV:
5071
// Possessive repeat.
5072
// <REPEAT_POSS_REV> <skip to end> <index> <min> <max> ... <END_REPEAT_POSS_REV> <skip to start>
5073
SRE_BACKTRACK_ITEM* backtrack_item = &context.backtrack_chunk->items[context.backtrack_chunk->count - 1];
5074
TRACE(("REPEAT_POSS\n"));
5075
SRE_RESTORE_MARKS(&context);
5076
SRE_DISCARD_BACKTRACK(&context);
5080
TRACE(("UNKNOWN %d\n", backtrack_chunk->items[backtrack_chunk->count - 1].op));
5081
return SRE_CLEANUP(&context, state, SRE_ERROR_ILLEGAL);
5087
LOCAL(SRE_CODE*) SRE_LOOK_LITERAL(SRE_CODE* look_ahead) {
5089
switch (look_ahead[0]) {
5090
case SRE_OP_BOUNDARY:
5091
case SRE_OP_LOC_BOUNDARY:
5092
case SRE_OP_UNI_BOUNDARY:
5095
case SRE_OP_LITERAL:
5096
case SRE_OP_LITERAL_IGNORE:
5097
case SRE_OP_LITERAL_STRING:
5098
case SRE_OP_LITERAL_STRING_IGNORE:
1681
5106
LOCAL(Py_ssize_t)
1682
5107
SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1684
SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1685
SRE_CHAR* end = (SRE_CHAR *)state->end;
5109
SRE_CODE* look_ahead;
5110
SRE_CODE* look_literal;
5111
SRE_CONTEXT context;
1686
5112
Py_ssize_t status = 0;
1687
Py_ssize_t prefix_len = 0;
1688
Py_ssize_t prefix_skip = 0;
1689
SRE_CODE* prefix = NULL;
1690
SRE_CODE* charset = NULL;
1691
SRE_CODE* overlap = NULL;
1694
if (pattern[0] == SRE_OP_INFO) {
1695
/* optimization info block */
1696
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
1700
if (pattern[3] > 1) {
1701
/* adjust end point (but make sure we leave at least one
1702
character in there, so literal search will work) */
1703
end -= pattern[3]-1;
1708
if (flags & SRE_INFO_PREFIX) {
1709
/* pattern starts with a known prefix */
1710
/* <length> <skip> <prefix data> <overlap data> */
1711
prefix_len = pattern[5];
1712
prefix_skip = pattern[6];
1713
prefix = pattern + 7;
1714
overlap = prefix + prefix_len - 1;
1715
} else if (flags & SRE_INFO_CHARSET)
1716
/* pattern starts with a character from a known set */
1718
charset = pattern + 5;
1720
pattern += 1 + pattern[1];
1723
TRACE(("prefix = %p %d %d\n", prefix, prefix_len, prefix_skip));
1724
TRACE(("charset = %p\n", charset));
1726
#if defined(USE_FAST_SEARCH)
1727
if (prefix_len > 1) {
1728
/* pattern starts with a known prefix. use the overlap
1729
table to skip forward as fast as we possibly can */
1731
end = (SRE_CHAR *)state->end;
1734
if ((SRE_CODE) ptr[0] != prefix[i]) {
1740
if (++i == prefix_len) {
1741
/* found a potential match */
1742
TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1743
state->start = ptr + 1 - prefix_len;
1744
state->ptr = ptr + 1 - prefix_len + prefix_skip;
1745
if (flags & SRE_INFO_LITERAL)
1746
return 1; /* we got all of it */
1747
status = SRE_MATCH(state, pattern + 2*prefix_skip);
1750
/* close but no cigar -- try again */
1762
if (pattern[0] == SRE_OP_LITERAL) {
1763
/* pattern starts with a literal character. this is used
1764
for short prefixes, and if fast search is disabled */
1765
SRE_CODE chr = pattern[1];
1766
end = (SRE_CHAR *)state->end;
1768
while (ptr < end && (SRE_CODE) ptr[0] != chr)
1772
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1775
if (flags & SRE_INFO_LITERAL)
1776
return 1; /* we got all of it */
1777
status = SRE_MATCH(state, pattern + 2);
1781
} else if (charset) {
1782
/* pattern starts with a character from a known set */
1783
end = (SRE_CHAR *)state->end;
1785
while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1789
TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1792
status = SRE_MATCH(state, pattern);
1799
while (ptr <= end) {
1800
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1801
state->start = state->ptr = ptr++;
1802
status = SRE_MATCH(state, pattern);
5114
look_ahead = pattern;
5115
while (look_ahead[0] == SRE_OP_MARK)
5118
look_literal = SRE_LOOK_LITERAL(look_ahead);
5119
if (look_literal != NULL)
5120
look_ahead = look_literal;
5122
context.text_start = (SRE_CHAR *)state->beginning;
5123
context.text_end = (SRE_CHAR *)state->end;
5124
context.text_ptr = (SRE_CHAR *)state->start;
5126
while (context.text_ptr <= context.text_end) {
5127
TRACE(("|%p|%p|SEARCH\n", pattern, context.text_ptr));
5128
if (SRE_LOOK_AHEAD_ONE(&context, state, look_ahead)) {
5129
state->start = state->ptr = context.text_ptr;
5130
status = SRE_MATCH(state, state->pattern_code);