698
727
position and return MATCH_SKIP. Otherwise, pass back the return code
701
if (rrc == MATCH_SKIP_ARG &&
702
strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
730
else if (rrc == MATCH_SKIP_ARG &&
731
STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
704
733
md->start_match_ptr = eptr;
705
734
RRETURN(MATCH_SKIP);
708
if (md->mark == NULL) md->mark = markptr;
712
MRRETURN(MATCH_NOMATCH);
739
RRETURN(MATCH_NOMATCH);
714
741
/* COMMIT overrides PRUNE, SKIP, and THEN */
717
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718
ims, eptrb, flags, RM52);
744
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
719
746
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720
747
rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721
748
rrc != MATCH_THEN)
723
MRRETURN(MATCH_COMMIT);
750
RRETURN(MATCH_COMMIT);
725
752
/* PRUNE overrides THEN */
728
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729
ims, eptrb, flags, RM51);
755
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
730
757
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731
MRRETURN(MATCH_PRUNE);
758
RRETURN(MATCH_PRUNE);
733
760
case OP_PRUNE_ARG:
734
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735
ims, eptrb, flags, RM56);
761
md->nomatch_mark = ecode + 2;
762
md->mark = NULL; /* In case previously set by assertion */
763
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
765
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
766
md->mark == NULL) md->mark = ecode + 2;
736
767
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737
md->mark = ecode + 2;
738
768
RRETURN(MATCH_PRUNE);
740
770
/* SKIP overrides PRUNE and THEN */
743
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744
ims, eptrb, flags, RM53);
773
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
745
775
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
747
777
md->start_match_ptr = eptr; /* Pass back current position */
748
MRRETURN(MATCH_SKIP);
780
/* Note that, for Perl compatibility, SKIP with an argument does NOT set
781
nomatch_mark. There is a flag that disables this opcode when re-matching a
782
pattern that ended with a SKIP for which there was not a matching MARK. */
750
784
case OP_SKIP_ARG:
751
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752
ims, eptrb, flags, RM57);
785
if (md->ignore_skip_arg)
787
ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
790
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
753
792
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
756
795
/* Pass back the current skip name by overloading md->start_match_ptr and
757
796
returning the special MATCH_SKIP_ARG return code. This will either be
758
caught by a matching MARK, or get to the top, where it is treated the same
797
caught by a matching MARK, or get to the top, where it causes a rematch
798
with the md->ignore_skip_arg flag set. */
761
800
md->start_match_ptr = ecode + 2;
762
801
RRETURN(MATCH_SKIP_ARG);
764
/* For THEN (and THEN_ARG) we pass back the address of the bracket or
765
the alt that is at the start of the current branch. This makes it possible
766
to skip back past alternatives that precede the THEN within the current
803
/* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
804
the branch in which it occurs can be determined. Overload the start of
805
match pointer to do this. */
770
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771
ims, eptrb, flags, RM54);
808
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
772
810
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773
md->start_match_ptr = ecode - GET(ecode, 1);
774
MRRETURN(MATCH_THEN);
811
md->start_match_ptr = ecode;
776
814
case OP_THEN_ARG:
777
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778
offset_top, md, ims, eptrb, flags, RM58);
815
md->nomatch_mark = ecode + 2;
816
md->mark = NULL; /* In case previously set by assertion */
817
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
819
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
820
md->mark == NULL) md->mark = ecode + 2;
779
821
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780
md->start_match_ptr = ecode - GET(ecode, 1);
781
md->mark = ecode + LINK_SIZE + 2;
822
md->start_match_ptr = ecode;
782
823
RRETURN(MATCH_THEN);
784
/* Handle a capturing bracket. If there is space in the offset vector, save
785
the current subject position in the working slot at the top of the vector.
786
We mustn't change the current values of the data slot, because they may be
787
set from a previous iteration of this group, and be referred to by a
788
reference inside the group.
790
If the bracket fails to match, we need to restore this value and also the
791
values of the final offsets, in case they were set by a previous iteration
825
/* Handle an atomic group that does not contain any capturing parentheses.
826
This can be handled like an assertion. Prior to 8.13, all atomic groups
827
were handled this way. In 8.13, the code was changed as below for ONCE, so
828
that backups pass through the group and thereby reset captured values.
829
However, this uses a lot more stack, so in 8.20, atomic groups that do not
830
contain any captures generate OP_ONCE_NC, which can be handled in the old,
831
less stack intensive way.
833
Check the alternative branches in turn - the matching won't pass the KET
834
for this kind of subpattern. If any one branch matches, we carry on as at
835
the end of a normal bracket, leaving the subject pointer, but resetting
836
the start-of-match value in case it was changed by \K. */
841
save_mark = md->mark;
844
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
845
if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
847
mstart = md->start_match_ptr;
850
if (rrc == MATCH_THEN)
852
next = ecode + GET(ecode,1);
853
if (md->start_match_ptr < next &&
854
(*ecode == OP_ALT || *next == OP_ALT))
858
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
859
ecode += GET(ecode,1);
860
md->mark = save_mark;
862
while (*ecode == OP_ALT);
864
/* If hit the end of the group (which could be repeated), fail */
866
if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
868
/* Continue as from after the group, updating the offsets high water
869
mark, since extracts may have been taken. */
871
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
873
offset_top = md->end_offset_top;
874
eptr = md->end_match_ptr;
876
/* For a non-repeating ket, just continue at this level. This also
877
happens for a repeating ket if no characters were matched in the group.
878
This is the forcible breaking of infinite loops as implemented in Perl
881
if (*ecode == OP_KET || eptr == saved_eptr)
883
ecode += 1+LINK_SIZE;
887
/* The repeating kets try the rest of the pattern or restart from the
888
preceding bracket, in the appropriate order. The second "call" of match()
889
uses tail recursion, to avoid using another stack frame. */
891
if (*ecode == OP_KETRMIN)
893
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
894
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
898
else /* OP_KETRMAX */
900
md->match_function_type = MATCH_CBEGROUP;
901
RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
902
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
903
ecode += 1 + LINK_SIZE;
906
/* Control never gets here */
908
/* Handle a capturing bracket, other than those that are possessive with an
909
unlimited repeat. If there is space in the offset vector, save the current
910
subject position in the working slot at the top of the vector. We mustn't
911
change the current values of the data slot, because they may be set from a
912
previous iteration of this group, and be referred to by a reference inside
913
the group. A failure to match might occur after the group has succeeded,
914
if something later on doesn't match. For this reason, we need to restore
915
the working value and also the values of the final offsets, in case they
916
were set by a previous iteration of the same bracket.
794
918
If there isn't enough space in the offset vector, treat this as if it were
795
919
a non-capturing bracket. Don't worry about setting the flag for the error
813
937
save_offset2 = md->offset_vector[offset+1];
814
938
save_offset3 = md->offset_vector[md->offset_end - number];
815
939
save_capture_last = md->capture_last;
940
save_mark = md->mark;
817
942
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818
943
md->offset_vector[md->offset_end - number] =
819
944
(int)(eptr - md->start_subject);
821
flags = (op == OP_SCBRA)? match_cbegroup : 0;
824
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825
ims, eptrb, flags, RM1);
826
if (rrc != MATCH_NOMATCH &&
827
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
948
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
949
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
951
if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
953
/* If we backed up to a THEN, check whether it is within the current
954
branch by comparing the address of the THEN that is passed back with
955
the end of the branch. If it is within the current branch, and the
956
branch is one of two or more alternatives (it either starts or ends
957
with OP_ALT), we have reached the limit of THEN's action, so convert
958
the return code to NOMATCH, which will cause normal backtracking to
959
happen from now on. Otherwise, THEN is passed back to an outer
960
alternative. This implements Perl's treatment of parenthesized groups,
961
where a group not containing | does not affect the current alternative,
962
that is, (X) is NOT the same as (X|(*F)). */
964
if (rrc == MATCH_THEN)
966
next = ecode + GET(ecode,1);
967
if (md->start_match_ptr < next &&
968
(*ecode == OP_ALT || *next == OP_ALT))
972
/* Anything other than NOMATCH is passed back. */
974
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829
975
md->capture_last = save_capture_last;
830
976
ecode += GET(ecode, 1);
977
md->mark = save_mark;
978
if (*ecode != OP_ALT) break;
832
while (*ecode == OP_ALT);
834
981
DPRINTF(("bracket %d failed\n", number));
836
982
md->offset_vector[offset] = save_offset1;
837
983
md->offset_vector[offset+1] = save_offset2;
838
984
md->offset_vector[md->offset_end - number] = save_offset3;
840
if (rrc != MATCH_THEN) md->mark = markptr;
841
RRETURN(MATCH_NOMATCH);
986
/* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
844
991
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
852
999
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
853
1000
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
855
/* Non-capturing bracket. Loop for all the alternatives. When we get to the
856
final alternative within the brackets, we would return the result of a
857
recursive call to match() whatever happened. We can reduce stack usage by
858
turning this into a tail recursion, except in the case when match_cbegroup
1002
/* Non-capturing or atomic group, except for possessive with unlimited
1003
repeat and ONCE group with no captures. Loop for all the alternatives.
1005
When we get to the final alternative within the brackets, we used to return
1006
the result of a recursive call to match() whatever happened so it was
1007
possible to reduce stack usage by turning this into a tail recursion,
1008
except in the case of a possibly empty group. However, now that there is
1009
the possiblity of (*THEN) occurring in the final alternative, this
1010
optimization is no longer always possible.
1012
We can optimize if we know there are no (*THEN)s in the pattern; at present
1013
this is the best that can be done.
1015
MATCH_ONCE is returned when the end of an atomic group is successfully
1016
reached, but subsequent matching fails. It passes back up the tree (causing
1017
captured values to be reset) until the original atomic group level is
1018
reached. This is tested by comparing md->once_target with the start of the
1019
group. At this point, the return is converted into MATCH_NOMATCH so that
1020
previous backup points can be taken. */
863
1025
DPRINTF(("start non-capturing bracket\n"));
864
flags = (op >= OP_SBRA)? match_cbegroup : 0;
867
if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
869
if (flags == 0) /* Not a possibly empty group */
871
ecode += _pcre_OP_lengths[*ecode];
872
DPRINTF(("bracket 0 tail recursion\n"));
876
/* Possibly empty group; can't use tail recursion. */
878
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
880
if (rrc == MATCH_NOMATCH) md->mark = markptr;
884
/* For non-final alternatives, continue the loop for a NOMATCH result;
887
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
889
if (rrc != MATCH_NOMATCH &&
890
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
892
ecode += GET(ecode, 1);
1029
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP;
1031
/* If this is not a possibly empty group, and there are no (*THEN)s in
1032
the pattern, and this is the final alternative, optimize as described
1035
else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1037
ecode += PRIV(OP_lengths)[*ecode];
1041
/* In all other cases, we have to make another call to match(). */
1043
save_mark = md->mark;
1044
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1047
/* See comment in the code for capturing groups above about handling
1050
if (rrc == MATCH_THEN)
1052
next = ecode + GET(ecode,1);
1053
if (md->start_match_ptr < next &&
1054
(*ecode == OP_ALT || *next == OP_ALT))
1055
rrc = MATCH_NOMATCH;
1058
if (rrc != MATCH_NOMATCH)
1060
if (rrc == MATCH_ONCE)
1062
const pcre_uchar *scode = ecode;
1063
if (*scode != OP_ONCE) /* If not at start, find it */
1065
while (*scode == OP_ALT) scode += GET(scode, 1);
1066
scode -= GET(scode, 1);
1068
if (md->once_target == scode) rrc = MATCH_NOMATCH;
1072
ecode += GET(ecode, 1);
1073
md->mark = save_mark;
1074
if (*ecode != OP_ALT) break;
1077
RRETURN(MATCH_NOMATCH);
1079
/* Handle possessive capturing brackets with an unlimited repeat. We come
1080
here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1081
handled similarly to the normal case above. However, the matching is
1082
different. The end of these brackets will always be OP_KETRPOS, which
1083
returns MATCH_KETRPOS without going further in the pattern. By this means
1084
we can handle the group by iteration rather than recursion, thereby
1085
reducing the amount of stack needed. */
1092
number = GET2(ecode, 1+LINK_SIZE);
1093
offset = number << 1;
1096
printf("start possessive bracket %d\n", number);
1098
pchars(eptr, 16, TRUE, md);
1102
if (offset < md->offset_max)
1104
matched_once = FALSE;
1105
code_offset = (int)(ecode - md->start_code);
1107
save_offset1 = md->offset_vector[offset];
1108
save_offset2 = md->offset_vector[offset+1];
1109
save_offset3 = md->offset_vector[md->offset_end - number];
1110
save_capture_last = md->capture_last;
1112
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1114
/* Each time round the loop, save the current subject position for use
1115
when the group matches. For MATCH_MATCH, the group has matched, so we
1116
restart it with a new subject starting position, remembering that we had
1117
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1118
usual. If we haven't matched any alternatives in any iteration, check to
1119
see if a previous iteration matched. If so, the group has matched;
1120
continue from afterwards. Otherwise it has failed; restore the previous
1121
capture values before returning NOMATCH. */
1125
md->offset_vector[md->offset_end - number] =
1126
(int)(eptr - md->start_subject);
1127
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1128
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1130
if (rrc == MATCH_KETRPOS)
1132
offset_top = md->end_offset_top;
1133
eptr = md->end_match_ptr;
1134
ecode = md->start_code + code_offset;
1135
save_capture_last = md->capture_last;
1136
matched_once = TRUE;
1140
/* See comment in the code for capturing groups above about handling
1143
if (rrc == MATCH_THEN)
1145
next = ecode + GET(ecode,1);
1146
if (md->start_match_ptr < next &&
1147
(*ecode == OP_ALT || *next == OP_ALT))
1148
rrc = MATCH_NOMATCH;
1151
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1152
md->capture_last = save_capture_last;
1153
ecode += GET(ecode, 1);
1154
if (*ecode != OP_ALT) break;
1159
md->offset_vector[offset] = save_offset1;
1160
md->offset_vector[offset+1] = save_offset2;
1161
md->offset_vector[md->offset_end - number] = save_offset3;
1164
if (allow_zero || matched_once)
1166
ecode += 1 + LINK_SIZE;
1170
RRETURN(MATCH_NOMATCH);
1173
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1174
as a non-capturing bracket. */
1176
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
1177
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
1179
DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1181
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
1182
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
1184
/* Non-capturing possessive bracket with unlimited repeat. We come here
1185
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1186
without the capturing complication. It is written out separately for speed
1193
POSSESSIVE_NON_CAPTURE:
1194
matched_once = FALSE;
1195
code_offset = (int)(ecode - md->start_code);
1199
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1200
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1202
if (rrc == MATCH_KETRPOS)
1204
offset_top = md->end_offset_top;
1205
eptr = md->end_match_ptr;
1206
ecode = md->start_code + code_offset;
1207
matched_once = TRUE;
1211
/* See comment in the code for capturing groups above about handling
1214
if (rrc == MATCH_THEN)
1216
next = ecode + GET(ecode,1);
1217
if (md->start_match_ptr < next &&
1218
(*ecode == OP_ALT || *next == OP_ALT))
1219
rrc = MATCH_NOMATCH;
1222
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1223
ecode += GET(ecode, 1);
1224
if (*ecode != OP_ALT) break;
1227
if (matched_once || allow_zero)
1229
ecode += 1 + LINK_SIZE;
1232
RRETURN(MATCH_NOMATCH);
894
1234
/* Control never reaches here. */
896
1236
/* Conditional group: compilation checked that there are no more than
897
1237
two branches. If the condition is false, skipping the first branch takes us
898
1238
past the end if there is only one branch, but that's OK because that is
899
exactly what going to the ket would do. As there is only one branch to be
900
obeyed, we can use tail recursion to avoid using another stack frame. */
1239
exactly what going to the ket would do. */
904
codelink= GET(ecode, 1);
1243
codelink = GET(ecode, 1);
906
1245
/* Because of the way auto-callout works during compile, a callout item is
907
1246
inserted between OP_COND and an assertion condition. */
909
1248
if (ecode[LINK_SIZE+1] == OP_CALLOUT)
911
if (pcre_callout != NULL)
1250
if (PUBL(callout) != NULL)
913
pcre_callout_block cb;
914
cb.version = 1; /* Version 1 of the callout block */
1252
PUBL(callout_block) cb;
1253
cb.version = 2; /* Version 1 of the callout block */
915
1254
cb.callout_number = ecode[LINK_SIZE+2];
916
1255
cb.offset_vector = md->offset_vector;
1256
#ifdef COMPILE_PCRE8
917
1257
cb.subject = (PCRE_SPTR)md->start_subject;
1259
cb.subject = (PCRE_SPTR16)md->start_subject;
918
1261
cb.subject_length = (int)(md->end_subject - md->start_subject);
919
1262
cb.start_match = (int)(mstart - md->start_subject);
920
1263
cb.current_position = (int)(eptr - md->start_subject);
1188
1531
md->start_match_ptr = mstart; /* and the start (\K can modify) */
1190
1533
/* For some reason, the macros don't work properly if an expression is
1191
given as the argument to MRRETURN when the heap is in use. */
1534
given as the argument to RRETURN when the heap is in use. */
1193
1536
rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1196
/* Change option settings */
1201
DPRINTF(("ims set to %02lx\n", ims));
1204
1539
/* Assertion brackets. Check the alternative branches in turn - the
1205
1540
matching won't pass the KET for an assertion. If any one branch matches,
1206
1541
the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1207
1542
start of each branch to move the current point backwards, so the code at
1208
this level is identical to the lookahead case. */
1543
this level is identical to the lookahead case. When the assertion is part
1544
of a condition, we want to return immediately afterwards. The caller of
1545
this incarnation of the match() function will have set MATCH_CONDASSERT in
1546
md->match_function type, and one of these opcodes will be the first opcode
1547
that is processed. We use a local variable that is preserved over calls to
1548
match() to remember this case. */
1210
1550
case OP_ASSERT:
1211
1551
case OP_ASSERTBACK:
1552
save_mark = md->mark;
1553
if (md->match_function_type == MATCH_CONDASSERT)
1556
md->match_function_type = 0;
1558
else condassert = FALSE;
1214
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1562
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1216
1563
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1218
1565
mstart = md->start_match_ptr; /* In case \K reset it */
1221
if (rrc != MATCH_NOMATCH &&
1222
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
1569
/* PCRE does not allow THEN to escape beyond an assertion; it is treated
1572
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1224
1573
ecode += GET(ecode, 1);
1574
md->mark = save_mark;
1226
1576
while (*ecode == OP_ALT);
1227
if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1578
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1229
1580
/* If checking an assertion for a condition, return MATCH_MATCH. */
1231
if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1582
if (condassert) RRETURN(MATCH_MATCH);
1233
1584
/* Continue from after the assertion, updating the offsets high water
1234
1585
mark, since extracts may have been taken during the assertion. */
1329
1695
offset data is the offset to the starting bracket from the start of the
1330
1696
whole pattern. (This is so that it works from duplicated subpatterns.)
1332
If there are any capturing brackets started but not finished, we have to
1333
save their starting points and reinstate them after the recursion. However,
1334
we don't know how many such there are (offset_top records the completed
1335
total) so we just have to save all the potential data. There may be up to
1336
65535 such values, which is too large to put on the stack, but using malloc
1337
for small numbers seems expensive. As a compromise, the stack is used when
1338
there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1339
is used. A problem is what to do if the malloc fails ... there is no way of
1340
returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1341
values on the stack, and accept that the rest may be wrong.
1698
The state of the capturing groups is preserved over recursion, and
1699
re-instated afterwards. We don't know how many are started and not yet
1700
finished (offset_top records the completed total) so we just have to save
1701
all the potential data. There may be up to 65535 such values, which is too
1702
large to put on the stack, but using malloc for small numbers seems
1703
expensive. As a compromise, the stack is used when there are no more than
1704
REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1343
1706
There are also other values that have to be saved. We use a chained
1344
1707
sequence of blocks that actually live on the stack. Thanks to Robin Houston
1345
for the original version of this logic. */
1708
for the original version of this logic. It has, however, been hacked around
1709
a lot, so he is not to blame for the current way it works. */
1347
1711
case OP_RECURSE:
1349
1716
callpat = md->start_code + GET(ecode, 1);
1350
new_recursive.group_num = (callpat == md->start_code)? 0 :
1717
recno = (callpat == md->start_code)? 0 :
1351
1718
GET2(callpat, 1 + LINK_SIZE);
1720
/* Check for repeating a recursion without advancing the subject pointer.
1721
This should catch convoluted mutual recursions. (Some simple cases are
1722
caught at compile time.) */
1724
for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1725
if (recno == ri->group_num && eptr == ri->subject_position)
1726
RRETURN(PCRE_ERROR_RECURSELOOP);
1353
1728
/* Add to "recursing stack" */
1730
new_recursive.group_num = recno;
1731
new_recursive.subject_position = eptr;
1355
1732
new_recursive.prevrec = md->recursive;
1356
1733
md->recursive = &new_recursive;
1358
/* Find where to continue from afterwards */
1735
/* Where to continue from afterwards */
1360
1737
ecode += 1 + LINK_SIZE;
1361
new_recursive.after_call = ecode;
1363
/* Now save the offset data. */
1739
/* Now save the offset data */
1365
1741
new_recursive.saved_max = md->offset_end;
1366
1742
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1370
1746
new_recursive.offset_save =
1371
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1747
(int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1372
1748
if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1375
1750
memcpy(new_recursive.offset_save, md->offset_vector,
1376
1751
new_recursive.saved_max * sizeof(int));
1377
new_recursive.save_offset_top = offset_top;
1379
/* OK, now we can do the recursion. For each top-level alternative we
1380
restore the offset and recursion data. */
1753
/* OK, now we can do the recursion. After processing each alternative,
1754
restore the offset data. If there were nested recursions, md->recursive
1755
might be changed, so reset it before looping. */
1382
1757
DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1383
flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1758
cbegroup = (*callpat >= OP_SBRA);
1386
RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1387
md, ims, eptrb, flags, RM6);
1761
if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1762
RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1764
memcpy(md->offset_vector, new_recursive.offset_save,
1765
new_recursive.saved_max * sizeof(int));
1766
md->recursive = new_recursive.prevrec;
1388
1767
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1390
1769
DPRINTF(("Recursion matched\n"));
1391
md->recursive = new_recursive.prevrec;
1392
1770
if (new_recursive.offset_save != stacksave)
1393
(pcre_free)(new_recursive.offset_save);
1394
MRRETURN(MATCH_MATCH);
1771
(PUBL(free))(new_recursive.offset_save);
1773
/* Set where we got to in the subject, and reset the start in case
1774
it was changed by \K. This *is* propagated back out of a recursion,
1775
for Perl compatibility. */
1777
eptr = md->end_match_ptr;
1778
mstart = md->start_match_ptr;
1779
goto RECURSION_MATCHED; /* Exit loop; end processing */
1396
else if (rrc != MATCH_NOMATCH &&
1397
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
1782
/* PCRE does not allow THEN to escape beyond a recursion; it is treated
1785
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1399
1787
DPRINTF(("Recursion gave error %d\n", rrc));
1400
1788
if (new_recursive.offset_save != stacksave)
1401
(pcre_free)(new_recursive.offset_save);
1789
(PUBL(free))(new_recursive.offset_save);
1405
1793
md->recursive = &new_recursive;
1406
memcpy(md->offset_vector, new_recursive.offset_save,
1407
new_recursive.saved_max * sizeof(int));
1408
1794
callpat += GET(callpat, 1);
1410
1796
while (*callpat == OP_ALT);
1412
1798
DPRINTF(("Recursion didn't match\n"));
1413
1799
md->recursive = new_recursive.prevrec;
1414
1800
if (new_recursive.offset_save != stacksave)
1415
(pcre_free)(new_recursive.offset_save);
1416
MRRETURN(MATCH_NOMATCH);
1418
/* Control never reaches here */
1420
/* "Once" brackets are like assertion brackets except that after a match,
1421
the point in the subject string is not moved back. Thus there can never be
1422
a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423
Check the alternative branches in turn - the matching won't pass the KET
1424
for this kind of subpattern. If any one branch matches, we carry on as at
1425
the end of a normal bracket, leaving the subject pointer, but resetting
1426
the start-of-match value in case it was changed by \K. */
1434
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1435
if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1437
mstart = md->start_match_ptr;
1440
if (rrc != MATCH_NOMATCH &&
1441
(rrc != MATCH_THEN || md->start_match_ptr != ecode))
1443
ecode += GET(ecode,1);
1445
while (*ecode == OP_ALT);
1447
/* If hit the end of the group (which could be repeated), fail */
1449
if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1451
/* Continue as from after the assertion, updating the offsets high water
1452
mark, since extracts may have been taken. */
1454
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1456
offset_top = md->end_offset_top;
1457
eptr = md->end_match_ptr;
1459
/* For a non-repeating ket, just continue at this level. This also
1460
happens for a repeating ket if no characters were matched in the group.
1461
This is the forcible breaking of infinite loops as implemented in Perl
1462
5.005. If there is an options reset, it will get obeyed in the normal
1463
course of events. */
1465
if (*ecode == OP_KET || eptr == saved_eptr)
1467
ecode += 1+LINK_SIZE;
1471
/* The repeating kets try the rest of the pattern or restart from the
1472
preceding bracket, in the appropriate order. The second "call" of match()
1473
uses tail recursion, to avoid using another stack frame. We need to reset
1474
any options that changed within the bracket before re-running it, so
1475
check the next opcode. */
1477
if (ecode[1+LINK_SIZE] == OP_OPT)
1479
ims = (ims & ~PCRE_IMS) | ecode[4];
1480
DPRINTF(("ims set to %02lx at group repeat\n", ims));
1483
if (*ecode == OP_KETRMIN)
1485
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1486
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1491
else /* OP_KETRMAX */
1493
RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1494
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495
ecode += 1 + LINK_SIZE;
1499
/* Control never gets here */
1801
(PUBL(free))(new_recursive.offset_save);
1802
RRETURN(MATCH_NOMATCH);
1501
1808
/* An alternation is the end of a branch; scan along to find the end of the
1502
1809
bracketed group and go to there. */
1512
1819
optional ones preceded by BRAZERO or BRAMINZERO. */
1514
1821
case OP_BRAZERO:
1517
RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1518
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1519
do next += GET(next,1); while (*next == OP_ALT);
1520
ecode = next + 1 + LINK_SIZE;
1823
RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1824
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1825
do next += GET(next, 1); while (*next == OP_ALT);
1826
ecode = next + 1 + LINK_SIZE;
1524
1829
case OP_BRAMINZERO:
1527
do next += GET(next, 1); while (*next == OP_ALT);
1528
RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1529
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1831
do next += GET(next, 1); while (*next == OP_ALT);
1832
RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1833
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1534
1837
case OP_SKIPZERO:
1537
do next += GET(next,1); while (*next == OP_ALT);
1538
ecode = next + 1 + LINK_SIZE;
1839
do next += GET(next,1); while (*next == OP_ALT);
1840
ecode = next + 1 + LINK_SIZE;
1843
/* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1844
here; just jump to the group, with allow_zero set TRUE. */
1849
if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1850
goto POSSESSIVE_NON_CAPTURE;
1542
1852
/* End of a group, repeated or non-repeating. */
1545
1855
case OP_KETRMIN:
1546
1856
case OP_KETRMAX:
1547
1858
prev = ecode - GET(ecode, 1);
1549
1860
/* If this was a group that remembered the subject start, in order to break
1550
1861
infinite repeats of empty string matches, retrieve the subject start from
1551
1862
the chain. Otherwise, set it NULL. */
1553
if (*prev >= OP_SBRA)
1864
if (*prev >= OP_SBRA || *prev == OP_ONCE)
1555
1866
saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1556
1867
eptrb = eptrb->epb_prev; /* Backup to previous group */
1558
1869
else saved_eptr = NULL;
1560
/* If we are at the end of an assertion group or an atomic group, stop
1561
matching and return MATCH_MATCH, but record the current high water mark for
1562
use by positive assertions. We also need to record the match start in case
1563
it was changed by \K. */
1871
/* If we are at the end of an assertion group or a non-capturing atomic
1872
group, stop matching and return MATCH_MATCH, but record the current high
1873
water mark for use by positive assertions. We also need to record the match
1874
start in case it was changed by \K. */
1565
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1566
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1876
if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1877
*prev == OP_ONCE_NC)
1569
md->end_match_ptr = eptr; /* For ONCE */
1879
md->end_match_ptr = eptr; /* For ONCE_NC */
1570
1880
md->end_offset_top = offset_top;
1571
1881
md->start_match_ptr = mstart;
1572
MRRETURN(MATCH_MATCH);
1882
RRETURN(MATCH_MATCH); /* Sets md->mark */
1575
1885
/* For capturing groups we have to check the group number back at the start
1576
1886
and if necessary complete handling an extraction by setting the offsets and
1577
bumping the high water mark. Note that whole-pattern recursion is coded as
1578
a recurse into group 0, so it won't be picked up here. Instead, we catch it
1579
when the OP_END is reached. Other recursion is handled here. */
1887
bumping the high water mark. Whole-pattern recursion is coded as a recurse
1888
into group 0, so it won't be picked up here. Instead, we catch it when the
1889
OP_END is reached. Other recursion is handled here. We just have to record
1890
the current subject position and start match pointer and give a MATCH
1581
if (*prev == OP_CBRA || *prev == OP_SCBRA)
1893
if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1894
*prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1583
1896
number = GET2(prev, 1+LINK_SIZE);
1584
1897
offset = number << 1;
1904
/* Handle a recursively called group. */
1906
if (md->recursive != NULL && md->recursive->group_num == number)
1908
md->end_match_ptr = eptr;
1909
md->start_match_ptr = mstart;
1910
RRETURN(MATCH_MATCH);
1913
/* Deal with capturing */
1591
1915
md->capture_last = number;
1592
1916
if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1918
/* If offset is greater than offset_top, it means that we are
1919
"skipping" a capturing group, and that group's offsets must be marked
1920
unset. In earlier versions of PCRE, all the offsets were unset at the
1921
start of matching, but this doesn't work because atomic groups and
1922
assertions can cause a value to be set that should later be unset.
1923
Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1924
part of the atomic group, but this is not on the final matching path,
1925
so must be unset when 2 is set. (If there is no group 2, there is no
1926
problem, because offset_top will then be 2, indicating no capture.) */
1928
if (offset > offset_top)
1930
register int *iptr = md->offset_vector + offset_top;
1931
register int *iend = md->offset_vector + offset;
1932
while (iptr < iend) *iptr++ = -1;
1935
/* Now make the extraction */
1594
1937
md->offset_vector[offset] =
1595
1938
md->offset_vector[md->offset_end - number];
1596
1939
md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597
1940
if (offset_top <= offset) offset_top = offset + 2;
1600
/* Handle a recursively called group. Restore the offsets
1601
appropriately and continue from after the call. */
1603
if (md->recursive != NULL && md->recursive->group_num == number)
1605
recursion_info *rec = md->recursive;
1606
DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1607
md->recursive = rec->prevrec;
1608
memcpy(md->offset_vector, rec->offset_save,
1609
rec->saved_max * sizeof(int));
1610
offset_top = rec->save_offset_top;
1611
ecode = rec->after_call;
1617
/* For both capturing and non-capturing groups, reset the value of the ims
1618
flags, in case they got changed during the group. */
1621
DPRINTF(("ims reset to %02lx\n", ims));
1623
/* For a non-repeating ket, just continue at this level. This also
1624
happens for a repeating ket if no characters were matched in the group.
1625
This is the forcible breaking of infinite loops as implemented in Perl
1626
5.005. If there is an options reset, it will get obeyed in the normal
1627
course of events. */
1944
/* For an ordinary non-repeating ket, just continue at this level. This
1945
also happens for a repeating ket if no characters were matched in the
1946
group. This is the forcible breaking of infinite loops as implemented in
1947
Perl 5.005. For a non-repeating atomic group that includes captures,
1948
establish a backup point by processing the rest of the pattern at a lower
1949
level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
1950
original OP_ONCE level, thereby bypassing intermediate backup points, but
1951
resetting any captures that happened along the way. */
1629
1953
if (*ecode == OP_KET || eptr == saved_eptr)
1631
ecode += 1 + LINK_SIZE;
1955
if (*prev == OP_ONCE)
1957
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
1958
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1959
md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1960
RRETURN(MATCH_ONCE);
1962
ecode += 1 + LINK_SIZE; /* Carry on at this level */
1635
/* The repeating kets try the rest of the pattern or restart from the
1636
preceding bracket, in the appropriate order. In the second case, we can use
1637
tail recursion to avoid using another stack frame, unless we have an
1638
unlimited repeat of a group that can match an empty string. */
1640
flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1966
/* OP_KETRPOS is a possessive repeating ket. Remember the current position,
1967
and return the MATCH_KETRPOS. This makes it possible to do the repeats one
1968
at a time from the outer level, thus saving stack. */
1970
if (*ecode == OP_KETRPOS)
1972
md->end_match_ptr = eptr;
1973
md->end_offset_top = offset_top;
1974
RRETURN(MATCH_KETRPOS);
1977
/* The normal repeating kets try the rest of the pattern or restart from
1978
the preceding bracket, in the appropriate order. In the second case, we can
1979
use tail recursion to avoid using another stack frame, unless we have an
1980
an atomic group or an unlimited repeat of a group that can match an empty
1642
1983
if (*ecode == OP_KETRMIN)
1644
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1985
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
1645
1986
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646
if (flags != 0) /* Could match an empty string */
1648
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1987
if (*prev == OP_ONCE)
1989
RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
1990
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1991
md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
1992
RRETURN(MATCH_ONCE);
1994
if (*prev >= OP_SBRA) /* Could match an empty string */
1996
md->match_function_type = MATCH_CBEGROUP;
1997
RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
1654
2003
else /* OP_KETRMAX */
1656
RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
2005
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
2006
RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2007
if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
1657
2008
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2009
if (*prev == OP_ONCE)
2011
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2012
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2013
md->once_target = prev;
2014
RRETURN(MATCH_ONCE);
1658
2016
ecode += 1 + LINK_SIZE;
1660
2017
goto TAIL_RECURSE;
1662
2019
/* Control never gets here */
1664
/* Start of subject unless notbol, or after internal newline if multiline */
2021
/* Not multiline mode: start of subject assertion, unless notbol. */
1667
if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1668
if ((ims & PCRE_MULTILINE) != 0)
1670
if (eptr != md->start_subject &&
1671
(eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1672
MRRETURN(MATCH_NOMATCH);
1676
/* ... else fall through */
2024
if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1678
2026
/* Start of subject assertion */
1681
if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
2029
if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2033
/* Multiline mode: start of subject unless notbol, or after any newline. */
2036
if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2037
if (eptr != md->start_subject &&
2038
(eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2039
RRETURN(MATCH_NOMATCH);
1685
2043
/* Start of match assertion */
1688
if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
2046
if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2256
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2259
/* If the reference is unset, there are two possibilities:
2261
(a) In the default, Perl-compatible state, set the length to be longer
2262
than the amount of subject left; this ensures that every attempt at a
2263
match fails. We can't just fail here, because of the possibility of
2264
quantifiers with zero minima.
2266
(b) If the JavaScript compatibility flag is set, set the length to zero
2267
so that the back reference matches an empty string.
2269
Otherwise, set the length to the length of what was matched by the
2270
referenced subpattern. */
2272
if (offset >= offset_top || md->offset_vector[offset] < 0)
2273
length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2275
length = md->offset_vector[offset+1] - md->offset_vector[offset];
2277
/* Set up for repetition, or handle the non-repeated case */
2287
c = *ecode++ - OP_CRSTAR;
2288
minimize = (c & 1) != 0;
2289
min = rep_min[c]; /* Pick up values from tables; */
2290
max = rep_max[c]; /* zero for max => infinity */
2291
if (max == 0) max = INT_MAX;
2296
minimize = (*ecode == OP_CRMINRANGE);
2297
min = GET2(ecode, 1);
2298
max = GET2(ecode, 3);
2299
if (max == 0) max = INT_MAX;
2303
default: /* No repeat follows */
2304
if (!match_ref(offset, eptr, length, md, ims))
2307
MRRETURN(MATCH_NOMATCH);
2310
continue; /* With the main loop */
2313
/* If the length of the reference is zero, just continue with the
2316
if (length == 0) continue;
2318
/* First, ensure the minimum number of matches are present. We get back
2319
the length of the reference string explicitly rather than passing the
2320
address of eptr, so that eptr can be a register variable. */
2322
for (i = 1; i <= min; i++)
2324
if (!match_ref(offset, eptr, length, md, ims))
2327
MRRETURN(MATCH_NOMATCH);
2332
/* If min = max, continue at the same level without recursion.
2333
They are not both allowed to be zero. */
2335
if (min == max) continue;
2337
/* If minimizing, keep trying and advancing the pointer */
2341
for (fi = min;; fi++)
2343
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2344
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345
if (fi >= max) MRRETURN(MATCH_NOMATCH);
2346
if (!match_ref(offset, eptr, length, md, ims))
2349
MRRETURN(MATCH_NOMATCH);
2353
/* Control never gets here */
2356
/* If maximizing, find the longest string and work backwards */
2361
for (i = min; i < max; i++)
2363
if (!match_ref(offset, eptr, length, md, ims))
2372
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2373
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376
MRRETURN(MATCH_NOMATCH);
2613
caseless = op == OP_REFI;
2614
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2615
ecode += 1 + IMM2_SIZE;
2617
/* If the reference is unset, there are two possibilities:
2619
(a) In the default, Perl-compatible state, set the length negative;
2620
this ensures that every attempt at a match fails. We can't just fail
2621
here, because of the possibility of quantifiers with zero minima.
2623
(b) If the JavaScript compatibility flag is set, set the length to zero
2624
so that the back reference matches an empty string.
2626
Otherwise, set the length to the length of what was matched by the
2627
referenced subpattern. */
2629
if (offset >= offset_top || md->offset_vector[offset] < 0)
2630
length = (md->jscript_compat)? 0 : -1;
2632
length = md->offset_vector[offset+1] - md->offset_vector[offset];
2634
/* Set up for repetition, or handle the non-repeated case */
2644
c = *ecode++ - OP_CRSTAR;
2645
minimize = (c & 1) != 0;
2646
min = rep_min[c]; /* Pick up values from tables; */
2647
max = rep_max[c]; /* zero for max => infinity */
2648
if (max == 0) max = INT_MAX;
2653
minimize = (*ecode == OP_CRMINRANGE);
2654
min = GET2(ecode, 1);
2655
max = GET2(ecode, 1 + IMM2_SIZE);
2656
if (max == 0) max = INT_MAX;
2657
ecode += 1 + 2 * IMM2_SIZE;
2660
default: /* No repeat follows */
2661
if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2664
RRETURN(MATCH_NOMATCH);
2667
continue; /* With the main loop */
2670
/* Handle repeated back references. If the length of the reference is
2671
zero, just continue with the main loop. If the length is negative, it
2672
means the reference is unset in non-Java-compatible mode. If the minimum is
2673
zero, we can continue at the same level without recursion. For any other
2674
minimum, carrying on will result in NOMATCH. */
2676
if (length == 0) continue;
2677
if (length < 0 && min == 0) continue;
2679
/* First, ensure the minimum number of matches are present. We get back
2680
the length of the reference string explicitly rather than passing the
2681
address of eptr, so that eptr can be a register variable. */
2683
for (i = 1; i <= min; i++)
2686
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2689
RRETURN(MATCH_NOMATCH);
2694
/* If min = max, continue at the same level without recursion.
2695
They are not both allowed to be zero. */
2697
if (min == max) continue;
2699
/* If minimizing, keep trying and advancing the pointer */
2703
for (fi = min;; fi++)
2706
RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2707
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2708
if (fi >= max) RRETURN(MATCH_NOMATCH);
2709
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2712
RRETURN(MATCH_NOMATCH);
2716
/* Control never gets here */
2719
/* If maximizing, find the longest string and work backwards */
2724
for (i = min; i < max; i++)
2727
if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2736
RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2737
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2740
RRETURN(MATCH_NOMATCH);
2379
2742
/* Control never gets here */
4243
4754
for (fi = min;; fi++)
4245
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4756
RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4246
4757
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4247
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4758
if (fi >= max) RRETURN(MATCH_NOMATCH);
4248
4759
if (eptr >= md->end_subject)
4250
4761
SCHECK_PARTIAL();
4251
MRRETURN(MATCH_NOMATCH);
4762
RRETURN(MATCH_NOMATCH);
4253
4764
GETCHARINCTEST(c, eptr);
4254
if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4765
if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4256
4767
/* Control never gets here */
4259
4770
for (fi = min;; fi++)
4261
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4773
RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4262
4774
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4263
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4775
if (fi >= max) RRETURN(MATCH_NOMATCH);
4264
4776
if (eptr >= md->end_subject)
4266
4778
SCHECK_PARTIAL();
4267
MRRETURN(MATCH_NOMATCH);
4779
RRETURN(MATCH_NOMATCH);
4269
4781
GETCHARINCTEST(c, eptr);
4270
prop_chartype = UCD_CHARTYPE(c);
4271
if ((prop_chartype == ucp_Lu ||
4272
prop_chartype == ucp_Ll ||
4273
prop_chartype == ucp_Lt) == prop_fail_result)
4274
MRRETURN(MATCH_NOMATCH);
4782
chartype = UCD_CHARTYPE(c);
4783
if ((chartype == ucp_Lu ||
4784
chartype == ucp_Ll ||
4785
chartype == ucp_Lt) == prop_fail_result)
4786
RRETURN(MATCH_NOMATCH);
4276
4788
/* Control never gets here */
4279
4791
for (fi = min;; fi++)
4281
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4793
RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4282
4794
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4283
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4795
if (fi >= max) RRETURN(MATCH_NOMATCH);
4284
4796
if (eptr >= md->end_subject)
4286
4798
SCHECK_PARTIAL();
4287
MRRETURN(MATCH_NOMATCH);
4799
RRETURN(MATCH_NOMATCH);
4289
4801
GETCHARINCTEST(c, eptr);
4290
prop_category = UCD_CATEGORY(c);
4291
if ((prop_category == prop_value) == prop_fail_result)
4292
MRRETURN(MATCH_NOMATCH);
4802
if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4803
RRETURN(MATCH_NOMATCH);
4294
4805
/* Control never gets here */
4297
4808
for (fi = min;; fi++)
4299
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4810
RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4300
4811
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4301
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4812
if (fi >= max) RRETURN(MATCH_NOMATCH);
4302
4813
if (eptr >= md->end_subject)
4304
4815
SCHECK_PARTIAL();
4305
MRRETURN(MATCH_NOMATCH);
4816
RRETURN(MATCH_NOMATCH);
4307
4818
GETCHARINCTEST(c, eptr);
4308
prop_chartype = UCD_CHARTYPE(c);
4309
if ((prop_chartype == prop_value) == prop_fail_result)
4310
MRRETURN(MATCH_NOMATCH);
4819
if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4820
RRETURN(MATCH_NOMATCH);
4312
4822
/* Control never gets here */
4315
4825
for (fi = min;; fi++)
4317
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4827
RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4318
4828
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4319
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4829
if (fi >= max) RRETURN(MATCH_NOMATCH);
4320
4830
if (eptr >= md->end_subject)
4322
4832
SCHECK_PARTIAL();
4323
MRRETURN(MATCH_NOMATCH);
4833
RRETURN(MATCH_NOMATCH);
4325
4835
GETCHARINCTEST(c, eptr);
4326
prop_script = UCD_SCRIPT(c);
4327
if ((prop_script == prop_value) == prop_fail_result)
4328
MRRETURN(MATCH_NOMATCH);
4836
if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4837
RRETURN(MATCH_NOMATCH);
4330
4839
/* Control never gets here */
4333
4842
for (fi = min;; fi++)
4335
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4845
RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4336
4846
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4847
if (fi >= max) RRETURN(MATCH_NOMATCH);
4338
4848
if (eptr >= md->end_subject)
4340
4850
SCHECK_PARTIAL();
4341
MRRETURN(MATCH_NOMATCH);
4851
RRETURN(MATCH_NOMATCH);
4343
4853
GETCHARINCTEST(c, eptr);
4344
prop_category = UCD_CATEGORY(c);
4345
if ((prop_category == ucp_L || prop_category == ucp_N)
4346
== prop_fail_result)
4347
MRRETURN(MATCH_NOMATCH);
4854
category = UCD_CATEGORY(c);
4855
if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4856
RRETURN(MATCH_NOMATCH);
4349
4858
/* Control never gets here */
4351
4860
case PT_SPACE: /* Perl space */
4352
4861
for (fi = min;; fi++)
4354
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4863
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
4355
4864
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4865
if (fi >= max) RRETURN(MATCH_NOMATCH);
4357
4866
if (eptr >= md->end_subject)
4359
4868
SCHECK_PARTIAL();
4360
MRRETURN(MATCH_NOMATCH);
4869
RRETURN(MATCH_NOMATCH);
4362
4871
GETCHARINCTEST(c, eptr);
4363
prop_category = UCD_CATEGORY(c);
4364
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4872
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365
4873
c == CHAR_FF || c == CHAR_CR)
4366
4874
== prop_fail_result)
4367
MRRETURN(MATCH_NOMATCH);
4875
RRETURN(MATCH_NOMATCH);
4369
4877
/* Control never gets here */
4371
4879
case PT_PXSPACE: /* POSIX space */
4372
4880
for (fi = min;; fi++)
4374
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4882
RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
4375
4883
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4884
if (fi >= max) RRETURN(MATCH_NOMATCH);
4377
4885
if (eptr >= md->end_subject)
4379
4887
SCHECK_PARTIAL();
4380
MRRETURN(MATCH_NOMATCH);
4888
RRETURN(MATCH_NOMATCH);
4382
4890
GETCHARINCTEST(c, eptr);
4383
prop_category = UCD_CATEGORY(c);
4384
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4891
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385
4892
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386
4893
== prop_fail_result)
4387
MRRETURN(MATCH_NOMATCH);
4894
RRETURN(MATCH_NOMATCH);
4389
4896
/* Control never gets here */
4392
4899
for (fi = min;; fi++)
4394
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4902
RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
4395
4903
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396
if (fi >= max) MRRETURN(MATCH_NOMATCH);
4904
if (fi >= max) RRETURN(MATCH_NOMATCH);
4397
4905
if (eptr >= md->end_subject)
4399
4907
SCHECK_PARTIAL();
4400
MRRETURN(MATCH_NOMATCH);
4908
RRETURN(MATCH_NOMATCH);
4402
4910
GETCHARINCTEST(c, eptr);
4403
prop_category = UCD_CATEGORY(c);
4404
if ((prop_category == ucp_L ||
4405
prop_category == ucp_N ||
4911
category = UCD_CATEGORY(c);
4912
if ((category == ucp_L ||
4913
category == ucp_N ||
4406
4914
c == CHAR_UNDERSCORE)
4407
4915
== prop_fail_result)
4408
MRRETURN(MATCH_NOMATCH);
4916
RRETURN(MATCH_NOMATCH);
4410
4918
/* Control never gets here */
5594
6170
< -1 => some kind of unexpected problem
6173
#ifdef COMPILE_PCRE8
5597
6174
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5598
6175
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5599
6176
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5600
6177
int offsetcount)
6179
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6180
pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6181
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
5602
int rc, resetcount, ocount;
5603
int first_byte = -1;
6185
int rc, ocount, arg_offset_max;
5607
unsigned long int ims;
5608
6187
BOOL using_temporary_offsets = FALSE;
5610
6189
BOOL startline;
5611
6190
BOOL firstline;
5612
BOOL first_byte_caseless = FALSE;
5613
BOOL req_byte_caseless = FALSE;
6192
BOOL has_first_char = FALSE;
6193
BOOL has_req_char = FALSE;
6194
pcre_uchar first_char = 0;
6195
pcre_uchar first_char2 = 0;
6196
pcre_uchar req_char = 0;
6197
pcre_uchar req_char2 = 0;
5615
6198
match_data match_block;
5616
6199
match_data *md = &match_block;
5617
const uschar *tables;
5618
const uschar *start_bits = NULL;
5619
USPTR start_match = (USPTR)subject + start_offset;
5621
USPTR start_partial = NULL;
5622
USPTR req_byte_ptr = start_match - 1;
6200
const pcre_uint8 *tables;
6201
const pcre_uint8 *start_bits = NULL;
6202
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6203
PCRE_PUCHAR end_subject;
6204
PCRE_PUCHAR start_partial = NULL;
6205
PCRE_PUCHAR req_char_ptr = start_match - 1;
5624
pcre_study_data internal_study;
5625
6207
const pcre_study_data *study;
5627
real_pcre internal_re;
5628
const real_pcre *external_re = (const real_pcre *)argument_re;
5629
const real_pcre *re = external_re;
6208
const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6210
/* Check for the special magic call that measures the size of the stack used
6211
per recursive call of match(). */
6213
if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6214
start_offset == -999)
6216
return -sizeof(heapframe);
6218
return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
5631
6221
/* Plausibility checks */
5633
6223
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5634
if (re == NULL || subject == NULL ||
5635
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
6224
if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6225
return PCRE_ERROR_NULL;
5636
6226
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5637
6227
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5639
/* This information is for finding all the numbers associated with a given
5640
name, for condition testing. */
5642
md->name_table = (uschar *)re + re->name_table_offset;
6229
/* Check that the first field in the block is the magic number. If it is not,
6230
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6231
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6232
means that the pattern is likely compiled with different endianness. */
6234
if (re->magic_number != MAGIC_NUMBER)
6235
return re->magic_number == REVERSED_MAGIC_NUMBER?
6236
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6237
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6239
/* These two settings are used in the code for checking a UTF-8 string that
6240
follows immediately afterwards. Other values in the md block are used only
6241
during "normal" pcre_exec() processing, not when the JIT support is in use,
6242
so they are set up later. */
6244
/* PCRE_UTF16 has the same value as PCRE_UTF8. */
6245
utf = md->utf = (re->options & PCRE_UTF8) != 0;
6246
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6247
((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6249
/* Check a UTF-8 string if required. Pass back the character offset and error
6250
code for an invalid string if a results vector is available. */
6253
if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6256
int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6259
if (offsetcount >= 2)
6261
offsets[0] = erroroffset;
6262
offsets[1] = errorcode;
6264
#ifdef COMPILE_PCRE16
6265
return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6266
PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6268
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6269
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6273
/* Check that a start_offset points to the start of a UTF character. */
6274
if (start_offset > 0 && start_offset < length &&
6275
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6276
return PCRE_ERROR_BADUTF8_OFFSET;
6280
/* If the pattern was successfully studied with JIT support, run the JIT
6281
executable instead of the rest of this function. Most options must be set at
6282
compile time for the JIT code to be usable. Fallback to the normal code path if
6283
an unsupported flag is set. In particular, JIT does not support partial
6287
if (extra_data != NULL
6288
&& (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
6289
&& extra_data->executable_jit != NULL
6290
&& (extra_data->flags & PCRE_EXTRA_TABLES) == 0
6291
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6292
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
6293
return PRIV(jit_exec)(re, extra_data->executable_jit,
6294
(const pcre_uchar *)subject, length, start_offset, options,
6295
((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6296
? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6299
/* Carry on with non-JIT matching. This information is for finding all the
6300
numbers associated with a given name, for condition testing. */
6302
md->name_table = (pcre_uchar *)re + re->name_table_offset;
5643
6303
md->name_count = re->name_count;
5644
6304
md->name_entry_size = re->name_entry_size;
6209
6858
if (using_temporary_offsets)
6211
if (offsetcount >= 4)
6860
if (arg_offset_max >= 4)
6213
6862
memcpy(offsets + 2, md->offset_vector + 2,
6214
(offsetcount - 2) * sizeof(int));
6863
(arg_offset_max - 2) * sizeof(int));
6215
6864
DPRINTF(("Copied offsets from temporary memory\n"));
6217
if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6866
if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
6218
6867
DPRINTF(("Freeing temporary memory\n"));
6219
(pcre_free)(md->offset_vector);
6868
(PUBL(free))(md->offset_vector);
6222
/* Set the return code to the number of captured strings, or 0 if there are
6871
/* Set the return code to the number of captured strings, or 0 if there were
6223
6872
too many to fit into the vector. */
6225
rc = md->offset_overflow? 0 : md->end_offset_top/2;
6874
rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
6875
0 : md->end_offset_top/2;
6877
/* If there is space in the offset vector, set any unused pairs at the end of
6878
the pattern to -1 for backwards compatibility. It is documented that this
6879
happens. In earlier versions, the whole set of potential capturing offsets
6880
was set to -1 each time round the loop, but this is handled differently now.
6881
"Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
6882
those at the end that need unsetting here. We can't just unset them all at
6883
the start of the whole thing because they may get set in one branch that is
6884
not the final matching branch. */
6886
if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
6888
register int *iptr, *iend;
6889
int resetcount = 2 + re->top_bracket * 2;
6890
if (resetcount > offsetcount) resetcount = ocount;
6891
iptr = offsets + md->end_offset_top;
6892
iend = offsets + resetcount;
6893
while (iptr < iend) *iptr++ = -1;
6227
6896
/* If there is space, set up the whole thing as substring 0. The value of
6228
6897
md->start_match_ptr might be modified if \K was encountered on the success