1
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2
* vim: set sw=4 ts=8 et tw=78:
4
* ***** BEGIN LICENSE BLOCK *****
5
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
7
* The contents of this file are subject to the Mozilla Public License Version
8
* 1.1 (the "License"); you may not use this file except in compliance with
9
* the License. You may obtain a copy of the License at
10
* http://www.mozilla.org/MPL/
12
* Software distributed under the License is distributed on an "AS IS" basis,
13
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14
* for the specific language governing rights and limitations under the
17
* The Original Code is Mozilla Communicator client code, released
20
* The Initial Developer of the Original Code is
21
* Netscape Communications Corporation.
22
* Portions created by the Initial Developer are Copyright (C) 1998
23
* the Initial Developer. All Rights Reserved.
27
* Alternatively, the contents of this file may be used under the terms of
28
* either of the GNU General Public License Version 2 or later (the "GPL"),
29
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30
* in which case the provisions of the GPL or the LGPL are applicable instead
31
* of those above. If you wish to allow use of your version of this file only
32
* under the terms of either the GPL or the LGPL, and not to allow others to
33
* use your version of this file under the terms of the MPL, indicate your
34
* decision by deleting the provisions above and replace them with the notice
35
* and other provisions required by the GPL or the LGPL. If you do not delete
36
* the provisions above, a recipient may use your version of this file under
37
* the terms of any one of the MPL, the GPL or the LGPL.
39
* ***** END LICENSE BLOCK ***** */
42
* JS regular expressions, after Perl.
48
#include "jsarena.h" /* Added by JSIFY */
49
#include "jsutil.h" /* Added by JSIFY */
66
/* Note : contiguity of 'simple opcodes' is important for SimpleMatch() */
68
REOP_EMPTY = 0, /* match rest of input against rest of r.e. */
69
REOP_ALT = 1, /* alternative subexpressions in kid and next */
70
REOP_SIMPLE_START = 2, /* start of 'simple opcodes' */
71
REOP_BOL = 2, /* beginning of input (or line if multiline) */
72
REOP_EOL = 3, /* end of input (or line if multiline) */
73
REOP_WBDRY = 4, /* match "" at word boundary */
74
REOP_WNONBDRY = 5, /* match "" at word non-boundary */
75
REOP_DOT = 6, /* stands for any character */
76
REOP_DIGIT = 7, /* match a digit char: [0-9] */
77
REOP_NONDIGIT = 8, /* match a non-digit char: [^0-9] */
78
REOP_ALNUM = 9, /* match an alphanumeric char: [0-9a-z_A-Z] */
79
REOP_NONALNUM = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
80
REOP_SPACE = 11, /* match a whitespace char */
81
REOP_NONSPACE = 12, /* match a non-whitespace char */
82
REOP_BACKREF = 13, /* back-reference (e.g., \1) to a parenthetical */
83
REOP_FLAT = 14, /* match a flat string */
84
REOP_FLAT1 = 15, /* match a single char */
85
REOP_FLATi = 16, /* case-independent REOP_FLAT */
86
REOP_FLAT1i = 17, /* case-independent REOP_FLAT1 */
87
REOP_UCFLAT1 = 18, /* single Unicode char */
88
REOP_UCFLAT1i = 19, /* case-independent REOP_UCFLAT1 */
89
REOP_UCFLAT = 20, /* flat Unicode string; len immediate counts chars */
90
REOP_UCFLATi = 21, /* case-independent REOP_UCFLAT */
91
REOP_CLASS = 22, /* character class with index */
92
REOP_NCLASS = 23, /* negated character class with index */
93
REOP_SIMPLE_END = 23, /* end of 'simple opcodes' */
94
REOP_QUANT = 25, /* quantified atom: atom{1,2} */
95
REOP_STAR = 26, /* zero or more occurrences of kid */
96
REOP_PLUS = 27, /* one or more occurrences of kid */
97
REOP_OPT = 28, /* optional subexpression in kid */
98
REOP_LPAREN = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
99
REOP_RPAREN = 30, /* right paren bytecode */
100
REOP_JUMP = 31, /* for deoptimized closure loops */
101
REOP_DOTSTAR = 32, /* optimize .* to use a single opcode */
102
REOP_ANCHOR = 33, /* like .* but skips left context to unanchored r.e. */
103
REOP_EOLONLY = 34, /* $ not preceded by any pattern */
104
REOP_BACKREFi = 37, /* case-independent REOP_BACKREF */
105
REOP_LPARENNON = 41, /* non-capturing version of REOP_LPAREN */
106
REOP_ASSERT = 43, /* zero width positive lookahead assertion */
107
REOP_ASSERT_NOT = 44, /* zero width negative lookahead assertion */
108
REOP_ASSERTTEST = 45, /* sentinel at end of assertion child */
109
REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
110
REOP_MINIMALSTAR = 47, /* non-greedy version of * */
111
REOP_MINIMALPLUS = 48, /* non-greedy version of + */
112
REOP_MINIMALOPT = 49, /* non-greedy version of ? */
113
REOP_MINIMALQUANT = 50, /* non-greedy version of {} */
114
REOP_ENDCHILD = 51, /* sentinel at end of quantifier child */
115
REOP_REPEAT = 52, /* directs execution of greedy quantifier */
116
REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
117
REOP_ALTPREREQ = 54, /* prerequisite for ALT, either of two chars */
118
REOP_ALTPREREQ2 = 55, /* prerequisite for ALT, a char or a class */
119
REOP_ENDALT = 56, /* end of final alternate */
120
REOP_CONCAT = 57, /* concatenation of terms (parse time only) */
125
#define REOP_IS_SIMPLE(op) ((unsigned)((op) - REOP_SIMPLE_START) < \
126
(unsigned)REOP_SIMPLE_END)
129
REOp op; /* r.e. op bytecode */
130
RENode *next; /* next in concatenation order */
131
void *kid; /* first operand */
133
void *kid2; /* second operand */
134
jsint num; /* could be a number */
135
size_t parenIndex; /* or a parenthesis index */
136
struct { /* or a quantifier range */
141
struct { /* or a character class */
143
size_t kidlen; /* length of string at kid, in jschars */
144
size_t index; /* index into class list */
145
uint16 bmsize; /* bitmap size, based on max char code */
148
struct { /* or a literal sequence */
149
jschar chr; /* of one character */
150
size_t length; /* or many (via the kid) */
153
RENode *kid2; /* second operand from ALT */
154
jschar ch1; /* match char for ALTPREREQ */
155
jschar ch2; /* ditto, or class index for ALTPREREQ2 */
160
#define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
161
((c >= 'a') && (c <= 'z')) )
162
#define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
163
(c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
165
#define CLASS_CACHE_SIZE 4
167
typedef struct CompilerState {
169
JSTokenStream *tokenStream; /* For reporting errors */
170
const jschar *cpbegin;
174
size_t classCount; /* number of [] encountered */
175
size_t treeDepth; /* maximum depth of parse tree */
176
size_t progLength; /* estimated bytecode length */
178
size_t classBitmapsMem; /* memory to hold all class bitmaps */
180
const jschar *start; /* small cache of class strings */
181
size_t length; /* since they're often the same */
183
} classCache[CLASS_CACHE_SIZE];
187
typedef struct EmitStateStackEntry {
188
jsbytecode *altHead; /* start of REOP_ALT* opcode */
189
jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
190
jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
191
jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
192
RENode *continueNode; /* original REOP_ALT* node being stacked */
193
jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
194
JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
195
avoid 16-bit unsigned offset overflow */
196
} EmitStateStackEntry;
199
* Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
200
* the getters and setters take the pc of the offset, not of the opcode before
204
#define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
205
#define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
206
(pc)[1] = (jsbytecode) (arg))
208
#define OFFSET_LEN ARG_LEN
209
#define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
210
#define GET_OFFSET(pc) GET_ARG(pc)
213
* Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
214
* For sanity, we limit it to 2^24 bytes.
216
#define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
219
* The maximum memory that can be allocated for class bitmaps.
220
* For sanity, we limit it to 2^24 bytes.
222
#define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
225
* Functions to get size and write/read bytecode that represent small indexes
227
* Each byte in the code represent 7-bit chunk of the index. 8th bit when set
228
* indicates that the following byte brings more bits to the index. Otherwise
229
* this is the last byte in the index bytecode representing highest index bits.
232
GetCompactIndexWidth(size_t index)
236
for (width = 1; (index >>= 7) != 0; ++width) { }
241
WriteCompactIndex(jsbytecode *pc, size_t index)
245
while ((next = index >> 7) != 0) {
246
*pc++ = (jsbytecode)(index | 0x80);
249
*pc++ = (jsbytecode)index;
254
ReadCompactIndex(jsbytecode *pc, size_t *result)
259
if ((nextByte & 0x80) == 0) {
261
* Short-circuit the most common case when compact index <= 127.
266
*result = 0x7F & nextByte;
269
*result |= (nextByte & 0x7F) << shift;
271
} while ((nextByte & 0x80) != 0);
276
typedef struct RECapture {
277
ptrdiff_t index; /* start of contents, -1 for empty */
278
size_t length; /* length of capture */
281
typedef struct REMatchState {
283
RECapture parens[1]; /* first of 're->parenCount' captures,
284
allocated at end of this struct */
287
struct REBackTrackData;
289
typedef struct REProgState {
290
jsbytecode *continue_pc; /* current continuation data */
291
jsbytecode continue_op;
292
ptrdiff_t index; /* progress in text */
293
size_t parenSoFar; /* highest indexed paren started */
296
uintN min; /* current quantifier limits */
300
size_t top; /* backtrack stack state */
306
typedef struct REBackTrackData {
307
size_t sz; /* size of previous stack entry */
308
jsbytecode *backtrack_pc; /* where to backtrack to */
309
jsbytecode backtrack_op;
310
const jschar *cp; /* index in text of match at backtrack */
311
size_t parenIndex; /* start index of saved paren contents */
312
size_t parenCount; /* # of saved paren contents */
313
size_t saveStateStackTop; /* number of parent states */
314
/* saved parent states follow */
315
/* saved paren contents follow */
318
#define INITIAL_STATESTACK 100
319
#define INITIAL_BACKTRACK 8000
321
typedef struct REGlobalData {
323
JSRegExp *regexp; /* the RE in execution */
324
JSBool ok; /* runtime error (out_of_memory only?) */
325
size_t start; /* offset to start at */
326
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
327
const jschar *cpbegin; /* text base address */
328
const jschar *cpend; /* text limit address */
330
REProgState *stateStack; /* stack of state of current parents */
331
size_t stateStackTop;
332
size_t stateStackLimit;
334
REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
335
REBackTrackData *backTrackSP;
336
size_t backTrackStackSize;
337
size_t cursz; /* size of current stack entry */
339
JSArenaPool pool; /* It's faster to use one malloc'd pool
340
than to malloc/free the three items
341
that are allocated from this pool */
345
* 1. If IgnoreCase is false, return ch.
346
* 2. Let u be ch converted to upper case as if by calling
347
* String.prototype.toUpperCase on the one-character string ch.
348
* 3. If u does not consist of a single character, return ch.
349
* 4. Let cu be u's character.
350
* 5. If ch's code point value is greater than or equal to decimal 128 and cu's
351
* code point value is less than decimal 128, then return ch.
357
jschar cu = JS_TOUPPER(ch);
358
if (ch >= 128 && cu < 128)
366
jschar cl = JS_TOLOWER(ch);
367
if (cl >= 128 && ch < 128)
372
/* Construct and initialize an RENode, returning NULL for out-of-memory */
374
NewRENode(CompilerState *state, REOp op)
380
JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
382
JS_ReportOutOfMemory(cx);
392
* Validates and converts hex ascii value.
395
isASCIIHexDigit(jschar c, uintN *digit)
406
if (cv >= 'a' && cv <= 'f') {
407
*digit = cv - 'a' + 10;
416
const jschar *errPos;
422
* Process the op against the two top operands, reducing them to a single
423
* operand in the penultimate slot. Update progLength and treeDepth.
426
ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
431
switch (opData->op) {
433
result = NewRENode(state, REOP_ALT);
436
result->kid = operandStack[operandSP - 2];
437
result->u.kid2 = operandStack[operandSP - 1];
438
operandStack[operandSP - 2] = result;
440
if (state->treeDepth == TREE_DEPTH_MAX) {
441
js_ReportCompileErrorNumber(state->context, state->tokenStream,
442
JSREPORT_TS | JSREPORT_ERROR,
443
JSMSG_REGEXP_TOO_COMPLEX);
449
* Look at both alternates to see if there's a FLAT or a CLASS at
450
* the start of each. If so, use a prerequisite match.
452
if (((RENode *) result->kid)->op == REOP_FLAT &&
453
((RENode *) result->u.kid2)->op == REOP_FLAT &&
454
(state->flags & JSREG_FOLD) == 0) {
455
result->op = REOP_ALTPREREQ;
456
result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
457
result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
458
/* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
459
JUMP, <end> ... ENDALT */
460
state->progLength += 13;
463
if (((RENode *) result->kid)->op == REOP_CLASS &&
464
((RENode *) result->kid)->u.ucclass.index < 256 &&
465
((RENode *) result->u.kid2)->op == REOP_FLAT &&
466
(state->flags & JSREG_FOLD) == 0) {
467
result->op = REOP_ALTPREREQ2;
468
result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
469
result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
470
/* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
471
JUMP, <end> ... ENDALT */
472
state->progLength += 13;
475
if (((RENode *) result->kid)->op == REOP_FLAT &&
476
((RENode *) result->u.kid2)->op == REOP_CLASS &&
477
((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
478
(state->flags & JSREG_FOLD) == 0) {
479
result->op = REOP_ALTPREREQ2;
480
result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
481
result->u.altprereq.ch2 =
482
((RENode *) result->u.kid2)->u.ucclass.index;
483
/* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
484
JUMP, <end> ... ENDALT */
485
state->progLength += 13;
488
/* ALT, <next>, ..., JUMP, <end> ... ENDALT */
489
state->progLength += 7;
494
result = operandStack[operandSP - 2];
496
result = result->next;
497
result->next = operandStack[operandSP - 1];
501
case REOP_ASSERT_NOT:
504
/* These should have been processed by a close paren. */
505
js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
506
JSREPORT_TS | JSREPORT_ERROR,
507
JSMSG_MISSING_PAREN, opData->errPos);
516
* Parser forward declarations.
518
static JSBool ParseTerm(CompilerState *state);
519
static JSBool ParseQuantifier(CompilerState *state);
520
static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
523
* Top-down regular expression grammar, based closely on Perl4.
525
* regexp: altern A regular expression is one or more
526
* altern '|' regexp alternatives separated by vertical bar.
528
#define INITIAL_STACK_SIZE 128
531
ParseRegExp(CompilerState *state)
535
REOpData *operatorStack;
536
RENode **operandStack;
539
JSBool result = JS_FALSE;
541
intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
542
intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
544
/* Watch out for empty regexp */
545
if (state->cp == state->cpend) {
546
state->result = NewRENode(state, REOP_EMPTY);
547
return (state->result != NULL);
550
operatorStack = (REOpData *)
551
JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
555
operandStack = (RENode **)
556
JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
561
parenIndex = state->parenCount;
562
if (state->cp == state->cpend) {
564
* If we are at the end of the regexp and we're short one or more
565
* operands, the regexp must have the form /x|/ or some such, with
566
* left parentheses making us short more than one operand.
568
if (operatorSP >= operandSP) {
569
operand = NewRENode(state, REOP_EMPTY);
575
switch (*state->cp) {
578
if (state->cp + 1 < state->cpend &&
580
(state->cp[1] == '=' ||
581
state->cp[1] == '!' ||
582
state->cp[1] == ':')) {
583
switch (state->cp[1]) {
586
/* ASSERT, <next>, ... ASSERTTEST */
587
state->progLength += 4;
590
op = REOP_ASSERT_NOT;
591
/* ASSERTNOT, <next>, ... ASSERTNOTTEST */
592
state->progLength += 4;
601
/* LPAREN, <index>, ... RPAREN, <index> */
603
+= 2 * (1 + GetCompactIndexWidth(parenIndex));
605
if (state->parenCount == 65535) {
606
js_ReportCompileErrorNumber(state->context,
610
JSMSG_TOO_MANY_PARENS);
618
* If there's no stacked open parenthesis, throw syntax error.
620
for (i = operatorSP - 1; ; i--) {
622
js_ReportCompileErrorNumber(state->context,
626
JSMSG_UNMATCHED_RIGHT_PAREN);
629
if (operatorStack[i].op == REOP_ASSERT ||
630
operatorStack[i].op == REOP_ASSERT_NOT ||
631
operatorStack[i].op == REOP_LPARENNON ||
632
operatorStack[i].op == REOP_LPAREN) {
639
/* Expected an operand before these, so make an empty one */
640
operand = NewRENode(state, REOP_EMPTY);
646
if (!ParseTerm(state))
648
operand = state->result;
650
if (operandSP == operandStackSize) {
651
operandStackSize += operandStackSize;
652
operandStack = (RENode **)
653
JS_realloc(state->context, operandStack,
654
sizeof(RENode *) * operandStackSize);
658
operandStack[operandSP++] = operand;
663
/* At the end; process remaining operators. */
665
if (state->cp == state->cpend) {
668
if (!ProcessOp(state, &operatorStack[operatorSP],
669
operandStack, operandSP))
673
JS_ASSERT(operandSP == 1);
674
state->result = operandStack[0];
679
switch (*state->cp) {
681
/* Process any stacked 'concat' operators */
684
operatorStack[operatorSP - 1].op == REOP_CONCAT) {
686
if (!ProcessOp(state, &operatorStack[operatorSP],
687
operandStack, operandSP)) {
697
* If there's no stacked open parenthesis, throw syntax error.
699
for (i = operatorSP - 1; ; i--) {
701
js_ReportCompileErrorNumber(state->context,
703
JSREPORT_TS | JSREPORT_ERROR,
704
JSMSG_UNMATCHED_RIGHT_PAREN);
707
if (operatorStack[i].op == REOP_ASSERT ||
708
operatorStack[i].op == REOP_ASSERT_NOT ||
709
operatorStack[i].op == REOP_LPARENNON ||
710
operatorStack[i].op == REOP_LPAREN) {
716
/* Process everything on the stack until the open parenthesis. */
718
JS_ASSERT(operatorSP);
720
switch (operatorStack[operatorSP].op) {
722
case REOP_ASSERT_NOT:
724
operand = NewRENode(state, operatorStack[operatorSP].op);
727
operand->u.parenIndex =
728
operatorStack[operatorSP].parenIndex;
729
JS_ASSERT(operandSP);
730
operand->kid = operandStack[operandSP - 1];
731
operandStack[operandSP - 1] = operand;
732
if (state->treeDepth == TREE_DEPTH_MAX) {
733
js_ReportCompileErrorNumber(state->context,
737
JSMSG_REGEXP_TOO_COMPLEX);
744
state->result = operandStack[operandSP - 1];
745
if (!ParseQuantifier(state))
747
operandStack[operandSP - 1] = state->result;
748
goto restartOperator;
750
if (!ProcessOp(state, &operatorStack[operatorSP],
751
operandStack, operandSP))
761
const jschar *errp = state->cp;
763
if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
765
* This didn't even scan correctly as a quantifier, so we should
779
js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
780
JSREPORT_TS | JSREPORT_ERROR,
781
JSMSG_BAD_QUANTIFIER, state->cp);
786
/* Anything else is the start of the next term. */
789
if (operatorSP == operatorStackSize) {
790
operatorStackSize += operatorStackSize;
791
operatorStack = (REOpData *)
792
JS_realloc(state->context, operatorStack,
793
sizeof(REOpData) * operatorStackSize);
797
operatorStack[operatorSP].op = op;
798
operatorStack[operatorSP].errPos = state->cp;
799
operatorStack[operatorSP++].parenIndex = parenIndex;
805
JS_free(state->context, operatorStack);
807
JS_free(state->context, operandStack);
812
* Hack two bits in CompilerState.flags, for use within FindParenCount to flag
813
* its being on the stack, and to propagate errors to its callers.
815
#define JSREG_FIND_PAREN_COUNT 0x8000
816
#define JSREG_FIND_PAREN_ERROR 0x4000
819
* Magic return value from FindParenCount and GetDecimalValue, to indicate
820
* overflow beyond GetDecimalValue's max parameter, or a computed maximum if
821
* its findMax parameter is non-null.
823
#define OVERFLOW_VALUE ((uintN)-1)
826
FindParenCount(CompilerState *state)
831
if (state->flags & JSREG_FIND_PAREN_COUNT)
832
return OVERFLOW_VALUE;
835
* Copy state into temp, flag it so we never report an invalid backref,
836
* and reset its members to parse the entire regexp. This is obviously
837
* suboptimal, but GetDecimalValue calls us only if a backref appears to
838
* refer to a forward parenthetical, which is rare.
841
temp.flags |= JSREG_FIND_PAREN_COUNT;
842
temp.cp = temp.cpbegin;
847
temp.classBitmapsMem = 0;
848
for (i = 0; i < CLASS_CACHE_SIZE; i++)
849
temp.classCache[i].start = NULL;
851
if (!ParseRegExp(&temp)) {
852
state->flags |= JSREG_FIND_PAREN_ERROR;
853
return OVERFLOW_VALUE;
855
return temp.parenCount;
859
* Extract and return a decimal value at state->cp. The initial character c
860
* has already been read. Return OVERFLOW_VALUE if the result exceeds max.
861
* Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
862
* state->flags to discover whether an error occurred under findMax.
865
GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
866
CompilerState *state)
868
uintN value = JS7_UNDEC(c);
869
JSBool overflow = (value > max && (!findMax || value > findMax(state)));
871
/* The following restriction allows simpler overflow checks. */
872
JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
873
while (state->cp < state->cpend) {
877
value = 10 * value + JS7_UNDEC(c);
878
if (!overflow && value > max && (!findMax || value > findMax(state)))
882
return overflow ? OVERFLOW_VALUE : value;
886
* Calculate the total size of the bitmap required for a class expression.
889
CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
893
JSBool inRange = JS_FALSE;
894
jschar c, rangeStart = 0;
895
uintN n, digit, nDigits, i;
897
target->u.ucclass.bmsize = 0;
898
target->u.ucclass.sense = JS_TRUE;
905
target->u.ucclass.sense = JS_FALSE;
934
if (src < end && RE_IS_LETTER(*src)) {
935
localMax = (jschar) (*src++ & 0x1F);
948
for (i = 0; (i < nDigits) && (src < end); i++) {
950
if (!isASCIIHexDigit(c, &digit)) {
952
* Back off to accepting the original
959
n = (n << 4) | digit;
965
JS_ReportErrorNumber(state->context,
966
js_GetErrorMessage, NULL,
967
JSMSG_BAD_CLASS_RANGE);
978
JS_ReportErrorNumber(state->context,
979
js_GetErrorMessage, NULL,
980
JSMSG_BAD_CLASS_RANGE);
983
target->u.ucclass.bmsize = 65535;
994
* This is a non-ECMA extension - decimal escapes (in this
995
* case, octal!) are supposed to be an error inside class
996
* ranges, but supported here for backwards compatibility.
1001
if ('0' <= c && c <= '7') {
1003
n = 8 * n + JS7_UNDEC(c);
1005
if ('0' <= c && c <= '7') {
1007
i = 8 * n + JS7_UNDEC(c);
1026
if (state->flags & JSREG_FOLD) {
1027
c = JS_MAX(upcase((jschar) localMax), downcase((jschar) localMax));
1032
if (rangeStart > localMax) {
1033
JS_ReportErrorNumber(state->context,
1034
js_GetErrorMessage, NULL,
1035
JSMSG_BAD_CLASS_RANGE);
1040
if (src < end - 1) {
1044
rangeStart = (jschar)localMax;
1052
target->u.ucclass.bmsize = max;
1057
* item: assertion An item is either an assertion or
1058
* quantatom a quantified atom.
1060
* assertion: '^' Assertions match beginning of string
1061
* (or line if the class static property
1062
* RegExp.multiline is true).
1063
* '$' End of string (or line if the class
1064
* static property RegExp.multiline is
1066
* '\b' Word boundary (between \w and \W).
1067
* '\B' Word non-boundary.
1069
* quantatom: atom An unquantified atom.
1070
* quantatom '{' n ',' m '}'
1071
* Atom must occur between n and m times.
1072
* quantatom '{' n ',' '}' Atom must occur at least n times.
1073
* quantatom '{' n '}' Atom must occur exactly n times.
1074
* quantatom '*' Zero or more times (same as {0,}).
1075
* quantatom '+' One or more times (same as {1,}).
1076
* quantatom '?' Zero or one time (same as {0,1}).
1078
* any of which can be optionally followed by '?' for ungreedy
1080
* atom: '(' regexp ')' A parenthesized regexp (what matched
1081
* can be addressed using a backreference,
1083
* '.' Matches any char except '\n'.
1084
* '[' classlist ']' A character class.
1085
* '[' '^' classlist ']' A negated character class.
1087
* '\n' Newline (Line Feed).
1088
* '\r' Carriage Return.
1089
* '\t' Horizontal Tab.
1090
* '\v' Vertical Tab.
1091
* '\d' A digit (same as [0-9]).
1093
* '\w' A word character, [0-9a-z_A-Z].
1094
* '\W' A non-word character.
1095
* '\s' A whitespace character, [ \b\f\n\r\t\v].
1096
* '\S' A non-whitespace character.
1097
* '\' n A backreference to the nth (n decimal
1098
* and positive) parenthesized expression.
1099
* '\' octal An octal escape sequence (octal must be
1100
* two or three digits long, unless it is
1101
* 0 for the null character).
1102
* '\x' hex A hex escape (hex must be two digits).
1103
* '\u' unicode A unicode escape (must be four digits).
1104
* '\c' ctrl A control character, ctrl is a letter.
1105
* '\' literalatomchar Any character except one of the above
1106
* that follow '\' in an atom.
1107
* otheratomchar Any character not first among the other
1108
* atom right-hand sides.
1111
ParseTerm(CompilerState *state)
1113
jschar c = *state->cp++;
1115
uintN num, tmp, n, i;
1116
const jschar *termStart;
1119
/* assertions and atoms */
1121
state->result = NewRENode(state, REOP_BOL);
1124
state->progLength++;
1127
state->result = NewRENode(state, REOP_EOL);
1130
state->progLength++;
1133
if (state->cp >= state->cpend) {
1134
/* a trailing '\' is an error */
1135
js_ReportCompileErrorNumber(state->context, state->tokenStream,
1136
JSREPORT_TS | JSREPORT_ERROR,
1137
JSMSG_TRAILING_SLASH);
1142
/* assertion escapes */
1144
state->result = NewRENode(state, REOP_WBDRY);
1147
state->progLength++;
1150
state->result = NewRENode(state, REOP_WNONBDRY);
1153
state->progLength++;
1155
/* Decimal escape */
1157
/* Give a strict warning. See also the note below. */
1158
if (!js_ReportCompileErrorNumber(state->context,
1163
JSMSG_INVALID_BACKREF)) {
1168
while (state->cp < state->cpend) {
1170
if (c < '0' || '7' < c)
1173
tmp = 8 * num + (uintN)JS7_UNDEC(c);
1180
state->result = NewRENode(state, REOP_FLAT);
1183
state->result->u.flat.chr = c;
1184
state->result->u.flat.length = 1;
1185
state->progLength += 3;
1196
termStart = state->cp - 1;
1197
num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1198
if (state->flags & JSREG_FIND_PAREN_ERROR)
1200
if (num == OVERFLOW_VALUE) {
1201
/* Give a strict mode warning. */
1202
if (!js_ReportCompileErrorNumber(state->context,
1208
? JSMSG_INVALID_BACKREF
1209
: JSMSG_BAD_BACKREF)) {
1214
* Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1215
* error here. However, for compatibility with IE, we treat the
1216
* whole backref as flat if the first character in it is not a
1217
* valid octal character, and as an octal escape otherwise.
1219
state->cp = termStart;
1221
/* Treat this as flat. termStart - 1 is the \. */
1226
/* Treat this as an octal escape. */
1229
JS_ASSERT(1 <= num && num <= 0x10000);
1230
state->result = NewRENode(state, REOP_BACKREF);
1233
state->result->u.parenIndex = num - 1;
1235
+= 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1237
/* Control escape */
1253
/* Control letter */
1255
if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1256
c = (jschar) (*state->cp++ & 0x1F);
1258
/* back off to accepting the original '\' as a literal */
1263
/* HexEscapeSequence */
1267
/* UnicodeEscapeSequence */
1272
for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1275
if (!isASCIIHexDigit(c, &digit)) {
1277
* Back off to accepting the original 'u' or 'x' as a
1284
n = (n << 4) | digit;
1288
/* Character class escapes */
1290
state->result = NewRENode(state, REOP_DIGIT);
1294
state->progLength++;
1297
state->result = NewRENode(state, REOP_NONDIGIT);
1300
state->result = NewRENode(state, REOP_SPACE);
1303
state->result = NewRENode(state, REOP_NONSPACE);
1306
state->result = NewRENode(state, REOP_ALNUM);
1309
state->result = NewRENode(state, REOP_NONALNUM);
1311
/* IdentityEscape */
1313
state->result = NewRENode(state, REOP_FLAT);
1316
state->result->u.flat.chr = c;
1317
state->result->u.flat.length = 1;
1318
state->result->kid = (void *) (state->cp - 1);
1319
state->progLength += 3;
1324
state->result = NewRENode(state, REOP_CLASS);
1327
termStart = state->cp;
1328
state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1330
if (state->cp == state->cpend) {
1331
js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1332
JSREPORT_TS | JSREPORT_ERROR,
1333
JSMSG_UNTERM_CLASS, termStart);
1337
if (*state->cp == '\\') {
1339
if (state->cp != state->cpend)
1343
if (*state->cp == ']') {
1344
state->result->u.ucclass.kidlen = state->cp - termStart;
1349
for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1350
if (!state->classCache[i].start) {
1351
state->classCache[i].start = termStart;
1352
state->classCache[i].length = state->result->u.ucclass.kidlen;
1353
state->classCache[i].index = state->classCount;
1356
if (state->classCache[i].length ==
1357
state->result->u.ucclass.kidlen) {
1358
for (n = 0; ; n++) {
1359
if (n == state->classCache[i].length) {
1360
state->result->u.ucclass.index
1361
= state->classCache[i].index;
1364
if (state->classCache[i].start[n] != termStart[n])
1369
state->result->u.ucclass.index = state->classCount++;
1373
* Call CalculateBitmapSize now as we want any errors it finds
1374
* to be reported during the parse phase, not at execution.
1376
if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1379
* Update classBitmapsMem with number of bytes to hold bmsize bits,
1380
* which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1381
* or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1383
n = (state->result->u.ucclass.bmsize >> 3) + 1;
1384
if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1385
js_ReportCompileErrorNumber(state->context, state->tokenStream,
1386
JSREPORT_TS | JSREPORT_ERROR,
1387
JSMSG_REGEXP_TOO_COMPLEX);
1390
state->classBitmapsMem += n;
1391
/* CLASS, <index> */
1393
+= 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1397
state->result = NewRENode(state, REOP_DOT);
1402
const jschar *errp = state->cp--;
1405
err = ParseMinMaxQuantifier(state, JS_TRUE);
1416
js_ReportCompileErrorNumberUC(state->context, state->tokenStream,
1417
JSREPORT_TS | JSREPORT_ERROR,
1418
JSMSG_BAD_QUANTIFIER, state->cp - 1);
1422
state->result = NewRENode(state, REOP_FLAT);
1425
state->result->u.flat.chr = c;
1426
state->result->u.flat.length = 1;
1427
state->result->kid = (void *) (state->cp - 1);
1428
state->progLength += 3;
1431
return ParseQuantifier(state);
1435
ParseQuantifier(CompilerState *state)
1438
term = state->result;
1439
if (state->cp < state->cpend) {
1440
switch (*state->cp) {
1442
state->result = NewRENode(state, REOP_QUANT);
1445
state->result->u.range.min = 1;
1446
state->result->u.range.max = (uintN)-1;
1447
/* <PLUS>, <next> ... <ENDCHILD> */
1448
state->progLength += 4;
1451
state->result = NewRENode(state, REOP_QUANT);
1454
state->result->u.range.min = 0;
1455
state->result->u.range.max = (uintN)-1;
1456
/* <STAR>, <next> ... <ENDCHILD> */
1457
state->progLength += 4;
1460
state->result = NewRENode(state, REOP_QUANT);
1463
state->result->u.range.min = 0;
1464
state->result->u.range.max = 1;
1465
/* <OPT>, <next> ... <ENDCHILD> */
1466
state->progLength += 4;
1468
case '{': /* balance '}' */
1471
const jschar *errp = state->cp;
1473
err = ParseMinMaxQuantifier(state, JS_FALSE);
1479
js_ReportCompileErrorNumberUC(state->context,
1481
JSREPORT_TS | JSREPORT_ERROR,
1491
if (state->treeDepth == TREE_DEPTH_MAX) {
1492
js_ReportCompileErrorNumber(state->context, state->tokenStream,
1493
JSREPORT_TS | JSREPORT_ERROR,
1494
JSMSG_REGEXP_TOO_COMPLEX);
1500
state->result->kid = term;
1501
if (state->cp < state->cpend && *state->cp == '?') {
1503
state->result->u.range.greedy = JS_FALSE;
1505
state->result->u.range.greedy = JS_TRUE;
1511
ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
1515
const jschar *errp = state->cp++;
1520
min = GetDecimalValue(c, 0xFFFF, NULL, state);
1523
if (!ignoreValues && min == OVERFLOW_VALUE)
1524
return JSMSG_MIN_TOO_BIG;
1530
max = GetDecimalValue(c, 0xFFFF, NULL, state);
1532
if (!ignoreValues && max == OVERFLOW_VALUE)
1533
return JSMSG_MAX_TOO_BIG;
1534
if (!ignoreValues && min > max)
1535
return JSMSG_OUT_OF_ORDER;
1543
state->result = NewRENode(state, REOP_QUANT);
1546
state->result->u.range.min = min;
1547
state->result->u.range.max = max;
1549
* QUANT, <min>, <max>, <next> ... <ENDCHILD>
1550
* where <max> is written as compact(max+1) to make
1551
* (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
1553
state->progLength += (1 + GetCompactIndexWidth(min)
1554
+ GetCompactIndexWidth(max + 1)
1565
SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
1567
ptrdiff_t offset = target - jump;
1569
/* Check that target really points forward. */
1570
JS_ASSERT(offset >= 2);
1571
if ((size_t)offset > OFFSET_MAX)
1574
jump[0] = JUMP_OFFSET_HI(offset);
1575
jump[1] = JUMP_OFFSET_LO(offset);
1580
* Generate bytecode for the tree rooted at t using an explicit stack instead
1584
EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
1585
jsbytecode *pc, RENode *t)
1587
EmitStateStackEntry *emitStateSP, *emitStateStack;
1591
if (treeDepth == 0) {
1592
emitStateStack = NULL;
1595
(EmitStateStackEntry *)JS_malloc(state->context,
1596
sizeof(EmitStateStackEntry) *
1598
if (!emitStateStack)
1601
emitStateSP = emitStateStack;
1611
case REOP_ALTPREREQ2:
1612
case REOP_ALTPREREQ:
1613
JS_ASSERT(emitStateSP);
1614
emitStateSP->altHead = pc - 1;
1615
emitStateSP->endTermFixup = pc;
1617
SET_ARG(pc, t->u.altprereq.ch1);
1619
SET_ARG(pc, t->u.altprereq.ch2);
1622
emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1625
emitStateSP->continueNode = t;
1626
emitStateSP->continueOp = REOP_JUMP;
1627
emitStateSP->jumpToJumpFlag = JS_FALSE;
1629
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1630
t = (RENode *) t->kid;
1635
emitStateSP->nextTermFixup = pc; /* offset to following term */
1637
if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
1639
emitStateSP->continueOp = REOP_ENDALT;
1641
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1648
* If we already patched emitStateSP->nextTermFixup to jump to
1649
* a nearer jump, to avoid 16-bit immediate offset overflow, we
1652
if (emitStateSP->jumpToJumpFlag)
1656
* Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
1657
* REOP_ENDALT is executed only on successful match of the last
1658
* alternate in a group.
1660
if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1662
if (t->op != REOP_ALT) {
1663
if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
1668
* If the program is bigger than the REOP_JUMP offset range, then
1669
* we must check for alternates before this one that are part of
1670
* the same group, and fix up their jump offsets to target jumps
1671
* close enough to fit in a 16-bit unsigned offset immediate.
1673
if ((size_t)(pc - re->program) > OFFSET_MAX &&
1674
emitStateSP > emitStateStack) {
1675
EmitStateStackEntry *esp, *esp2;
1676
jsbytecode *alt, *jump;
1677
ptrdiff_t span, header;
1680
alt = esp2->altHead;
1681
for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
1682
if (esp->continueOp == REOP_ENDALT &&
1683
!esp->jumpToJumpFlag &&
1684
esp->nextTermFixup + OFFSET_LEN == alt &&
1685
(size_t)(pc - ((esp->continueNode->op != REOP_ALT)
1687
: esp->nextTermFixup)) > OFFSET_MAX) {
1689
jump = esp->nextTermFixup;
1692
* The span must be 1 less than the distance from
1693
* jump offset to jump offset, so we actually jump
1694
* to a REOP_JUMP bytecode, not to its offset!
1697
JS_ASSERT(jump < esp2->nextTermFixup);
1698
span = esp2->nextTermFixup - jump - 1;
1699
if ((size_t)span <= OFFSET_MAX)
1704
} while (esp2->continueOp != REOP_ENDALT);
1707
jump[0] = JUMP_OFFSET_HI(span);
1708
jump[1] = JUMP_OFFSET_LO(span);
1710
if (esp->continueNode->op != REOP_ALT) {
1712
* We must patch the offset at esp->endTermFixup
1713
* as well, for the REOP_ALTPREREQ{,2} opcodes.
1714
* If we're unlucky and endTermFixup is more than
1715
* OFFSET_MAX bytes from its target, we cheat by
1716
* jumping 6 bytes to the jump whose offset is at
1717
* esp->nextTermFixup, which has the same target.
1719
jump = esp->endTermFixup;
1720
header = esp->nextTermFixup - jump;
1722
if ((size_t)span > OFFSET_MAX)
1725
jump[0] = JUMP_OFFSET_HI(span);
1726
jump[1] = JUMP_OFFSET_LO(span);
1729
esp->jumpToJumpFlag = JS_TRUE;
1736
JS_ASSERT(emitStateSP);
1737
emitStateSP->altHead = pc - 1;
1738
emitStateSP->nextAltFixup = pc; /* offset to next alternate */
1740
emitStateSP->continueNode = t;
1741
emitStateSP->continueOp = REOP_JUMP;
1742
emitStateSP->jumpToJumpFlag = JS_FALSE;
1744
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1751
* Coalesce FLATs if possible and if it would not increase bytecode
1752
* beyond preallocated limit. The latter happens only when bytecode
1753
* size for coalesced string with offset p and length 2 exceeds 6
1754
* bytes preallocated for 2 single char nodes, i.e. when
1755
* 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
1756
* GetCompactIndexWidth(p) > 4.
1757
* Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
1758
* nodes strictly decreases bytecode size, the check has to be
1759
* done only for the first coalescing.
1762
GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
1765
t->next->op == REOP_FLAT &&
1766
(jschar*)t->kid + t->u.flat.length ==
1767
(jschar*)t->next->kid) {
1768
t->u.flat.length += t->next->u.flat.length;
1769
t->next = t->next->next;
1772
if (t->kid && t->u.flat.length > 1) {
1773
pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
1774
pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
1775
pc = WriteCompactIndex(pc, t->u.flat.length);
1776
} else if (t->u.flat.chr < 256) {
1777
pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
1778
*pc++ = (jsbytecode) t->u.flat.chr;
1780
pc[-1] = (state->flags & JSREG_FOLD)
1783
SET_ARG(pc, t->u.flat.chr);
1789
JS_ASSERT(emitStateSP);
1790
pc = WriteCompactIndex(pc, t->u.parenIndex);
1791
emitStateSP->continueNode = t;
1792
emitStateSP->continueOp = REOP_RPAREN;
1794
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1795
t = (RENode *) t->kid;
1800
pc = WriteCompactIndex(pc, t->u.parenIndex);
1804
pc = WriteCompactIndex(pc, t->u.parenIndex);
1808
JS_ASSERT(emitStateSP);
1809
emitStateSP->nextTermFixup = pc;
1811
emitStateSP->continueNode = t;
1812
emitStateSP->continueOp = REOP_ASSERTTEST;
1814
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1815
t = (RENode *) t->kid;
1819
case REOP_ASSERTTEST:
1820
case REOP_ASSERTNOTTEST:
1821
if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1825
case REOP_ASSERT_NOT:
1826
JS_ASSERT(emitStateSP);
1827
emitStateSP->nextTermFixup = pc;
1829
emitStateSP->continueNode = t;
1830
emitStateSP->continueOp = REOP_ASSERTNOTTEST;
1832
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1833
t = (RENode *) t->kid;
1838
JS_ASSERT(emitStateSP);
1839
if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
1840
pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
1841
} else if (t->u.range.min == 0 && t->u.range.max == 1) {
1842
pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
1843
} else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
1844
pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
1846
if (!t->u.range.greedy)
1847
pc[-1] = REOP_MINIMALQUANT;
1848
pc = WriteCompactIndex(pc, t->u.range.min);
1850
* Write max + 1 to avoid using size_t(max) + 1 bytes
1851
* for (uintN)-1 sentinel.
1853
pc = WriteCompactIndex(pc, t->u.range.max + 1);
1855
emitStateSP->nextTermFixup = pc;
1857
emitStateSP->continueNode = t;
1858
emitStateSP->continueOp = REOP_ENDCHILD;
1860
JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
1861
t = (RENode *) t->kid;
1866
if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
1871
if (!t->u.ucclass.sense)
1872
pc[-1] = REOP_NCLASS;
1873
pc = WriteCompactIndex(pc, t->u.ucclass.index);
1874
charSet = &re->classList[t->u.ucclass.index];
1875
charSet->converted = JS_FALSE;
1876
charSet->length = t->u.ucclass.bmsize;
1877
charSet->u.src.startIndex = t->u.ucclass.startIndex;
1878
charSet->u.src.length = t->u.ucclass.kidlen;
1879
charSet->sense = t->u.ucclass.sense;
1890
if (emitStateSP == emitStateStack)
1893
t = emitStateSP->continueNode;
1894
op = emitStateSP->continueOp;
1900
JS_free(state->context, emitStateStack);
1904
js_ReportCompileErrorNumber(state->context, state->tokenStream,
1905
JSREPORT_TS | JSREPORT_ERROR,
1906
JSMSG_REGEXP_TOO_COMPLEX);
1913
js_NewRegExp(JSContext *cx, JSTokenStream *ts,
1914
JSString *str, uintN flags, JSBool flat)
1918
CompilerState state;
1925
mark = JS_ARENA_MARK(&cx->tempPool);
1926
len = JSSTRING_LENGTH(str);
1929
state.tokenStream = ts;
1930
state.cp = js_UndependString(cx, str);
1933
state.cpbegin = state.cp;
1934
state.cpend = state.cp + len;
1935
state.flags = flags;
1936
state.parenCount = 0;
1937
state.classCount = 0;
1938
state.progLength = 0;
1939
state.treeDepth = 0;
1940
state.classBitmapsMem = 0;
1941
for (i = 0; i < CLASS_CACHE_SIZE; i++)
1942
state.classCache[i].start = NULL;
1944
if (len != 0 && flat) {
1945
state.result = NewRENode(&state, REOP_FLAT);
1946
state.result->u.flat.chr = *state.cpbegin;
1947
state.result->u.flat.length = len;
1948
state.result->kid = (void *) state.cpbegin;
1949
/* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */
1950
state.progLength += 1 + GetCompactIndexWidth(0)
1951
+ GetCompactIndexWidth(len);
1953
if (!ParseRegExp(&state))
1956
resize = offsetof(JSRegExp, program) + state.progLength + 1;
1957
re = (JSRegExp *) JS_malloc(cx, resize);
1962
JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT);
1963
re->classCount = state.classCount;
1964
if (re->classCount) {
1965
re->classList = (RECharSet *)
1966
JS_malloc(cx, re->classCount * sizeof(RECharSet));
1967
if (!re->classList) {
1968
js_DestroyRegExp(cx, re);
1972
for (i = 0; i < re->classCount; i++)
1973
re->classList[i].converted = JS_FALSE;
1975
re->classList = NULL;
1977
endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result);
1979
js_DestroyRegExp(cx, re);
1983
*endPC++ = REOP_END;
1985
* Check whether size was overestimated and shrink using realloc.
1986
* This is safe since no pointers to newly parsed regexp or its parts
1987
* besides re exist here.
1989
if ((size_t)(endPC - re->program) != state.progLength + 1) {
1991
JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1);
1992
resize = offsetof(JSRegExp, program) + (endPC - re->program);
1993
tmp = (JSRegExp *) JS_realloc(cx, re, resize);
2000
re->parenCount = state.parenCount;
2004
JS_ARENA_RELEASE(&cx->tempPool, mark);
2009
js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
2010
JSString *str, JSString *opt, JSBool flat)
2019
s = JSSTRING_CHARS(opt);
2020
for (i = 0, n = JSSTRING_LENGTH(opt); i < n; i++) {
2023
flags |= JSREG_GLOB;
2026
flags |= JSREG_FOLD;
2029
flags |= JSREG_MULTILINE;
2032
charBuf[0] = (char)s[i];
2034
js_ReportCompileErrorNumber(cx, ts,
2035
JSREPORT_TS | JSREPORT_ERROR,
2036
JSMSG_BAD_FLAG, charBuf);
2041
return js_NewRegExp(cx, ts, str, flags, flat);
2045
* Save the current state of the match - the position in the input
2046
* text as well as the position in the bytecode. The state of any
2047
* parent expressions is also saved (preceding state).
2048
* Contents of parenCount parentheses from parenIndex are also saved.
2050
static REBackTrackData *
2051
PushBackTrackState(REGlobalData *gData, REOp op,
2052
jsbytecode *target, REMatchState *x, const jschar *cp,
2053
size_t parenIndex, size_t parenCount)
2056
REBackTrackData *result =
2057
(REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz);
2059
size_t sz = sizeof(REBackTrackData) +
2060
gData->stateStackTop * sizeof(REProgState) +
2061
parenCount * sizeof(RECapture);
2063
ptrdiff_t btsize = gData->backTrackStackSize;
2064
ptrdiff_t btincr = ((char *)result + sz) -
2065
((char *)gData->backTrackStack + btsize);
2068
ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
2070
btincr = JS_ROUNDUP(btincr, btsize);
2071
JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *,
2072
&gData->pool, btsize, btincr);
2073
if (!gData->backTrackStack) {
2074
JS_ReportOutOfMemory(gData->cx);
2075
gData->ok = JS_FALSE;
2078
gData->backTrackStackSize = btsize + btincr;
2079
result = (REBackTrackData *) ((char *)gData->backTrackStack + offset);
2081
gData->backTrackSP = result;
2082
result->sz = gData->cursz;
2085
result->backtrack_op = op;
2086
result->backtrack_pc = target;
2088
result->parenCount = parenCount;
2090
result->saveStateStackTop = gData->stateStackTop;
2091
JS_ASSERT(gData->stateStackTop);
2092
memcpy(result + 1, gData->stateStack,
2093
sizeof(REProgState) * result->saveStateStackTop);
2095
if (parenCount != 0) {
2096
result->parenIndex = parenIndex;
2097
memcpy((char *)(result + 1) +
2098
sizeof(REProgState) * result->saveStateStackTop,
2099
&x->parens[parenIndex],
2100
sizeof(RECapture) * parenCount);
2101
for (i = 0; i != parenCount; i++)
2102
x->parens[parenIndex + i].index = -1;
2110
* Consecutive literal characters.
2113
static REMatchState *
2114
FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2118
if (length > gData->cpend - x->cp)
2120
for (i = 0; i != length; i++) {
2121
if (matchChars[i] != x->cp[i])
2129
static REMatchState *
2130
FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars,
2134
JS_ASSERT(gData->cpend >= x->cp);
2135
if (length > (size_t)(gData->cpend - x->cp))
2137
for (i = 0; i != length; i++) {
2138
if (upcase(matchChars[i]) != upcase(x->cp[i]))
2146
* 1. Evaluate DecimalEscape to obtain an EscapeValue E.
2147
* 2. If E is not a character then go to step 6.
2148
* 3. Let ch be E's character.
2149
* 4. Let A be a one-element RECharSet containing the character ch.
2150
* 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
2151
* 6. E must be an integer. Let n be that integer.
2152
* 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
2153
* 8. Return an internal Matcher closure that takes two arguments, a State x
2154
* and a Continuation c, and performs the following:
2155
* 1. Let cap be x's captures internal array.
2156
* 2. Let s be cap[n].
2157
* 3. If s is undefined, then call c(x) and return its result.
2158
* 4. Let e be x's endIndex.
2159
* 5. Let len be s's length.
2160
* 6. Let f be e+len.
2161
* 7. If f>InputLength, return failure.
2162
* 8. If there exists an integer i between 0 (inclusive) and len (exclusive)
2163
* such that Canonicalize(s[i]) is not the same character as
2164
* Canonicalize(Input [e+i]), then return failure.
2165
* 9. Let y be the State (f, cap).
2166
* 10. Call c(y) and return its result.
2168
static REMatchState *
2169
BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex)
2172
const jschar *parenContent;
2173
RECapture *cap = &x->parens[parenIndex];
2175
if (cap->index == -1)
2179
if (x->cp + len > gData->cpend)
2182
parenContent = &gData->cpbegin[cap->index];
2183
if (gData->regexp->flags & JSREG_FOLD) {
2184
for (i = 0; i < len; i++) {
2185
if (upcase(parenContent[i]) != upcase(x->cp[i]))
2189
for (i = 0; i < len; i++) {
2190
if (parenContent[i] != x->cp[i])
2199
/* Add a single character to the RECharSet */
2201
AddCharacterToCharSet(RECharSet *cs, jschar c)
2203
uintN byteIndex = (uintN)(c >> 3);
2204
JS_ASSERT(c <= cs->length);
2205
cs->u.bits[byteIndex] |= 1 << (c & 0x7);
2209
/* Add a character range, c1 to c2 (inclusive) to the RECharSet */
2211
AddCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
2215
uintN byteIndex1 = (uintN)(c1 >> 3);
2216
uintN byteIndex2 = (uintN)(c2 >> 3);
2218
JS_ASSERT((c2 <= cs->length) && (c1 <= c2));
2223
if (byteIndex1 == byteIndex2) {
2224
cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1;
2226
cs->u.bits[byteIndex1] |= 0xFF << c1;
2227
for (i = byteIndex1 + 1; i < byteIndex2; i++)
2228
cs->u.bits[i] = 0xFF;
2229
cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2);
2233
/* Compile the source of the class into a RECharSet */
2235
ProcessCharSet(REGlobalData *gData, RECharSet *charSet)
2237
const jschar *src, *end;
2238
JSBool inRange = JS_FALSE;
2239
jschar rangeStart = 0;
2240
uintN byteLength, n;
2244
JS_ASSERT(!charSet->converted);
2246
* Assert that startIndex and length points to chars inside [] inside
2249
JS_ASSERT(1 <= charSet->u.src.startIndex);
2250
JS_ASSERT(charSet->u.src.startIndex
2251
< JSSTRING_LENGTH(gData->regexp->source));
2252
JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source)
2253
- 1 - charSet->u.src.startIndex);
2255
charSet->converted = JS_TRUE;
2256
src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex;
2257
end = src + charSet->u.src.length;
2258
JS_ASSERT(src[-1] == '[');
2259
JS_ASSERT(end[0] == ']');
2261
byteLength = (charSet->length >> 3) + 1;
2262
charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength);
2263
if (!charSet->u.bits) {
2264
JS_ReportOutOfMemory(gData->cx);
2265
gData->ok = JS_FALSE;
2268
memset(charSet->u.bits, 0, byteLength);
2274
JS_ASSERT(charSet->sense == JS_FALSE);
2277
JS_ASSERT(charSet->sense == JS_TRUE);
2280
while (src != end) {
2305
if (src < end && JS_ISWORD(*src)) {
2306
thisCh = (jschar)(*src++ & 0x1F);
2319
for (i = 0; (i < nDigits) && (src < end); i++) {
2322
if (!isASCIIHexDigit(c, &digit)) {
2324
* Back off to accepting the original '\'
2331
n = (n << 4) | digit;
2344
* This is a non-ECMA extension - decimal escapes (in this
2345
* case, octal!) are supposed to be an error inside class
2346
* ranges, but supported here for backwards compatibility.
2350
if ('0' <= c && c <= '7') {
2352
n = 8 * n + JS7_UNDEC(c);
2354
if ('0' <= c && c <= '7') {
2356
i = 8 * n + JS7_UNDEC(c);
2367
AddCharacterRangeToCharSet(charSet, '0', '9');
2368
continue; /* don't need range processing */
2370
AddCharacterRangeToCharSet(charSet, 0, '0' - 1);
2371
AddCharacterRangeToCharSet(charSet,
2373
(jschar)charSet->length);
2376
for (i = (intN)charSet->length; i >= 0; i--)
2378
AddCharacterToCharSet(charSet, (jschar)i);
2381
for (i = (intN)charSet->length; i >= 0; i--)
2383
AddCharacterToCharSet(charSet, (jschar)i);
2386
for (i = (intN)charSet->length; i >= 0; i--)
2388
AddCharacterToCharSet(charSet, (jschar)i);
2391
for (i = (intN)charSet->length; i >= 0; i--)
2393
AddCharacterToCharSet(charSet, (jschar)i);
2408
if (gData->regexp->flags & JSREG_FOLD) {
2409
AddCharacterRangeToCharSet(charSet, upcase(rangeStart),
2411
AddCharacterRangeToCharSet(charSet, downcase(rangeStart),
2414
AddCharacterRangeToCharSet(charSet, rangeStart, thisCh);
2418
if (gData->regexp->flags & JSREG_FOLD) {
2419
AddCharacterToCharSet(charSet, upcase(thisCh));
2420
AddCharacterToCharSet(charSet, downcase(thisCh));
2422
AddCharacterToCharSet(charSet, thisCh);
2424
if (src < end - 1) {
2428
rangeStart = thisCh;
2437
js_DestroyRegExp(JSContext *cx, JSRegExp *re)
2439
if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) {
2440
if (re->classList) {
2442
for (i = 0; i < re->classCount; i++) {
2443
if (re->classList[i].converted)
2444
JS_free(cx, re->classList[i].u.bits);
2445
re->classList[i].u.bits = NULL;
2447
JS_free(cx, re->classList);
2454
ReallocStateStack(REGlobalData *gData)
2456
size_t limit = gData->stateStackLimit;
2457
size_t sz = sizeof(REProgState) * limit;
2459
JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, &gData->pool, sz, sz);
2460
if (!gData->stateStack) {
2461
gData->ok = JS_FALSE;
2464
gData->stateStackLimit = limit + limit;
2468
#define PUSH_STATE_STACK(data) \
2470
++(data)->stateStackTop; \
2471
if ((data)->stateStackTop == (data)->stateStackLimit && \
2472
!ReallocStateStack((data))) { \
2478
* Apply the current op against the given input to see if it's going to match
2479
* or fail. Return false if we don't get a match, true if we do. If updatecp is
2480
* true, then update the current state's cp. Always update startpc to the next
2483
static REMatchState *
2484
SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op,
2485
jsbytecode **startpc, JSBool updatecp)
2487
REMatchState *result = NULL;
2490
size_t offset, length, index;
2491
jsbytecode *pc = *startpc; /* pc has already been incremented past op */
2493
const jschar *startcp = x->cp;
2499
if (x->cp != gData->cpbegin) {
2500
if (!gData->cx->regExpStatics.multiline &&
2501
!(gData->regexp->flags & JSREG_MULTILINE)) {
2504
if (!RE_IS_LINE_TERM(x->cp[-1]))
2510
if (x->cp != gData->cpend) {
2511
if (!gData->cx->regExpStatics.multiline &&
2512
!(gData->regexp->flags & JSREG_MULTILINE)) {
2515
if (!RE_IS_LINE_TERM(*x->cp))
2521
if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2522
!(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2527
if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^
2528
(x->cp != gData->cpend && JS_ISWORD(*x->cp))) {
2533
if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
2539
if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
2545
if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
2551
if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
2557
if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
2563
if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
2569
if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
2575
pc = ReadCompactIndex(pc, &parenIndex);
2576
JS_ASSERT(parenIndex < gData->regexp->parenCount);
2577
result = BackrefMatcher(gData, x, parenIndex);
2580
pc = ReadCompactIndex(pc, &offset);
2581
JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2582
pc = ReadCompactIndex(pc, &length);
2583
JS_ASSERT(1 <= length);
2584
JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2585
if (length <= (size_t)(gData->cpend - x->cp)) {
2586
source = JSSTRING_CHARS(gData->regexp->source) + offset;
2587
for (index = 0; index != length; index++) {
2588
if (source[index] != x->cp[index])
2597
if (x->cp != gData->cpend && *x->cp == matchCh) {
2603
pc = ReadCompactIndex(pc, &offset);
2604
JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source));
2605
pc = ReadCompactIndex(pc, &length);
2606
JS_ASSERT(1 <= length);
2607
JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset);
2608
source = JSSTRING_CHARS(gData->regexp->source);
2609
result = FlatNIMatcher(gData, x, source + offset, length);
2613
if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2619
matchCh = GET_ARG(pc);
2621
if (x->cp != gData->cpend && *x->cp == matchCh) {
2627
matchCh = GET_ARG(pc);
2629
if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) {
2635
pc = ReadCompactIndex(pc, &index);
2636
JS_ASSERT(index < gData->regexp->classCount);
2637
if (x->cp != gData->cpend) {
2638
charSet = &gData->regexp->classList[index];
2639
JS_ASSERT(charSet->converted);
2642
if (charSet->length != 0 &&
2643
ch <= charSet->length &&
2644
(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2651
pc = ReadCompactIndex(pc, &index);
2652
JS_ASSERT(index < gData->regexp->classCount);
2653
if (x->cp != gData->cpend) {
2654
charSet = &gData->regexp->classList[index];
2655
JS_ASSERT(charSet->converted);
2658
if (charSet->length == 0 ||
2659
ch > charSet->length ||
2660
!(charSet->u.bits[index] & (1 << (ch & 0x7)))) {
2667
JS_ASSERT(JS_FALSE);
2679
static REMatchState *
2680
ExecuteREBytecode(REGlobalData *gData, REMatchState *x)
2682
REMatchState *result = NULL;
2683
REBackTrackData *backTrackData;
2684
jsbytecode *nextpc, *testpc;
2687
REProgState *curState;
2688
const jschar *startcp;
2689
size_t parenIndex, k;
2690
size_t parenSoFar = 0;
2692
jschar matchCh1, matchCh2;
2695
JSBranchCallback onbranch = gData->cx->branchCallback;
2696
uintN onbranchCalls = 0;
2697
#define ONBRANCH_CALLS_MASK 127
2698
#define CHECK_BRANCH() \
2701
(++onbranchCalls & ONBRANCH_CALLS_MASK) == 0 && \
2702
!(*onbranch)(gData->cx, NULL)) { \
2703
gData->ok = JS_FALSE; \
2709
jsbytecode *pc = gData->regexp->program;
2710
REOp op = (REOp) *pc++;
2713
* If the first node is a simple match, step the index into the string
2714
* until that match is made, or fail if it can't be found at all.
2716
if (REOP_IS_SIMPLE(op)) {
2718
while (x->cp <= gData->cpend) {
2719
nextpc = pc; /* reset back to start each time */
2720
result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE);
2724
pc = nextpc; /* accept skip to next opcode */
2736
if (REOP_IS_SIMPLE(op)) {
2737
result = SimpleMatch(gData, x, op, &pc, JS_TRUE);
2739
curState = &gData->stateStack[gData->stateStackTop];
2745
case REOP_ALTPREREQ2:
2746
nextpc = pc + GET_OFFSET(pc); /* start of next op */
2748
matchCh2 = GET_ARG(pc);
2753
if (x->cp != gData->cpend) {
2754
if (*x->cp == matchCh2)
2757
charSet = &gData->regexp->classList[k];
2758
if (!charSet->converted && !ProcessCharSet(gData, charSet))
2762
if ((charSet->length == 0 ||
2763
matchCh1 > charSet->length ||
2764
!(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^
2772
case REOP_ALTPREREQ:
2773
nextpc = pc + GET_OFFSET(pc); /* start of next op */
2775
matchCh1 = GET_ARG(pc);
2777
matchCh2 = GET_ARG(pc);
2779
if (x->cp == gData->cpend ||
2780
(*x->cp != matchCh1 && *x->cp != matchCh2)) {
2784
/* else false thru... */
2788
nextpc = pc + GET_OFFSET(pc); /* start of next alternate */
2789
pc += ARG_LEN; /* start of this alternate */
2790
curState->parenSoFar = parenSoFar;
2791
PUSH_STATE_STACK(gData);
2794
if (REOP_IS_SIMPLE(op)) {
2795
if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) {
2796
op = (REOp) *nextpc++;
2803
nextop = (REOp) *nextpc++;
2804
if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
2809
* Occurs at (successful) end of REOP_ALT,
2812
--gData->stateStackTop;
2813
pc += GET_OFFSET(pc);
2818
* Occurs at last (successful) end of REOP_ALT,
2821
--gData->stateStackTop;
2826
pc = ReadCompactIndex(pc, &parenIndex);
2827
JS_ASSERT(parenIndex < gData->regexp->parenCount);
2828
if (parenIndex + 1 > parenSoFar)
2829
parenSoFar = parenIndex + 1;
2830
x->parens[parenIndex].index = x->cp - gData->cpbegin;
2831
x->parens[parenIndex].length = 0;
2836
pc = ReadCompactIndex(pc, &parenIndex);
2837
JS_ASSERT(parenIndex < gData->regexp->parenCount);
2838
cap = &x->parens[parenIndex];
2841
* FIXME: https://bugzilla.mozilla.org/show_bug.cgi?id=346090
2842
* This wallpaper prevents a case where we somehow took a step
2843
* backward in input while minimally-matching an empty string.
2845
if (x->cp < gData->cpbegin + cap->index)
2847
cap->length = x->cp - (gData->cpbegin + cap->index);
2852
nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */
2853
pc += ARG_LEN; /* start of ASSERT child */
2856
if (REOP_IS_SIMPLE(op) &&
2857
!SimpleMatch(gData, x, op, &testpc, JS_FALSE)) {
2861
curState->u.assertion.top =
2862
(char *)gData->backTrackSP - (char *)gData->backTrackStack;
2863
curState->u.assertion.sz = gData->cursz;
2864
curState->index = x->cp - gData->cpbegin;
2865
curState->parenSoFar = parenSoFar;
2866
PUSH_STATE_STACK(gData);
2867
if (!PushBackTrackState(gData, REOP_ASSERTTEST,
2868
nextpc, x, x->cp, 0, 0)) {
2873
case REOP_ASSERT_NOT:
2874
nextpc = pc + GET_OFFSET(pc);
2878
if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ &&
2879
SimpleMatch(gData, x, op, &testpc, JS_FALSE) &&
2880
*testpc == REOP_ASSERTNOTTEST) {
2884
curState->u.assertion.top
2885
= (char *)gData->backTrackSP -
2886
(char *)gData->backTrackStack;
2887
curState->u.assertion.sz = gData->cursz;
2888
curState->index = x->cp - gData->cpbegin;
2889
curState->parenSoFar = parenSoFar;
2890
PUSH_STATE_STACK(gData);
2891
if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST,
2892
nextpc, x, x->cp, 0, 0)) {
2897
case REOP_ASSERTTEST:
2898
--gData->stateStackTop;
2900
x->cp = gData->cpbegin + curState->index;
2901
gData->backTrackSP =
2902
(REBackTrackData *) ((char *)gData->backTrackStack +
2903
curState->u.assertion.top);
2904
gData->cursz = curState->u.assertion.sz;
2909
case REOP_ASSERTNOTTEST:
2910
--gData->stateStackTop;
2912
x->cp = gData->cpbegin + curState->index;
2913
gData->backTrackSP =
2914
(REBackTrackData *) ((char *)gData->backTrackStack +
2915
curState->u.assertion.top);
2916
gData->cursz = curState->u.assertion.sz;
2917
result = (!result) ? x : NULL;
2926
curState->u.quantifier.min = 0;
2927
curState->u.quantifier.max = (uintN)-1;
2930
curState->u.quantifier.min = 1;
2931
curState->u.quantifier.max = (uintN)-1;
2934
curState->u.quantifier.min = 0;
2935
curState->u.quantifier.max = 1;
2938
pc = ReadCompactIndex(pc, &k);
2939
curState->u.quantifier.min = k;
2940
pc = ReadCompactIndex(pc, &k);
2941
/* max is k - 1 to use one byte for (uintN)-1 sentinel. */
2942
curState->u.quantifier.max = k - 1;
2943
JS_ASSERT(curState->u.quantifier.min
2944
<= curState->u.quantifier.max);
2946
if (curState->u.quantifier.max == 0) {
2947
pc = pc + GET_OFFSET(pc);
2952
/* Step over <next> */
2953
nextpc = pc + ARG_LEN;
2954
op = (REOp) *nextpc++;
2956
if (REOP_IS_SIMPLE(op)) {
2957
if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
2958
if (curState->u.quantifier.min == 0)
2962
pc = pc + GET_OFFSET(pc);
2965
op = (REOp) *nextpc++;
2968
curState->index = startcp - gData->cpbegin;
2969
curState->continue_op = REOP_REPEAT;
2970
curState->continue_pc = pc;
2971
curState->parenSoFar = parenSoFar;
2972
PUSH_STATE_STACK(gData);
2973
if (curState->u.quantifier.min == 0 &&
2974
!PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp,
2981
case REOP_ENDCHILD: /* marks the end of a quantifier child */
2982
pc = curState[-1].continue_pc;
2983
op = curState[-1].continue_op;
2990
--gData->stateStackTop;
2992
/* Failed, see if we have enough children. */
2993
if (curState->u.quantifier.min == 0)
2997
if (curState->u.quantifier.min == 0 &&
2998
x->cp == gData->cpbegin + curState->index) {
2999
/* matched an empty string, that'll get us nowhere */
3003
if (curState->u.quantifier.min != 0)
3004
curState->u.quantifier.min--;
3005
if (curState->u.quantifier.max != (uintN) -1)
3006
curState->u.quantifier.max--;
3007
if (curState->u.quantifier.max == 0)
3009
nextpc = pc + ARG_LEN;
3010
nextop = (REOp) *nextpc;
3012
if (REOP_IS_SIMPLE(nextop)) {
3014
if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
3015
if (curState->u.quantifier.min == 0)
3022
curState->index = startcp - gData->cpbegin;
3023
PUSH_STATE_STACK(gData);
3024
if (curState->u.quantifier.min == 0 &&
3025
!PushBackTrackState(gData, REOP_REPEAT,
3027
curState->parenSoFar,
3029
curState->parenSoFar)) {
3032
} while (*nextpc == REOP_ENDCHILD);
3035
parenSoFar = curState->parenSoFar;
3040
pc += GET_OFFSET(pc);
3043
case REOP_MINIMALSTAR:
3044
curState->u.quantifier.min = 0;
3045
curState->u.quantifier.max = (uintN)-1;
3046
goto minimalquantcommon;
3047
case REOP_MINIMALPLUS:
3048
curState->u.quantifier.min = 1;
3049
curState->u.quantifier.max = (uintN)-1;
3050
goto minimalquantcommon;
3051
case REOP_MINIMALOPT:
3052
curState->u.quantifier.min = 0;
3053
curState->u.quantifier.max = 1;
3054
goto minimalquantcommon;
3055
case REOP_MINIMALQUANT:
3056
pc = ReadCompactIndex(pc, &k);
3057
curState->u.quantifier.min = k;
3058
pc = ReadCompactIndex(pc, &k);
3059
/* See REOP_QUANT comments about k - 1. */
3060
curState->u.quantifier.max = k - 1;
3061
JS_ASSERT(curState->u.quantifier.min
3062
<= curState->u.quantifier.max);
3064
curState->index = x->cp - gData->cpbegin;
3065
curState->parenSoFar = parenSoFar;
3066
PUSH_STATE_STACK(gData);
3067
if (curState->u.quantifier.min != 0) {
3068
curState->continue_op = REOP_MINIMALREPEAT;
3069
curState->continue_pc = pc;
3070
/* step over <next> */
3074
if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3075
pc, x, x->cp, 0, 0)) {
3078
--gData->stateStackTop;
3079
pc = pc + GET_OFFSET(pc);
3084
case REOP_MINIMALREPEAT:
3086
--gData->stateStackTop;
3091
* Non-greedy failure - try to consume another child.
3093
if (curState->u.quantifier.max == (uintN) -1 ||
3094
curState->u.quantifier.max > 0) {
3095
curState->index = x->cp - gData->cpbegin;
3096
curState->continue_op = REOP_MINIMALREPEAT;
3097
curState->continue_pc = pc;
3099
for (k = curState->parenSoFar; k < parenSoFar; k++)
3100
x->parens[k].index = -1;
3101
PUSH_STATE_STACK(gData);
3105
/* Don't need to adjust pc since we're going to pop. */
3108
if (curState->u.quantifier.min == 0 &&
3109
x->cp == gData->cpbegin + curState->index) {
3110
/* Matched an empty string, that'll get us nowhere. */
3114
if (curState->u.quantifier.min != 0)
3115
curState->u.quantifier.min--;
3116
if (curState->u.quantifier.max != (uintN) -1)
3117
curState->u.quantifier.max--;
3118
if (curState->u.quantifier.min != 0) {
3119
curState->continue_op = REOP_MINIMALREPEAT;
3120
curState->continue_pc = pc;
3122
for (k = curState->parenSoFar; k < parenSoFar; k++)
3123
x->parens[k].index = -1;
3124
curState->index = x->cp - gData->cpbegin;
3125
PUSH_STATE_STACK(gData);
3129
curState->index = x->cp - gData->cpbegin;
3130
curState->parenSoFar = parenSoFar;
3131
PUSH_STATE_STACK(gData);
3132
if (!PushBackTrackState(gData, REOP_MINIMALREPEAT,
3134
curState->parenSoFar,
3135
parenSoFar - curState->parenSoFar)) {
3138
--gData->stateStackTop;
3139
pc = pc + GET_OFFSET(pc);
3144
JS_ASSERT(JS_FALSE);
3151
* If the match failed and there's a backtrack option, take it.
3152
* Otherwise this is a complete and utter failure.
3155
if (gData->cursz == 0)
3157
backTrackData = gData->backTrackSP;
3158
gData->cursz = backTrackData->sz;
3159
gData->backTrackSP =
3160
(REBackTrackData *) ((char *)backTrackData - backTrackData->sz);
3161
x->cp = backTrackData->cp;
3162
pc = backTrackData->backtrack_pc;
3163
op = backTrackData->backtrack_op;
3164
gData->stateStackTop = backTrackData->saveStateStackTop;
3165
JS_ASSERT(gData->stateStackTop);
3167
memcpy(gData->stateStack, backTrackData + 1,
3168
sizeof(REProgState) * backTrackData->saveStateStackTop);
3169
curState = &gData->stateStack[gData->stateStackTop - 1];
3171
if (backTrackData->parenCount) {
3172
memcpy(&x->parens[backTrackData->parenIndex],
3173
(char *)(backTrackData + 1) +
3174
sizeof(REProgState) * backTrackData->saveStateStackTop,
3175
sizeof(RECapture) * backTrackData->parenCount);
3176
parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
3178
for (k = curState->parenSoFar; k < parenSoFar; k++)
3179
x->parens[k].index = -1;
3180
parenSoFar = curState->parenSoFar;
3187
* Continue with the expression.
3194
static REMatchState *
3195
MatchRegExp(REGlobalData *gData, REMatchState *x)
3197
REMatchState *result;
3198
const jschar *cp = x->cp;
3203
* Have to include the position beyond the last character
3204
* in order to detect end-of-input/line condition.
3206
for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
3207
gData->skipped = cp2 - cp;
3209
for (j = 0; j < gData->regexp->parenCount; j++)
3210
x->parens[j].index = -1;
3211
result = ExecuteREBytecode(gData, x);
3212
if (!gData->ok || result)
3214
gData->backTrackSP = gData->backTrackStack;
3216
gData->stateStackTop = 0;
3217
cp2 = cp + gData->skipped;
3223
static REMatchState *
3224
InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re)
3226
REMatchState *result;
3229
gData->backTrackStackSize = INITIAL_BACKTRACK;
3230
JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *,
3233
if (!gData->backTrackStack)
3236
gData->backTrackSP = gData->backTrackStack;
3239
gData->stateStackLimit = INITIAL_STATESTACK;
3240
JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *,
3242
sizeof(REProgState) * INITIAL_STATESTACK);
3243
if (!gData->stateStack)
3246
gData->stateStackTop = 0;
3249
gData->ok = JS_TRUE;
3251
JS_ARENA_ALLOCATE_CAST(result, REMatchState *,
3253
offsetof(REMatchState, parens)
3254
+ re->parenCount * sizeof(RECapture));
3258
for (i = 0; i < re->classCount; i++) {
3259
if (!re->classList[i].converted &&
3260
!ProcessCharSet(gData, &re->classList[i])) {
3268
JS_ReportOutOfMemory(cx);
3269
gData->ok = JS_FALSE;
3274
js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
3275
JSBool test, jsval *rval)
3278
REMatchState *x, *result;
3280
const jschar *cp, *ep;
3281
size_t i, length, start;
3282
JSSubString *morepar;
3284
JSRegExpStatics *res;
3287
JSString *parstr, *matchstr;
3290
RECapture *parsub = NULL;
3293
* It's safe to load from cp because JSStrings have a zero at the end,
3294
* and we never let cp get beyond cpend.
3297
length = JSSTRING_LENGTH(str);
3300
cp = JSSTRING_CHARS(str);
3302
gData.cpend = cp + length;
3304
gData.start = start;
3307
JS_InitArenaPool(&gData.pool, "RegExpPool", 8096, 4);
3308
x = InitMatch(cx, &gData, re);
3316
* Call the recursive matcher to do the real work. Return null on mismatch
3317
* whether testing or not. On match, return an extended Array object.
3319
result = MatchRegExp(&gData, x);
3328
i = cp - gData.cpbegin;
3330
matchlen = i - (start + gData.skipped);
3336
* Testing for a match and updating cx->regExpStatics: don't allocate
3337
* an array object, do return true.
3341
/* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
3345
* The array returned on match has element 0 bound to the matched
3346
* string, elements 1 through state.parenCount bound to the paren
3347
* matches, an index property telling the length of the left context,
3348
* and an input property referring to the input string.
3350
obj = js_NewArrayObject(cx, 0, NULL);
3355
*rval = OBJECT_TO_JSVAL(obj);
3357
#define DEFVAL(val, id) { \
3358
ok = js_DefineProperty(cx, obj, id, val, \
3359
JS_PropertyStub, JS_PropertyStub, \
3360
JSPROP_ENUMERATE, NULL); \
3362
cx->weakRoots.newborn[GCX_OBJECT] = NULL; \
3363
cx->weakRoots.newborn[GCX_STRING] = NULL; \
3368
matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
3370
cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3374
DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0));
3377
res = &cx->regExpStatics;
3379
res->parenCount = re->parenCount;
3380
if (re->parenCount == 0) {
3381
res->lastParen = js_EmptySubString;
3383
for (num = 0; num < re->parenCount; num++) {
3384
parsub = &result->parens[num];
3386
if (parsub->index == -1) {
3387
res->parens[num].chars = NULL;
3388
res->parens[num].length = 0;
3390
res->parens[num].chars = gData.cpbegin + parsub->index;
3391
res->parens[num].length = parsub->length;
3395
morepar = res->moreParens;
3397
res->moreLength = 10;
3398
morepar = (JSSubString*)
3399
JS_malloc(cx, 10 * sizeof(JSSubString));
3400
} else if (morenum >= res->moreLength) {
3401
res->moreLength += 10;
3402
morepar = (JSSubString*)
3403
JS_realloc(cx, morepar,
3404
res->moreLength * sizeof(JSSubString));
3407
cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3408
cx->weakRoots.newborn[GCX_STRING] = NULL;
3412
res->moreParens = morepar;
3413
if (parsub->index == -1) {
3414
morepar[morenum].chars = NULL;
3415
morepar[morenum].length = 0;
3417
morepar[morenum].chars = gData.cpbegin + parsub->index;
3418
morepar[morenum].length = parsub->length;
3423
if (parsub->index == -1) {
3424
ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3425
JSVAL_VOID, NULL, NULL,
3426
JSPROP_ENUMERATE, NULL);
3428
parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index,
3431
cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3432
cx->weakRoots.newborn[GCX_STRING] = NULL;
3436
ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1),
3437
STRING_TO_JSVAL(parstr), NULL, NULL,
3438
JSPROP_ENUMERATE, NULL);
3441
cx->weakRoots.newborn[GCX_OBJECT] = NULL;
3442
cx->weakRoots.newborn[GCX_STRING] = NULL;
3446
if (parsub->index == -1) {
3447
res->lastParen = js_EmptySubString;
3449
res->lastParen.chars = gData.cpbegin + parsub->index;
3450
res->lastParen.length = parsub->length;
3456
* Define the index and input properties last for better for/in loop
3457
* order (so they come after the elements).
3459
DEFVAL(INT_TO_JSVAL(start + gData.skipped),
3460
ATOM_TO_JSID(cx->runtime->atomState.indexAtom));
3461
DEFVAL(STRING_TO_JSVAL(str),
3462
ATOM_TO_JSID(cx->runtime->atomState.inputAtom));
3467
res->lastMatch.chars = cp;
3468
res->lastMatch.length = matchlen;
3471
* For JS1.3 and ECMAv2, emulate Perl5 exactly:
3473
* js1.3 "hi", "hi there" "hihitherehi therebye"
3475
res->leftContext.chars = JSSTRING_CHARS(str);
3476
res->leftContext.length = start + gData.skipped;
3477
res->rightContext.chars = ep;
3478
res->rightContext.length = gData.cpend - ep;
3481
JS_FinishArenaPool(&gData.pool);
3485
/************************************************************************/
3487
enum regexp_tinyid {
3490
REGEXP_IGNORE_CASE = -3,
3491
REGEXP_LAST_INDEX = -4,
3492
REGEXP_MULTILINE = -5
3495
#define REGEXP_PROP_ATTRS (JSPROP_PERMANENT|JSPROP_SHARED)
3497
static JSPropertySpec regexp_props[] = {
3498
{"source", REGEXP_SOURCE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3499
{"global", REGEXP_GLOBAL, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3500
{"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3501
{"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
3502
{"multiline", REGEXP_MULTILINE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
3507
regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3512
if (!JSVAL_IS_INT(id))
3514
slot = JSVAL_TO_INT(id);
3515
if (slot == REGEXP_LAST_INDEX)
3516
return JS_GetReservedSlot(cx, obj, 0, vp);
3518
JS_LOCK_OBJ(cx, obj);
3519
re = (JSRegExp *) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
3523
*vp = STRING_TO_JSVAL(re->source);
3526
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
3528
case REGEXP_IGNORE_CASE:
3529
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
3531
case REGEXP_MULTILINE:
3532
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
3536
JS_UNLOCK_OBJ(cx, obj);
3541
regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3548
if (!JSVAL_IS_INT(id))
3550
slot = JSVAL_TO_INT(id);
3551
if (slot == REGEXP_LAST_INDEX) {
3552
if (!js_ValueToNumber(cx, *vp, &lastIndex))
3554
lastIndex = js_DoubleToInteger(lastIndex);
3555
ok = js_NewNumberValue(cx, lastIndex, vp) &&
3556
JS_SetReservedSlot(cx, obj, 0, *vp);
3562
* RegExp class static properties and their Perl counterparts:
3565
* RegExp.multiline $*
3566
* RegExp.lastMatch $&
3567
* RegExp.lastParen $+
3568
* RegExp.leftContext $`
3569
* RegExp.rightContext $'
3571
enum regexp_static_tinyid {
3572
REGEXP_STATIC_INPUT = -1,
3573
REGEXP_STATIC_MULTILINE = -2,
3574
REGEXP_STATIC_LAST_MATCH = -3,
3575
REGEXP_STATIC_LAST_PAREN = -4,
3576
REGEXP_STATIC_LEFT_CONTEXT = -5,
3577
REGEXP_STATIC_RIGHT_CONTEXT = -6
3581
js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3583
JS_ClearRegExpStatics(cx);
3584
return js_AddRoot(cx, &res->input, "res->input");
3588
js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
3590
if (res->moreParens) {
3591
JS_free(cx, res->moreParens);
3592
res->moreParens = NULL;
3594
js_RemoveRoot(cx->runtime, &res->input);
3598
regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3601
JSRegExpStatics *res;
3605
res = &cx->regExpStatics;
3606
if (!JSVAL_IS_INT(id))
3608
slot = JSVAL_TO_INT(id);
3610
case REGEXP_STATIC_INPUT:
3611
*vp = res->input ? STRING_TO_JSVAL(res->input)
3612
: JS_GetEmptyStringValue(cx);
3614
case REGEXP_STATIC_MULTILINE:
3615
*vp = BOOLEAN_TO_JSVAL(res->multiline);
3617
case REGEXP_STATIC_LAST_MATCH:
3618
sub = &res->lastMatch;
3620
case REGEXP_STATIC_LAST_PAREN:
3621
sub = &res->lastParen;
3623
case REGEXP_STATIC_LEFT_CONTEXT:
3624
sub = &res->leftContext;
3626
case REGEXP_STATIC_RIGHT_CONTEXT:
3627
sub = &res->rightContext;
3630
sub = REGEXP_PAREN_SUBSTRING(res, slot);
3633
str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
3636
*vp = STRING_TO_JSVAL(str);
3641
regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
3643
JSRegExpStatics *res;
3645
if (!JSVAL_IS_INT(id))
3647
res = &cx->regExpStatics;
3648
/* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
3649
if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
3650
if (!JSVAL_IS_STRING(*vp) &&
3651
!JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
3654
res->input = JSVAL_TO_STRING(*vp);
3655
} else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
3656
if (!JSVAL_IS_BOOLEAN(*vp) &&
3657
!JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
3660
res->multiline = JSVAL_TO_BOOLEAN(*vp);
3665
static JSPropertySpec regexp_static_props[] = {
3667
REGEXP_STATIC_INPUT,
3668
JSPROP_ENUMERATE|JSPROP_SHARED,
3669
regexp_static_getProperty, regexp_static_setProperty},
3671
REGEXP_STATIC_MULTILINE,
3672
JSPROP_ENUMERATE|JSPROP_SHARED,
3673
regexp_static_getProperty, regexp_static_setProperty},
3675
REGEXP_STATIC_LAST_MATCH,
3676
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3677
regexp_static_getProperty, regexp_static_getProperty},
3679
REGEXP_STATIC_LAST_PAREN,
3680
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3681
regexp_static_getProperty, regexp_static_getProperty},
3683
REGEXP_STATIC_LEFT_CONTEXT,
3684
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3685
regexp_static_getProperty, regexp_static_getProperty},
3687
REGEXP_STATIC_RIGHT_CONTEXT,
3688
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3689
regexp_static_getProperty, regexp_static_getProperty},
3691
/* XXX should have block scope and local $1, etc. */
3692
{"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3693
regexp_static_getProperty, regexp_static_getProperty},
3694
{"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3695
regexp_static_getProperty, regexp_static_getProperty},
3696
{"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3697
regexp_static_getProperty, regexp_static_getProperty},
3698
{"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3699
regexp_static_getProperty, regexp_static_getProperty},
3700
{"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3701
regexp_static_getProperty, regexp_static_getProperty},
3702
{"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3703
regexp_static_getProperty, regexp_static_getProperty},
3704
{"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3705
regexp_static_getProperty, regexp_static_getProperty},
3706
{"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3707
regexp_static_getProperty, regexp_static_getProperty},
3708
{"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
3709
regexp_static_getProperty, regexp_static_getProperty},
3715
regexp_finalize(JSContext *cx, JSObject *obj)
3719
re = (JSRegExp *) JS_GetPrivate(cx, obj);
3722
js_DestroyRegExp(cx, re);
3725
/* Forward static prototype. */
3727
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3731
regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
3733
return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
3738
#include "jsxdrapi.h"
3741
regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
3748
if (xdr->mode == JSXDR_ENCODE) {
3749
re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp);
3752
source = re->source;
3753
flagsword = ((uint32)re->cloneIndex << 16) | re->flags;
3755
if (!JS_XDRString(xdr, &source) ||
3756
!JS_XDRUint32(xdr, &flagsword)) {
3759
if (xdr->mode == JSXDR_DECODE) {
3760
obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
3763
re = js_NewRegExp(xdr->cx, NULL, source, (uint16)flagsword, JS_FALSE);
3766
if (!JS_SetPrivate(xdr->cx, obj, re) ||
3767
!js_SetLastIndex(xdr->cx, obj, 0)) {
3768
js_DestroyRegExp(xdr->cx, re);
3771
re->cloneIndex = (uint16)(flagsword >> 16);
3777
#else /* !JS_HAS_XDR */
3779
#define regexp_xdrObject NULL
3781
#endif /* !JS_HAS_XDR */
3784
regexp_mark(JSContext *cx, JSObject *obj, void *arg)
3786
JSRegExp *re = (JSRegExp *) JS_GetPrivate(cx, obj);
3788
GC_MARK(cx, re->source, "source");
3792
JSClass js_RegExpClass = {
3794
JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) |
3795
JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp),
3796
JS_PropertyStub, JS_PropertyStub,
3797
regexp_getProperty, regexp_setProperty,
3798
JS_EnumerateStub, JS_ResolveStub,
3799
JS_ConvertStub, regexp_finalize,
3802
regexp_xdrObject, NULL,
3806
static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0};
3809
js_regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3813
const jschar *source;
3815
size_t length, nflags;
3819
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3821
JS_LOCK_OBJ(cx, obj);
3822
re = (JSRegExp *) JS_GetPrivate(cx, obj);
3824
JS_UNLOCK_OBJ(cx, obj);
3825
*rval = STRING_TO_JSVAL(cx->runtime->emptyString);
3829
source = JSSTRING_CHARS(re->source);
3830
length = JSSTRING_LENGTH(re->source);
3832
source = empty_regexp_ucstr;
3833
length = sizeof(empty_regexp_ucstr) / sizeof(jschar) - 1;
3837
for (flags = re->flags; flags != 0; flags &= flags - 1)
3839
chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
3841
JS_UNLOCK_OBJ(cx, obj);
3846
js_strncpy(&chars[1], source, length - 2);
3847
chars[length-1] = '/';
3849
if (re->flags & JSREG_GLOB)
3850
chars[length++] = 'g';
3851
if (re->flags & JSREG_FOLD)
3852
chars[length++] = 'i';
3853
if (re->flags & JSREG_MULTILINE)
3854
chars[length++] = 'm';
3856
JS_UNLOCK_OBJ(cx, obj);
3859
str = js_NewString(cx, chars, length, 0);
3864
*rval = STRING_TO_JSVAL(str);
3869
regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3872
JSString *opt, *str;
3873
JSRegExp *oldre, *re;
3876
size_t length, nbytes;
3877
const jschar *cp, *start, *end;
3878
jschar *nstart, *ncp, *tmp;
3880
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
3884
str = cx->runtime->emptyString;
3886
if (JSVAL_IS_OBJECT(argv[0])) {
3888
* If we get passed in a RegExp object we construct a new
3889
* RegExp that is a duplicate of it by re-compiling the
3890
* original source code. ECMA requires that it be an error
3891
* here if the flags are specified. (We must use the flags
3892
* from the original RegExp also).
3894
obj2 = JSVAL_TO_OBJECT(argv[0]);
3895
if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
3896
if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */
3897
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3898
JSMSG_NEWREGEXP_FLAGGED);
3901
JS_LOCK_OBJ(cx, obj2);
3902
re = (JSRegExp *) JS_GetPrivate(cx, obj2);
3904
JS_UNLOCK_OBJ(cx, obj2);
3907
re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
3908
JS_UNLOCK_OBJ(cx, obj2);
3912
str = js_ValueToString(cx, argv[0]);
3915
argv[0] = STRING_TO_JSVAL(str);
3917
if (JSVAL_IS_VOID(argv[1])) {
3920
opt = js_ValueToString(cx, argv[1]);
3923
argv[1] = STRING_TO_JSVAL(opt);
3927
/* Escape any naked slashes in the regexp source. */
3928
length = JSSTRING_LENGTH(str);
3929
start = JSSTRING_CHARS(str);
3930
end = start + length;
3931
nstart = ncp = NULL;
3932
for (cp = start; cp < end; cp++) {
3933
if (*cp == '/' && (cp == start || cp[-1] != '\\')) {
3934
nbytes = (++length + 1) * sizeof(jschar);
3936
nstart = (jschar *) JS_malloc(cx, nbytes);
3939
ncp = nstart + (cp - start);
3940
js_strncpy(nstart, start, cp - start);
3942
tmp = (jschar *) JS_realloc(cx, nstart, nbytes);
3944
JS_free(cx, nstart);
3947
ncp = tmp + (ncp - nstart);
3957
/* Don't forget to store the backstop after the new string. */
3958
JS_ASSERT((size_t)(ncp - nstart) == length);
3960
str = js_NewString(cx, nstart, length, 0);
3962
JS_free(cx, nstart);
3965
argv[0] = STRING_TO_JSVAL(str);
3969
re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
3973
JS_LOCK_OBJ(cx, obj);
3974
oldre = (JSRegExp *) JS_GetPrivate(cx, obj);
3975
ok = JS_SetPrivate(cx, obj, re);
3976
ok2 = js_SetLastIndex(cx, obj, 0);
3977
JS_UNLOCK_OBJ(cx, obj);
3979
js_DestroyRegExp(cx, re);
3983
js_DestroyRegExp(cx, oldre);
3984
*rval = OBJECT_TO_JSVAL(obj);
3989
regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
3990
JSBool test, jsval *rval)
3998
ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv);
4001
JS_LOCK_OBJ(cx, obj);
4002
re = (JSRegExp *) JS_GetPrivate(cx, obj);
4004
JS_UNLOCK_OBJ(cx, obj);
4008
/* NB: we must reach out: after this paragraph, in order to drop re. */
4009
HOLD_REGEXP(cx, re);
4010
if (re->flags & JSREG_GLOB) {
4011
ok = js_GetLastIndex(cx, obj, &lastIndex);
4015
JS_UNLOCK_OBJ(cx, obj);
4019
/* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */
4021
str = cx->regExpStatics.input;
4023
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4025
JS_GetStringBytes(re->source),
4026
(re->flags & JSREG_GLOB) ? "g" : "",
4027
(re->flags & JSREG_FOLD) ? "i" : "",
4028
(re->flags & JSREG_MULTILINE) ? "m" : "");
4033
str = js_ValueToString(cx, argv[0]);
4038
argv[0] = STRING_TO_JSVAL(str);
4041
if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) {
4042
ok = js_SetLastIndex(cx, obj, 0);
4045
i = (size_t) lastIndex;
4046
ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
4047
if (ok && (re->flags & JSREG_GLOB))
4048
ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i);
4052
DROP_REGEXP(cx, re);
4057
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4059
return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
4063
regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4065
if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
4067
if (*rval != JSVAL_TRUE)
4068
*rval = JSVAL_FALSE;
4072
static JSFunctionSpec regexp_methods[] = {
4074
{js_toSource_str, js_regexp_toString, 0,0,0},
4076
{js_toString_str, js_regexp_toString, 0,0,0},
4077
{"compile", regexp_compile, 1,0,0},
4078
{"exec", regexp_exec, 0,0,0},
4079
{"test", regexp_test, 0,0,0},
4084
RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
4086
if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) {
4088
* If first arg is regexp and no flags are given, just return the arg.
4089
* (regexp_compile detects the regexp + flags case and throws a
4090
* TypeError.) See 10.15.3.1.
4092
if ((argc < 2 || JSVAL_IS_VOID(argv[1])) &&
4093
!JSVAL_IS_PRIMITIVE(argv[0]) &&
4094
OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
4099
/* Otherwise, replace obj with a new RegExp object. */
4100
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4105
* regexp_compile does not use rval to root its temporaries
4106
* so we can use it to root obj.
4108
*rval = OBJECT_TO_JSVAL(obj);
4110
return regexp_compile(cx, obj, argc, argv, rval);
4114
js_InitRegExpClass(JSContext *cx, JSObject *obj)
4116
JSObject *proto, *ctor;
4119
proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
4120
regexp_props, regexp_methods,
4121
regexp_static_props, NULL);
4123
if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
4125
if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
4126
!JS_AliasProperty(cx, ctor, "multiline", "$*") ||
4127
!JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
4128
!JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
4129
!JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
4130
!JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
4134
/* Give RegExp.prototype private data so it matches the empty string. */
4135
if (!regexp_compile(cx, proto, 0, NULL, &rval))
4140
JS_DeleteProperty(cx, obj, js_RegExpClass.name);
4145
js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
4146
jschar *chars, size_t length, uintN flags)
4151
JSTempValueRooter tvr;
4153
str = js_NewStringCopyN(cx, chars, length, 0);
4156
re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
4159
JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr);
4160
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
4161
if (!obj || !JS_SetPrivate(cx, obj, re)) {
4162
js_DestroyRegExp(cx, re);
4165
if (obj && !js_SetLastIndex(cx, obj, 0))
4167
JS_POP_TEMP_ROOT(cx, &tvr);
4172
js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent)
4177
JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass);
4178
clone = js_NewObject(cx, &js_RegExpClass, NULL, parent);
4181
re = JS_GetPrivate(cx, obj);
4182
if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) {
4183
cx->weakRoots.newborn[GCX_OBJECT] = NULL;
4186
HOLD_REGEXP(cx, re);
4191
js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex)
4195
return JS_GetReservedSlot(cx, obj, 0, &v) &&
4196
js_ValueToNumber(cx, v, lastIndex);
4200
js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex)
4204
return js_NewNumberValue(cx, lastIndex, &v) &&
4205
JS_SetReservedSlot(cx, obj, 0, v);