88
m_mainTable.deleteTable();
91
void Lexer::setCode(const SourceCode& source)
93
yylineno = source.firstLine();
94
m_restrKeyword = false;
70
m_keywordTable.deleteTable();
73
inline const UChar* Lexer::currentCharacter() const
78
inline int Lexer::currentOffset() const
80
return currentCharacter() - m_codeStart;
83
ALWAYS_INLINE void Lexer::shift1()
88
if (LIKELY(m_code < m_codeEnd))
96
ALWAYS_INLINE void Lexer::shift2()
100
if (LIKELY(m_code + 1 < m_codeEnd)) {
104
m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
111
ALWAYS_INLINE void Lexer::shift3()
114
if (LIKELY(m_code + 2 < m_codeEnd)) {
119
m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
120
m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
127
ALWAYS_INLINE void Lexer::shift4()
129
if (LIKELY(m_code + 3 < m_codeEnd)) {
130
m_current = m_code[0];
135
m_current = m_code < m_codeEnd ? m_code[0] : -1;
136
m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
137
m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
144
void Lexer::setCode(const SourceCode& source, ParserArena& arena)
146
m_arena = &arena.identifierArena();
148
m_lineNumber = source.firstLine();
95
149
m_delimited = false;
96
m_eatNextIdentifier = false;
100
m_position = source.startOffset();
152
const UChar* data = source.provider()->data();
101
154
m_source = &source;
102
m_code = source.provider()->data();
103
m_length = source.endOffset();
156
m_code = data + source.startOffset();
157
m_codeEnd = data + source.endOffset();
107
159
m_atLineStart = true;
109
// read first characters
113
void Lexer::shift(unsigned p)
115
// ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
116
// see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
122
m_currentOffset = m_nextOffset1;
123
m_nextOffset1 = m_nextOffset2;
124
m_nextOffset2 = m_nextOffset3;
126
if (m_position >= m_length) {
127
m_nextOffset3 = m_position;
161
// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
162
// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
163
if (source.provider()->hasBOMs()) {
164
for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
165
if (UNLIKELY(*p == byteOrderMark)) {
166
copyCodeWithoutBOMs();
132
m_nextOffset3 = m_position;
133
m_next3 = m_code[m_position++];
134
} while (m_next3 == 0xFEFF);
138
// called on each new line
139
void Lexer::nextLine()
142
m_atLineStart = true;
145
void Lexer::setDone(State s)
172
// Read the first characters into the 4-character buffer.
174
ASSERT(currentOffset() == source.startOffset());
177
void Lexer::copyCodeWithoutBOMs()
179
// Note: In this case, the character offset data for debugging will be incorrect.
180
// If it's important to correctly debug code with extraneous BOMs, then the caller
181
// should strip the BOMs when creating the SourceProvider object and do its own
182
// mapping of offsets within the stripped text to original text offset.
184
m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
185
for (const UChar* p = m_code; p < m_codeEnd; ++p) {
187
if (c != byteOrderMark)
188
m_codeWithoutBOMs.append(c);
190
ptrdiff_t startDelta = m_codeStart - m_code;
191
m_code = m_codeWithoutBOMs.data();
192
m_codeStart = m_code + startDelta;
193
m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
196
void Lexer::shiftLineTerminator()
198
ASSERT(isLineTerminator(m_current));
200
// Allow both CRLF and LFCR.
201
if (m_current + m_next1 == '\n' + '\r')
209
ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
211
return &JSC::makeIdentifier(*m_arena, m_globalData, characters, length);
214
inline bool Lexer::lastTokenWasRestrKeyword() const
216
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
219
static NEVER_INLINE bool isNonASCIIIdentStart(int c)
221
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
224
static inline bool isIdentStart(int c)
226
return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
229
static NEVER_INLINE bool isNonASCIIIdentPart(int c)
231
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
232
| Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
235
static inline bool isIdentPart(int c)
237
return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
240
static inline int singleEscape(int c)
260
inline void Lexer::record8(int c)
264
m_buffer8.append(static_cast<char>(c));
267
inline void Lexer::record16(UChar c)
269
m_buffer16.append(c);
272
inline void Lexer::record16(int c)
275
ASSERT(c <= USHRT_MAX);
276
record16(UChar(static_cast<unsigned short>(c)));
151
279
int Lexer::lex(void* p1, void* p2)
282
ASSERT(m_buffer8.isEmpty());
283
ASSERT(m_buffer16.isEmpty());
153
285
YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
154
286
YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
157
unsigned short stringType = 0; // either single or double quotes
161
288
m_terminator = false;
165
// did we push a token on the stack previously ?
166
// (after an automatic semicolon insertion)
167
if (m_stackToken >= 0) {
169
token = m_stackToken;
172
int startOffset = m_currentOffset;
174
if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
176
if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
178
if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
185
startOffset = m_currentOffset;
186
if (isWhiteSpace()) {
188
} else if (m_current == '/' && m_next1 == '/') {
190
m_state = InSingleLineComment;
191
} else if (m_current == '/' && m_next1 == '*') {
193
m_state = InMultiLineComment;
194
} else if (m_current == -1) {
195
if (!m_terminator && !m_delimited && !m_isReparsing) {
196
// automatic semicolon insertion if program incomplete
202
} else if (isLineTerminator()) {
205
if (m_restrKeyword) {
209
} else if (m_current == '"' || m_current == '\'') {
211
stringType = static_cast<unsigned short>(m_current);
212
} else if (isIdentStart(m_current)) {
214
m_state = InIdentifierOrKeyword;
215
} else if (m_current == '\\')
216
m_state = InIdentifierStartUnicodeEscapeStart;
217
else if (m_current == '0') {
220
} else if (isDecimalDigit(m_current)) {
223
} else if (m_current == '.' && isDecimalDigit(m_next1)) {
226
// <!-- marks the beginning of a line comment (for www usage)
227
} else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
229
m_state = InSingleLineComment;
231
} else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
233
m_state = InSingleLineComment;
235
token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
243
if (m_current == stringType) {
246
} else if (isLineTerminator() || m_current == -1)
248
else if (m_current == '\\')
249
m_state = InEscapeSequence;
253
// Escape Sequences inside of strings
254
case InEscapeSequence:
255
if (isOctalDigit(m_current)) {
256
if (m_current >= '0' && m_current <= '3' &&
257
isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
258
record16(convertOctal(m_current, m_next1, m_next2));
261
} else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
262
record16(convertOctal('0', m_current, m_next1));
265
} else if (isOctalDigit(m_current)) {
266
record16(convertOctal('0', '0', m_current));
270
} else if (m_current == 'x')
271
m_state = InHexEscape;
272
else if (m_current == 'u')
273
m_state = InUnicodeEscape;
274
else if (isLineTerminator()) {
278
record16(singleEscape(static_cast<unsigned short>(m_current)));
283
if (isHexDigit(m_current) && isHexDigit(m_next1)) {
285
record16(convertHex(m_current, m_next1));
287
} else if (m_current == stringType) {
297
case InUnicodeEscape:
298
if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
299
record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
302
} else if (m_current == stringType) {
309
case InSingleLineComment:
310
if (isLineTerminator()) {
313
if (m_restrKeyword) {
318
} else if (m_current == -1)
321
case InMultiLineComment:
324
else if (isLineTerminator())
326
else if (m_current == '*' && m_next1 == '/') {
331
case InIdentifierOrKeyword:
333
if (isIdentPart(m_current))
335
else if (m_current == '\\')
336
m_state = InIdentifierPartUnicodeEscapeStart;
338
setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
341
if (m_current == 'x' || m_current == 'X') {
344
} else if (m_current == '.') {
347
} else if (m_current == 'e' || m_current == 'E') {
349
m_state = InExponentIndicator;
350
} else if (isOctalDigit(m_current)) {
353
} else if (isDecimalDigit(m_current)) {
360
if (isHexDigit(m_current))
366
if (isOctalDigit(m_current))
368
else if (isDecimalDigit(m_current)) {
375
if (isDecimalDigit(m_current))
377
else if (m_current == '.') {
380
} else if (m_current == 'e' || m_current == 'E') {
382
m_state = InExponentIndicator;
387
if (isDecimalDigit(m_current))
389
else if (m_current == 'e' || m_current == 'E') {
391
m_state = InExponentIndicator;
395
case InExponentIndicator:
396
if (m_current == '+' || m_current == '-')
398
else if (isDecimalDigit(m_current)) {
400
m_state = InExponent;
405
if (isDecimalDigit(m_current))
410
case InIdentifierStartUnicodeEscapeStart:
411
if (m_current == 'u')
412
m_state = InIdentifierStartUnicodeEscape;
416
case InIdentifierPartUnicodeEscapeStart:
417
if (m_current == 'u')
418
m_state = InIdentifierPartUnicodeEscape;
422
case InIdentifierStartUnicodeEscape:
423
if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
427
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
429
if (!isIdentStart(token)) {
434
m_state = InIdentifier;
436
case InIdentifierPartUnicodeEscape:
437
if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
441
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
443
if (!isIdentPart(token)) {
448
m_state = InIdentifier;
451
ASSERT(!"Unhandled state in switch statement");
454
// move on to the next character
457
if (m_state != Start && m_state != InSingleLineComment)
458
m_atLineStart = false;
461
// no identifiers allowed directly after numeric literal, e.g. "3in" is bad
462
if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
466
m_buffer8.append('\0');
469
fprintf(stderr, "line: %d ", lineNo());
470
fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
471
fprintf(stderr, "%s ", m_buffer8.data());
475
if (m_state == Number)
476
dval = WTF::strtod(m_buffer8.data(), 0L);
477
else if (m_state == Hex) { // scan hex numbers
478
const char* p = m_buffer8.data() + 2;
479
while (char c = *p++) {
481
dval += convertHex(c);
484
if (dval >= mantissaOverflowLowerBound)
485
dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
488
} else if (m_state == Octal) { // scan octal number
489
const char* p = m_buffer8.data() + 1;
490
while (char c = *p++) {
495
if (dval >= mantissaOverflowLowerBound)
496
dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
510
printf("(Identifier)/(Keyword)\n");
513
printf("(String)\n");
516
printf("(Number)\n");
523
if (m_state != Identifier)
524
m_eatNextIdentifier = false;
526
m_restrKeyword = false;
291
while (isWhiteSpace(m_current))
294
int startOffset = currentOffset();
296
if (m_current == -1) {
297
if (!m_terminator && !m_delimited && !m_isReparsing) {
298
// automatic semicolon insertion if program incomplete
527
305
m_delimited = false;
528
llocp->first_line = yylineno;
529
llocp->last_line = yylineno;
530
llocp->first_column = startOffset;
531
llocp->last_column = m_currentOffset;
537
if (token == '}' || token == ';')
541
// Apply anonymous-function hack below (eat the identifier).
542
if (m_eatNextIdentifier) {
543
m_eatNextIdentifier = false;
544
token = lex(lvalp, llocp);
547
lvalp->ident = makeIdentifier(m_buffer16);
550
case IdentifierOrKeyword: {
551
lvalp->ident = makeIdentifier(m_buffer16);
552
const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
554
// Lookup for keyword failed, means this is an identifier.
558
token = entry->lexerValue();
559
// Hack for "f = function somename() { ... }"; too hard to get into the grammar.
560
m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
561
if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
562
m_restrKeyword = true;
566
// Atomize constant strings in case they're later used in property lookup.
567
lvalp->ident = makeIdentifier(m_buffer16);
571
lvalp->doubleValue = dval;
576
fprintf(stderr, "yylex: ERROR.\n");
581
ASSERT(!"unhandled numeration value in switch");
589
bool Lexer::isWhiteSpace() const
591
return isWhiteSpace(m_current);
594
bool Lexer::isLineTerminator()
596
bool cr = (m_current == '\r');
597
bool lf = (m_current == '\n');
602
return cr || lf || m_current == 0x2028 || m_current == 0x2029;
605
bool Lexer::isIdentStart(int c)
607
return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
610
bool Lexer::isIdentPart(int c)
612
return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
613
| Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
616
static bool isDecimalDigit(int c)
618
return isASCIIDigit(c);
621
bool Lexer::isHexDigit(int c)
623
return isASCIIHexDigit(c);
626
bool Lexer::isOctalDigit(int c)
628
return isASCIIOctalDigit(c);
631
int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
633
if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
637
if (c1 == '=' && c2 == '=' && c3 == '=') {
641
if (c1 == '!' && c2 == '=' && c3 == '=') {
645
if (c1 == '>' && c2 == '>' && c3 == '>') {
649
if (c1 == '<' && c2 == '<' && c3 == '=') {
653
if (c1 == '>' && c2 == '>' && c3 == '=') {
657
if (c1 == '<' && c2 == '=') {
661
if (c1 == '>' && c2 == '=') {
665
if (c1 == '!' && c2 == '=') {
669
if (c1 == '+' && c2 == '+') {
675
if (c1 == '-' && c2 == '-') {
678
return AUTOMINUSMINUS;
681
if (c1 == '=' && c2 == '=') {
685
if (c1 == '+' && c2 == '=') {
689
if (c1 == '-' && c2 == '=') {
693
if (c1 == '*' && c2 == '=') {
697
if (c1 == '/' && c2 == '=') {
701
if (c1 == '&' && c2 == '=') {
705
if (c1 == '^' && c2 == '=') {
709
if (c1 == '%' && c2 == '=') {
713
if (c1 == '|' && c2 == '=') {
717
if (c1 == '<' && c2 == '<') {
721
if (c1 == '>' && c2 == '>') {
725
if (c1 == '&' && c2 == '&') {
729
if (c1 == '|' && c2 == '|') {
308
if (m_next1 == '>' && m_next2 == '>') {
309
if (m_next3 == '=') {
311
token = URSHIFTEQUAL;
318
if (m_next1 == '>') {
319
if (m_next2 == '=') {
328
if (m_next1 == '=') {
337
if (m_next1 == '=') {
338
if (m_next2 == '=') {
351
if (m_next1 == '=') {
352
if (m_next2 == '=') {
365
if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
366
// <!-- marks the beginning of a line comment (for www usage)
368
goto inSingleLineComment;
370
if (m_next1 == '<') {
371
if (m_next2 == '=') {
380
if (m_next1 == '=') {
389
if (m_next1 == '+') {
392
token = AUTOPLUSPLUS;
398
if (m_next1 == '=') {
407
if (m_next1 == '-') {
408
if (m_atLineStart && m_next2 == '>') {
410
goto inSingleLineComment;
414
token = AUTOMINUSMINUS;
420
if (m_next1 == '=') {
429
if (m_next1 == '=') {
438
if (m_next1 == '/') {
440
goto inSingleLineComment;
443
goto inMultiLineComment;
444
if (m_next1 == '=') {
453
if (m_next1 == '&') {
458
if (m_next1 == '=') {
467
if (m_next1 == '=') {
476
if (m_next1 == '=') {
485
if (m_next1 == '=') {
490
if (m_next1 == '|') {
499
if (isASCIIDigit(m_next1)) {
502
goto inNumberAfterDecimalPoint;
758
return static_cast<int>(c1);
760
charPos = m_currentOffset;
524
lvalp->intValue = currentOffset();
764
charPos = m_currentOffset;
772
unsigned short Lexer::singleEscape(unsigned short c)
529
lvalp->intValue = currentOffset();
535
goto startIdentifierWithBackslash;
537
goto startNumberWithZeroDigit;
798
unsigned short Lexer::convertOctal(int c1, int c2, int c3)
800
return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
803
unsigned char Lexer::convertHex(int c)
805
if (c >= '0' && c <= '9')
806
return static_cast<unsigned char>(c - '0');
807
if (c >= 'a' && c <= 'f')
808
return static_cast<unsigned char>(c - 'a' + 10);
809
return static_cast<unsigned char>(c - 'A' + 10);
812
unsigned char Lexer::convertHex(int c1, int c2)
814
return ((convertHex(c1) << 4) + convertHex(c2));
817
UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
819
unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
820
unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
821
return (highByte << 8 | lowByte);
824
void Lexer::record8(int c)
828
m_buffer8.append(static_cast<char>(c));
831
void Lexer::record16(int c)
834
ASSERT(c <= USHRT_MAX);
835
record16(UChar(static_cast<unsigned short>(c)));
838
void Lexer::record16(UChar c)
840
m_buffer16.append(c);
843
bool Lexer::scanRegExp()
552
if (isIdentStart(m_current))
553
goto startIdentifierOrKeyword;
554
if (isLineTerminator(m_current)) {
555
shiftLineTerminator();
556
m_atLineStart = true;
558
if (lastTokenWasRestrKeyword()) {
567
m_atLineStart = false;
571
int stringQuoteCharacter = m_current;
574
const UChar* stringStart = currentCharacter();
575
while (m_current != stringQuoteCharacter) {
576
// Fast check for characters that require special handling.
577
// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
578
// as possible, and lets through all common ASCII characters.
579
if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
580
m_buffer16.append(stringStart, currentCharacter() - stringStart);
585
lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
587
m_atLineStart = false;
593
while (m_current != stringQuoteCharacter) {
594
if (m_current == '\\')
595
goto inStringEscapeSequence;
596
if (UNLIKELY(isLineTerminator(m_current)))
598
if (UNLIKELY(m_current == -1))
605
inStringEscapeSequence:
607
if (m_current == 'x') {
609
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
610
record16(convertHex(m_current, m_next1));
615
if (m_current == stringQuoteCharacter)
619
if (m_current == 'u') {
621
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
622
record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
626
if (m_current == stringQuoteCharacter) {
632
if (isASCIIOctalDigit(m_current)) {
633
if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
634
record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
638
if (isASCIIOctalDigit(m_next1)) {
639
record16((m_current - '0') * 8 + m_next1 - '0');
643
record16(m_current - '0');
647
if (isLineTerminator(m_current)) {
648
shiftLineTerminator();
651
record16(singleEscape(m_current));
656
startIdentifierWithBackslash:
658
if (UNLIKELY(m_current != 'u'))
661
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
663
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
664
if (UNLIKELY(!isIdentStart(token)))
666
goto inIdentifierAfterCharacterCheck;
668
startIdentifierOrKeyword: {
669
const UChar* identifierStart = currentCharacter();
671
while (isIdentPart(m_current))
673
if (LIKELY(m_current != '\\')) {
674
lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
675
goto doneIdentifierOrKeyword;
677
m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
682
if (UNLIKELY(m_current != 'u'))
685
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
687
token = convertUnicode(m_current, m_next1, m_next2, m_next3);
688
if (UNLIKELY(!isIdentPart(token)))
690
inIdentifierAfterCharacterCheck:
694
while (isIdentPart(m_current)) {
698
} while (UNLIKELY(m_current == '\\'));
702
while (!isLineTerminator(m_current)) {
703
if (UNLIKELY(m_current == -1))
707
shiftLineTerminator();
708
m_atLineStart = true;
710
if (lastTokenWasRestrKeyword())
716
while (m_current != '*' || m_next1 != '/') {
717
if (isLineTerminator(m_current))
718
shiftLineTerminator();
721
if (UNLIKELY(m_current == -1))
726
m_atLineStart = false;
729
startNumberWithZeroDigit:
731
if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
735
if (m_current == '.') {
739
goto inNumberAfterDecimalPoint;
741
if ((m_current | 0x20) == 'e') {
745
goto inExponentIndicator;
747
if (isASCIIOctalDigit(m_current))
749
if (isASCIIDigit(m_current))
751
lvalp->doubleValue = 0;
754
inNumberAfterDecimalPoint:
755
while (isASCIIDigit(m_current)) {
759
if ((m_current | 0x20) == 'e') {
762
goto inExponentIndicator;
767
if (m_current == '+' || m_current == '-') {
771
if (!isASCIIDigit(m_current))
776
} while (isASCIIDigit(m_current));
783
} while (isASCIIOctalDigit(m_current));
784
if (isASCIIDigit(m_current))
789
const char* end = m_buffer8.end();
790
for (const char* p = m_buffer8.data(); p < end; ++p) {
794
if (dval >= mantissaOverflowLowerBound)
795
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
799
lvalp->doubleValue = dval;
807
} while (isASCIIHexDigit(m_current));
811
const char* end = m_buffer8.end();
812
for (const char* p = m_buffer8.data(); p < end; ++p) {
814
dval += toASCIIHexValue(*p);
816
if (dval >= mantissaOverflowLowerBound)
817
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
821
lvalp->doubleValue = dval;
828
while (isASCIIDigit(m_current)) {
832
if (m_current == '.') {
835
goto inNumberAfterDecimalPoint;
837
if ((m_current | 0x20) == 'e') {
840
goto inExponentIndicator;
843
// Fall through into doneNumber.
846
// Null-terminate string for strtod.
847
m_buffer8.append('\0');
848
lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
851
// Fall through into doneNumeric.
854
// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
855
if (UNLIKELY(isIdentStart(m_current)))
858
m_atLineStart = false;
869
m_atLineStart = false;
871
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
872
m_buffer16.resize(0);
876
doneIdentifierOrKeyword: {
877
m_atLineStart = false;
879
m_buffer16.resize(0);
880
const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
881
token = entry ? entry->lexerValue() : IDENT;
886
// Atomize constant strings in case they're later used in property lookup.
888
m_atLineStart = false;
890
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
891
m_buffer16.resize(0);
894
// Fall through into returnToken.
897
int lineNumber = m_lineNumber;
898
llocp->first_line = lineNumber;
899
llocp->last_line = lineNumber;
900
llocp->first_column = startOffset;
901
llocp->last_column = currentOffset();
912
bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar prefix)
914
ASSERT(m_buffer16.isEmpty());
846
916
bool lastWasEscape = false;
847
917
bool inBrackets = false;
850
if (isLineTerminator() || m_current == -1)
923
if (isLineTerminator(m_current) || m_current == -1) {
924
m_buffer16.resize(0);
852
else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
927
if (m_current != '/' || lastWasEscape || inBrackets) {
853
928
// keep track of '[' and ']'
854
929
if (!lastWasEscape) {
855
if ( m_current == '[' && !inBrackets )
930
if (m_current == '[' && !inBrackets)
856
931
inBrackets = true;
857
if ( m_current == ']' && inBrackets )
932
if (m_current == ']' && inBrackets)
858
933
inBrackets = false;
860
935
record16(m_current);
862
!lastWasEscape && (m_current == '\\');
936
lastWasEscape = !lastWasEscape && m_current == '\\';
863
937
} else { // end of regexp
864
m_pattern = UString(m_buffer16);
938
pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
939
m_buffer16.resize(0);
872
946
while (isIdentPart(m_current)) {
873
947
record16(m_current);
876
m_flags = UString(m_buffer16);
950
flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
951
m_buffer16.resize(0);
956
bool Lexer::skipRegExp()
958
bool lastWasEscape = false;
959
bool inBrackets = false;
962
if (isLineTerminator(m_current) || m_current == -1)
964
if (m_current != '/' || lastWasEscape || inBrackets) {
965
// keep track of '[' and ']'
966
if (!lastWasEscape) {
967
if (m_current == '[' && !inBrackets)
969
if (m_current == ']' && inBrackets)
972
lastWasEscape = !lastWasEscape && m_current == '\\';
973
} else { // end of regexp
980
while (isIdentPart(m_current))
881
986
void Lexer::clear()
883
m_identifiers.clear();
989
m_codeWithoutBOMs.clear();
885
991
Vector<char> newBuffer8;
886
992
newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);