1
/****************************************************************************
3
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
4
** Contact: http://www.qt-project.org/
6
** This file is part of the QtQml module of the Qt Toolkit.
8
** $QT_BEGIN_LICENSE:LGPL$
9
** GNU Lesser General Public License Usage
10
** This file may be used under the terms of the GNU Lesser General Public
11
** License version 2.1 as published by the Free Software Foundation and
12
** appearing in the file LICENSE.LGPL included in the packaging of this
13
** file. Please review the following information to ensure the GNU Lesser
14
** General Public License version 2.1 requirements will be met:
15
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
17
** In addition, as a special exception, Nokia gives you certain additional
18
** rights. These rights are described in the Nokia Qt LGPL Exception
19
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
21
** GNU General Public License Usage
22
** Alternatively, this file may be used under the terms of the GNU General
23
** Public License version 3.0 as published by the Free Software Foundation
24
** and appearing in the file LICENSE.GPL included in the packaging of this
25
** file. Please review the following information to ensure the GNU General
26
** Public License version 3.0 requirements will be met:
27
** http://www.gnu.org/copyleft/gpl.html.
30
** Alternatively, this file may be used in accordance with the terms and
31
** conditions contained in a signed written agreement between you and Nokia.
40
****************************************************************************/
42
#include "qqmljslexer_p.h"
43
#include "qqmljsengine_p.h"
44
#include "qqmljsmemorypool_p.h"
46
#include <QtCore/QCoreApplication>
47
#include <QtCore/QVarLengthArray>
48
#include <QtCore/QDebug>
51
Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
54
using namespace QQmlJS;
56
static int regExpFlagFromChar(const QChar &ch)
58
switch (ch.unicode()) {
59
case 'g': return Lexer::RegExp_Global;
60
case 'i': return Lexer::RegExp_IgnoreCase;
61
case 'm': return Lexer::RegExp_Multiline;
66
static unsigned char convertHex(ushort c)
68
if (c >= '0' && c <= '9')
70
else if (c >= 'a' && c <= 'f')
71
return (c - 'a' + 10);
73
return (c - 'A' + 10);
76
static QChar convertHex(QChar c1, QChar c2)
78
return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
81
static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
83
return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
84
(convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
87
Lexer::Lexer(Engine *engine)
93
, _char(QLatin1Char('\n'))
95
, _currentLineNumber(0)
97
, _parenthesesState(IgnoreParentheses)
98
, _parenthesesCount(0)
104
, _validTokenText(false)
105
, _prohibitAutomaticSemicolon(false)
106
, _restrictedKeyword(false)
108
, _followsClosingBrace(false)
113
engine->setLexer(this);
116
bool Lexer::qmlMode() const
121
QString Lexer::code() const
126
void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
129
_engine->setCode(code);
134
_tokenText.reserve(1024);
135
_errorMessage.clear();
136
_tokenSpell = QStringRef();
138
_codePtr = code.unicode();
139
_lastLinePtr = _codePtr;
140
_tokenLinePtr = _codePtr;
141
_tokenStartPtr = _codePtr;
143
_char = QLatin1Char('\n');
144
_errorCode = NoError;
146
_currentLineNumber = lineno;
150
_parenthesesState = IgnoreParentheses;
151
_parenthesesCount = 0;
159
_validTokenText = false;
160
_prohibitAutomaticSemicolon = false;
161
_restrictedKeyword = false;
163
_followsClosingBrace = false;
167
void Lexer::scanChar()
171
if (_char == QLatin1Char('\n')) {
172
_lastLinePtr = _codePtr; // points to the first character after the newline
173
++_currentLineNumber;
179
const int previousTokenKind = _tokenKind;
181
_tokenSpell = QStringRef();
182
_tokenKind = scanToken();
183
_tokenLength = _codePtr - _tokenStartPtr - 1;
186
_restrictedKeyword = false;
187
_followsClosingBrace = (previousTokenKind == T_RBRACE);
190
switch (_tokenKind) {
201
_parenthesesState = CountParentheses;
202
_parenthesesCount = 0;
206
_parenthesesState = BalancedParentheses;
213
_restrictedKeyword = true;
217
// update the parentheses state
218
switch (_parenthesesState) {
219
case IgnoreParentheses:
222
case CountParentheses:
223
if (_tokenKind == T_RPAREN) {
225
if (_parenthesesCount == 0)
226
_parenthesesState = BalancedParentheses;
227
} else if (_tokenKind == T_LPAREN) {
232
case BalancedParentheses:
233
_parenthesesState = IgnoreParentheses;
240
bool Lexer::isUnicodeEscapeSequence(const QChar *chars)
242
if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3]))
248
QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
250
if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) {
251
scanChar(); // skip u
253
const QChar c1 = _char;
256
const QChar c2 = _char;
259
const QChar c3 = _char;
262
const QChar c4 = _char;
268
return convertUnicode(c1, c2, c3, c4);
275
int Lexer::scanToken()
277
if (_stackToken != -1) {
278
int tk = _stackToken;
286
_validTokenText = false;
287
_tokenLinePtr = _lastLinePtr;
289
while (_char.isSpace()) {
290
if (_char == QLatin1Char('\n')) {
291
_tokenLinePtr = _codePtr;
293
if (_restrictedKeyword) {
294
// automatic semicolon insertion
295
_tokenLine = _currentLineNumber;
296
_tokenStartPtr = _codePtr - 1; // ### TODO: insert it before the optional \r sequence.
300
syncProhibitAutomaticSemicolon();
307
_tokenStartPtr = _codePtr - 1;
308
_tokenLine = _currentLineNumber;
313
const QChar ch = _char;
316
switch (ch.unicode()) {
317
case '~': return T_TILDE;
318
case '}': return T_RBRACE;
321
if (_char == QLatin1Char('|')) {
324
} else if (_char == QLatin1Char('=')) {
330
case '{': return T_LBRACE;
333
if (_char == QLatin1Char('=')) {
339
case ']': return T_RBRACKET;
340
case '[': return T_LBRACKET;
341
case '?': return T_QUESTION;
344
if (_char == QLatin1Char('>')) {
346
if (_char == QLatin1Char('>')) {
348
if (_char == QLatin1Char('=')) {
350
return T_GT_GT_GT_EQ;
353
} else if (_char == QLatin1Char('=')) {
358
} else if (_char == QLatin1Char('=')) {
365
if (_char == QLatin1Char('=')) {
367
if (_char == QLatin1Char('=')) {
376
if (_char == QLatin1Char('=')) {
379
} else if (_char == QLatin1Char('<')) {
381
if (_char == QLatin1Char('=')) {
389
case ';': return T_SEMICOLON;
390
case ':': return T_COLON;
393
if (_char == QLatin1Char('*')) {
395
while (!_char.isNull()) {
396
if (_char == QLatin1Char('*')) {
398
if (_char == QLatin1Char('/')) {
402
_engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4,
403
tokenStartLine(), tokenStartColumn() + 2);
412
} else if (_char == QLatin1Char('/')) {
413
while (!_char.isNull() && _char != QLatin1Char('\n')) {
417
_engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2,
418
tokenStartLine(), tokenStartColumn() + 2);
421
} if (_char == QLatin1Char('=')) {
428
if (_char.isDigit()) {
429
QVarLengthArray<char,32> chars;
431
chars.append(ch.unicode()); // append the `.'
433
while (_char.isDigit()) {
434
chars.append(_char.unicode());
438
if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
439
if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
440
_codePtr[1].isDigit())) {
442
chars.append(_char.unicode());
443
scanChar(); // consume `e'
445
if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
446
chars.append(_char.unicode());
447
scanChar(); // consume the sign
450
while (_char.isDigit()) {
451
chars.append(_char.unicode());
459
const char *begin = chars.constData();
463
_tokenValue = qstrtod(begin, &end, &ok);
465
if (end - begin != chars.size() - 1) {
466
_errorCode = IllegalExponentIndicator;
467
_errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
471
return T_NUMERIC_LITERAL;
476
if (_char == QLatin1Char('=')) {
479
} else if (_char == QLatin1Char('-')) {
482
if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
483
_stackToken = T_MINUS_MINUS;
487
return T_MINUS_MINUS;
491
case ',': return T_COMMA;
494
if (_char == QLatin1Char('=')) {
497
} else if (_char == QLatin1Char('+')) {
500
if (_terminator && !_delimited && !_prohibitAutomaticSemicolon) {
501
_stackToken = T_PLUS_PLUS;
510
if (_char == QLatin1Char('=')) {
516
case ')': return T_RPAREN;
517
case '(': return T_LPAREN;
520
if (_char == QLatin1Char('=')) {
523
} else if (_char == QLatin1Char('&')) {
530
if (_char == QLatin1Char('=')) {
532
return T_REMAINDER_EQ;
537
if (_char == QLatin1Char('=')) {
539
if (_char == QLatin1Char('=')) {
549
const QChar quote = ch;
550
bool multilineStringLiteral = false;
552
const QChar *startCode = _codePtr;
555
while (!_char.isNull()) {
556
if (_char == QLatin1Char('\n') || _char == QLatin1Char('\\')) {
558
} else if (_char == quote) {
559
_tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
562
return T_STRING_LITERAL;
568
_validTokenText = true;
569
_tokenText.resize(0);
571
while (startCode != _codePtr - 1)
572
_tokenText += *startCode++;
574
while (! _char.isNull()) {
575
if (_char == QLatin1Char('\n')) {
576
multilineStringLiteral = true;
579
} else if (_char == quote) {
583
_tokenSpell = _engine->newStringRef(_tokenText);
585
return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
586
} else if (_char == QLatin1Char('\\')) {
592
switch (_char.unicode()) {
593
// unicode escape sequence
595
u = decodeUnicodeEscapeCharacter(&ok);
600
// hex escape sequence
603
if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
606
const QChar c1 = _char;
609
const QChar c2 = _char;
612
u = convertHex(c1, c2);
618
// single character escape sequence
619
case '\\': u = QLatin1Char('\\'); scanChar(); break;
620
case '\'': u = QLatin1Char('\''); scanChar(); break;
621
case '\"': u = QLatin1Char('\"'); scanChar(); break;
622
case 'b': u = QLatin1Char('\b'); scanChar(); break;
623
case 'f': u = QLatin1Char('\f'); scanChar(); break;
624
case 'n': u = QLatin1Char('\n'); scanChar(); break;
625
case 'r': u = QLatin1Char('\r'); scanChar(); break;
626
case 't': u = QLatin1Char('\t'); scanChar(); break;
627
case 'v': u = QLatin1Char('\v'); scanChar(); break;
630
if (! _codePtr[1].isDigit()) {
632
u = QLatin1Char('\0');
634
// ### parse deprecated octal escape sequence ?
640
while (_char == QLatin1Char('\r'))
643
if (_char == QLatin1Char('\n')) {
647
u = QLatin1Char('\n');
658
// non escape character
670
_errorCode = UnclosedStringLiteral;
671
_errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line");
676
if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
677
bool identifierWithEscapeChars = false;
678
if (ch == QLatin1Char('\\')) {
679
identifierWithEscapeChars = true;
680
_tokenText.resize(0);
682
_tokenText += decodeUnicodeEscapeCharacter(&ok);
683
_validTokenText = true;
685
_errorCode = IllegalUnicodeEscapeSequence;
686
_errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
691
if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
692
if (identifierWithEscapeChars)
696
} else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
697
if (! identifierWithEscapeChars) {
698
identifierWithEscapeChars = true;
699
_tokenText.resize(0);
700
_tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
701
_validTokenText = true;
704
scanChar(); // skip '\\'
706
_tokenText += decodeUnicodeEscapeCharacter(&ok);
708
_errorCode = IllegalUnicodeEscapeSequence;
709
_errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
713
_tokenLength = _codePtr - _tokenStartPtr - 1;
715
int kind = T_IDENTIFIER;
717
if (! identifierWithEscapeChars)
718
kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
721
if (kind == T_IDENTIFIER && identifierWithEscapeChars)
722
_tokenSpell = _engine->newStringRef(_tokenText);
724
_tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
730
} else if (ch.isDigit()) {
731
if (ch != QLatin1Char('0')) {
732
double integer = ch.unicode() - '0';
735
const QChar *code = _codePtr;
736
while (n.isDigit()) {
737
integer = integer * 10 + (n.unicode() - '0');
741
if (n != QLatin1Char('.') && n != QLatin1Char('e') && n != QLatin1Char('E')) {
742
if (code != _codePtr) {
746
_tokenValue = integer;
747
return T_NUMERIC_LITERAL;
751
QVarLengthArray<char,32> chars;
752
chars.append(ch.unicode());
754
if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
755
// parse hex integer literal
757
chars.append(_char.unicode());
758
scanChar(); // consume `x'
760
while (isHexDigit(_char)) {
761
chars.append(_char.unicode());
765
_tokenValue = integerFromString(chars.constData(), chars.size(), 16);
766
return T_NUMERIC_LITERAL;
769
// decimal integer literal
770
while (_char.isDigit()) {
771
chars.append(_char.unicode());
772
scanChar(); // consume the digit
775
if (_char == QLatin1Char('.')) {
776
chars.append(_char.unicode());
777
scanChar(); // consume `.'
779
while (_char.isDigit()) {
780
chars.append(_char.unicode());
784
if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
785
if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
786
_codePtr[1].isDigit())) {
788
chars.append(_char.unicode());
789
scanChar(); // consume `e'
791
if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
792
chars.append(_char.unicode());
793
scanChar(); // consume the sign
796
while (_char.isDigit()) {
797
chars.append(_char.unicode());
802
} else if (_char == QLatin1Char('e') || _char == QLatin1Char('E')) {
803
if (_codePtr[0].isDigit() || ((_codePtr[0] == QLatin1Char('+') || _codePtr[0] == QLatin1Char('-')) &&
804
_codePtr[1].isDigit())) {
806
chars.append(_char.unicode());
807
scanChar(); // consume `e'
809
if (_char == QLatin1Char('+') || _char == QLatin1Char('-')) {
810
chars.append(_char.unicode());
811
scanChar(); // consume the sign
814
while (_char.isDigit()) {
815
chars.append(_char.unicode());
823
const char *begin = chars.constData();
827
_tokenValue = qstrtod(begin, &end, &ok);
829
if (end - begin != chars.size() - 1) {
830
_errorCode = IllegalExponentIndicator;
831
_errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number");
835
return T_NUMERIC_LITERAL;
844
bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
846
_tokenText.resize(0);
847
_validTokenText = true;
850
if (prefix == EqualPrefix)
851
_tokenText += QLatin1Char('=');
854
switch (_char.unicode()) {
856
case '\n': case '\r': // line terminator
857
_errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
865
while (isIdentLetter(_char)) {
866
int flag = regExpFlagFromChar(_char);
868
_errorMessage = QCoreApplication::translate("QQmlParser", "Invalid regular expression flag '%0'")
872
_patternFlags |= flag;
876
_tokenLength = _codePtr - _tokenStartPtr - 1;
880
// regular expression backslash sequence
884
if (_char.isNull() || isLineTerminator()) {
885
_errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
894
// regular expression class
898
while (! _char.isNull() && ! isLineTerminator()) {
899
if (_char == QLatin1Char(']'))
901
else if (_char == QLatin1Char('\\')) {
902
// regular expression backslash sequence
906
if (_char.isNull() || isLineTerminator()) {
907
_errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
919
if (_char != QLatin1Char(']')) {
920
_errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression class");
925
scanChar(); // skip ]
937
bool Lexer::isLineTerminator() const
939
return (_char == QLatin1Char('\n') || _char == QLatin1Char('\r'));
942
bool Lexer::isIdentLetter(QChar ch)
944
// ASCII-biased, since all reserved words are ASCII, aand hence the
945
// bulk of content to be parsed.
946
if ((ch >= QLatin1Char('a') && ch <= QLatin1Char('z'))
947
|| (ch >= QLatin1Char('A') && ch <= QLatin1Char('Z'))
948
|| ch == QLatin1Char('$')
949
|| ch == QLatin1Char('_'))
951
if (ch.unicode() < 128)
953
return ch.isLetterOrNumber();
956
bool Lexer::isDecimalDigit(ushort c)
958
return (c >= '0' && c <= '9');
961
bool Lexer::isHexDigit(QChar c)
963
return ((c >= QLatin1Char('0') && c <= QLatin1Char('9'))
964
|| (c >= QLatin1Char('a') && c <= QLatin1Char('f'))
965
|| (c >= QLatin1Char('A') && c <= QLatin1Char('F')));
968
bool Lexer::isOctalDigit(ushort c)
970
return (c >= '0' && c <= '7');
973
int Lexer::tokenKind() const
978
int Lexer::tokenOffset() const
980
return _tokenStartPtr - _code.unicode();
983
int Lexer::tokenLength() const
988
int Lexer::tokenStartLine() const
993
int Lexer::tokenStartColumn() const
995
return _tokenStartPtr - _tokenLinePtr + 1;
998
int Lexer::tokenEndLine() const
1000
return _currentLineNumber;
1003
int Lexer::tokenEndColumn() const
1005
return _codePtr - _lastLinePtr;
1008
QStringRef Lexer::tokenSpell() const
1013
double Lexer::tokenValue() const
1018
QString Lexer::tokenText() const
1020
if (_validTokenText)
1023
if (_tokenKind == T_STRING_LITERAL)
1024
return QString(_tokenStartPtr + 1, _tokenLength - 2);
1026
return QString(_tokenStartPtr, _tokenLength);
1029
Lexer::Error Lexer::errorCode() const
1034
QString Lexer::errorMessage() const
1036
return _errorMessage;
1039
void Lexer::syncProhibitAutomaticSemicolon()
1041
if (_parenthesesState == BalancedParentheses) {
1042
// we have seen something like "if (foo)", which means we should
1043
// never insert an automatic semicolon at this point, since it would
1044
// then be expanded into an empty statement (ECMA-262 7.9.1)
1045
_prohibitAutomaticSemicolon = true;
1046
_parenthesesState = IgnoreParentheses;
1048
_prohibitAutomaticSemicolon = false;
1052
bool Lexer::prevTerminator() const
1057
bool Lexer::followsClosingBrace() const
1059
return _followsClosingBrace;
1062
bool Lexer::canInsertAutomaticSemicolon(int token) const
1064
return token == T_RBRACE
1065
|| token == EOF_SYMBOL
1067
|| _followsClosingBrace;
1070
bool Lexer::scanDirectives(Directives *directives)
1073
// the directives are a Javascript-only extension.
1077
lex(); // fetch the first token
1079
if (_tokenKind != T_DOT)
1083
lex(); // skip T_DOT
1085
const int lineNumber = tokenStartLine();
1087
if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_RESERVED_WORD))
1088
return false; // expected a valid QML/JS directive
1090
const QString directiveName = tokenText();
1092
if (! (directiveName == QLatin1String("pragma") ||
1093
directiveName == QLatin1String("import")))
1094
return false; // not a valid directive name
1096
// it must be a pragma or an import directive.
1097
if (directiveName == QLatin1String("pragma")) {
1099
if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library")))
1100
return false; // expected `library
1102
// we found a .pragma library directive
1103
directives->pragmaLibrary();
1106
Q_ASSERT(directiveName == QLatin1String("import"));
1107
lex(); // skip .import
1111
bool fileImport = false; // file or uri import
1113
if (_tokenKind == T_STRING_LITERAL) {
1114
// .import T_STRING_LITERAL as T_IDENTIFIER
1117
pathOrUri = tokenText();
1119
} else if (_tokenKind == T_IDENTIFIER) {
1120
// .import T_IDENTIFIER (. T_IDENTIFIER)* T_NUMERIC_LITERAL as T_IDENTIFIER
1122
pathOrUri = tokenText();
1124
lex(); // skip the first T_IDENTIFIER
1125
for (; _tokenKind == T_DOT; lex()) {
1126
if (lex() != T_IDENTIFIER)
1129
pathOrUri += QLatin1Char('.');
1130
pathOrUri += tokenText();
1133
if (_tokenKind != T_NUMERIC_LITERAL)
1134
return false; // expected the module version number
1136
version = tokenText();
1140
// recognize the mandatory `as' followed by the module name
1142
if (! (lex() == T_RESERVED_WORD && tokenText() == QLatin1String("as")))
1143
return false; // expected `as'
1145
if (lex() != T_IDENTIFIER)
1146
return false; // expected module name
1148
const QString module = tokenText();
1151
directives->importFile(pathOrUri, module);
1153
directives->importModule(pathOrUri, version, module);
1156
if (tokenStartLine() != lineNumber)
1157
return false; // the directives cannot span over multiple lines
1159
// fetch the first token after the .pragma/.import directive
1161
} while (_tokenKind == T_DOT);
1166
#include "qqmljskeywords_p.h"