1
/***************************************************************************
2
* This file is part of KDevelop *
3
* Copyright 2008 Niko Sams <niko.sams@gmail.com> *
5
* This program is free software; you can redistribute it and/or modify *
6
* it under the terms of the GNU Library General Public License as *
7
* published by the Free Software Foundation; either version 2 of the *
8
* License, or (at your option) any later version. *
10
* This program is distributed in the hope that it will be useful, *
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13
* GNU General Public License for more details. *
15
* You should have received a copy of the GNU Library General Public *
16
* License along with this program; if not, write to the *
17
* Free Software Foundation, Inc., *
18
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19
***************************************************************************/
23
#include "phpparser.h"
24
#include "tokenstream.h"
26
#include <QtCore/QString>
27
#include <QtCore/QStringList>
28
#include <QtCore/QRegExp>
29
#include <QtCore/QDebug>
36
Lexer::Lexer(TokenStream* tokenStream, const QString& content, int initialState):
37
m_content(content), m_tokenStream(tokenStream),
38
m_curpos(0), m_contentSize(m_content.size()),
39
m_tokenBegin(0), m_tokenEnd(0), m_haltCompiler(0)
41
pushState(ErrorState);
42
if (initialState == DefaultState) {
45
pushState(initialState);
48
int Lexer::state(int deepness) const
50
return m_state.at(m_state.size() - deepness - 1);
52
void Lexer::printState()
56
qDebug() << "ErrorState";
57
else if (s == HtmlState)
58
qDebug() << "HtmlState";
59
else if (s == DefaultState)
60
qDebug() << "DefaultState";
63
else if (s == StringVariable)
64
qDebug() << "StringVariable";
65
else if (s == StringVariableBracket)
66
qDebug() << "StringVariableBracket";
67
else if (s == StringVariableObjectOperator)
68
qDebug() << "StringVariableObjectOperator";
69
else if (s == StringVariableCurly)
70
qDebug() << "StringVariableCurly";
71
else if (s == StringVarname)
72
qDebug() << "StringVarname";
73
else if (s == StringHeredoc)
74
qDebug() << "StringHeredoc";
75
else if (s == StringBacktick)
76
qDebug() << "StringBacktick";
79
void Lexer::pushState(int state)
84
void Lexer::popState()
89
int Lexer::nextTokenKind()
91
int token = Parser::Token_INVALID;
92
if (m_curpos >= m_contentSize) {
95
createNewline(m_curpos);
98
QChar* it = m_content.data();
100
m_tokenBegin = m_curpos;
103
if (it->unicode() == '<' && (it + 1)->unicode() == '?'
104
///TODO: per-project configuration to set whether we use shortags
105
/// or not. In the former case we'd need to rise an error here
106
&& !( (it + 2)->toLower().unicode() == 'x'
107
&& (it + 3)->toLower().unicode() == 'm'
108
&& (it + 4)->toLower().unicode() == 'l' ) )
110
token = Parser::Token_OPEN_TAG;
111
if ((it + 2)->unicode() == '=') {
112
token = Parser::Token_OPEN_TAG_WITH_ECHO;
115
} else if ((it + 2)->toLower().unicode() == 'p'
116
&& (it + 3)->toLower().unicode() == 'h'
117
&& (it + 4)->toLower().unicode() == 'p'
118
&& (it + 5)->isSpace()) {
120
if ((it + 5)->unicode() == '\n') createNewline(m_curpos + 1);
123
pushState(DefaultState);
125
token = Parser::Token_INLINE_HTML;
126
while (m_curpos < m_contentSize) {
127
if (it->unicode() == '\n') createNewline(m_curpos);
128
if ((it + 1)->unicode() == '<' && (it + 2)->unicode() == '?') {
137
case StringVariableCurly: {
139
token = Parser::Token_WHITESPACE;
140
while (m_curpos < m_contentSize && it->isSpace()) {
141
if (it->unicode() == '\n') createNewline(m_curpos);
146
} else if (it->isDigit() || (it->unicode() == '.' && (it + 1)->isDigit())) {
147
QString num;bool hasPoint = false;
149
if (it->unicode() == '0' && (it + 1)->unicode() == 'x') {
154
while (m_curpos < m_contentSize && (
156
|| (!hex && !hasPoint && it->unicode() == '.')
157
|| (hex && (it->toLower() == 'a' || it->toLower() == 'b' ||
158
it->toLower() == 'c' || it->toLower() == 'd' ||
159
it->toLower() == 'e' || it->toLower() == 'f')))) {
160
if (it->unicode() == '.') hasPoint = true;
165
if (!hex && it->toLower() == 'e' &&
166
((it + 1)->isDigit() ||
167
(((it + 1)->unicode() == '-' || (it + 1)->unicode() == '+') && (it + 2)->isDigit()))) {
169
token = Parser::Token_DNUMBER;
172
if (it->unicode() == '-' || it->unicode() == '+') {
176
while (m_curpos < m_contentSize && (it->isDigit())) {
184
token = Parser::Token_DNUMBER;
187
//check if string can be converted to long
188
//if we get an overflow use double
189
num.toLong(&ok, hex ? 16 : 10);
191
token = Parser::Token_LNUMBER;
193
token = Parser::Token_DNUMBER;
198
} else if (processVariable(it)) {
199
token = Parser::Token_VARIABLE;
200
} else if (it->unicode() == '$') {
201
//when it was not recognized as variable
202
token = Parser::Token_DOLLAR;
203
} else if (it->unicode() == '}') {
204
token = Parser::Token_RBRACE;
205
if (state() == StringVariableCurly) {
208
} else if (it->unicode() == '{') {
209
token = Parser::Token_LBRACE;
210
if (state() == StringVariableCurly) {
211
pushState(StringVariableCurly);
213
} else if (it->unicode() == ')') {
214
token = Parser::Token_RPAREN;
215
} else if (it->unicode() == '(') {
217
int pos = m_curpos + 1;
218
while (pos < m_contentSize && it->isSpace()) {
223
while (pos < m_contentSize && it->isLetter()) {
228
while (pos < m_contentSize && it->isSpace()) {
232
name = name.toLower();
233
if (it->unicode() == ')') {
234
if (name == "int" || name == "integer") {
235
token = Parser::Token_INT_CAST;
236
} else if (name == "real" || name == "double" || name == "float") {
237
token = Parser::Token_DOUBLE_CAST;
238
} else if (name == "string") {
239
token = Parser::Token_STRING_CAST;
240
} else if (name == "binary") {
242
token = Parser::Token_STRING_CAST;
243
} else if (name == "array") {
244
token = Parser::Token_ARRAY_CAST;
245
} else if (name == "object") {
246
token = Parser::Token_OBJECT_CAST;
247
} else if (name == "bool" || name == "boolean") {
248
token = Parser::Token_BOOL_CAST;
249
} else if (name == "unset") {
250
token = Parser::Token_UNSET_CAST;
252
token = Parser::Token_LPAREN;
255
if (token != Parser::Token_LPAREN) {
259
token = Parser::Token_LPAREN;
261
} else if (it->unicode() == ']') {
262
token = Parser::Token_RBRACKET;
263
} else if (it->unicode() == '[') {
264
token = Parser::Token_LBRACKET;
265
} else if (it->unicode() == ',') {
266
token = Parser::Token_COMMA;
267
} else if (it->unicode() == '@') {
268
token = Parser::Token_AT;
269
} else if (it->unicode() == '!') {
270
if ((it + 1)->unicode() == '=') {
272
if ((it + 2)->unicode() == '=') {
274
token = Parser::Token_IS_NOT_IDENTICAL;
276
token = Parser::Token_IS_NOT_EQUAL;
279
token = Parser::Token_BANG;
281
} else if (it->unicode() == '<') {
282
if ((it + 1)->unicode() == '<') {
284
if ((it + 2)->unicode() == '<' && state() != StringVariableCurly) {
285
//HEREDOC string (<<< EOD\nfoo\nEOD;\n)
287
while (m_curpos + pos < m_contentSize &&
288
((it + pos)->unicode() == ' ' || (it + pos)->unicode() == '\t')) {
291
if ((it + pos)->isLetter() || (it + pos)->unicode() == '_') { //identifier must start with a letter
292
m_heredocIdentifier.clear();
293
while (m_curpos + pos < m_contentSize &&
294
((it + pos)->isDigit() || (it + pos)->isLetter() || (it + pos)->unicode() == '_')) {
295
m_heredocIdentifier.append(*(it + pos));
298
if ((it + pos)->unicode() == '\n') {
299
//identifier must be followed by newline, newline is part of HEREDOC token
300
token = Parser::Token_START_HEREDOC;
301
pushState(StringHeredoc);
303
createNewline(m_curpos);
308
if (token != Parser::Token_START_HEREDOC) {
309
if ((it + 2)->unicode() == '=') {
311
token = Parser::Token_SL_ASSIGN;
313
token = Parser::Token_SL;
316
} else if ((it + 1)->unicode() == '=') {
318
token = Parser::Token_IS_SMALLER_OR_EQUAL;
319
} else if ((it + 1)->unicode() == '>') {
321
token = Parser::Token_IS_NOT_EQUAL;
323
token = Parser::Token_IS_SMALLER;
325
} else if (it->unicode() == '>') {
326
if ((it + 1)->unicode() == '>') {
328
if ((it + 2)->unicode() == '=') {
330
token = Parser::Token_SR_ASSIGN;
332
token = Parser::Token_SR;
334
} else if ((it + 1)->unicode() == '=') {
336
token = Parser::Token_IS_GREATER_OR_EQUAL;
338
token = Parser::Token_IS_GREATER;
340
} else if (it->unicode() == '~') {
341
token = Parser::Token_TILDE;
342
} else if (it->unicode() == ':') {
343
if ((it + 1)->unicode() == ':') {
345
token = Parser::Token_PAAMAYIM_NEKUDOTAYIM;
347
token = Parser::Token_COLON;
349
} else if (it->unicode() == '?') {
350
if ((it + 1)->unicode() == '>') {
351
//accept CLOSE_TAG inside StringVariableCurly too, as php does
352
token = Parser::Token_CLOSE_TAG;
354
while (state() != HtmlState) popState();
356
token = Parser::Token_QUESTION;
358
} else if (it->unicode() == '-' && (it + 1)->unicode() == '>') {
360
token = Parser::Token_OBJECT_OPERATOR;
361
if (isValidVariableIdentifier(it + 2)) {
362
pushState(StringVariableObjectOperator);
364
} else if (it->unicode() == '%') {
365
if ((it + 1)->unicode() == '=') {
367
token = Parser::Token_MOD_ASSIGN;
369
token = Parser::Token_MOD;
371
} else if (it->unicode() == '/') {
372
if ((it + 1)->unicode() == '=') {
374
token = Parser::Token_DIV_ASSIGN;
375
} else if ((it + 1)->unicode() == '/') {
376
//accept COMMENT inside StringVariableCurly too, as php does
377
if ((it + 2)->unicode() == '/') {
378
token = Parser::Token_DOC_COMMENT;
380
token = Parser::Token_COMMENT;
382
while (m_curpos < m_contentSize &&
383
!((it)->unicode() == '?' && (it + 1)->unicode() == '>')) {
384
if ( it->unicode() == '\n' ) {
385
createNewline(m_curpos);
386
if ( token == Parser::Token_COMMENT ) {
389
// lookahead to check whether this doc comment spans multiple lines
391
int pos = m_curpos + 1;
392
while ( pos < m_contentSize && (it2)->isSpace() && (it2)->unicode() != '\n' ) {
396
if ( it2->unicode() == '/' && (it2 + 1)->unicode() == '/'
397
&& (it2 + 2)->unicode() == '/' ) {
398
// seems to be a multi-line doc-comment
403
// not a multi-line doc-comment
411
} else if ((it + 1)->unicode() == '*') {
412
//accept COMMENT inside StringVariableCurly too, as php does
413
if ((it + 2)->unicode() == '*' && (it + 3)->isSpace()) {
414
token = Parser::Token_DOC_COMMENT;
416
token = Parser::Token_COMMENT;
420
while (m_curpos < m_contentSize && !(it->unicode() == '*' && (it + 1)->unicode() == '/')) {
421
if (it->unicode() == '\n') {
422
createNewline(m_curpos);
429
token = Parser::Token_DIV;
431
} else if (it->unicode() == '#') {
432
//accept COMMENT inside StringVariableCurly too, as php does
433
token = Parser::Token_COMMENT;
434
while (m_curpos < m_contentSize) {
435
if (it->unicode() == '\n') {
436
createNewline(m_curpos);
442
} else if (it->unicode() == '^') {
443
if ((it + 1)->unicode() == '=') {
445
token = Parser::Token_XOR_ASSIGN;
447
token = Parser::Token_BIT_XOR;
449
} else if (it->unicode() == '*') {
450
if ((it + 1)->unicode() == '=') {
452
token = Parser::Token_MUL_ASSIGN;
454
token = Parser::Token_MUL;
456
} else if (it->unicode() == '|') {
457
if ((it + 1)->unicode() == '|') {
459
token = Parser::Token_BOOLEAN_OR;
460
} else if ((it + 1)->unicode() == '=') {
462
token = Parser::Token_OR_ASSIGN;
464
token = Parser::Token_BIT_OR;
466
} else if (it->unicode() == '&') {
467
if ((it + 1)->unicode() == '&') {
469
token = Parser::Token_BOOLEAN_AND;
470
} else if ((it + 1)->unicode() == '=') {
472
token = Parser::Token_AND_ASSIGN;
474
token = Parser::Token_BIT_AND;
476
} else if (it->unicode() == '+') {
477
if ((it + 1)->unicode() == '+') {
479
token = Parser::Token_INC;
480
} else if ((it + 1)->unicode() == '=') {
482
token = Parser::Token_PLUS_ASSIGN;
484
token = Parser::Token_PLUS;
486
} else if (it->unicode() == '-') {
487
if ((it + 1)->unicode() == '-') {
489
token = Parser::Token_DEC;
490
} else if ((it + 1)->unicode() == '=') {
492
token = Parser::Token_MINUS_ASSIGN;
494
token = Parser::Token_MINUS;
496
} else if (it->unicode() == '.') {
497
if ((it + 1)->unicode() == '=') {
499
token = Parser::Token_CONCAT_ASSIGN;
501
token = Parser::Token_CONCAT;
504
} else if (it->unicode() == ';') {
505
token = Parser::Token_SEMICOLON;
506
} else if (it->unicode() == '\'') {
507
token = Parser::Token_CONSTANT_ENCAPSED_STRING;
510
int startPos = m_curpos;
511
while (m_curpos < m_contentSize
512
&& (it->unicode() != '\'' || isEscapedWithBackslash(it, m_curpos, startPos))) {
513
if (it->unicode() == '\n') createNewline(m_curpos);
517
// if the string is never terminated, make sure we don't overflow the boundaries
518
if ( m_curpos == m_contentSize ) {
521
} else if (it->unicode() == '"') {
525
bool foundVar = false;
526
while (m_curpos + stringSize < m_contentSize
527
&& (it->unicode() != '"' || isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos)))
529
if (it->unicode() == '$' && !isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos)
530
&& ((it + 1)->unicode() == '{'
531
|| (isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) {
539
// if the string is never terminated, make sure we don't overflow the boundaries
540
if ( m_curpos + stringSize == m_contentSize ) {
543
token = Parser::Token_CONSTANT_ENCAPSED_STRING;
545
for (int j = 0; j < stringSize; j++) {
546
if (it->unicode() == '\n') {
547
createNewline(m_curpos + j);
551
m_curpos += stringSize;
553
// properly set the token pos to the starting double quote
555
token = Parser::Token_DOUBLE_QUOTE;
558
} else if (it->unicode() == '`') {
559
token = Parser::Token_BACKTICK;
560
pushState(StringBacktick);
561
} else if (it->unicode() == '=') {
562
if ((it + 1)->unicode() == '=') {
564
if ((it + 2)->unicode() == '=') {
566
token = Parser::Token_IS_IDENTICAL;
568
token = Parser::Token_IS_EQUAL;
570
} else if ((it + 1)->unicode() == '>') {
572
token = Parser::Token_DOUBLE_ARROW;
574
token = Parser::Token_ASSIGN;
576
} else if (isValidVariableIdentifier(it) && !it->isDigit()) {
578
while (m_curpos < m_contentSize && (isValidVariableIdentifier(it))) {
584
name = name.toLower();
585
if (name == "echo") {
586
token = Parser::Token_ECHO;
587
} else if (name == "include") {
588
token = Parser::Token_INCLUDE;
589
} else if (name == "include_once") {
590
token = Parser::Token_INCLUDE_ONCE;
591
} else if (name == "require") {
592
token = Parser::Token_REQUIRE;
593
} else if (name == "require_once") {
594
token = Parser::Token_REQUIRE_ONCE;
595
} else if (name == "eval") {
596
token = Parser::Token_EVAL;
597
} else if (name == "print") {
598
token = Parser::Token_PRINT;
599
} else if (name == "abstract") {
600
token = Parser::Token_ABSTRACT;
601
} else if (name == "break") {
602
token = Parser::Token_BREAK;
603
} else if (name == "case") {
604
token = Parser::Token_CASE;
605
} else if (name == "catch") {
606
token = Parser::Token_CATCH;
607
} else if (name == "class") {
608
token = Parser::Token_CLASS;
609
} else if (name == "const") {
610
token = Parser::Token_CONST;
611
} else if (name == "continue") {
612
token = Parser::Token_CONTINUE;
613
} else if (name == "default") {
614
token = Parser::Token_DEFAULT;
615
} else if (name == "do") {
616
token = Parser::Token_DO;
617
} else if (name == "else") {
618
token = Parser::Token_ELSE;
619
} else if (name == "extends") {
620
token = Parser::Token_EXTENDS;
621
} else if (name == "final") {
622
token = Parser::Token_FINAL;
623
} else if (name == "for") {
624
token = Parser::Token_FOR;
625
} else if (name == "if") {
626
token = Parser::Token_IF;
627
} else if (name == "implements") {
628
token = Parser::Token_IMPLEMENTS;
629
} else if (name == "instanceof") {
630
token = Parser::Token_INSTANCEOF;
631
} else if (name == "interface") {
632
token = Parser::Token_INTERFACE;
633
} else if (name == "new") {
634
token = Parser::Token_NEW;
635
} else if (name == "private") {
636
token = Parser::Token_PRIVATE;
637
} else if (name == "protected") {
638
token = Parser::Token_PROTECTED;
639
} else if (name == "public") {
640
token = Parser::Token_PUBLIC;
641
} else if (name == "return") {
642
token = Parser::Token_RETURN;
643
} else if (name == "static") {
644
token = Parser::Token_STATIC;
645
} else if (name == "switch") {
646
token = Parser::Token_SWITCH;
647
} else if (name == "throw") {
648
token = Parser::Token_THROW;
649
} else if (name == "try") {
650
token = Parser::Token_TRY;
651
} else if (name == "while") {
652
token = Parser::Token_WHILE;
653
} else if (name == "clone") {
654
token = Parser::Token_CLONE;
655
} else if (name == "exit" || name == "die") {
656
token = Parser::Token_EXIT;
657
} else if (name == "elseif") {
658
token = Parser::Token_ELSEIF;
659
} else if (name == "endif") {
660
token = Parser::Token_ENDIF;
661
} else if (name == "endwhile") {
662
token = Parser::Token_ENDWHILE;
663
} else if (name == "endfor") {
664
token = Parser::Token_ENDFOR;
665
} else if (name == "foreach") {
666
token = Parser::Token_FOREACH;
667
} else if (name == "endforeach") {
668
token = Parser::Token_ENDFOREACH;
669
} else if (name == "declare") {
670
token = Parser::Token_DECLARE;
671
} else if (name == "enddeclare") {
672
token = Parser::Token_ENDDECLARE;
673
} else if (name == "as") {
674
token = Parser::Token_AS;
675
} else if (name == "endswitch") {
676
token = Parser::Token_ENDSWITCH;
677
} else if (name == "function") {
678
token = Parser::Token_FUNCTION;
679
} else if (name == "use") {
680
token = Parser::Token_USE;
681
} else if (name == "global") {
682
token = Parser::Token_GLOBAL;
683
} else if (name == "var") {
684
token = Parser::Token_VAR;
685
} else if (name == "unset") {
686
token = Parser::Token_UNSET;
687
} else if (name == "isset") {
688
token = Parser::Token_ISSET;
689
} else if (name == "empty") {
690
token = Parser::Token_EMPTY;
691
} else if (name == "__halt_compiler") {
692
token = Parser::Token_HALT_COMPILER;
693
} else if (name == "list") {
694
token = Parser::Token_LIST;
695
} else if (name == "array") {
696
token = Parser::Token_ARRAY;
697
} else if (name == "__class__") {
698
token = Parser::Token_CLASS_C;
699
} else if (name == "__method__") {
700
token = Parser::Token_METHOD_C;
701
} else if (name == "__function__") {
702
token = Parser::Token_FUNC_C;
703
} else if (name == "__line__") {
704
token = Parser::Token_LINE;
705
} else if (name == "__file__") {
706
token = Parser::Token_FILE;
707
} else if (name == "or") {
708
token = Parser::Token_LOGICAL_OR;
709
} else if (name == "and") {
710
token = Parser::Token_LOGICAL_AND;
711
} else if (name == "xor") {
712
token = Parser::Token_LOGICAL_XOR;
714
token = Parser::Token_STRING;
724
if ((state() == String || state(1) == String) && it->unicode() == '"') {
725
token = Parser::Token_DOUBLE_QUOTE;
726
if (state() == StringVariable) popState();
728
} else if ((state() == StringBacktick || state(1) == StringBacktick) && it->unicode() == '`') {
729
token = Parser::Token_BACKTICK;
730
if (state() == StringVariable) popState();
732
} else if ((state() == StringHeredoc || state(1) == StringHeredoc) && isHeredocEnd(it)) {
733
token = Parser::Token_END_HEREDOC;
734
m_curpos += m_heredocIdentifier.length() - 1;
735
if (state() == StringVariable) popState();
737
} else if (processVariable(it)) {
738
token = Parser::Token_VARIABLE;
739
if (state() != StringVariable) pushState(StringVariable);
740
} else if (state() != StringVariable && it->unicode() == '$' && (it + 1)->unicode() == '{') {
741
token = Parser::Token_DOLLAR_OPEN_CURLY_BRACES;
744
//check if a valid variable follows
745
if ((isValidVariableIdentifier(it) && !it->isDigit())) {
746
pushState(StringVarname);
749
} else if (state() == StringVariable && it->unicode() == '[') {
750
token = Parser::Token_LBRACKET;
751
pushState(StringVariableBracket);
752
} else if (state() != StringVariable && it->unicode() == '{' && (it + 1)->unicode() == '$'
753
&& ((isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) || (it + 2)->unicode() == '{')) {
754
token = Parser::Token_CURLY_OPEN;
755
pushState(StringVariableCurly);
756
} else if (state() == StringVariable
757
&& it->unicode() == '-' && (it + 1)->unicode() == '>'
758
&& isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) {
759
token = Parser::Token_OBJECT_OPERATOR;
761
pushState(StringVariableObjectOperator);
763
if (state() == StringVariable) popState();
764
token = Parser::Token_ENCAPSED_AND_WHITESPACE;
765
int startPos = m_curpos;
766
while (m_curpos < m_contentSize) {
767
if (!isEscapedWithBackslash(it, m_curpos, startPos) &&
768
((it->unicode() == '$' && (it + 1)->unicode() == '{') ||
769
(it->unicode() == '{' && (it + 1)->unicode() == '$' && isValidVariableIdentifier(it + 2)) ||
770
(it->unicode() == '$' && isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) {
771
//variable is next ${var} or {$var}
774
if (state() == String && it->unicode() == '"'
775
&& !isEscapedWithBackslash(it, m_curpos, startPos)) {
779
if (state() == StringBacktick && it->unicode() == '`'
780
&& !isEscapedWithBackslash(it, m_curpos, startPos)) {
785
if (it->unicode() == '\n') createNewline(m_curpos);
789
if (state() == StringHeredoc && (it - 1)->unicode() == '\n') {
790
//check for end of heredoc (\nEOD;\n)
791
if (state() == StringHeredoc && isHeredocEnd(it)) {
799
case StringVariableBracket:
800
if (it->unicode() == ']') {
801
token = Parser::Token_RBRACKET;
804
} else if (it->isDigit()) {
805
token = Parser::Token_NUM_STRING;
806
while (m_curpos < m_contentSize && it->isDigit()) {
812
token = Parser::Token_STRING;
813
while (m_curpos < m_contentSize && (it->unicode() != ']')) {
814
if (it->unicode() == '\n') createNewline(m_curpos);
821
case StringVariableObjectOperator:
822
token = Parser::Token_STRING;
823
while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) {
829
if (state() == StringVariable) popState();
833
pushState(StringVariableCurly);
834
token = Parser::Token_STRING_VARNAME;
835
while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) {
842
token = Parser::Token_INVALID;
845
if (m_curpos > m_contentSize) {
850
m_tokenEnd = m_curpos;
853
if (m_haltCompiler) {
854
//look for __halt_compiler(); and stop lexer there
855
if (m_haltCompiler == 4) {
857
} else if (token == Parser::Token_WHITESPACE || token == Parser::Token_COMMENT || token == Parser::Token_DOC_COMMENT) {
859
} else if (m_haltCompiler == 1 && token == Parser::Token_LPAREN) {
861
} else if (m_haltCompiler == 2 && token == Parser::Token_RPAREN) {
863
} else if (m_haltCompiler == 3 && token == Parser::Token_SEMICOLON) {
869
if (token == Parser::Token_HALT_COMPILER && !m_haltCompiler) {
875
qint64 Lexer::tokenBegin() const
880
qint64 Lexer::tokenEnd() const
885
bool Lexer::isHeredocEnd(QChar* it)
887
int identiferLen = m_heredocIdentifier.length();
889
for (int i = 0; i < identiferLen; i++) {
890
if (m_curpos + i >= m_contentSize) break;
891
lineStart.append(*(it + i));
893
if (lineStart == m_heredocIdentifier &&
894
((it + identiferLen)->unicode() == '\n'
895
|| ((it + identiferLen)->unicode() == ';' &&
896
(it + identiferLen + 1)->unicode() == '\n'))) {
902
//used for strings, to check if " is escaped (\" is, \\" not)
903
bool Lexer::isEscapedWithBackslash(QChar* it, int curPos, int startPos)
907
while (curPos > startPos && it->unicode() == '\\') {
911
return (cnt % 2) == 1;
914
bool Lexer::processVariable(QChar* it)
917
if (it->unicode() == '$' && (isValidVariableIdentifier(c2) && !c2->isDigit())) {
920
while (m_curpos < m_contentSize
921
&& (isValidVariableIdentifier(it))) {
931
bool Lexer::isValidVariableIdentifier(QChar* it)
933
return it->isLetter() || it->isDigit() || it->unicode() == '_' || it->unicode() > 0x7f;
936
void Lexer::createNewline(int pos)
938
if (m_tokenStream) m_tokenStream->locationTable()->newline(pos);