~ubuntu-branches/ubuntu/utopic/kdevelop-php/utopic

« back to all changes in this revision

Viewing changes to parser/phplexer.cpp

Committer: Bazaar Package Importer
Author(s): Fathi Boudra
Date: 2010-01-17 17:10:22 UTC
Revision ID: james.westby@ubuntu.com-20100117171022-q2xlgd9ekewo2ijx

Tags: upstream-1.0.0~beta2

Import upstream version 1.0.0~beta2

files added:

.kateconfig

CMakeLists.txt

HACKING

Messages.sh

TODO

app_templates

app_templates/CMakeLists.txt

app_templates/simple_phpapp

app_templates/simple_phpapp/%{APPNAME}.php

app_templates/simple_phpapp/%{PROJECTDIRNAME}.kdev4

app_templates/simple_phpapp/simple_phpapp.kdevtemplate

cmake

cmake/FindKDevPlatform.cmake

completion

completion/CMakeLists.txt

completion/context.cpp

completion/context.h

completion/helpers.cpp

completion/helpers.h

completion/implementationitem.cpp

completion/implementationitem.h

completion/includefileitem.cpp

completion/includefileitem.h

completion/item.cpp

completion/item.h

completion/keyworditem.cpp

completion/keyworditem.h

completion/model.cpp

completion/model.h

completion/phpcompletionexport.h

completion/tests

completion/tests/CMakeLists.txt

completion/tests/test_completion.cpp

completion/tests/test_completion.h

completion/worker.cpp

completion/worker.h

create_functions.php

duchain

duchain/CMakeLists.txt

duchain/classdeclaration.cpp

duchain/classdeclaration.h

duchain/classmethoddeclaration.cpp

duchain/classmethoddeclaration.h

duchain/contextbuilder.cpp

duchain/contextbuilder.h

duchain/declarationbuilder.cpp

duchain/declarationbuilder.h

duchain/dumptypes.cpp

duchain/dumptypes.h

duchain/editorintegrator.cpp

duchain/editorintegrator.h

duchain/expressionevaluationresult.cpp

duchain/expressionevaluationresult.h

duchain/expressionparser.cpp

duchain/expressionparser.h

duchain/expressionvisitor.cpp

duchain/expressionvisitor.h

duchain/functiondeclaration.cpp

duchain/functiondeclaration.h

duchain/helper.cpp

duchain/helper.h

duchain/includebuilder.cpp

duchain/includebuilder.h

duchain/integraltypeextended.cpp

duchain/integraltypeextended.h

duchain/navigation

duchain/navigation/declarationnavigationcontext.cpp

duchain/navigation/declarationnavigationcontext.h

duchain/navigation/includenavigationcontext.cpp

duchain/navigation/includenavigationcontext.h

duchain/navigation/magicconstantnavigationcontext.cpp

duchain/navigation/magicconstantnavigationcontext.h

duchain/navigation/navigationwidget.cpp

duchain/navigation/navigationwidget.h

duchain/phpduchainexport.h

duchain/phpducontext.cpp

duchain/phpducontext.h

duchain/predeclarationbuilder.cpp

duchain/predeclarationbuilder.h

duchain/structuretype.cpp

duchain/structuretype.h

duchain/tests

duchain/tests/CMakeLists.txt

duchain/tests/benchmarks.cpp

duchain/tests/benchmarks.h

duchain/tests/duchaintestbase.cpp

duchain/tests/duchaintestbase.h

duchain/tests/test_duchain.cpp

duchain/tests/test_duchain.h

duchain/tests/test_expressionparser.cpp

duchain/tests/test_expressionparser.h

duchain/tests/test_uses.cpp

duchain/tests/test_uses.h

duchain/typebuilder.cpp

duchain/typebuilder.h

duchain/usebuilder.cpp

duchain/usebuilder.h

duchain/variabledeclaration.cpp

duchain/variabledeclaration.h

kdevphpsupport.desktop

parser

parser/CMakeLists.txt

parser/generated

parser/generated/kdevelop-pg-qt

parser/generated/kdevelop-pg-qt/CMakeLists.txt

parser/generated/kdevelop-pg-qt/kdev-pg-allocator.h

parser/generated/kdevelop-pg-qt/kdev-pg-list.h

parser/generated/kdevelop-pg-qt/kdev-pg-location-table.h

parser/generated/kdevelop-pg-qt/kdev-pg-memory-pool.h

parser/generated/kdevelop-pg-qt/kdev-pg-token-stream.h

parser/generated/phpast.h

parser/generated/phpdebugvisitor.h

parser/generated/phpdefaultvisitor.cpp

parser/generated/phpdefaultvisitor.h

parser/generated/phpparser.cpp

parser/generated/phpparser.h

parser/generated/phptokentext.h

parser/generated/phpvisitor.cpp

parser/generated/phpvisitor.h

parser/main.cpp

parser/parserexport.h

parser/parsesession.cpp

parser/parsesession.h

parser/php.g

parser/phplexer.cpp

parser/phplexer.h

parser/test

parser/test/CMakeLists.txt

parser/test/lexertest.cpp

parser/test/lexertest.h

parser/test/test-tokenize.php

parser/test/tests.php

parser/test/tokenize.php

parser/tokenstream.h

php.kdev4

phpfunctions.php

phplanguagesupport.cpp

phplanguagesupport.h

phpparsejob.cpp

phpparsejob.h

Show diffs side-by-side

added added

removed removed

parser/phplexer.cpp

/***************************************************************************

* This file is part of KDevelop *

* *

* This program is free software; you can redistribute it and/or modify *

* it under the terms of the GNU Library General Public License as *

* published by the Free Software Foundation; either version 2 of the *

* License, or (at your option) any later version. *

* *

* This program is distributed in the hope that it will be useful, *

* but WITHOUT ANY WARRANTY; without even the implied warranty of *

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *

* GNU General Public License for more details. *

* *

* You should have received a copy of the GNU Library General Public *

* License along with this program; if not, write to the *

* Free Software Foundation, Inc., *

* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *

***************************************************************************/

#include "phplexer.h"

#include "phpparser.h"

#include "tokenstream.h"

#include <QtCore/QString>

#include <QtCore/QStringList>

#include <QtCore/QRegExp>

#include <QtCore/QDebug>

#include <iostream>

namespace Php

{

Lexer::Lexer(TokenStream* tokenStream, const QString& content, int initialState):

m_content(content), m_tokenStream(tokenStream),

m_curpos(0), m_contentSize(m_content.size()),

m_tokenBegin(0), m_tokenEnd(0), m_haltCompiler(0)

{

pushState(ErrorState);

if (initialState == DefaultState) {

pushState(HtmlState);

}

pushState(initialState);

}

int Lexer::state(int deepness) const

{

return m_state.at(m_state.size() - deepness - 1);

}

void Lexer::printState()

{

int s = state();

if (s == ErrorState)

qDebug() << "ErrorState";

else if (s == HtmlState)

qDebug() << "HtmlState";

else if (s == DefaultState)

qDebug() << "DefaultState";

else if (s == String)

qDebug() << "String";

else if (s == StringVariable)

qDebug() << "StringVariable";

else if (s == StringVariableBracket)

qDebug() << "StringVariableBracket";

else if (s == StringVariableObjectOperator)

qDebug() << "StringVariableObjectOperator";

else if (s == StringVariableCurly)

qDebug() << "StringVariableCurly";

else if (s == StringVarname)

qDebug() << "StringVarname";

else if (s == StringHeredoc)

qDebug() << "StringHeredoc";

else if (s == StringBacktick)

qDebug() << "StringBacktick";

}

void Lexer::pushState(int state)

{

m_state.push(state);

}

void Lexer::popState()

{

m_state.pop();

}

int Lexer::nextTokenKind()

{

int token = Parser::Token_INVALID;

if (m_curpos >= m_contentSize) {

m_tokenBegin = -1;

m_tokenEnd = -1;

createNewline(m_curpos);

return 0;

}

QChar* it = m_content.data();

it += m_curpos;

100

m_tokenBegin = m_curpos;

101

switch (state()) {

102

case HtmlState:

103

if (it->unicode() == '<' && (it + 1)->unicode() == '?'

104

///TODO: per-project configuration to set whether we use shortags

105

/// or not. In the former case we'd need to rise an error here

106

&& !( (it + 2)->toLower().unicode() == 'x'

107

&& (it + 3)->toLower().unicode() == 'm'

108

&& (it + 4)->toLower().unicode() == 'l' ) )

109

{

110

token = Parser::Token_OPEN_TAG;

111

if ((it + 2)->unicode() == '=') {

112

token = Parser::Token_OPEN_TAG_WITH_ECHO;

113

m_curpos++;

114

it++;

115

} else if ((it + 2)->toLower().unicode() == 'p'

116

&& (it + 3)->toLower().unicode() == 'h'

117

&& (it + 4)->toLower().unicode() == 'p'

118

&& (it + 5)->isSpace()) {

119

m_curpos += 4;

120

if ((it + 5)->unicode() == '\n') createNewline(m_curpos + 1);

121

}

122

m_curpos++;

123

pushState(DefaultState);

124

} else {

125

token = Parser::Token_INLINE_HTML;

126

while (m_curpos < m_contentSize) {

127

if (it->unicode() == '\n') createNewline(m_curpos);

128

if ((it + 1)->unicode() == '<' && (it + 2)->unicode() == '?') {

129

break;

130

}

131

it++;

132

m_curpos++;

133

}

134

}

135

break;

136

case DefaultState:

137

case StringVariableCurly: {

138

if (it->isSpace()) {

139

token = Parser::Token_WHITESPACE;

140

while (m_curpos < m_contentSize && it->isSpace()) {

141

if (it->unicode() == '\n') createNewline(m_curpos);

142

it++;

143

m_curpos++;

144

}

145

m_curpos--;

146

} else if (it->isDigit() || (it->unicode() == '.' && (it + 1)->isDigit())) {

147

QString num;bool hasPoint = false;

148

bool hex = false;

149

if (it->unicode() == '0' && (it + 1)->unicode() == 'x') {

150

it += 2;

151

m_curpos += 2;

152

hex = true;

153

}

154

while (m_curpos < m_contentSize && (

155

it->isDigit()

156

|| (!hex && !hasPoint && it->unicode() == '.')

157

|| (hex && (it->toLower() == 'a' || it->toLower() == 'b' ||

158

it->toLower() == 'c' || it->toLower() == 'd' ||

159

it->toLower() == 'e' || it->toLower() == 'f')))) {

160

if (it->unicode() == '.') hasPoint = true;

161

num.append(*it);

162

it++;

163

m_curpos++;

164

}

165

if (!hex && it->toLower() == 'e' &&

166

((it + 1)->isDigit() ||

167

(((it + 1)->unicode() == '-' || (it + 1)->unicode() == '+') && (it + 2)->isDigit()))) {

168

//exponential number

169

token = Parser::Token_DNUMBER;

170

m_curpos++;

171

it++;

172

if (it->unicode() == '-' || it->unicode() == '+') {

173

it++;

174

m_curpos++;

175

}

176

while (m_curpos < m_contentSize && (it->isDigit())) {

177

it++;

178

m_curpos++;

179

}

180

m_curpos--;

181

} else {

182

m_curpos--;

183

if (hasPoint) {

184

token = Parser::Token_DNUMBER;

185

} else {

186

bool ok;

187

//check if string can be converted to long

188

//if we get an overflow use double

189

num.toLong(&ok, hex ? 16 : 10);

190

if (ok) {

191

token = Parser::Token_LNUMBER;

192

} else {

193

token = Parser::Token_DNUMBER;

194

}

195

}

196

}

197

198

} else if (processVariable(it)) {

199

token = Parser::Token_VARIABLE;

200

} else if (it->unicode() == '$') {

201

//when it was not recognized as variable

202

token = Parser::Token_DOLLAR;

203

} else if (it->unicode() == '}') {

204

token = Parser::Token_RBRACE;

205

if (state() == StringVariableCurly) {

206

popState();

207

}

208

} else if (it->unicode() == '{') {

209

token = Parser::Token_LBRACE;

210

if (state() == StringVariableCurly) {

211

pushState(StringVariableCurly);

212

}

213

} else if (it->unicode() == ')') {

214

token = Parser::Token_RPAREN;

215

} else if (it->unicode() == '(') {

216

it++;

217

int pos = m_curpos + 1;

218

while (pos < m_contentSize && it->isSpace()) {

219

it++;

220

pos++;

221

}

222

QString name;

223

while (pos < m_contentSize && it->isLetter()) {

224

name.append(*it);

225

it++;

226

pos++;

227

}

228

while (pos < m_contentSize && it->isSpace()) {

229

it++;

230

pos++;

231

}

232

name = name.toLower();

233

if (it->unicode() == ')') {

234

if (name == "int" || name == "integer") {

235

token = Parser::Token_INT_CAST;

236

} else if (name == "real" || name == "double" || name == "float") {

237

token = Parser::Token_DOUBLE_CAST;

238

} else if (name == "string") {

239

token = Parser::Token_STRING_CAST;

240

} else if (name == "binary") {

241

//as in php

242

token = Parser::Token_STRING_CAST;

243

} else if (name == "array") {

244

token = Parser::Token_ARRAY_CAST;

245

} else if (name == "object") {

246

token = Parser::Token_OBJECT_CAST;

247

} else if (name == "bool" || name == "boolean") {

248

token = Parser::Token_BOOL_CAST;

249

} else if (name == "unset") {

250

token = Parser::Token_UNSET_CAST;

251

} else {

252

token = Parser::Token_LPAREN;

253

}

254

255

if (token != Parser::Token_LPAREN) {

256

m_curpos = pos;

257

}

258

} else {

259

token = Parser::Token_LPAREN;

260

}

261

} else if (it->unicode() == ']') {

262

token = Parser::Token_RBRACKET;

263

} else if (it->unicode() == '[') {

264

token = Parser::Token_LBRACKET;

265

} else if (it->unicode() == ',') {

266

token = Parser::Token_COMMA;

267

} else if (it->unicode() == '@') {

268

token = Parser::Token_AT;

269

} else if (it->unicode() == '!') {

270

if ((it + 1)->unicode() == '=') {

271

m_curpos++;

272

if ((it + 2)->unicode() == '=') {

273

m_curpos++;

274

token = Parser::Token_IS_NOT_IDENTICAL;

275

} else {

276

token = Parser::Token_IS_NOT_EQUAL;

277

}

278

} else {

279

token = Parser::Token_BANG;

280

}

281

} else if (it->unicode() == '<') {

282

if ((it + 1)->unicode() == '<') {

283

m_curpos++;

284

if ((it + 2)->unicode() == '<' && state() != StringVariableCurly) {

285

//HEREDOC string (<<< EOD\nfoo\nEOD;\n)

286

int pos = 3;

287

while (m_curpos + pos < m_contentSize &&

288

((it + pos)->unicode() == ' ' || (it + pos)->unicode() == '\t')) {

289

pos++;

290

}

291

if ((it + pos)->isLetter() || (it + pos)->unicode() == '_') { //identifier must start with a letter

292

m_heredocIdentifier.clear();

293

while (m_curpos + pos < m_contentSize &&

294

((it + pos)->isDigit() || (it + pos)->isLetter() || (it + pos)->unicode() == '_')) {

295

m_heredocIdentifier.append(*(it + pos));

296

pos++;

297

}

298

if ((it + pos)->unicode() == '\n') {

299

//identifier must be followed by newline, newline is part of HEREDOC token

300

token = Parser::Token_START_HEREDOC;

301

pushState(StringHeredoc);

302

m_curpos += pos - 1;

303

createNewline(m_curpos);

304

}

305

}

306

}

307

308

if (token != Parser::Token_START_HEREDOC) {

309

if ((it + 2)->unicode() == '=') {

310

m_curpos++;

311

token = Parser::Token_SL_ASSIGN;

312

} else {

313

token = Parser::Token_SL;

314

}

315

}

316

} else if ((it + 1)->unicode() == '=') {

317

m_curpos++;

318

token = Parser::Token_IS_SMALLER_OR_EQUAL;

319

} else if ((it + 1)->unicode() == '>') {

320

m_curpos++;

321

token = Parser::Token_IS_NOT_EQUAL;

322

} else {

323

token = Parser::Token_IS_SMALLER;

324

}

325

} else if (it->unicode() == '>') {

326

if ((it + 1)->unicode() == '>') {

327

m_curpos++;

328

if ((it + 2)->unicode() == '=') {

329

m_curpos++;

330

token = Parser::Token_SR_ASSIGN;

331

} else {

332

token = Parser::Token_SR;

333

}

334

} else if ((it + 1)->unicode() == '=') {

335

m_curpos++;

336

token = Parser::Token_IS_GREATER_OR_EQUAL;

337

} else {

338

token = Parser::Token_IS_GREATER;

339

}

340

} else if (it->unicode() == '~') {

341

token = Parser::Token_TILDE;

342

} else if (it->unicode() == ':') {

343

if ((it + 1)->unicode() == ':') {

344

m_curpos++;

345

token = Parser::Token_PAAMAYIM_NEKUDOTAYIM;

346

} else {

347

token = Parser::Token_COLON;

348

}

349

} else if (it->unicode() == '?') {

350

if ((it + 1)->unicode() == '>') {

351

//accept CLOSE_TAG inside StringVariableCurly too, as php does

352

token = Parser::Token_CLOSE_TAG;

353

m_curpos++;

354

while (state() != HtmlState) popState();

355

} else {

356

token = Parser::Token_QUESTION;

357

}

358

} else if (it->unicode() == '-' && (it + 1)->unicode() == '>') {

359

m_curpos++;

360

token = Parser::Token_OBJECT_OPERATOR;

361

if (isValidVariableIdentifier(it + 2)) {

362

pushState(StringVariableObjectOperator);

363

}

364

} else if (it->unicode() == '%') {

365

if ((it + 1)->unicode() == '=') {

366

m_curpos++;

367

token = Parser::Token_MOD_ASSIGN;

368

} else {

369

token = Parser::Token_MOD;

370

}

371

} else if (it->unicode() == '/') {

372

if ((it + 1)->unicode() == '=') {

373

m_curpos++;

374

token = Parser::Token_DIV_ASSIGN;

375

} else if ((it + 1)->unicode() == '/') {

376

//accept COMMENT inside StringVariableCurly too, as php does

377

if ((it + 2)->unicode() == '/') {

378

token = Parser::Token_DOC_COMMENT;

379

} else {

380

token = Parser::Token_COMMENT;

381

}

382

while (m_curpos < m_contentSize &&

383

!((it)->unicode() == '?' && (it + 1)->unicode() == '>')) {

384

if ( it->unicode() == '\n' ) {

385

createNewline(m_curpos);

386

if ( token == Parser::Token_COMMENT ) {

387

break;

388

} else {

389

// lookahead to check whether this doc comment spans multiple lines

390

QChar* it2 = it + 1;

391

int pos = m_curpos + 1;

392

while ( pos < m_contentSize && (it2)->isSpace() && (it2)->unicode() != '\n' ) {

393

++it2;

394

++pos;

395

}

396

if ( it2->unicode() == '/' && (it2 + 1)->unicode() == '/'

397

&& (it2 + 2)->unicode() == '/' ) {

398

// seems to be a multi-line doc-comment

399

it = it2 + 2;

400

m_curpos = pos + 2;

401

continue;

402

} else {

403

// not a multi-line doc-comment

404

break;

405

}

406

}

407

}

408

it++;

409

m_curpos++;

410

}

411

} else if ((it + 1)->unicode() == '*') {

412

//accept COMMENT inside StringVariableCurly too, as php does

413

if ((it + 2)->unicode() == '*' && (it + 3)->isSpace()) {

414

token = Parser::Token_DOC_COMMENT;

415

} else {

416

token = Parser::Token_COMMENT;

417

}

418

it += 2;

419

m_curpos += 2;

420

while (m_curpos < m_contentSize && !(it->unicode() == '*' && (it + 1)->unicode() == '/')) {

421

if (it->unicode() == '\n') {

422

createNewline(m_curpos);

423

}

424

it++;

425

m_curpos++;

426

}

427

m_curpos++;

428

} else {

429

token = Parser::Token_DIV;

430

}

431

} else if (it->unicode() == '#') {

432

//accept COMMENT inside StringVariableCurly too, as php does

433

token = Parser::Token_COMMENT;

434

while (m_curpos < m_contentSize) {

435

if (it->unicode() == '\n') {

436

createNewline(m_curpos);

437

break;

438

}

439

it++;

440

m_curpos++;

441

}

442

} else if (it->unicode() == '^') {

443

if ((it + 1)->unicode() == '=') {

444

m_curpos++;

445

token = Parser::Token_XOR_ASSIGN;

446

} else {

447

token = Parser::Token_BIT_XOR;

448

}

449

} else if (it->unicode() == '*') {

450

if ((it + 1)->unicode() == '=') {

451

m_curpos++;

452

token = Parser::Token_MUL_ASSIGN;

453

} else {

454

token = Parser::Token_MUL;

455

}

456

} else if (it->unicode() == '|') {

457

if ((it + 1)->unicode() == '|') {

458

m_curpos++;

459

token = Parser::Token_BOOLEAN_OR;

460

} else if ((it + 1)->unicode() == '=') {

461

m_curpos++;

462

token = Parser::Token_OR_ASSIGN;

463

} else {

464

token = Parser::Token_BIT_OR;

465

}

466

} else if (it->unicode() == '&') {

467

if ((it + 1)->unicode() == '&') {

468

m_curpos++;

469

token = Parser::Token_BOOLEAN_AND;

470

} else if ((it + 1)->unicode() == '=') {

471

m_curpos++;

472

token = Parser::Token_AND_ASSIGN;

473

} else {

474

token = Parser::Token_BIT_AND;

475

}

476

} else if (it->unicode() == '+') {

477

if ((it + 1)->unicode() == '+') {

478

m_curpos++;

479

token = Parser::Token_INC;

480

} else if ((it + 1)->unicode() == '=') {

481

m_curpos++;

482

token = Parser::Token_PLUS_ASSIGN;

483

} else {

484

token = Parser::Token_PLUS;

485

}

486

} else if (it->unicode() == '-') {

487

if ((it + 1)->unicode() == '-') {

488

m_curpos++;

489

token = Parser::Token_DEC;

490

} else if ((it + 1)->unicode() == '=') {

491

m_curpos++;

492

token = Parser::Token_MINUS_ASSIGN;

493

} else {

494

token = Parser::Token_MINUS;

495

}

496

} else if (it->unicode() == '.') {

497

if ((it + 1)->unicode() == '=') {

498

m_curpos++;

499

token = Parser::Token_CONCAT_ASSIGN;

500

} else {

501

token = Parser::Token_CONCAT;

502

}

503

504

} else if (it->unicode() == ';') {

505

token = Parser::Token_SEMICOLON;

506

} else if (it->unicode() == '\'') {

507

token = Parser::Token_CONSTANT_ENCAPSED_STRING;

508

it++;

509

m_curpos++;

510

int startPos = m_curpos;

511

while (m_curpos < m_contentSize

512

&& (it->unicode() != '\'' || isEscapedWithBackslash(it, m_curpos, startPos))) {

513

if (it->unicode() == '\n') createNewline(m_curpos);

514

it++;

515

m_curpos++;

516

}

517

// if the string is never terminated, make sure we don't overflow the boundaries

518

if ( m_curpos == m_contentSize ) {

519

--m_curpos;

520

}

521

} else if (it->unicode() == '"') {

522

it++;

523

m_curpos++;

524

int stringSize = 0;

525

bool foundVar = false;

526

while (m_curpos + stringSize < m_contentSize

527

&& (it->unicode() != '"' || isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos)))

528

{

529

if (it->unicode() == '$' && !isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos)

530

&& ((it + 1)->unicode() == '{'

531

|| (isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) {

532

foundVar = true;

533

break;

534

}

535

it++;

536

stringSize++;

537

}

538

if (!foundVar) {

539

// if the string is never terminated, make sure we don't overflow the boundaries

540

if ( m_curpos + stringSize == m_contentSize ) {

541

m_curpos--;

542

}

543

token = Parser::Token_CONSTANT_ENCAPSED_STRING;

544

it -= stringSize;

545

for (int j = 0; j < stringSize; j++) {

546

if (it->unicode() == '\n') {

547

createNewline(m_curpos + j);

548

}

549

it++;

550

}

551

m_curpos += stringSize;

552

} else {

553

// properly set the token pos to the starting double quote

554

m_curpos--;

555

token = Parser::Token_DOUBLE_QUOTE;

556

pushState(String);

557

}

558

} else if (it->unicode() == '`') {

559

token = Parser::Token_BACKTICK;

560

pushState(StringBacktick);

561

} else if (it->unicode() == '=') {

562

if ((it + 1)->unicode() == '=') {

563

m_curpos++;

564

if ((it + 2)->unicode() == '=') {

565

m_curpos++;

566

token = Parser::Token_IS_IDENTICAL;

567

} else {

568

token = Parser::Token_IS_EQUAL;

569

}

570

} else if ((it + 1)->unicode() == '>') {

571

m_curpos++;

572

token = Parser::Token_DOUBLE_ARROW;

573

} else {

574

token = Parser::Token_ASSIGN;

575

}

576

} else if (isValidVariableIdentifier(it) && !it->isDigit()) {

577

QString name;

578

while (m_curpos < m_contentSize && (isValidVariableIdentifier(it))) {

579

name.append(*it);

580

it++;

581

m_curpos++;

582

}

583

m_curpos--;

584

name = name.toLower();

585

if (name == "echo") {

586

token = Parser::Token_ECHO;

587

} else if (name == "include") {

588

token = Parser::Token_INCLUDE;

589

} else if (name == "include_once") {

590

token = Parser::Token_INCLUDE_ONCE;

591

} else if (name == "require") {

592

token = Parser::Token_REQUIRE;

593

} else if (name == "require_once") {

594

token = Parser::Token_REQUIRE_ONCE;

595

} else if (name == "eval") {

596

token = Parser::Token_EVAL;

597

} else if (name == "print") {

598

token = Parser::Token_PRINT;

599

} else if (name == "abstract") {

600

token = Parser::Token_ABSTRACT;

601

} else if (name == "break") {

602

token = Parser::Token_BREAK;

603

} else if (name == "case") {

604

token = Parser::Token_CASE;

605

} else if (name == "catch") {

606

token = Parser::Token_CATCH;

607

} else if (name == "class") {

608

token = Parser::Token_CLASS;

609

} else if (name == "const") {

610

token = Parser::Token_CONST;

611

} else if (name == "continue") {

612

token = Parser::Token_CONTINUE;

613

} else if (name == "default") {

614

token = Parser::Token_DEFAULT;

615

} else if (name == "do") {

616

token = Parser::Token_DO;

617

} else if (name == "else") {

618

token = Parser::Token_ELSE;

619

} else if (name == "extends") {

620

token = Parser::Token_EXTENDS;

621

} else if (name == "final") {

622

token = Parser::Token_FINAL;

623

} else if (name == "for") {

624

token = Parser::Token_FOR;

625

} else if (name == "if") {

626

token = Parser::Token_IF;

627

} else if (name == "implements") {

628

token = Parser::Token_IMPLEMENTS;

629

} else if (name == "instanceof") {

630

token = Parser::Token_INSTANCEOF;

631

} else if (name == "interface") {

632

token = Parser::Token_INTERFACE;

633

} else if (name == "new") {

634

token = Parser::Token_NEW;

635

} else if (name == "private") {

636

token = Parser::Token_PRIVATE;

637

} else if (name == "protected") {

638

token = Parser::Token_PROTECTED;

639

} else if (name == "public") {

640

token = Parser::Token_PUBLIC;

641

} else if (name == "return") {

642

token = Parser::Token_RETURN;

643

} else if (name == "static") {

644

token = Parser::Token_STATIC;

645

} else if (name == "switch") {

646

token = Parser::Token_SWITCH;

647

} else if (name == "throw") {

648

token = Parser::Token_THROW;

649

} else if (name == "try") {

650

token = Parser::Token_TRY;

651

} else if (name == "while") {

652

token = Parser::Token_WHILE;

653

} else if (name == "clone") {

654

token = Parser::Token_CLONE;

655

} else if (name == "exit" || name == "die") {

656

token = Parser::Token_EXIT;

657

} else if (name == "elseif") {

658

token = Parser::Token_ELSEIF;

659

} else if (name == "endif") {

660

token = Parser::Token_ENDIF;

661

} else if (name == "endwhile") {

662

token = Parser::Token_ENDWHILE;

663

} else if (name == "endfor") {

664

token = Parser::Token_ENDFOR;

665

} else if (name == "foreach") {

666

token = Parser::Token_FOREACH;

667

} else if (name == "endforeach") {

668

token = Parser::Token_ENDFOREACH;

669

} else if (name == "declare") {

670

token = Parser::Token_DECLARE;

671

} else if (name == "enddeclare") {

672

token = Parser::Token_ENDDECLARE;

673

} else if (name == "as") {

674

token = Parser::Token_AS;

675

} else if (name == "endswitch") {

676

token = Parser::Token_ENDSWITCH;

677

} else if (name == "function") {

678

token = Parser::Token_FUNCTION;

679

} else if (name == "use") {

680

token = Parser::Token_USE;

681

} else if (name == "global") {

682

token = Parser::Token_GLOBAL;

683

} else if (name == "var") {

684

token = Parser::Token_VAR;

685

} else if (name == "unset") {

686

token = Parser::Token_UNSET;

687

} else if (name == "isset") {

688

token = Parser::Token_ISSET;

689

} else if (name == "empty") {

690

token = Parser::Token_EMPTY;

691

} else if (name == "__halt_compiler") {

692

token = Parser::Token_HALT_COMPILER;

693

} else if (name == "list") {

694

token = Parser::Token_LIST;

695

} else if (name == "array") {

696

token = Parser::Token_ARRAY;

697

} else if (name == "__class__") {

698

token = Parser::Token_CLASS_C;

699

} else if (name == "__method__") {

700

token = Parser::Token_METHOD_C;

701

} else if (name == "__function__") {

702

token = Parser::Token_FUNC_C;

703

} else if (name == "__line__") {

704

token = Parser::Token_LINE;

705

} else if (name == "__file__") {

706

token = Parser::Token_FILE;

707

} else if (name == "or") {

708

token = Parser::Token_LOGICAL_OR;

709

} else if (name == "and") {

710

token = Parser::Token_LOGICAL_AND;

711

} else if (name == "xor") {

712

token = Parser::Token_LOGICAL_XOR;

713

} else {

714

token = Parser::Token_STRING;

715

}

716

}

717

break;

718

}

719

720

case StringVariable:

721

case String:

722

case StringHeredoc:

723

case StringBacktick:

724

if ((state() == String || state(1) == String) && it->unicode() == '"') {

725

token = Parser::Token_DOUBLE_QUOTE;

726

if (state() == StringVariable) popState();

727

popState();

728

} else if ((state() == StringBacktick || state(1) == StringBacktick) && it->unicode() == '`') {

729

token = Parser::Token_BACKTICK;

730

if (state() == StringVariable) popState();

731

popState();

732

} else if ((state() == StringHeredoc || state(1) == StringHeredoc) && isHeredocEnd(it)) {

733

token = Parser::Token_END_HEREDOC;

734

m_curpos += m_heredocIdentifier.length() - 1;

735

if (state() == StringVariable) popState();

736

popState();

737

} else if (processVariable(it)) {

738

token = Parser::Token_VARIABLE;

739

if (state() != StringVariable) pushState(StringVariable);

740

} else if (state() != StringVariable && it->unicode() == '$' && (it + 1)->unicode() == '{') {

741

token = Parser::Token_DOLLAR_OPEN_CURLY_BRACES;

742

m_curpos++;

743

it += 2;

744

//check if a valid variable follows

745

if ((isValidVariableIdentifier(it) && !it->isDigit())) {

746

pushState(StringVarname);

747

}

748

749

} else if (state() == StringVariable && it->unicode() == '[') {

750

token = Parser::Token_LBRACKET;

751

pushState(StringVariableBracket);

752

} else if (state() != StringVariable && it->unicode() == '{' && (it + 1)->unicode() == '$'

753

&& ((isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) || (it + 2)->unicode() == '{')) {

754

token = Parser::Token_CURLY_OPEN;

755

pushState(StringVariableCurly);

756

} else if (state() == StringVariable

757

&& it->unicode() == '-' && (it + 1)->unicode() == '>'

758

&& isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) {

759

token = Parser::Token_OBJECT_OPERATOR;

760

m_curpos++;

761

pushState(StringVariableObjectOperator);

762

} else {

763

if (state() == StringVariable) popState();

764

token = Parser::Token_ENCAPSED_AND_WHITESPACE;

765

int startPos = m_curpos;

766

while (m_curpos < m_contentSize) {

767

if (!isEscapedWithBackslash(it, m_curpos, startPos) &&

768

((it->unicode() == '$' && (it + 1)->unicode() == '{') ||

769

(it->unicode() == '{' && (it + 1)->unicode() == '$' && isValidVariableIdentifier(it + 2)) ||

770

(it->unicode() == '$' && isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) {

771

//variable is next ${var} or {$var}

772

break;

773

}

774

if (state() == String && it->unicode() == '"'

775

&& !isEscapedWithBackslash(it, m_curpos, startPos)) {

776

//end of string

777

break;

778

}

779

if (state() == StringBacktick && it->unicode() == '`'

780

&& !isEscapedWithBackslash(it, m_curpos, startPos)) {

781

//end of string

782

break;

783

}

784

785

if (it->unicode() == '\n') createNewline(m_curpos);

786

m_curpos++;

787

it++;

788

789

if (state() == StringHeredoc && (it - 1)->unicode() == '\n') {

790

//check for end of heredoc (\nEOD;\n)

791

if (state() == StringHeredoc && isHeredocEnd(it)) {

792

break;

793

}

794

}

795

}

796

m_curpos--;

797

}

798

break;

799

case StringVariableBracket:

800

if (it->unicode() == ']') {

801

token = Parser::Token_RBRACKET;

802

popState();

803

popState();

804

} else if (it->isDigit()) {

805

token = Parser::Token_NUM_STRING;

806

while (m_curpos < m_contentSize && it->isDigit()) {

807

it++;

808

m_curpos++;

809

}

810

m_curpos--;

811

} else {

812

token = Parser::Token_STRING;

813

while (m_curpos < m_contentSize && (it->unicode() != ']')) {

814

if (it->unicode() == '\n') createNewline(m_curpos);

815

it++;

816

m_curpos++;

817

}

818

m_curpos--;

819

}

820

break;

821

case StringVariableObjectOperator:

822

token = Parser::Token_STRING;

823

while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) {

824

it++;

825

m_curpos++;

826

}

827

m_curpos--;

828

popState();

829

if (state() == StringVariable) popState();

830

break;

831

case StringVarname:

832

popState();

833

pushState(StringVariableCurly);

834

token = Parser::Token_STRING_VARNAME;

835

while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) {

836

it++;

837

m_curpos++;

838

}

839

m_curpos--;

840

break;

841

default:

842

token = Parser::Token_INVALID;

843

break;

844

}

845

if (m_curpos > m_contentSize) {

846

m_tokenBegin = -1;

847

m_tokenEnd = -1;

848

return 0;

849

}

850

m_tokenEnd = m_curpos;

851

m_curpos++;

852

853

if (m_haltCompiler) {

854

//look for __halt_compiler(); and stop lexer there

855

if (m_haltCompiler == 4) {

856

token = 0; //EOF

857

} else if (token == Parser::Token_WHITESPACE || token == Parser::Token_COMMENT || token == Parser::Token_DOC_COMMENT) {

858

//ignore

859

} else if (m_haltCompiler == 1 && token == Parser::Token_LPAREN) {

860

m_haltCompiler++;

861

} else if (m_haltCompiler == 2 && token == Parser::Token_RPAREN) {

862

m_haltCompiler++;

863

} else if (m_haltCompiler == 3 && token == Parser::Token_SEMICOLON) {

864

m_haltCompiler++;

865

} else {

866

m_haltCompiler = 0;

867

}

868

}

869

if (token == Parser::Token_HALT_COMPILER && !m_haltCompiler) {

870

m_haltCompiler = 1;

871

}

872

return token;

873

}

874

875

qint64 Lexer::tokenBegin() const

876

{

877

return m_tokenBegin;

878

}

879

880

qint64 Lexer::tokenEnd() const

881

{

882

return m_tokenEnd;

883

}

884

885

bool Lexer::isHeredocEnd(QChar* it)

886

{

887

int identiferLen = m_heredocIdentifier.length();

888

QString lineStart;

889

for (int i = 0; i < identiferLen; i++) {

890

if (m_curpos + i >= m_contentSize) break;

891

lineStart.append(*(it + i));

892

}

893

if (lineStart == m_heredocIdentifier &&

894

((it + identiferLen)->unicode() == '\n'

895

|| ((it + identiferLen)->unicode() == ';' &&

896

(it + identiferLen + 1)->unicode() == '\n'))) {

897

return true;

898

}

899

return false;

900

}

901

902

//used for strings, to check if " is escaped (\" is, \\" not)

903

bool Lexer::isEscapedWithBackslash(QChar* it, int curPos, int startPos)

904

{

905

int cnt = 0;

906

it--;

907

while (curPos > startPos && it->unicode() == '\\') {

908

cnt++;

909

it--;

910

}

911

return (cnt % 2) == 1;

912

}

913

914

bool Lexer::processVariable(QChar* it)

915

{

916

QChar* c2 = it + 1;

917

if (it->unicode() == '$' && (isValidVariableIdentifier(c2) && !c2->isDigit())) {

918

it++;

919

m_curpos++;

920

while (m_curpos < m_contentSize

921

&& (isValidVariableIdentifier(it))) {

922

it++;

923

m_curpos++;

924

}

925

m_curpos--;

926

return true;

927

} else {

928

return false;

929

}

930

}

931

bool Lexer::isValidVariableIdentifier(QChar* it)

932

{

933

return it->isLetter() || it->isDigit() || it->unicode() == '_' || it->unicode() > 0x7f;

934

}

935

936

void Lexer::createNewline(int pos)

937

{

938

if (m_tokenStream) m_tokenStream->locationTable()->newline(pos);

939

}

940

941

}

942

Older »