8
* @package PHP_CodeSniffer
9
* @author Greg Sherwood <gsherwood@squiz.net>
10
* @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
11
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
12
* @link http://pear.php.net/package/PHP_CodeSniffer
19
* @package PHP_CodeSniffer
20
* @author Greg Sherwood <gsherwood@squiz.net>
21
* @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
22
* @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
23
* @version Release: 1.5.3
24
* @link http://pear.php.net/package/PHP_CodeSniffer
26
class PHP_CodeSniffer_Tokenizers_PHP
30
* A list of tokens that are allowed to open a scope.
32
* This array also contains information about what kind of token the scope
33
* opener uses to open and close the scope, if the token strictly requires
34
* an opener, if the token can share a scope closer, and who it can be shared
35
* with. An example of a token that shares a scope closer is a CASE scope.
39
public $scopeOpeners = array(
46
T_CLOSE_CURLY_BRACKET,
59
'start' => array(T_OPEN_CURLY_BRACKET),
60
'end' => array(T_CLOSE_CURLY_BRACKET),
66
'start' => array(T_OPEN_CURLY_BRACKET),
67
'end' => array(T_CLOSE_CURLY_BRACKET),
73
'start' => array(T_OPEN_CURLY_BRACKET),
74
'end' => array(T_CLOSE_CURLY_BRACKET),
85
T_CLOSE_CURLY_BRACKET,
101
T_CLOSE_CURLY_BRACKET,
115
T_OPEN_CURLY_BRACKET,
119
T_CLOSE_CURLY_BRACKET,
128
T_OPEN_CURLY_BRACKET,
132
T_CLOSE_CURLY_BRACKET,
139
T_INTERFACE => array(
140
'start' => array(T_OPEN_CURLY_BRACKET),
141
'end' => array(T_CLOSE_CURLY_BRACKET),
147
'start' => array(T_OPEN_CURLY_BRACKET),
148
'end' => array(T_CLOSE_CURLY_BRACKET),
154
'start' => array(T_OPEN_CURLY_BRACKET),
155
'end' => array(T_CLOSE_CURLY_BRACKET),
161
'start' => array(T_OPEN_CURLY_BRACKET),
162
'end' => array(T_CLOSE_CURLY_BRACKET),
167
T_NAMESPACE => array(
168
'start' => array(T_OPEN_CURLY_BRACKET),
169
'end' => array(T_CLOSE_CURLY_BRACKET),
176
T_OPEN_CURLY_BRACKET,
180
T_CLOSE_CURLY_BRACKET,
188
'start' => array(T_OPEN_CURLY_BRACKET),
189
'end' => array(T_CLOSE_CURLY_BRACKET),
195
'start' => array(T_OPEN_CURLY_BRACKET),
196
'end' => array(T_CLOSE_CURLY_BRACKET),
240
T_START_HEREDOC => array(
241
'start' => array(T_START_HEREDOC),
242
'end' => array(T_END_HEREDOC),
250
* A list of tokens that end the scope.
252
* This array is just a unique collection of the end tokens
253
* from the _scopeOpeners array. The data is duplicated here to
254
* save time during parsing of the file.
258
public $endScopeTokens = array(
259
T_CLOSE_CURLY_BRACKET,
266
* Creates an array of tokens when given some PHP code.
268
* Starts by using token_get_all() but does a lot of extra processing
269
* to insert information about the context of the token.
271
* @param string $string The string to tokenize.
272
* @param string $eolChar The EOL character to use for splitting strings.
276
public function tokenizeString($string, $eolChar='\n')
278
$tokens = @token_get_all($string);
279
$finalTokens = array();
282
$numTokens = count($tokens);
284
$insideInlineIf = false;
286
for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
287
$token = $tokens[$stackPtr];
288
$tokenIsArray = is_array($token);
291
If we are using \r\n newline characters, the \r and \n are sometimes
292
split over two tokens. This normally occurs after comments. We need
293
to merge these two characters together so that our line endings are
294
consistent for all lines.
297
if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
298
if (isset($tokens[($stackPtr + 1)]) === true
299
&& is_array($tokens[($stackPtr + 1)]) === true
300
&& $tokens[($stackPtr + 1)][1][0] === "\n"
304
if ($tokens[($stackPtr + 1)][1] === "\n") {
305
// The next token's content has been merged into this token,
306
// so we can skip it.
309
$tokens[($stackPtr + 1)][1]
310
= substr($tokens[($stackPtr + 1)][1], 1);
316
If this is a double quoted string, PHP will tokenise the whole
317
thing which causes problems with the scope map when braces are
318
within the string. So we need to merge the tokens together to
319
provide a single string.
322
if ($tokenIsArray === false && $token === '"') {
324
$nestedVars = array();
325
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
326
$subTokenIsArray = is_array($tokens[$i]);
328
if ($subTokenIsArray === true) {
329
$tokenContent .= $tokens[$i][1];
330
if ($tokens[$i][1] === '{'
331
&& $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE
336
$tokenContent .= $tokens[$i];
337
if ($tokens[$i] === '}') {
338
array_pop($nestedVars);
342
if ($subTokenIsArray === false
343
&& $tokens[$i] === '"'
344
&& empty($nestedVars) === true
346
// We found the other end of the double quoted string.
353
// Convert each line within the double quoted string to a
354
// new token, so it conforms with other multiple line tokens.
355
$tokenLines = explode($eolChar, $tokenContent);
356
$numLines = count($tokenLines);
359
for ($j = 0; $j < $numLines; $j++) {
360
$newToken['content'] = $tokenLines[$j];
361
if ($j === ($numLines - 1)) {
362
if ($tokenLines[$j] === '') {
366
$newToken['content'] .= $eolChar;
369
$newToken['code'] = T_DOUBLE_QUOTED_STRING;
370
$newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
371
$finalTokens[$newStackPtr] = $newToken;
375
// Continue, as we're done with this token.
380
If this is a heredoc, PHP will tokenise the whole
381
thing which causes problems when heredocs don't
382
contain real PHP code, which is almost never.
383
We want to leave the start and end heredoc tokens
387
if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
388
// Add the start heredoc token to the final array.
389
$finalTokens[$newStackPtr]
390
= PHP_CodeSniffer::standardiseToken($token);
392
// Check if this is actually a nowdoc and use a different token
393
// to help the sniffs.
395
if ($token[1][3] === "'") {
396
$finalTokens[$newStackPtr]['code'] = T_START_NOWDOC;
397
$finalTokens[$newStackPtr]['type'] = 'T_START_NOWDOC';
404
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
405
$subTokenIsArray = is_array($tokens[$i]);
406
if ($subTokenIsArray === true
407
&& $tokens[$i][0] === T_END_HEREDOC
409
// We found the other end of the heredoc.
413
if ($subTokenIsArray === true) {
414
$tokenContent .= $tokens[$i][1];
416
$tokenContent .= $tokens[$i];
422
// Convert each line within the heredoc to a
423
// new token, so it conforms with other multiple line tokens.
424
$tokenLines = explode($eolChar, $tokenContent);
425
$numLines = count($tokenLines);
428
for ($j = 0; $j < $numLines; $j++) {
429
$newToken['content'] = $tokenLines[$j];
430
if ($j === ($numLines - 1)) {
431
if ($tokenLines[$j] === '') {
435
$newToken['content'] .= $eolChar;
438
if ($nowdoc === true) {
439
$newToken['code'] = T_NOWDOC;
440
$newToken['type'] = 'T_NOWDOC';
442
$newToken['code'] = T_HEREDOC;
443
$newToken['type'] = 'T_HEREDOC';
446
$finalTokens[$newStackPtr] = $newToken;
450
// Add the end heredoc token to the final array.
451
$finalTokens[$newStackPtr]
452
= PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
454
if ($nowdoc === true) {
455
$finalTokens[$newStackPtr]['code'] = T_END_NOWDOC;
456
$finalTokens[$newStackPtr]['type'] = 'T_END_NOWDOC';
462
// Continue, as we're done with this token.
467
PHP doesn't assign a token to goto labels, so we have to.
468
These are just string tokens with a single colon after them. Double
469
colons are already tokenized and so don't interfere with this check.
470
But we do have to account for CASE statements, that look just like
474
if ($tokenIsArray === true
475
&& $token[0] === T_STRING
476
&& $tokens[($stackPtr + 1)] === ':'
477
&& $tokens[($stackPtr - 1)][0] !== T_PAAMAYIM_NEKUDOTAYIM
482
T_OPEN_CURLY_BRACKET,
486
for ($x = ($newStackPtr - 1); $x > 0; $x--) {
487
if (in_array($finalTokens[$x]['code'], $stopTokens) === true) {
492
if ($finalTokens[$x]['code'] !== T_CASE
493
&& $finalTokens[$x]['code'] !== T_INLINE_THEN
495
$finalTokens[$newStackPtr] = array(
496
'content' => $token[1].':',
497
'code' => T_GOTO_LABEL,
498
'type' => 'T_GOTO_LABEL',
507
If this token has newlines in its content, split each line up
508
and create a new token for each line. We do this so it's easier
509
to ascertain where errors occur on a line.
510
Note that $token[1] is the token's content.
513
if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
514
$tokenLines = explode($eolChar, $token[1]);
515
$numLines = count($tokenLines);
516
$tokenName = token_name($token[0]);
518
for ($i = 0; $i < $numLines; $i++) {
519
$newToken['content'] = $tokenLines[$i];
520
if ($i === ($numLines - 1)) {
521
if ($tokenLines[$i] === '') {
525
$newToken['content'] .= $eolChar;
528
$newToken['type'] = $tokenName;
529
$newToken['code'] = $token[0];
530
$finalTokens[$newStackPtr] = $newToken;
534
$newToken = PHP_CodeSniffer::standardiseToken($token);
536
// Convert colons that are actually the ELSE component of an
537
// inline IF statement.
538
if ($newToken['code'] === T_INLINE_THEN) {
539
$insideInlineIf = true;
540
} else if ($insideInlineIf === true && $newToken['code'] === T_COLON) {
541
$insideInlineIf = false;
542
$newToken['code'] = T_INLINE_ELSE;
543
$newToken['type'] = 'T_INLINE_ELSE';
546
// This is a special condition for T_ARRAY tokens used for
547
// type hinting function arguments as being arrays. We want to keep
548
// the parenthesis map clean, so let's tag these tokens as
550
if ($newToken['code'] === T_ARRAY) {
551
// Recalculate number of tokens.
552
$numTokens = count($tokens);
553
for ($i = $stackPtr; $i < $numTokens; $i++) {
554
if (is_array($tokens[$i]) === false) {
555
if ($tokens[$i] === '(') {
558
} else if ($tokens[$i][0] === T_VARIABLE) {
559
$newToken['code'] = T_ARRAY_HINT;
560
$newToken['type'] = 'T_ARRAY_HINT';
566
// This is a special case for the PHP 5.5 classname::class syntax
567
// where "class" should be T_STRING instead of T_CLASS.
568
if ($newToken['code'] === T_CLASS
569
&& $finalTokens[($newStackPtr - 1)]['code'] === T_DOUBLE_COLON
571
$newToken['code'] = T_STRING;
572
$newToken['type'] = 'T_STRING';
575
$finalTokens[$newStackPtr] = $newToken;
582
}//end tokenizeString()
586
* Performs additional processing after main tokenizing.
588
* This additional processing checks for CASE statements that are using curly
589
* braces for scope openers and closers. It also turns some T_FUNCTION tokens
590
* into T_CLOSURE when they are not standard function definitions. It also
591
* detects short array syntax and converts those square brackets into new tokens.
592
* It also corrects some usage of the static and class keywords.
594
* @param array &$tokens The array of tokens to process.
595
* @param string $eolChar The EOL character to use for splitting strings.
599
public function processAdditional(&$tokens, $eolChar)
601
if (PHP_CODESNIFFER_VERBOSITY > 1) {
602
echo "\t*** START ADDITIONAL PHP PROCESSING ***".PHP_EOL;
605
$numTokens = count($tokens);
606
for ($i = ($numTokens - 1); $i >= 0; $i--) {
607
// Check for any unset scope conditions due to alternate IF/ENDIF syntax.
608
if (isset($tokens[$i]['scope_opener']) === true
609
&& isset($tokens[$i]['scope_condition']) === false
611
$tokens[$i]['scope_condition'] = $tokens[$tokens[$i]['scope_opener']]['scope_condition'];
614
// Looking for functions that are actually closures.
615
if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
616
for ($x = ($i + 1); $x < $numTokens; $x++) {
617
if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
622
if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
623
$tokens[$i]['code'] = T_CLOSURE;
624
$tokens[$i]['type'] = 'T_CLOSURE';
625
if (PHP_CODESNIFFER_VERBOSITY > 1) {
626
$line = $tokens[$i]['line'];
627
echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
630
for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
631
if (isset($tokens[$x]['conditions'][$i]) === false) {
635
$tokens[$x]['conditions'][$i] = T_CLOSURE;
636
if (PHP_CODESNIFFER_VERBOSITY > 1) {
637
$type = $tokens[$x]['type'];
638
echo "\t\t* cleaned $x ($type) *".PHP_EOL;
644
} else if ($tokens[$i]['code'] === T_OPEN_SQUARE_BRACKET) {
645
// Unless there is a variable or a bracket before this token,
646
// it is the start of an array being defined using the short syntax.
647
for ($x = ($i - 1); $x > 0; $x--) {
648
if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
654
T_CLOSE_SQUARE_BRACKET,
660
if (in_array($tokens[$x]['code'], $allowed) === false) {
661
$tokens[$i]['code'] = T_OPEN_SHORT_ARRAY;
662
$tokens[$i]['type'] = 'T_OPEN_SHORT_ARRAY';
664
$closer = $tokens[$i]['bracket_closer'];
665
$tokens[$closer]['code'] = T_CLOSE_SHORT_ARRAY;
666
$tokens[$closer]['type'] = 'T_CLOSE_SHORT_ARRAY';
667
if (PHP_CODESNIFFER_VERBOSITY > 1) {
668
$line = $tokens[$i]['line'];
669
echo "\t* token $i on line $line changed from T_OPEN_SQUARE_BRACKET to T_OPEN_SHORT_ARRAY".PHP_EOL;
670
$line = $tokens[$closer]['line'];
671
echo "\t* token $closer on line $line changed from T_CLOSE_SQUARE_BRACKET to T_CLOSE_SHORT_ARRAY".PHP_EOL;
676
} else if ($tokens[$i]['code'] === T_STATIC) {
677
for ($x = ($i - 1); $x > 0; $x--) {
678
if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
683
if ($tokens[$x]['code'] === T_INSTANCEOF) {
684
$tokens[$i]['code'] = T_STRING;
685
$tokens[$i]['type'] = 'T_STRING';
687
if (PHP_CODESNIFFER_VERBOSITY > 1) {
688
$line = $tokens[$i]['line'];
689
echo "\t* token $i on line $line changed from T_STATIC to T_STRING".PHP_EOL;
696
if (($tokens[$i]['code'] !== T_CASE
697
&& $tokens[$i]['code'] !== T_DEFAULT)
698
|| isset($tokens[$i]['scope_opener']) === false
700
// Only interested in CASE and DEFAULT statements from here on in.
704
$scopeOpener = $tokens[$i]['scope_opener'];
705
$scopeCloser = $tokens[$i]['scope_closer'];
707
// If the first char after the opener is a curly brace
708
// and that brace has been ignored, it is actually
709
// opening this case statement and the opener and closer are
710
// probably set incorrectly.
711
for ($x = ($scopeOpener + 1); $x < $numTokens; $x++) {
712
if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
713
// Non-whitespace content.
718
if ($tokens[$x]['code'] === T_CASE) {
719
// Special case for multiple CASE statements that share the same
720
// closer. Because we are going backwards through the file, this next
721
// CASE statement is already fixed, so just use its closer and don't
722
// worry about fixing anything.
723
$newCloser = $tokens[$x]['scope_closer'];
724
$tokens[$i]['scope_closer'] = $newCloser;
725
if (PHP_CODESNIFFER_VERBOSITY > 1) {
726
$oldType = $tokens[$scopeCloser]['type'];
727
$newType = $tokens[$newCloser]['type'];
728
$line = $tokens[$i]['line'];
729
echo "\t* token $i (T_CASE) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
735
if ($tokens[$x]['code'] !== T_OPEN_CURLY_BRACKET
736
|| isset($tokens[$x]['scope_condition']) === true
738
// Not a CASE with a curly brace opener.
742
// The closer for this CASE/DEFAULT should be the closing curly brace and
743
// not whatever it already is. The opener needs to be the opening curly
744
// brace so everything matches up.
745
$newCloser = $tokens[$x]['bracket_closer'];
746
$tokens[$i]['scope_closer'] = $newCloser;
747
$tokens[$x]['scope_closer'] = $newCloser;
748
$tokens[$i]['scope_opener'] = $x;
749
$tokens[$x]['scope_condition'] = $i;
750
$tokens[$newCloser]['scope_condition'] = $i;
751
$tokens[$newCloser]['scope_opener'] = $x;
752
if (PHP_CODESNIFFER_VERBOSITY > 1) {
753
$line = $tokens[$i]['line'];
754
$tokenType = $tokens[$i]['type'];
756
$oldType = $tokens[$scopeOpener]['type'];
757
$newType = $tokens[$x]['type'];
758
echo "\t* token $i ($tokenType) on line $line opener changed from $scopeOpener ($oldType) to $x ($newType)".PHP_EOL;
760
$oldType = $tokens[$scopeCloser]['type'];
761
$newType = $tokens[$newCloser]['type'];
762
echo "\t* token $i ($tokenType) on line $line closer changed from $scopeCloser ($oldType) to $newCloser ($newType)".PHP_EOL;
765
// Now fix up all the tokens that think they are
766
// inside the CASE/DEFAULT statement when they are really outside.
767
for ($x = $newCloser; $x < $scopeCloser; $x++) {
768
foreach ($tokens[$x]['conditions'] as $num => $oldCond) {
769
if ($oldCond === $tokens[$i]['code']) {
770
$oldConditions = $tokens[$x]['conditions'];
771
unset($tokens[$x]['conditions'][$num]);
773
if (PHP_CODESNIFFER_VERBOSITY > 1) {
774
$type = $tokens[$x]['type'];
776
foreach ($oldConditions as $condition) {
777
$oldConds .= token_name($condition).',';
780
$oldConds = rtrim($oldConds, ',');
783
foreach ($tokens[$x]['conditions'] as $condition) {
784
$newConds .= token_name($condition).',';
787
$newConds = rtrim($newConds, ',');
789
echo "\t\t* cleaned $x ($type) *".PHP_EOL;
790
echo "\t\t\t=> conditions changed from $oldConds to $newConds".PHP_EOL;
799
if (PHP_CODESNIFFER_VERBOSITY > 1) {
800
echo "\t*** END ADDITIONAL PHP PROCESSING ***".PHP_EOL;
803
}//end processAdditional()