8
* @package PHP_CodeSniffer
9
* @author Greg Sherwood <gsherwood@squiz.net>
10
* @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
11
* @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
12
* @version CVS: $Id: PHP.php,v 1.2 2008/06/20 03:33:58 squiz Exp $
13
* @link http://pear.php.net/package/PHP_CodeSniffer
20
* @package PHP_CodeSniffer
21
* @author Greg Sherwood <gsherwood@squiz.net>
22
* @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
23
* @license http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
24
* @version Release: 1.1.0
25
* @link http://pear.php.net/package/PHP_CodeSniffer
27
class PHP_CodeSniffer_Tokenizers_PHP
31
* A list of tokens that are allowed to open a scope.
33
* This array also contains information about what kind of token the scope
34
* opener uses to open and close the scope, if the token strictly requires
35
* an opener, if the token can share a scope closer, and who it can be shared
36
* with. An example of a token that shares a scope closer is a CASE scope.
40
public $scopeOpeners = array(
42
'start' => T_OPEN_CURLY_BRACKET,
43
'end' => T_CLOSE_CURLY_BRACKET,
49
'start' => T_OPEN_CURLY_BRACKET,
50
'end' => T_CLOSE_CURLY_BRACKET,
56
'start' => T_OPEN_CURLY_BRACKET,
57
'end' => T_CLOSE_CURLY_BRACKET,
63
'start' => T_OPEN_CURLY_BRACKET,
64
'end' => T_CLOSE_CURLY_BRACKET,
70
'start' => T_OPEN_CURLY_BRACKET,
71
'end' => T_CLOSE_CURLY_BRACKET,
77
'start' => T_OPEN_CURLY_BRACKET,
78
'end' => T_CLOSE_CURLY_BRACKET,
84
'start' => T_OPEN_CURLY_BRACKET,
85
'end' => T_CLOSE_CURLY_BRACKET,
91
'start' => T_OPEN_CURLY_BRACKET,
92
'end' => T_CLOSE_CURLY_BRACKET,
98
'start' => T_OPEN_CURLY_BRACKET,
99
'end' => T_CLOSE_CURLY_BRACKET,
105
'start' => T_OPEN_CURLY_BRACKET,
106
'end' => T_CLOSE_CURLY_BRACKET,
112
'start' => T_OPEN_CURLY_BRACKET,
113
'end' => T_CLOSE_CURLY_BRACKET,
119
'start' => T_OPEN_CURLY_BRACKET,
120
'end' => T_CLOSE_CURLY_BRACKET,
126
'start' => T_OPEN_CURLY_BRACKET,
127
'end' => T_CLOSE_CURLY_BRACKET,
147
'with' => array(T_CASE),
149
T_START_HEREDOC => array(
150
'start' => T_START_HEREDOC,
151
'end' => T_END_HEREDOC,
159
* A list of tokens that end the scope.
161
* This array is just a unique collection of the end tokens
162
* from the _scopeOpeners array. The data is duplicated here to
163
* save time during parsing of the file.
167
public $endScopeTokens = array(
168
T_CLOSE_CURLY_BRACKET,
175
* Creates an array of tokens when given some PHP code.
177
* Starts by using token_get_all() but does a lot of extra processing
178
* to insert information about the context of the token.
180
* @param string $string The string to tokenize.
181
* @param string $eolChar The EOL character to use for splitting strings.
185
public function tokenizeString($string, $eolChar='\n')
187
$tokens = @token_get_all($string);
188
$finalTokens = array();
191
$numTokens = count($tokens);
192
for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
193
$token = $tokens[$stackPtr];
194
$tokenIsArray = is_array($token);
197
If we are using \r\n newline characters, the \r and \n are sometimes
198
split over two tokens. This normally occurs after comments. We need
199
to merge these two characters together so that our line endings are
200
consistent for all lines.
203
if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
204
if (isset($tokens[($stackPtr + 1)]) === true && is_array($tokens[($stackPtr + 1)]) === true && $tokens[($stackPtr + 1)][1][0] === "\n") {
207
if ($tokens[($stackPtr + 1)][1] === "\n") {
208
// The next token's content has been merged into this token,
209
// so we can skip it.
212
$tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1);
218
If this is a double quoted string, PHP will tokenise the whole
219
thing which causes problems with the scope map when braces are
220
within the string. So we need to merge the tokens together to
221
provide a single string.
224
if ($tokenIsArray === false && $token === '"') {
226
$nestedVars = array();
227
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
228
$subTokenIsArray = is_array($tokens[$i]);
230
if ($subTokenIsArray === true) {
231
$tokenContent .= $tokens[$i][1];
232
if ($tokens[$i][1] === '{') {
236
$tokenContent .= $tokens[$i];
237
if ($tokens[$i] === '}') {
238
array_pop($nestedVars);
242
if ($subTokenIsArray === false && $tokens[$i] === '"' && empty($nestedVars) === true) {
243
// We found the other end of the double quoted string.
250
// Convert each line within the double quoted string to a
251
// new token, so it conforms with other multiple line tokens.
252
$tokenLines = explode($eolChar, $tokenContent);
253
$numLines = count($tokenLines);
256
for ($j = 0; $j < $numLines; $j++) {
257
$newToken['content'] = $tokenLines[$j];
258
if ($j === ($numLines - 1)) {
259
if ($tokenLines[$j] === '') {
263
$newToken['content'] .= $eolChar;
266
$newToken['code'] = T_DOUBLE_QUOTED_STRING;
267
$newToken['type'] = 'T_DOUBLE_QUOTED_STRING';
268
$finalTokens[$newStackPtr] = $newToken;
272
// Continue, as we're done with this token.
277
If this is a heredoc, PHP will tokenise the whole
278
thing which causes problems when heredocs don't
279
contain real PHP code, which is almost never.
280
We want to leave the start and end heredoc tokens
284
if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
285
// Add the start heredoc token to the final array.
286
$finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($token);
290
for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
291
$subTokenIsArray = is_array($tokens[$i]);
292
if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
293
// We found the other end of the heredoc.
297
if ($subTokenIsArray === true) {
298
$tokenContent .= $tokens[$i][1];
300
$tokenContent .= $tokens[$i];
306
// Convert each line within the heredoc to a
307
// new token, so it conforms with other multiple line tokens.
308
$tokenLines = explode($eolChar, $tokenContent);
309
$numLines = count($tokenLines);
312
for ($j = 0; $j < $numLines; $j++) {
313
$newToken['content'] = $tokenLines[$j];
314
if ($j === ($numLines - 1)) {
315
if ($tokenLines[$j] === '') {
319
$newToken['content'] .= $eolChar;
322
$newToken['code'] = T_HEREDOC;
323
$newToken['type'] = 'T_HEREDOC';
324
$finalTokens[$newStackPtr] = $newToken;
328
// Add the end heredoc token to the final array.
329
$finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
332
// Continue, as we're done with this token.
337
If this token has newlines in its content, split each line up
338
and create a new token for each line. We do this so it's easier
339
to asertain where errors occur on a line.
340
Note that $token[1] is the token's content.
343
if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
344
$tokenLines = explode($eolChar, $token[1]);
345
$numLines = count($tokenLines);
346
$tokenName = token_name($token[0]);
348
for ($i = 0; $i < $numLines; $i++) {
349
$newToken['content'] = $tokenLines[$i];
350
if ($i === ($numLines - 1)) {
351
if ($tokenLines[$i] === '') {
355
$newToken['content'] .= $eolChar;
358
$newToken['type'] = $tokenName;
359
$newToken['code'] = $token[0];
360
$finalTokens[$newStackPtr] = $newToken;
364
$newToken = PHP_CodeSniffer::standardiseToken($token);
366
// This is a special condition for T_ARRAY tokens use to
367
// type hint function arguments as being arrays. We want to keep
368
// the parenthsis map clean, so let's tag these tokens as
370
if ($newToken['code'] === T_ARRAY) {
371
// Recalculate number of tokens.
372
$numTokens = count($tokens);
373
for ($i = $stackPtr; $i < $numTokens; $i++) {
374
if (is_array($tokens[$i]) === false) {
375
if ($tokens[$i] === '(') {
378
} else if ($tokens[$i][0] === T_VARIABLE) {
379
$newToken['code'] = T_ARRAY_HINT;
380
$newToken['type'] = 'T_ARRAY_HINT';
386
$finalTokens[$newStackPtr] = $newToken;
393
}//end tokenizeString()