~ubuntu-branches/ubuntu/trusty/php-codesniffer/trusty

« back to all changes in this revision

Viewing changes to PHP_CodeSniffer-1.1.0/CodeSniffer/Tokenizers/PHP.php

  • Committer: Bazaar Package Importer
  • Author(s): Jack Bates
  • Date: 2008-10-01 17:39:43 UTC
  • mfrom: (1.1.1 upstream)
  • Revision ID: james.westby@ubuntu.com-20081001173943-2dy06n1e8zwyw1o8
Tags: 1.1.0-1
* New upstream release
* Acknowledge NMU, thanks Jan

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
<?php
 
2
/**
 
3
 * Tokenizes PHP code.
 
4
 *
 
5
 * PHP version 5
 
6
 *
 
7
 * @category  PHP
 
8
 * @package   PHP_CodeSniffer
 
9
 * @author    Greg Sherwood <gsherwood@squiz.net>
 
10
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
 
11
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
 
12
 * @version   CVS: $Id: PHP.php,v 1.2 2008/06/20 03:33:58 squiz Exp $
 
13
 * @link      http://pear.php.net/package/PHP_CodeSniffer
 
14
 */
 
15
 
 
16
/**
 
17
 * Tokenizes PHP code.
 
18
 *
 
19
 * @category  PHP
 
20
 * @package   PHP_CodeSniffer
 
21
 * @author    Greg Sherwood <gsherwood@squiz.net>
 
22
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
 
23
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
 
24
 * @version   Release: 1.1.0
 
25
 * @link      http://pear.php.net/package/PHP_CodeSniffer
 
26
 */
 
27
class PHP_CodeSniffer_Tokenizers_PHP
 
28
{
 
29
 
 
30
    /**
 
31
     * A list of tokens that are allowed to open a scope.
 
32
     *
 
33
     * This array also contains information about what kind of token the scope
 
34
     * opener uses to open and close the scope, if the token strictly requires
 
35
     * an opener, if the token can share a scope closer, and who it can be shared
 
36
     * with. An example of a token that shares a scope closer is a CASE scope.
 
37
     *
 
38
     * @var array
 
39
     */
 
40
    public $scopeOpeners = array(
 
41
                            T_IF            => array(
 
42
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
43
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
44
                                                'strict' => false,
 
45
                                                'shared' => false,
 
46
                                                'with'   => array(),
 
47
                                               ),
 
48
                            T_TRY           => array(
 
49
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
50
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
51
                                                'strict' => true,
 
52
                                                'shared' => false,
 
53
                                                'with'   => array(),
 
54
                                               ),
 
55
                            T_CATCH         => array(
 
56
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
57
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
58
                                                'strict' => true,
 
59
                                                'shared' => false,
 
60
                                                'with'   => array(),
 
61
                                               ),
 
62
                            T_ELSE          => array(
 
63
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
64
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
65
                                                'strict' => false,
 
66
                                                'shared' => false,
 
67
                                                'with'   => array(),
 
68
                                               ),
 
69
                            T_ELSEIF        => array(
 
70
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
71
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
72
                                                'strict' => false,
 
73
                                                'shared' => false,
 
74
                                                'with'   => array(),
 
75
                                               ),
 
76
                            T_FOR           => array(
 
77
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
78
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
79
                                                'strict' => false,
 
80
                                                'shared' => false,
 
81
                                                'with'   => array(),
 
82
                                               ),
 
83
                            T_FOREACH       => array(
 
84
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
85
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
86
                                                'strict' => false,
 
87
                                                'shared' => false,
 
88
                                                'with'   => array(),
 
89
                                               ),
 
90
                            T_INTERFACE     => array(
 
91
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
92
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
93
                                                'strict' => true,
 
94
                                                'shared' => false,
 
95
                                                'with'   => array(),
 
96
                                               ),
 
97
                            T_FUNCTION      => array(
 
98
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
99
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
100
                                                'strict' => false,
 
101
                                                'shared' => false,
 
102
                                                'with'   => array(),
 
103
                                               ),
 
104
                            T_CLASS         => array(
 
105
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
106
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
107
                                                'strict' => true,
 
108
                                                'shared' => false,
 
109
                                                'with'   => array(),
 
110
                                               ),
 
111
                            T_WHILE         => array(
 
112
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
113
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
114
                                                'strict' => false,
 
115
                                                'shared' => false,
 
116
                                                'with'   => array(),
 
117
                                               ),
 
118
                            T_DO            => array(
 
119
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
120
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
121
                                                'strict' => true,
 
122
                                                'shared' => false,
 
123
                                                'with'   => array(),
 
124
                                               ),
 
125
                            T_SWITCH        => array(
 
126
                                                'start'  => T_OPEN_CURLY_BRACKET,
 
127
                                                'end'    => T_CLOSE_CURLY_BRACKET,
 
128
                                                'strict' => true,
 
129
                                                'shared' => false,
 
130
                                                'with'   => array(),
 
131
                                               ),
 
132
                            T_CASE          => array(
 
133
                                                'start'  => T_COLON,
 
134
                                                'end'    => T_BREAK,
 
135
                                                'strict' => true,
 
136
                                                'shared' => true,
 
137
                                                'with'   => array(
 
138
                                                             T_DEFAULT,
 
139
                                                             T_CASE,
 
140
                                                            ),
 
141
                                               ),
 
142
                            T_DEFAULT       => array(
 
143
                                                'start'  => T_COLON,
 
144
                                                'end'    => T_BREAK,
 
145
                                                'strict' => true,
 
146
                                                'shared' => true,
 
147
                                                'with'   => array(T_CASE),
 
148
                                               ),
 
149
                            T_START_HEREDOC => array(
 
150
                                                'start'  => T_START_HEREDOC,
 
151
                                                'end'    => T_END_HEREDOC,
 
152
                                                'strict' => true,
 
153
                                                'shared' => false,
 
154
                                                'with'   => array(),
 
155
                                               ),
 
156
                           );
 
157
 
 
158
    /**
 
159
     * A list of tokens that end the scope.
 
160
     *
 
161
     * This array is just a unique collection of the end tokens
 
162
     * from the _scopeOpeners array. The data is duplicated here to
 
163
     * save time during parsing of the file.
 
164
     *
 
165
     * @var array
 
166
     */
 
167
    public $endScopeTokens = array(
 
168
                              T_CLOSE_CURLY_BRACKET,
 
169
                              T_BREAK,
 
170
                              T_END_HEREDOC,
 
171
                             );
 
172
 
 
173
 
 
174
    /**
 
175
     * Creates an array of tokens when given some PHP code.
 
176
     *
 
177
     * Starts by using token_get_all() but does a lot of extra processing
 
178
     * to insert information about the context of the token.
 
179
     *
 
180
     * @param string $string  The string to tokenize.
 
181
     * @param string $eolChar The EOL character to use for splitting strings.
 
182
     *
 
183
     * @return array
 
184
     */
 
185
    public function tokenizeString($string, $eolChar='\n')
 
186
    {
 
187
        $tokens      = @token_get_all($string);
 
188
        $finalTokens = array();
 
189
 
 
190
        $newStackPtr = 0;
 
191
        $numTokens   = count($tokens);
 
192
        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
 
193
            $token        = $tokens[$stackPtr];
 
194
            $tokenIsArray = is_array($token);
 
195
 
 
196
            /*
 
197
                If we are using \r\n newline characters, the \r and \n are sometimes
 
198
                split over two tokens. This normally occurs after comments. We need
 
199
                to merge these two characters together so that our line endings are
 
200
                consistent for all lines.
 
201
            */
 
202
 
 
203
            if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
 
204
                if (isset($tokens[($stackPtr + 1)]) === true && is_array($tokens[($stackPtr + 1)]) === true && $tokens[($stackPtr + 1)][1][0] === "\n") {
 
205
                    $token[1] .= "\n";
 
206
 
 
207
                    if ($tokens[($stackPtr + 1)][1] === "\n") {
 
208
                        // The next token's content has been merged into this token,
 
209
                        // so we can skip it.
 
210
                        $stackPtr++;
 
211
                    } else {
 
212
                        $tokens[($stackPtr + 1)][1] = substr($tokens[($stackPtr + 1)][1], 1);
 
213
                    }
 
214
                }
 
215
            }//end if
 
216
 
 
217
            /*
 
218
                If this is a double quoted string, PHP will tokenise the whole
 
219
                thing which causes problems with the scope map when braces are
 
220
                within the string. So we need to merge the tokens together to
 
221
                provide a single string.
 
222
            */
 
223
 
 
224
            if ($tokenIsArray === false && $token === '"') {
 
225
                $tokenContent = '"';
 
226
                $nestedVars   = array();
 
227
                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
 
228
                    $subTokenIsArray = is_array($tokens[$i]);
 
229
 
 
230
                    if ($subTokenIsArray === true) {
 
231
                        $tokenContent .= $tokens[$i][1];
 
232
                        if ($tokens[$i][1] === '{') {
 
233
                            $nestedVars[] = $i;
 
234
                        }
 
235
                    } else {
 
236
                        $tokenContent .= $tokens[$i];
 
237
                        if ($tokens[$i] === '}') {
 
238
                            array_pop($nestedVars);
 
239
                        }
 
240
                    }
 
241
 
 
242
                    if ($subTokenIsArray === false && $tokens[$i] === '"' && empty($nestedVars) === true) {
 
243
                        // We found the other end of the double quoted string.
 
244
                        break;
 
245
                    }
 
246
                }
 
247
 
 
248
                $stackPtr = $i;
 
249
 
 
250
                // Convert each line within the double quoted string to a
 
251
                // new token, so it conforms with other multiple line tokens.
 
252
                $tokenLines = explode($eolChar, $tokenContent);
 
253
                $numLines   = count($tokenLines);
 
254
                $newToken   = array();
 
255
 
 
256
                for ($j = 0; $j < $numLines; $j++) {
 
257
                    $newToken['content'] = $tokenLines[$j];
 
258
                    if ($j === ($numLines - 1)) {
 
259
                        if ($tokenLines[$j] === '') {
 
260
                            break;
 
261
                        }
 
262
                    } else {
 
263
                        $newToken['content'] .= $eolChar;
 
264
                    }
 
265
 
 
266
                    $newToken['code']          = T_DOUBLE_QUOTED_STRING;
 
267
                    $newToken['type']          = 'T_DOUBLE_QUOTED_STRING';
 
268
                    $finalTokens[$newStackPtr] = $newToken;
 
269
                    $newStackPtr++;
 
270
                }
 
271
 
 
272
                // Continue, as we're done with this token.
 
273
                continue;
 
274
            }//end if
 
275
 
 
276
            /*
 
277
                If this is a heredoc, PHP will tokenise the whole
 
278
                thing which causes problems when heredocs don't
 
279
                contain real PHP code, which is almost never.
 
280
                We want to leave the start and end heredoc tokens
 
281
                alone though.
 
282
            */
 
283
 
 
284
            if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
 
285
                // Add the start heredoc token to the final array.
 
286
                $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($token);
 
287
                $newStackPtr++;
 
288
 
 
289
                $tokenContent = '';
 
290
                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
 
291
                    $subTokenIsArray = is_array($tokens[$i]);
 
292
                    if ($subTokenIsArray === true && $tokens[$i][0] === T_END_HEREDOC) {
 
293
                        // We found the other end of the heredoc.
 
294
                        break;
 
295
                    }
 
296
 
 
297
                    if ($subTokenIsArray === true) {
 
298
                        $tokenContent .= $tokens[$i][1];
 
299
                    } else {
 
300
                        $tokenContent .= $tokens[$i];
 
301
                    }
 
302
                }
 
303
 
 
304
                $stackPtr = $i;
 
305
 
 
306
                // Convert each line within the heredoc to a
 
307
                // new token, so it conforms with other multiple line tokens.
 
308
                $tokenLines = explode($eolChar, $tokenContent);
 
309
                $numLines   = count($tokenLines);
 
310
                $newToken   = array();
 
311
 
 
312
                for ($j = 0; $j < $numLines; $j++) {
 
313
                    $newToken['content'] = $tokenLines[$j];
 
314
                    if ($j === ($numLines - 1)) {
 
315
                        if ($tokenLines[$j] === '') {
 
316
                            break;
 
317
                        }
 
318
                    } else {
 
319
                        $newToken['content'] .= $eolChar;
 
320
                    }
 
321
 
 
322
                    $newToken['code']          = T_HEREDOC;
 
323
                    $newToken['type']          = 'T_HEREDOC';
 
324
                    $finalTokens[$newStackPtr] = $newToken;
 
325
                    $newStackPtr++;
 
326
                }
 
327
 
 
328
                // Add the end heredoc token to the final array.
 
329
                $finalTokens[$newStackPtr] = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
 
330
                $newStackPtr++;
 
331
 
 
332
                // Continue, as we're done with this token.
 
333
                continue;
 
334
            }//end if
 
335
 
 
336
            /*
 
337
                If this token has newlines in its content, split each line up
 
338
                and create a new token for each line. We do this so it's easier
 
339
                to asertain where errors occur on a line.
 
340
                Note that $token[1] is the token's content.
 
341
            */
 
342
 
 
343
            if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
 
344
                $tokenLines = explode($eolChar, $token[1]);
 
345
                $numLines   = count($tokenLines);
 
346
                $tokenName  = token_name($token[0]);
 
347
 
 
348
                for ($i = 0; $i < $numLines; $i++) {
 
349
                    $newToken['content'] = $tokenLines[$i];
 
350
                    if ($i === ($numLines - 1)) {
 
351
                        if ($tokenLines[$i] === '') {
 
352
                            break;
 
353
                        }
 
354
                    } else {
 
355
                        $newToken['content'] .= $eolChar;
 
356
                    }
 
357
 
 
358
                    $newToken['type']          = $tokenName;
 
359
                    $newToken['code']          = $token[0];
 
360
                    $finalTokens[$newStackPtr] = $newToken;
 
361
                    $newStackPtr++;
 
362
                }
 
363
            } else {
 
364
                $newToken = PHP_CodeSniffer::standardiseToken($token);
 
365
 
 
366
                // This is a special condition for T_ARRAY tokens use to
 
367
                // type hint function arguments as being arrays. We want to keep
 
368
                // the parenthsis map clean, so let's tag these tokens as
 
369
                // T_ARRAY_HINT.
 
370
                if ($newToken['code'] === T_ARRAY) {
 
371
                    // Recalculate number of tokens.
 
372
                    $numTokens = count($tokens);
 
373
                    for ($i = $stackPtr; $i < $numTokens; $i++) {
 
374
                        if (is_array($tokens[$i]) === false) {
 
375
                            if ($tokens[$i] === '(') {
 
376
                                break;
 
377
                            }
 
378
                        } else if ($tokens[$i][0] === T_VARIABLE) {
 
379
                            $newToken['code'] = T_ARRAY_HINT;
 
380
                            $newToken['type'] = 'T_ARRAY_HINT';
 
381
                            break;
 
382
                        }
 
383
                    }
 
384
                }
 
385
 
 
386
                $finalTokens[$newStackPtr] = $newToken;
 
387
                $newStackPtr++;
 
388
            }//end if
 
389
        }//end for
 
390
 
 
391
        return $finalTokens;
 
392
 
 
393
    }//end tokenizeString()
 
394
 
 
395
 
 
396
}//end class
 
397
 
 
398
?>