Subversion Repositories Applications.framework

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5 aurelien 1
<?php
2
/**
3
 * Tokenizes PHP code.
4
 *
5
 * PHP version 5
6
 *
7
 * @category  PHP
8
 * @package   PHP_CodeSniffer
9
 * @author    Greg Sherwood <gsherwood@squiz.net>
10
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
11
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
12
 * @version   CVS: $Id: PHP.php,v 1.6 2009/01/20 23:55:15 squiz Exp $
13
 * @link      http://pear.php.net/package/PHP_CodeSniffer
14
 */
15
 
16
/**
17
 * Tokenizes PHP code.
18
 *
19
 * @category  PHP
20
 * @package   PHP_CodeSniffer
21
 * @author    Greg Sherwood <gsherwood@squiz.net>
22
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
23
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
24
 * @version   Release: 1.2.0RC1
25
 * @link      http://pear.php.net/package/PHP_CodeSniffer
26
 */
27
class PHP_CodeSniffer_Tokenizers_PHP
28
{
29
 
30
    /**
31
     * A list of tokens that are allowed to open a scope.
32
     *
33
     * This array also contains information about what kind of token the scope
34
     * opener uses to open and close the scope, if the token strictly requires
35
     * an opener, if the token can share a scope closer, and who it can be shared
36
     * with. An example of a token that shares a scope closer is a CASE scope.
37
     *
38
     * @var array
39
     */
40
    public $scopeOpeners = array(
41
                            T_IF            => array(
42
                                                'start'  => T_OPEN_CURLY_BRACKET,
43
                                                'end'    => T_CLOSE_CURLY_BRACKET,
44
                                                'strict' => false,
45
                                                'shared' => false,
46
                                                'with'   => array(),
47
                                               ),
48
                            T_TRY           => array(
49
                                                'start'  => T_OPEN_CURLY_BRACKET,
50
                                                'end'    => T_CLOSE_CURLY_BRACKET,
51
                                                'strict' => true,
52
                                                'shared' => false,
53
                                                'with'   => array(),
54
                                               ),
55
                            T_CATCH         => array(
56
                                                'start'  => T_OPEN_CURLY_BRACKET,
57
                                                'end'    => T_CLOSE_CURLY_BRACKET,
58
                                                'strict' => true,
59
                                                'shared' => false,
60
                                                'with'   => array(),
61
                                               ),
62
                            T_ELSE          => array(
63
                                                'start'  => T_OPEN_CURLY_BRACKET,
64
                                                'end'    => T_CLOSE_CURLY_BRACKET,
65
                                                'strict' => false,
66
                                                'shared' => false,
67
                                                'with'   => array(),
68
                                               ),
69
                            T_ELSEIF        => array(
70
                                                'start'  => T_OPEN_CURLY_BRACKET,
71
                                                'end'    => T_CLOSE_CURLY_BRACKET,
72
                                                'strict' => false,
73
                                                'shared' => false,
74
                                                'with'   => array(),
75
                                               ),
76
                            T_FOR           => array(
77
                                                'start'  => T_OPEN_CURLY_BRACKET,
78
                                                'end'    => T_CLOSE_CURLY_BRACKET,
79
                                                'strict' => false,
80
                                                'shared' => false,
81
                                                'with'   => array(),
82
                                               ),
83
                            T_FOREACH       => array(
84
                                                'start'  => T_OPEN_CURLY_BRACKET,
85
                                                'end'    => T_CLOSE_CURLY_BRACKET,
86
                                                'strict' => false,
87
                                                'shared' => false,
88
                                                'with'   => array(),
89
                                               ),
90
                            T_INTERFACE     => array(
91
                                                'start'  => T_OPEN_CURLY_BRACKET,
92
                                                'end'    => T_CLOSE_CURLY_BRACKET,
93
                                                'strict' => true,
94
                                                'shared' => false,
95
                                                'with'   => array(),
96
                                               ),
97
                            T_FUNCTION      => array(
98
                                                'start'  => T_OPEN_CURLY_BRACKET,
99
                                                'end'    => T_CLOSE_CURLY_BRACKET,
100
                                                'strict' => false,
101
                                                'shared' => false,
102
                                                'with'   => array(),
103
                                               ),
104
                            T_CLASS         => array(
105
                                                'start'  => T_OPEN_CURLY_BRACKET,
106
                                                'end'    => T_CLOSE_CURLY_BRACKET,
107
                                                'strict' => true,
108
                                                'shared' => false,
109
                                                'with'   => array(),
110
                                               ),
111
                            T_WHILE         => array(
112
                                                'start'  => T_OPEN_CURLY_BRACKET,
113
                                                'end'    => T_CLOSE_CURLY_BRACKET,
114
                                                'strict' => false,
115
                                                'shared' => false,
116
                                                'with'   => array(),
117
                                               ),
118
                            T_DO            => array(
119
                                                'start'  => T_OPEN_CURLY_BRACKET,
120
                                                'end'    => T_CLOSE_CURLY_BRACKET,
121
                                                'strict' => true,
122
                                                'shared' => false,
123
                                                'with'   => array(),
124
                                               ),
125
                            T_SWITCH        => array(
126
                                                'start'  => T_OPEN_CURLY_BRACKET,
127
                                                'end'    => T_CLOSE_CURLY_BRACKET,
128
                                                'strict' => true,
129
                                                'shared' => false,
130
                                                'with'   => array(),
131
                                               ),
132
                            T_CASE          => array(
133
                                                'start'  => T_COLON,
134
                                                'end'    => T_BREAK,
135
                                                'strict' => true,
136
                                                'shared' => true,
137
                                                'with'   => array(
138
                                                             T_DEFAULT,
139
                                                             T_CASE,
140
                                                             T_SWITCH,
141
                                                            ),
142
                                               ),
143
                            T_DEFAULT       => array(
144
                                                'start'  => T_COLON,
145
                                                'end'    => T_BREAK,
146
                                                'strict' => true,
147
                                                'shared' => true,
148
                                                'with'   => array(
149
                                                             T_CASE,
150
                                                             T_SWITCH,
151
                                                            ),
152
                                               ),
153
                            T_START_HEREDOC => array(
154
                                                'start'  => T_START_HEREDOC,
155
                                                'end'    => T_END_HEREDOC,
156
                                                'strict' => true,
157
                                                'shared' => false,
158
                                                'with'   => array(),
159
                                               ),
160
                           );
161
 
162
    /**
163
     * A list of tokens that end the scope.
164
     *
165
     * This array is just a unique collection of the end tokens
166
     * from the _scopeOpeners array. The data is duplicated here to
167
     * save time during parsing of the file.
168
     *
169
     * @var array
170
     */
171
    public $endScopeTokens = array(
172
                              T_CLOSE_CURLY_BRACKET,
173
                              T_BREAK,
174
                              T_END_HEREDOC,
175
                             );
176
 
177
 
178
    /**
179
     * Creates an array of tokens when given some PHP code.
180
     *
181
     * Starts by using token_get_all() but does a lot of extra processing
182
     * to insert information about the context of the token.
183
     *
184
     * @param string $string  The string to tokenize.
185
     * @param string $eolChar The EOL character to use for splitting strings.
186
     *
187
     * @return array
188
     */
189
    public function tokenizeString($string, $eolChar='\n')
190
    {
191
        $tokens      = @token_get_all($string);
192
        $finalTokens = array();
193
 
194
        $newStackPtr = 0;
195
        $numTokens   = count($tokens);
196
        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
197
            $token        = $tokens[$stackPtr];
198
            $tokenIsArray = is_array($token);
199
 
200
            /*
201
                If we are using \r\n newline characters, the \r and \n are sometimes
202
                split over two tokens. This normally occurs after comments. We need
203
                to merge these two characters together so that our line endings are
204
                consistent for all lines.
205
            */
206
 
207
            if ($tokenIsArray === true && substr($token[1], -1) === "\r") {
208
                if (isset($tokens[($stackPtr + 1)]) === true
209
                    && is_array($tokens[($stackPtr + 1)]) === true
210
                    && $tokens[($stackPtr + 1)][1][0] === "\n"
211
                ) {
212
                    $token[1] .= "\n";
213
 
214
                    if ($tokens[($stackPtr + 1)][1] === "\n") {
215
                        // The next token's content has been merged into this token,
216
                        // so we can skip it.
217
                        $stackPtr++;
218
                    } else {
219
                        $tokens[($stackPtr + 1)][1]
220
                            = substr($tokens[($stackPtr + 1)][1], 1);
221
                    }
222
                }
223
            }//end if
224
 
225
            /*
226
                If this is a double quoted string, PHP will tokenise the whole
227
                thing which causes problems with the scope map when braces are
228
                within the string. So we need to merge the tokens together to
229
                provide a single string.
230
            */
231
 
232
            if ($tokenIsArray === false && $token === '"') {
233
                $tokenContent = '"';
234
                $nestedVars   = array();
235
                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
236
                    $subTokenIsArray = is_array($tokens[$i]);
237
 
238
                    if ($subTokenIsArray === true) {
239
                        $tokenContent .= $tokens[$i][1];
240
                        if ($tokens[$i][1] === '{'
241
                            && $tokens[$i][0] !== T_ENCAPSED_AND_WHITESPACE
242
                        ) {
243
                            $nestedVars[] = $i;
244
                        }
245
                    } else {
246
                        $tokenContent .= $tokens[$i];
247
                        if ($tokens[$i] === '}') {
248
                            array_pop($nestedVars);
249
                        }
250
                    }
251
 
252
                    if ($subTokenIsArray === false
253
                        && $tokens[$i] === '"'
254
                        && empty($nestedVars) === true
255
                    ) {
256
                        // We found the other end of the double quoted string.
257
                        break;
258
                    }
259
                }
260
 
261
                $stackPtr = $i;
262
 
263
                // Convert each line within the double quoted string to a
264
                // new token, so it conforms with other multiple line tokens.
265
                $tokenLines = explode($eolChar, $tokenContent);
266
                $numLines   = count($tokenLines);
267
                $newToken   = array();
268
 
269
                for ($j = 0; $j < $numLines; $j++) {
270
                    $newToken['content'] = $tokenLines[$j];
271
                    if ($j === ($numLines - 1)) {
272
                        if ($tokenLines[$j] === '') {
273
                            break;
274
                        }
275
                    } else {
276
                        $newToken['content'] .= $eolChar;
277
                    }
278
 
279
                    $newToken['code']          = T_DOUBLE_QUOTED_STRING;
280
                    $newToken['type']          = 'T_DOUBLE_QUOTED_STRING';
281
                    $finalTokens[$newStackPtr] = $newToken;
282
                    $newStackPtr++;
283
                }
284
 
285
                // Continue, as we're done with this token.
286
                continue;
287
            }//end if
288
 
289
            /*
290
                If this is a heredoc, PHP will tokenise the whole
291
                thing which causes problems when heredocs don't
292
                contain real PHP code, which is almost never.
293
                We want to leave the start and end heredoc tokens
294
                alone though.
295
            */
296
 
297
            if ($tokenIsArray === true && $token[0] === T_START_HEREDOC) {
298
                // Add the start heredoc token to the final array.
299
                $finalTokens[$newStackPtr]
300
                    = PHP_CodeSniffer::standardiseToken($token);
301
                $newStackPtr++;
302
 
303
                $tokenContent = '';
304
                for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
305
                    $subTokenIsArray = is_array($tokens[$i]);
306
                    if ($subTokenIsArray === true
307
                        && $tokens[$i][0] === T_END_HEREDOC
308
                    ) {
309
                        // We found the other end of the heredoc.
310
                        break;
311
                    }
312
 
313
                    if ($subTokenIsArray === true) {
314
                        $tokenContent .= $tokens[$i][1];
315
                    } else {
316
                        $tokenContent .= $tokens[$i];
317
                    }
318
                }
319
 
320
                $stackPtr = $i;
321
 
322
                // Convert each line within the heredoc to a
323
                // new token, so it conforms with other multiple line tokens.
324
                $tokenLines = explode($eolChar, $tokenContent);
325
                $numLines   = count($tokenLines);
326
                $newToken   = array();
327
 
328
                for ($j = 0; $j < $numLines; $j++) {
329
                    $newToken['content'] = $tokenLines[$j];
330
                    if ($j === ($numLines - 1)) {
331
                        if ($tokenLines[$j] === '') {
332
                            break;
333
                        }
334
                    } else {
335
                        $newToken['content'] .= $eolChar;
336
                    }
337
 
338
                    $newToken['code']          = T_HEREDOC;
339
                    $newToken['type']          = 'T_HEREDOC';
340
                    $finalTokens[$newStackPtr] = $newToken;
341
                    $newStackPtr++;
342
                }
343
 
344
                // Add the end heredoc token to the final array.
345
                $finalTokens[$newStackPtr]
346
                    = PHP_CodeSniffer::standardiseToken($tokens[$stackPtr]);
347
                $newStackPtr++;
348
 
349
                // Continue, as we're done with this token.
350
                continue;
351
            }//end if
352
 
353
            /*
354
                If this token has newlines in its content, split each line up
355
                and create a new token for each line. We do this so it's easier
356
                to asertain where errors occur on a line.
357
                Note that $token[1] is the token's content.
358
            */
359
 
360
            if ($tokenIsArray === true && strpos($token[1], $eolChar) !== false) {
361
                $tokenLines = explode($eolChar, $token[1]);
362
                $numLines   = count($tokenLines);
363
                $tokenName  = token_name($token[0]);
364
 
365
                for ($i = 0; $i < $numLines; $i++) {
366
                    $newToken['content'] = $tokenLines[$i];
367
                    if ($i === ($numLines - 1)) {
368
                        if ($tokenLines[$i] === '') {
369
                            break;
370
                        }
371
                    } else {
372
                        $newToken['content'] .= $eolChar;
373
                    }
374
 
375
                    $newToken['type']          = $tokenName;
376
                    $newToken['code']          = $token[0];
377
                    $finalTokens[$newStackPtr] = $newToken;
378
                    $newStackPtr++;
379
                }
380
            } else {
381
                $newToken = PHP_CodeSniffer::standardiseToken($token);
382
 
383
                // This is a special condition for T_ARRAY tokens use to
384
                // type hint function arguments as being arrays. We want to keep
385
                // the parenthsis map clean, so let's tag these tokens as
386
                // T_ARRAY_HINT.
387
                if ($newToken['code'] === T_ARRAY) {
388
                    // Recalculate number of tokens.
389
                    $numTokens = count($tokens);
390
                    for ($i = $stackPtr; $i < $numTokens; $i++) {
391
                        if (is_array($tokens[$i]) === false) {
392
                            if ($tokens[$i] === '(') {
393
                                break;
394
                            }
395
                        } else if ($tokens[$i][0] === T_VARIABLE) {
396
                            $newToken['code'] = T_ARRAY_HINT;
397
                            $newToken['type'] = 'T_ARRAY_HINT';
398
                            break;
399
                        }
400
                    }
401
                }
402
 
403
                $finalTokens[$newStackPtr] = $newToken;
404
                $newStackPtr++;
405
            }//end if
406
        }//end for
407
 
408
        return $finalTokens;
409
 
410
    }//end tokenizeString()
411
 
412
 
413
    /**
414
     * Performs additional processing after main tokenizing.
415
     *
416
     * @param array  &$tokens The array of tokens to process.
417
     * @param string $eolChar The EOL character to use for splitting strings.
418
     *
419
     * @return array
420
     */
421
    public function processAdditional(&$tokens, $eolChar)
422
    {
423
 
424
    }//end processAdditional()
425
 
426
 
427
}//end class
428
 
429
?>