Subversion Repositories Applications.framework

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5 aurelien 1
<?php
2
/**
3
 * Tokenizes JS code.
4
 *
5
 * PHP version 5
6
 *
7
 * @category  PHP
8
 * @package   PHP_CodeSniffer
9
 * @author    Greg Sherwood <gsherwood@squiz.net>
10
 * @author    Marc McIntyre <mmcintyre@squiz.net>
11
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
12
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
34 aurelien 13
 * @version   CVS: $Id: JS.php 34 2009-04-09 07:34:39Z aurelien $
5 aurelien 14
 * @link      http://pear.php.net/package/PHP_CodeSniffer
15
 */
16
 
17
/**
18
 * Tokenizes JS code.
19
 *
20
 * @category  PHP
21
 * @package   PHP_CodeSniffer
22
 * @author    Greg Sherwood <gsherwood@squiz.net>
23
 * @copyright 2006 Squiz Pty Ltd (ABN 77 084 670 600)
24
 * @license   http://matrix.squiz.net/developer/tools/php_cs/licence BSD Licence
25
 * @version   Release: 1.2.0RC1
26
 * @link      http://pear.php.net/package/PHP_CodeSniffer
27
 */
28
class PHP_CodeSniffer_Tokenizers_JS
29
{
30
 
31
    /**
32
     * A list of tokens that are allowed to open a scope.
33
     *
34
     * This array also contains information about what kind of token the scope
35
     * opener uses to open and close the scope, if the token strictly requires
36
     * an opener, if the token can share a scope closer, and who it can be shared
37
     * with. An example of a token that shares a scope closer is a CASE scope.
38
     *
39
     * @var array
40
     */
41
    public $scopeOpeners = array(
42
                            T_IF       => array(
43
                                           'start'  => T_OPEN_CURLY_BRACKET,
44
                                           'end'    => T_CLOSE_CURLY_BRACKET,
45
                                           'strict' => false,
46
                                           'shared' => false,
47
                                           'with'   => array(),
48
                                          ),
49
                            T_TRY      => array(
50
                                           'start'  => T_OPEN_CURLY_BRACKET,
51
                                           'end'    => T_CLOSE_CURLY_BRACKET,
52
                                           'strict' => true,
53
                                           'shared' => false,
54
                                           'with'   => array(),
55
                                          ),
56
                            T_CATCH    => array(
57
                                           'start'  => T_OPEN_CURLY_BRACKET,
58
                                           'end'    => T_CLOSE_CURLY_BRACKET,
59
                                           'strict' => true,
60
                                           'shared' => false,
61
                                           'with'   => array(),
62
                                          ),
63
                            T_ELSE     => array(
64
                                           'start'  => T_OPEN_CURLY_BRACKET,
65
                                           'end'    => T_CLOSE_CURLY_BRACKET,
66
                                           'strict' => false,
67
                                           'shared' => false,
68
                                           'with'   => array(),
69
                                          ),
70
                            T_FOR      => array(
71
                                           'start'  => T_OPEN_CURLY_BRACKET,
72
                                           'end'    => T_CLOSE_CURLY_BRACKET,
73
                                           'strict' => false,
74
                                           'shared' => false,
75
                                           'with'   => array(),
76
                                          ),
77
                            T_FUNCTION => array(
78
                                           'start'  => T_OPEN_CURLY_BRACKET,
79
                                           'end'    => T_CLOSE_CURLY_BRACKET,
80
                                           'strict' => false,
81
                                           'shared' => false,
82
                                           'with'   => array(),
83
                                          ),
84
                            T_WHILE    => array(
85
                                           'start'  => T_OPEN_CURLY_BRACKET,
86
                                           'end'    => T_CLOSE_CURLY_BRACKET,
87
                                           'strict' => false,
88
                                           'shared' => false,
89
                                           'with'   => array(),
90
                                          ),
91
                            T_DO       => array(
92
                                           'start'  => T_OPEN_CURLY_BRACKET,
93
                                           'end'    => T_CLOSE_CURLY_BRACKET,
94
                                           'strict' => true,
95
                                           'shared' => false,
96
                                           'with'   => array(),
97
                                          ),
98
                            T_SWITCH   => array(
99
                                           'start'  => T_OPEN_CURLY_BRACKET,
100
                                           'end'    => T_CLOSE_CURLY_BRACKET,
101
                                           'strict' => true,
102
                                           'shared' => false,
103
                                           'with'   => array(),
104
                                          ),
105
                            T_CASE     => array(
106
                                           'start'  => T_COLON,
107
                                           'end'    => T_BREAK,
108
                                           'strict' => true,
109
                                           'shared' => true,
110
                                           'with'   => array(
111
                                                        T_DEFAULT,
112
                                                        T_CASE,
113
                                                        T_SWITCH,
114
                                                       ),
115
                                          ),
116
                            T_DEFAULT  => array(
117
                                           'start'  => T_COLON,
118
                                           'end'    => T_BREAK,
119
                                           'strict' => true,
120
                                           'shared' => true,
121
                                           'with'   => array(
122
                                                        T_CASE,
123
                                                        T_SWITCH,
124
                                                       ),
125
                                          ),
126
                           );
127
 
128
    /**
129
     * A list of tokens that end the scope.
130
     *
131
     * This array is just a unique collection of the end tokens
132
     * from the _scopeOpeners array. The data is duplicated here to
133
     * save time during parsing of the file.
134
     *
135
     * @var array
136
     */
137
    public $endScopeTokens = array(
138
                              T_CLOSE_CURLY_BRACKET,
139
                              T_BREAK,
140
                             );
141
 
142
    /**
143
     * A list of special JS tokens and their types.
144
     *
145
     * @var array
146
     */
147
    protected $tokenValues = array(
148
                              'function'  => 'T_FUNCTION',
149
                              'prototype' => 'T_PROTOTYPE',
150
                              'try'       => 'T_TRY',
151
                              'catch'     => 'T_CATCH',
152
                              'return'    => 'T_RETURN',
153
                              'break'     => 'T_BREAK',
154
                              'switch'    => 'T_SWITCH',
155
                              'continue'  => 'T_CONTINUE',
156
                              'if'        => 'T_IF',
157
                              'else'      => 'T_ELSE',
158
                              'do'        => 'T_DO',
159
                              'while'     => 'T_WHILE',
160
                              'for'       => 'T_FOR',
161
                              'var'       => 'T_VAR',
162
                              'case'      => 'T_CASE',
163
                              'default'   => 'T_DEFAULT',
164
                              'true'      => 'T_TRUE',
165
                              'false'     => 'T_FALSE',
166
                              'null'      => 'T_NULL',
167
                              'this'      => 'T_THIS',
168
                              '('         => 'T_OPEN_PARENTHESIS',
169
                              ')'         => 'T_CLOSE_PARENTHESIS',
170
                              '{'         => 'T_OPEN_CURLY_BRACKET',
171
                              '}'         => 'T_CLOSE_CURLY_BRACKET',
172
                              '['         => 'T_OPEN_SQUARE_BRACKET',
173
                              ']'         => 'T_CLOSE_SQUARE_BRACKET',
174
                              '?'         => 'T_INLINE_THEN',
175
                              '.'         => 'T_OBJECT_OPERATOR',
176
                              '+'         => 'T_PLUS',
177
                              '-'         => 'T_MINUS',
178
                              '*'         => 'T_MULTIPLY',
179
                              '%'         => 'T_MODULUS',
180
                              '/'         => 'T_DIVIDE',
181
                              ','         => 'T_COMMA',
182
                              ';'         => 'T_SEMICOLON',
183
                              ':'         => 'T_COLON',
184
                              '<'         => 'T_LESS_THAN',
185
                              '>'         => 'T_GREATER_THAN',
186
                              '<='        => 'T_IS_SMALLER_OR_EQUAL',
187
                              '>='        => 'T_IS_GREATER_OR_EQUAL',
188
                              '!'         => 'T_BOOLEAN_NOT',
189
                              '!='        => 'T_IS_NOT_EQUAL',
190
                              '!=='       => 'T_IS_NOT_IDENTICAL',
191
                              '='         => 'T_EQUAL',
192
                              '=='        => 'T_IS_EQUAL',
193
                              '==='       => 'T_IS_IDENTICAL',
194
                              '-='        => 'T_MINUS_EQUAL',
195
                              '+='        => 'T_PLUS_EQUAL',
196
                              '*='        => 'T_MUL_EQUAL',
197
                              '/='        => 'T_DIV_EQUAL',
198
                              '++'        => 'T_INC',
199
                              '--'        => 'T_DEC',
200
                              '//'        => 'T_COMMENT',
201
                              '/*'        => 'T_COMMENT',
202
                              '/**'       => 'T_DOC_COMMENT',
203
                              '*/'        => 'T_COMMENT',
204
                             );
205
 
206
    /**
207
     * A list string delimiters.
208
     *
209
     * @var array
210
     */
211
    protected $stringTokens = array(
212
                               '\'',
213
                               '"',
214
                              );
215
 
216
    /**
217
     * A list tokens that start and end comments.
218
     *
219
     * @var array
220
     */
221
    protected $commentTokens = array(
222
                                '//'  => null,
223
                                '/*'  => '*/',
224
                                '/**' => '*/',
225
                               );
226
 
227
 
228
    /**
229
     * Creates an array of tokens when given some PHP code.
230
     *
231
     * Starts by using token_get_all() but does a lot of extra processing
232
     * to insert information about the context of the token.
233
     *
234
     * @param string $string  The string to tokenize.
235
     * @param string $eolChar The EOL character to use for splitting strings.
236
     *
237
     * @return array
238
     */
239
    public function tokenizeString($string, $eolChar='\n')
240
    {
241
        $tokenTypes = array_keys($this->tokenValues);
242
 
243
        $maxTokenLength = 0;
244
        foreach ($tokenTypes as $token) {
245
            if (strlen($token) > $maxTokenLength) {
246
                $maxTokenLength = strlen($token);
247
            }
248
        }
249
 
250
        $tokens          = array();
251
        $inString        = '';
252
        $stringChar      = null;
253
        $inComment       = '';
254
        $buffer          = '';
255
        $preStringBuffer = '';
256
        $cleanBuffer     = false;
257
 
258
        $tokens[] = array(
259
                     'code'    => T_OPEN_TAG,
260
                     'type'    => 'T_OPEN_TAG',
261
                     'content' => '',
262
                    );
263
 
264
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
265
            echo "\t*** START TOKENIZING ***".PHP_EOL;
266
        }
267
 
268
        // Convert newlines to single characters for ease of
269
        // processing. We will change them back later.
270
        $string = str_replace($eolChar, "\n", $string);
271
 
272
        $chars    = str_split($string);
273
        $numChars = count($chars);
274
        for ($i = 0; $i < $numChars; $i++) {
275
            $char = $chars[$i];
276
 
277
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
278
                $content = str_replace("\n", '\n', $char);
279
                $bufferContent = str_replace("\n", '\n', $buffer);
280
                if ($inString !== '') {
281
                    echo "\t";
282
                }
283
 
284
                if ($inComment !== '') {
285
                    echo "\t";
286
                }
287
 
288
                echo "Process char $i => $content (buffer: $bufferContent)".PHP_EOL;
289
            }
290
 
291
            if ($inString === '' && $inComment === '' && $buffer !== '') {
292
                // If the buffer only has whitespace and we are about to
293
                // add a character, store the whitespace first.
294
                if (trim($char) !== '' && trim($buffer) === '') {
295
                    $tokens[] = array(
296
                                 'code'    => T_WHITESPACE,
297
                                 'type'    => 'T_WHITESPACE',
298
                                 'content' => str_replace("\n", $eolChar, $buffer),
299
                                );
300
 
301
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
302
                        $content = str_replace("\n", '\n', $buffer);
303
                        echo "=> Added token T_WHITESPACE ($content)".PHP_EOL;
304
                    }
305
 
306
                    $buffer = '';
307
                }
308
 
309
                // If the buffer is not whitespace and we are about to
310
                // add a whitespace character, store the content first.
311
                if ($inString === ''
312
                    && $inComment === ''
313
                    && trim($char) === ''
314
                    && trim($buffer) !== ''
315
                ) {
316
                    $tokens[] = array(
317
                                 'code'    => T_STRING,
318
                                 'type'    => 'T_STRING',
319
                                 'content' => str_replace("\n", $eolChar, $buffer),
320
                                );
321
 
322
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
323
                        $content = str_replace("\n", '\n', $buffer);
324
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
325
                    }
326
 
327
                    $buffer = '';
328
                }
329
            }//end if
330
 
331
            // Process strings.
332
            if ($inComment === '' && in_array($char, $this->stringTokens) === true) {
333
                if ($inString === $char) {
334
                    // This could be the end of the string, but make sure it
335
                    // is not escaped first.
336
                    $escapes = 0;
337
                    for ($x = ($i - 1); $x >= 0; $x--) {
338
                        if ($chars[$x] !== '\\') {
339
                            break;
340
                        }
341
 
342
                        $escapes++;
343
                    }
344
 
345
                    if ($escapes === 0 || ($escapes % 2) === 0) {
346
                        // There is an even number escape chars,
347
                        // so this is not escaped, it is the end of the string.
348
                        $tokens[] = array(
349
                                     'code'    => T_CONSTANT_ENCAPSED_STRING,
350
                                     'type'    => 'T_CONSTANT_ENCAPSED_STRING',
351
                                     'content' => str_replace("\n", $eolChar, $buffer).$char,
352
                                    );
353
 
354
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
355
                            echo "\t* found end of string *".PHP_EOL;
356
                            $content = str_replace("\n", '\n', $buffer.$char);
357
                            echo "=> Added token T_CONSTANT_ENCAPSED_STRING $content)".PHP_EOL;
358
                        }
359
 
360
                        $buffer          = '';
361
                        $preStringBuffer = '';
362
                        $inString        = '';
363
                        $stringChar      = null;
364
                        continue;
365
                    }
366
                } else if ($inString === '') {
367
                    $inString        = $char;
368
                    $stringChar      = $i;
369
                    $preStringBuffer = $buffer;
370
 
371
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
372
                        echo "\t* looking for string closer *".PHP_EOL;
373
                    }
374
                }//end if
375
            }//end if
376
 
377
            if ($inString !== '' && $char === "\n") {
378
                // Unless this newline character is escaped, the string did not
379
                // end before the end of the line, which means it probably
380
                // wasn't a string at all (maybe a regex).
381
                if ($chars[($i - 1)] !== '\\') {
382
                    $i               = $stringChar;
383
                    $buffer          = $preStringBuffer;
384
                    $preStringBuffer = '';
385
                    $inString        = '';
386
                    $stringChar      = null;
387
                    $char            = $chars[$i];
388
 
389
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
390
                        echo "\t* found newline before end of string, bailing *".PHP_EOL;
391
                    }
392
                }
393
            }
394
 
395
            $buffer .= $char;
396
 
397
            // We don't look for special tokens inside strings,
398
            // so if we are in a string, we can continue here now
399
            // that the current char is in the buffer.
400
            if ($inString !== '') {
401
                continue;
402
            }
403
 
404
            // Check for known tokens, but ignore tokens found that are not at
405
            // the end of a string, like FOR and this.FORmat.
406
            if (in_array(strtolower($buffer), $tokenTypes) === true
407
                && (preg_match('|[a-zA-z0-9_]|', $char) === 0
408
                || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
409
            ) {
410
                $matchedToken    = false;
411
                $lookAheadLength = ($maxTokenLength - strlen($buffer));
412
 
413
                if ($lookAheadLength > 0) {
414
                    // The buffer contains a token type, but we need
415
                    // to look ahead at the next chars to see if this is
416
                    // actually part of a larger token. For example,
417
                    // FOR and FOREACH.
418
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
419
                        echo "\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
420
                    }
421
 
422
                    $charBuffer = $buffer;
423
                    for ($x = 1; $x <= $lookAheadLength; $x++) {
424
                        if (isset($chars[($i + $x)]) === false) {
425
                            break;
426
                        }
427
 
428
                        $charBuffer .= $chars[($i + $x)];
429
 
430
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
431
                            $content = str_replace("\n", '\n', $charBuffer);
432
                            echo "\t=> Looking ahead $x chars => $content".PHP_EOL;
433
                        }
434
 
435
                        if (in_array(strtolower($charBuffer), $tokenTypes) === true) {
436
                            // We've found something larger that matches
437
                            // so we can ignore this char.
438
                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
439
                                $type = $this->tokenValues[strtolower($charBuffer)];
440
                                echo "\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
441
                            }
442
 
443
                            $matchedToken = true;
444
                            break;
445
                        }
446
                    }//end for
447
                }//end if
448
 
449
                if ($matchedToken === false) {
450
                    $value    = $this->tokenValues[strtolower($buffer)];
451
                    $tokens[] = array(
452
                                 'code'    => constant($value),
453
                                 'type'    => $value,
454
                                 'content' => $buffer,
455
                                );
456
 
457
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
458
                        if ($lookAheadLength > 0) {
459
                            echo "\t* look ahead found nothing *".PHP_EOL;
460
                        }
461
 
462
                        $content = str_replace("\n", '\n', $buffer);
463
                        echo "=> Added token $value ($content)".PHP_EOL;
464
                    }
465
 
466
                    $cleanBuffer = true;
467
                }
468
            } else if (in_array(strtolower($char), $tokenTypes) === true) {
469
                // No matter what token we end up using, we don't
470
                // need the content in the buffer any more because we have
471
                // found a valid token.
472
                $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
473
                if ($newContent !== '') {
474
                    $tokens[] = array(
475
                                 'code'    => T_STRING,
476
                                 'type'    => 'T_STRING',
477
                                 'content' => $newContent,
478
                                );
479
 
480
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
481
                        $content = str_replace("\n", '\n', substr($buffer, 0, -1));
482
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
483
                    }
484
                }
485
 
486
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
487
                    echo "\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
488
                }
489
 
490
                // The char is a token type, but we need to look ahead at the
491
                // next chars to see if this is actually part of a larger token.
492
                // For example, = and ===.
493
                $charBuffer   = $char;
494
                $matchedToken = false;
495
                for ($x = 1; $x <= $maxTokenLength; $x++) {
496
                    if (isset($chars[($i + $x)]) === false) {
497
                        break;
498
                    }
499
 
500
                    $charBuffer .= $chars[($i + $x)];
501
 
502
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
503
                        $content = str_replace("\n", '\n', $charBuffer);
504
                        echo "\t=> Looking ahead $x chars => $content".PHP_EOL;
505
                    }
506
 
507
                    if (in_array(strtolower($charBuffer), $tokenTypes) === true) {
508
                        // We've found something larger that matches
509
                        // so we can ignore this char.
510
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
511
                            $type = $this->tokenValues[strtolower($charBuffer)];
512
                            echo "\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
513
                        }
514
 
515
                        $matchedToken = true;
516
                        break;
517
                    }
518
                }//end for
519
 
520
                if ($matchedToken === false) {
521
                    $value    = $this->tokenValues[strtolower($char)];
522
                    $tokens[] = array(
523
                                 'code'    => constant($value),
524
                                 'type'    => $value,
525
                                 'content' => $char,
526
                                );
527
 
528
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
529
                        echo "\t* look ahead found nothing *".PHP_EOL;
530
                        $content = str_replace("\n", '\n', $char);
531
                        echo "=> Added token $value ($content)".PHP_EOL;
532
                    }
533
 
534
                    $cleanBuffer = true;
535
                } else {
536
                    $buffer = $char;
537
                }
538
            }//end if
539
 
540
            // Keep track of content inside comments.
541
            if ($inComment === ''
542
                && array_key_exists($buffer, $this->commentTokens) === true
543
            ) {
544
                // We have started a comment.
545
                $inComment = $buffer;
546
 
547
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
548
                    echo "\t* looking for end of comment *".PHP_EOL;
549
                }
550
            } else if ($inComment !== '') {
551
                if ($this->commentTokens[$inComment] === null) {
552
                    // Comment ends at the next newline.
553
                    if (strpos($buffer, "\n") !== false) {
554
                        $inComment = '';
555
                    }
556
                } else {
557
                    if ($this->commentTokens[$inComment] === $buffer) {
558
                        $inComment = '';
559
                    }
560
                }
561
 
562
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
563
                    if ($inComment === '') {
564
                        echo "\t* found end of comment *".PHP_EOL;
565
                    }
566
                }
567
 
568
                if ($inComment === '' && $cleanBuffer === false) {
569
                    $tokens[] = array(
570
                                 'code'    => T_STRING,
571
                                 'type'    => 'T_STRING',
572
                                 'content' => str_replace("\n", $eolChar, $buffer),
573
                                );
574
 
575
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
576
                        $content = str_replace("\n", '\n', $buffer);
577
                        echo "=> Added token T_STRING ($content)".PHP_EOL;
578
                    }
579
 
580
                    $buffer = '';
581
                }
582
            }//end if
583
 
584
            if ($cleanBuffer === true) {
585
                $buffer      = '';
586
                $cleanBuffer = false;
587
            }
588
        }//end foreach
589
 
590
        // Trim the last newline off the end of the buffer before
591
        // adding it's contents to the token stack.
592
        // This is so we don't count the very final newline of a file.
593
        $buffer = substr($buffer, 0, -1);
594
 
595
        if (empty($buffer) === false) {
596
            // Buffer contians whitespace from the end of the file, and not
597
            // just the final newline.
598
            $tokens[] = array(
599
                         'code'    => T_WHITESPACE,
600
                         'type'    => 'T_WHITESPACE',
601
                         'content' => str_replace("\n", $eolChar, $buffer),
602
                        );
603
 
604
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
605
                $content = str_replace($eolChar, '\n', $buffer);
606
                echo "=> Added token T_WHITESPACE ($content)".PHP_EOL;
607
            }
608
        }
609
 
610
        $tokens[] = array(
611
                     'code'    => T_CLOSE_TAG,
612
                     'type'    => 'T_CLOSE_TAG',
613
                     'content' => '',
614
                    );
615
 
616
        /*
617
            Now that we have done some basic tokenizing, we need to
618
            modify the tokens to join some together and split some apart
619
            so they match what the PHP tokenizer does.
620
        */
621
 
622
        $finalTokens = array();
623
        $newStackPtr = 0;
624
        $numTokens   = count($tokens);
625
        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
626
            $token = $tokens[$stackPtr];
627
 
628
            /*
629
                Look for regular expressions and join the tokens together.
630
            */
631
 
632
            if ($token['code'] === T_DIVIDE) {
633
                $beforeTokens = array(
634
                                 T_EQUAL,
635
                                 T_OPEN_PARENTHESIS,
636
                                 T_RETURN,
637
                                );
638
 
639
                $afterTokens = array(
640
                                 T_COMMA,
641
                                 T_CLOSE_PARENTHESIS,
642
                                 T_SEMICOLON,
643
                                 T_WHITESPACE,
644
                                 T_OBJECT_OPERATOR,
645
                                );
646
 
647
                for ($prev = ($stackPtr - 1); $prev >= 0; $prev--) {
648
                    if (in_array($tokens[$prev]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
649
                        break;
650
                    }
651
                }
652
 
653
                if (in_array($tokens[$prev]['code'], $beforeTokens) === true) {
654
                    // This is probably a regular expression,
655
                    // so look for the end of it.
656
                    for ($next = ($stackPtr + 1); $next < $numTokens; $next++) {
657
                        if ($tokens[$next]['code'] === T_DIVIDE) {
658
                            // Just make sure this is not escaped first.
659
                            if (substr($tokens[($next - 1)]['content'], -1) !== '\\') {
660
                                break;
661
                            }
662
                        } else if (strpos($tokens[$next]['content'], $eolChar) !== false) {
663
                            // If this is the last token on the line and regular
664
                            // expressions need to be defined on a single line.
665
                            break;
666
                        }
667
                    }
668
 
669
                    if ($tokens[$next]['code'] === T_DIVIDE) {
670
                        if ($tokens[($next + 1)]['code'] === T_STRING) {
671
                            // The token directly after the end of the regex can
672
                            // be modifiers like global and case insensitive
673
                            // (.e.g, /pattern/gi).
674
                            $next++;
675
                        }
676
 
677
                        $regexEnd = $next;
678
 
679
                        for ($next = ($next + 1); $next < $numTokens; $next++) {
680
                            if (in_array($tokens[$next]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
681
                                break;
682
                            } else if (strpos($tokens[$next]['content'], $eolChar) !== false) {
683
                                // If this is the last token on the line.
684
                                break;
685
                            }
686
                        }
687
 
688
                        if (in_array($tokens[$next]['code'], $afterTokens) === true) {
689
                            // This is a regular expression, so join all the
690
                            // tokens together.
691
                            for ($i = ($stackPtr + 1); $i <= $regexEnd; $i++) {
692
                                $token['content'] .= $tokens[$i]['content'];
693
                            }
694
 
695
                            $token['code'] = T_REGULAR_EXPRESSION;
696
                            $token['type'] = 'T_REGULAR_EXPRESSION';
697
                            $stackPtr      = $regexEnd;
698
                        }
699
                    }//end if
700
                }//end if
701
            }//end if
702
 
703
            /*
704
                Look for comments and join the tokens together.
705
            */
706
 
707
            if (array_key_exists($token['content'], $this->commentTokens) === true) {
708
                $newContent   = '';
709
                $tokenContent = $token['content'];
710
                $endContent   = $this->commentTokens[$tokenContent];
711
                while ($tokenContent !== $endContent) {
712
                    if ($endContent === null
713
                        && strpos($tokenContent, $eolChar) !== false
714
                    ) {
715
                        // A null end token means the comment ends at the end of
716
                        // the line so we look for newlines and split the token.
717
                        $tokens[$stackPtr]['content'] = substr(
718
                            $tokenContent,
719
                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
720
                        );
721
 
722
                        $tokenContent = substr(
723
                            $tokenContent,
724
                            0,
725
                            (strpos($tokenContent, $eolChar) + strlen($eolChar))
726
                        );
727
 
728
                        // If the substr failed, skip the token as the content
729
                        // will now be blank.
730
                        if ($tokens[$stackPtr]['content'] !== false) {
731
                            $stackPtr--;
732
                        }
733
 
734
                        break;
735
                    }//end if
736
 
737
                    $stackPtr++;
738
                    $newContent  .= $tokenContent;
739
                    if (isset($tokens[$stackPtr]) === false) {
740
                        break;
741
                    }
742
 
743
                    $tokenContent = $tokens[$stackPtr]['content'];
744
                }//end while
745
 
746
                // Save the new content in the current token so
747
                // the code below can chop it up on newlines.
748
                $token['content'] = $newContent.$tokenContent;
749
            }//end if
750
 
751
            /*
752
                If this token has newlines in its content, split each line up
753
                and create a new token for each line. We do this so it's easier
754
                to asertain where errors occur on a line.
755
                Note that $token[1] is the token's content.
756
            */
757
 
758
            if (strpos($token['content'], $eolChar) !== false) {
759
                $tokenLines = explode($eolChar, $token['content']);
760
                $numLines   = count($tokenLines);
761
 
762
                for ($i = 0; $i < $numLines; $i++) {
763
                    $newToken['content'] = $tokenLines[$i];
764
                    if ($i === ($numLines - 1)) {
765
                        if ($tokenLines[$i] === '') {
766
                            break;
767
                        }
768
                    } else {
769
                        $newToken['content'] .= $eolChar;
770
                    }
771
 
772
                    $newToken['type']          = $token['type'];
773
                    $newToken['code']          = $token['code'];
774
                    $finalTokens[$newStackPtr] = $newToken;
775
                    $newStackPtr++;
776
                }
777
            } else {
778
                $finalTokens[$newStackPtr] = $token;
779
                $newStackPtr++;
780
            }//end if
781
 
782
            // Convert numbers, including decimals.
783
            if ($token['code'] === T_STRING
784
                || $token['code'] === T_OBJECT_OPERATOR
785
            ) {
786
                $newContent  = '';
787
                $oldStackPtr = $stackPtr;
788
                while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
789
                    $newContent .= $tokens[$stackPtr]['content'];
790
                    $stackPtr++;
791
                }
792
 
793
                if ($newContent !== '' && $newContent !== '.') {
794
                    $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
795
                    if (ctype_digit($newContent) === true) {
796
                        $finalTokens[($newStackPtr - 1)]['code']
797
                            = constant('T_LNUMBER');
798
                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
799
                    } else {
800
                        $finalTokens[($newStackPtr - 1)]['code']
801
                            = constant('T_DNUMBER');
802
                        $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
803
                    }
804
 
805
                    $stackPtr--;
806
                } else {
807
                    $stackPtr = $oldStackPtr;
808
                }
809
            }//end if
810
        }//end for
811
 
812
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
813
            echo "\t*** END TOKENIZING ***".PHP_EOL;
814
        }
815
 
816
        return $finalTokens;
817
 
818
    }//end tokenizeString()
819
 
820
 
821
    /**
822
     * Performs additional processing after main tokenizing.
823
     *
824
     * This additional processing looks for properties, labels and objects.
825
     *
826
     * @param array  &$tokens The array of tokens to process.
827
     * @param string $eolChar The EOL character to use for splitting strings.
828
     *
829
     * @return void
830
     */
831
    public function processAdditional(&$tokens, $eolChar)
832
    {
833
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
834
            echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
835
        }
836
 
837
        $numTokens  = count($tokens);
838
        $classStack = array();
839
 
840
        for ($i = 0; $i < $numTokens; $i++) {
841
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
842
                $type    = $tokens[$i]['type'];
843
                $content = str_replace($eolChar, '\n', $tokens[$i]['content']);
844
                echo str_repeat("\t", count($classStack));
845
 
846
                echo "\tProcess token $i: $type => $content".PHP_EOL;
847
            }
848
 
849
            if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
850
                && isset($tokens[$i]['scope_condition']) === false
851
            ) {
852
                $classStack[] = $i;
853
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
854
                    echo str_repeat("\t", count($classStack));
855
                    echo "\t=> Found property opener".PHP_EOL;
856
                }
857
 
858
                // This could also be an object definition.
859
                for ($x = ($i - 1); $x >= 0; $x--) {
860
                    if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
861
                        // Non-whitespace content.
862
                        break;
863
                    }
864
                }
865
 
866
                if ($tokens[$x]['code'] === T_EQUAL) {
867
                    for ($x--; $x >= 0; $x--) {
868
                        if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
869
                            break;
870
                        }
871
                    }
872
 
873
                    if ($tokens[$x]['code'] === T_STRING
874
                        || $tokens[$x]['code'] === T_PROTOTYPE
875
                    ) {
876
                        // Find the first string in this definition.
877
                        // E.g., WantedString.DontWantThis.prototype
878
                        for ($x--; $x >= 0; $x--) {
879
                            $wantedTokens = array(
880
                                             T_STRING,
881
                                             T_PROTOTYPE,
882
                                             T_OBJECT_OPERATOR,
883
                                            );
884
 
885
                            if (in_array($tokens[$x]['code'], $wantedTokens) === false) {
886
                                $x++;
887
                                break;
888
                            }
889
                        }
890
 
891
                        $closer = $tokens[$i]['bracket_closer'];
892
                        $tokens[$i]['scope_condition']      = $x;
893
                        $tokens[$i]['scope_closer']         = $closer;
894
                        $tokens[$closer]['scope_condition'] = $x;
895
                        $tokens[$closer]['scope_opener']    = $i;
896
                        $tokens[$x]['scope_opener']         = $i;
897
                        $tokens[$x]['scope_closer']         = $closer;
898
                        $tokens[$x]['code']                 = T_OBJECT;
899
                        $tokens[$x]['type']                 = 'T_OBJECT';
900
 
901
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
902
                            echo str_repeat("\t", count($classStack));
903
                            echo "\t* token $x converted from T_STRING to T_OBJECT *".PHP_EOL;
904
                            echo str_repeat("\t", count($classStack));
905
                            echo "\t* set scope opener ($i) and closer ($closer) for token $x *".PHP_EOL;
906
                        }
907
                    }//end if
908
                }//end if
909
            } else if ($tokens[$i]['code'] === T_CLOSE_CURLY_BRACKET
910
                && (isset($tokens[$i]['scope_condition']) === false
911
                || $tokens[$tokens[$i]['scope_condition']]['code'] === T_OBJECT)
912
            ) {
913
                $opener = array_pop($classStack);
914
 
915
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
916
                    echo str_repeat("\t", count($classStack));
917
                    echo "\t\t=> Found property closer for $opener".PHP_EOL;
918
                }
919
            } else if ($tokens[$i]['code'] === T_COLON) {
920
                // If it is a scope opener, it belongs to a
921
                // DEFAULT or CASE statement.
922
                if (isset($tokens[$i]['scope_condition']) === true) {
923
                    continue;
924
                }
925
 
926
                // Make sure this is not part of an inline IF statement.
927
                for ($x = ($i - 1); $x >= 0; $x--) {
928
                    if ($tokens[$x]['code'] === T_INLINE_THEN) {
929
                        continue(2);
930
                    } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
931
                        break;
932
                    }
933
                }
934
 
935
                // The string to the left of the colon is either a property or label.
936
                for ($label = ($i - 1); $label >= 0; $label--) {
937
                    if (in_array($tokens[$label]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
938
                        break;
939
                    }
940
                }
941
 
942
                if ($tokens[$label]['code'] !== T_STRING) {
943
                    continue;
944
                }
945
 
946
                if (empty($classStack) === false) {
947
                    $tokens[$label]['code'] = T_PROPERTY;
948
                    $tokens[$label]['type'] = 'T_PROPERTY';
949
 
950
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
951
                        echo str_repeat("\t", count($classStack));
952
                        echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
953
                    }
954
 
955
                    // If the net token after the colon is a curly brace,
956
                    // this property is actually an object, so we can give it
957
                    // and opener and closer.
958
                    for ($x = ($i + 1); $x < $numTokens; $x++) {
959
                        if (in_array($tokens[$x]['code'], PHP_CodeSniffer_Tokens::$emptyTokens) === false) {
960
                            break;
961
                        }
962
                    }
963
 
964
                    if ($tokens[$x]['code'] === T_OPEN_CURLY_BRACKET) {
965
                        $closer = $tokens[$x]['bracket_closer'];
966
                        $tokens[$label]['scope_opener']     = $x;
967
                        $tokens[$label]['scope_closer']     = $closer;
968
                        $tokens[$x]['scope_condition']      = $label;
969
                        $tokens[$x]['scope_closer']         = $closer;
970
                        $tokens[$closer]['scope_condition'] = $label;
971
                        $tokens[$closer]['scope_opener']    = $x;
972
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
973
                            echo str_repeat("\t", count($classStack));
974
                            echo "\t* set scope opener ($x) and closer ($closer) for token $label *".PHP_EOL;
975
                        }
976
                    }
977
                } else {
978
                    $tokens[$label]['code'] = T_LABEL;
979
                    $tokens[$label]['type'] = 'T_LABEL';
980
 
981
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
982
                        echo str_repeat("\t", count($classStack));
983
                        echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
984
                    }
985
                }
986
            }//end if
987
        }//end for
988
 
989
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
990
            echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
991
        }
992
 
993
    }//end processAdditional()
994
 
995
 
996
}//end class
997
 
998
?>