Subversion Repositories Applications.papyrus

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1713 jp_milcent 1
<?php
2
//
3
// +----------------------------------------------------------------------+
4
// | PHP Version 4                                                        |
5
// +----------------------------------------------------------------------+
6
// | Copyright (c) 1997-2004 The PHP Group                                |
7
// +----------------------------------------------------------------------+
8
// | This source file is subject to version 3.0 of the PHP license,       |
9
// | that is bundled with this package in the file LICENSE, and is        |
10
// | available at through the world-wide-web at                           |
11
// | http://www.php.net/license/3_0.txt.                                  |
12
// | If you did not receive a copy of the PHP license and are unable to   |
13
// | obtain it through the world-wide-web, please send a note to          |
14
// | license@php.net so we can mail you a copy immediately.               |
15
// +----------------------------------------------------------------------+
16
// | Author: Stig Bakken <ssb@fast.no>                                    |
17
// |         Tomas V.V.Cox <cox@idecnet.com>                              |
18
// |         Stephan Schmidt <schst@php-tools.net>                        |
19
// +----------------------------------------------------------------------+
20
//
21
// $Id: Parser.php,v 1.3 2007-11-19 15:11:00 jp_milcent Exp $
22
 
23
/**
24
 * XML Parser class.
25
 *
26
 * This is an XML parser based on PHP's "xml" extension,
27
 * based on the bundled expat library.
28
 *
29
 * @category XML
30
 * @package XML_Parser
31
 * @author  Stig Bakken <ssb@fast.no>
32
 * @author  Tomas V.V.Cox <cox@idecnet.com>
33
 * @author  Stephan Schmidt <schst@php-tools.net>
34
 */
35
 
36
/**
37
 * uses PEAR's error handling
38
 */
39
require_once 'PEAR.php';
40
 
41
/**
42
 * resource could not be created
43
 */
44
define('XML_PARSER_ERROR_NO_RESOURCE', 200);
45
 
46
/**
47
 * unsupported mode
48
 */
49
define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
50
 
51
/**
52
 * invalid encoding was given
53
 */
54
define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
55
 
56
/**
57
 * specified file could not be read
58
 */
59
define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
60
 
61
/**
62
 * invalid input
63
 */
64
define('XML_PARSER_ERROR_INVALID_INPUT', 204);
65
 
66
/**
67
 * remote file cannot be retrieved in safe mode
68
 */
69
define('XML_PARSER_ERROR_REMOTE', 205);
70
 
71
/**
72
 * XML Parser class.
73
 *
74
 * This is an XML parser based on PHP's "xml" extension,
75
 * based on the bundled expat library.
76
 *
77
 * Notes:
78
 * - It requires PHP 4.0.4pl1 or greater
79
 * - From revision 1.17, the function names used by the 'func' mode
80
 *   are in the format "xmltag_$elem", for example: use "xmltag_name"
81
 *   to handle the <name></name> tags of your xml file.
82
 *
83
 * @category XML
84
 * @package XML_Parser
85
 * @author  Stig Bakken <ssb@fast.no>
86
 * @author  Tomas V.V.Cox <cox@idecnet.com>
87
 * @author  Stephan Schmidt <schst@php-tools.net>
88
 * @todo    create XML_Parser_Namespace to parse documents with namespaces
89
 * @todo    create XML_Parser_Pull
90
 * @todo    Tests that need to be made:
91
 *          - mixing character encodings
92
 *          - a test using all expat handlers
93
 *          - options (folding, output charset)
94
 *          - different parsing modes
95
 */
96
class XML_Parser extends PEAR
97
{
98
    // {{{ properties
99
 
100
   /**
101
     * XML parser handle
102
     *
103
     * @var  resource
104
     * @see  xml_parser_create()
105
     */
106
    var $parser;
107
 
108
    /**
109
     * File handle if parsing from a file
110
     *
111
     * @var  resource
112
     */
113
    var $fp;
114
 
115
    /**
116
     * Whether to do case folding
117
     *
118
     * If set to true, all tag and attribute names will
119
     * be converted to UPPER CASE.
120
     *
121
     * @var  boolean
122
     */
123
    var $folding = true;
124
 
125
    /**
126
     * Mode of operation, one of "event" or "func"
127
     *
128
     * @var  string
129
     */
130
    var $mode;
131
 
132
    /**
133
     * Mapping from expat handler function to class method.
134
     *
135
     * @var  array
136
     */
137
    var $handler = array(
138
        'character_data_handler'            => 'cdataHandler',
139
        'default_handler'                   => 'defaultHandler',
140
        'processing_instruction_handler'    => 'piHandler',
141
        'unparsed_entity_decl_handler'      => 'unparsedHandler',
142
        'notation_decl_handler'             => 'notationHandler',
143
        'external_entity_ref_handler'       => 'entityrefHandler'
144
    );
145
 
146
    /**
147
     * source encoding
148
     *
149
     * @var string
150
     */
151
    var $srcenc;
152
 
153
    /**
154
     * target encoding
155
     *
156
     * @var string
157
     */
158
    var $tgtenc;
159
 
160
    /**
161
     * handler object
162
     *
163
     * @var object
164
     */
165
    var $_handlerObj;
166
 
167
    /**
168
     * valid encodings
169
     *
170
     * @var array
171
     */
172
    var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
173
 
174
    // }}}
175
    // {{{ constructor
176
 
177
    /**
178
     * Creates an XML parser.
179
     *
180
     * This is needed for PHP4 compatibility, it will
181
     * call the constructor, when a new instance is created.
182
     *
183
     * @param string $srcenc source charset encoding, use NULL (default) to use
184
     *                       whatever the document specifies
185
     * @param string $mode   how this parser object should work, "event" for
186
     *                       startelement/endelement-type events, "func"
187
     *                       to have it call functions named after elements
188
     * @param string $tgenc  a valid target encoding
189
     */
190
    function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
191
    {
192
        XML_Parser::__construct($srcenc, $mode, $tgtenc);
193
    }
194
    // }}}
195
 
196
    /**
197
     * PHP5 constructor
198
     *
199
     * @param string $srcenc source charset encoding, use NULL (default) to use
200
     *                       whatever the document specifies
201
     * @param string $mode   how this parser object should work, "event" for
202
     *                       startelement/endelement-type events, "func"
203
     *                       to have it call functions named after elements
204
     * @param string $tgenc  a valid target encoding
205
     */
206
    function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
207
    {
208
        $this->PEAR('XML_Parser_Error');
209
 
210
        $this->mode   = $mode;
211
        $this->srcenc = $srcenc;
212
        $this->tgtenc = $tgtenc;
213
    }
214
    // }}}
215
 
216
    /**
217
     * Sets the mode of the parser.
218
     *
219
     * Possible modes are:
220
     * - func
221
     * - event
222
     *
223
     * You can set the mode using the second parameter
224
     * in the constructor.
225
     *
226
     * This method is only needed, when switching to a new
227
     * mode at a later point.
228
     *
229
     * @access  public
230
     * @param   string          mode, either 'func' or 'event'
231
     * @return  boolean|object  true on success, PEAR_Error otherwise
232
     */
233
    function setMode($mode)
234
    {
235
        if ($mode != 'func' && $mode != 'event') {
236
            $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
237
        }
238
 
239
        $this->mode = $mode;
240
        return true;
241
    }
242
 
243
    /**
244
     * Sets the object, that will handle the XML events
245
     *
246
     * This allows you to create a handler object independent of the
247
     * parser object that you are using and easily switch the underlying
248
     * parser.
249
     *
250
     * If no object will be set, XML_Parser assumes that you
251
     * extend this class and handle the events in $this.
252
     *
253
     * @access  public
254
     * @param   object      object to handle the events
255
     * @return  boolean     will always return true
256
     * @since   v1.2.0beta3
257
     */
258
    function setHandlerObj(&$obj)
259
    {
260
        $this->_handlerObj = &$obj;
261
        return true;
262
    }
263
 
264
    /**
265
     * Init the element handlers
266
     *
267
     * @access  private
268
     */
269
    function _initHandlers()
270
    {
271
        if (!is_resource($this->parser)) {
272
            return false;
273
        }
274
 
275
        if (!is_object($this->_handlerObj)) {
276
            $this->_handlerObj = &$this;
277
        }
278
        switch ($this->mode) {
279
 
280
            case 'func':
281
                xml_set_object($this->parser, $this->_handlerObj);
282
                xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
283
                break;
284
 
285
            case 'event':
286
                xml_set_object($this->parser, $this->_handlerObj);
287
                xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
288
                break;
289
            default:
290
                return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
291
                break;
292
        }
293
 
294
 
295
        /**
296
         * set additional handlers for character data, entities, etc.
297
         */
298
        foreach ($this->handler as $xml_func => $method) {
299
            if (method_exists($this->_handlerObj, $method)) {
300
                $xml_func = 'xml_set_' . $xml_func;
301
                $xml_func($this->parser, $method);
302
            }
303
		}
304
    }
305
 
306
    // {{{ _create()
307
 
308
    /**
309
     * create the XML parser resource
310
     *
311
     * Has been moved from the constructor to avoid
312
     * problems with object references.
313
     *
314
     * Furthermore it allows us returning an error
315
     * if something fails.
316
     *
317
     * @access   private
318
     * @return   boolean|object     true on success, PEAR_Error otherwise
319
     *
320
     * @see xml_parser_create
321
     */
322
    function _create()
323
    {
324
        if ($this->srcenc === null) {
325
            $xp = @xml_parser_create();
326
        } else {
327
            $xp = @xml_parser_create($this->srcenc);
328
        }
329
        if (is_resource($xp)) {
330
            if ($this->tgtenc !== null) {
331
                if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
332
                                            $this->tgtenc)) {
333
                    return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
334
                }
335
            }
336
            $this->parser = $xp;
337
            $result = $this->_initHandlers($this->mode);
338
            if ($this->isError($result)) {
339
                return $result;
340
            }
341
            xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
342
            return true;
343
        }
344
        if (!in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
345
            return $this->raiseError('invalid source encoding', XML_PARSER_ERROR_INVALID_ENCODING);
346
        }
347
        return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
348
    }
349
 
350
    // }}}
351
    // {{{ reset()
352
 
353
    /**
354
     * Reset the parser.
355
     *
356
     * This allows you to use one parser instance
357
     * to parse multiple XML documents.
358
     *
359
     * @access   public
360
     * @return   boolean|object     true on success, PEAR_Error otherwise
361
     */
362
    function reset()
363
    {
364
        $result = $this->_create();
365
        if ($this->isError( $result )) {
366
            return $result;
367
        }
368
        return true;
369
    }
370
 
371
    // }}}
372
    // {{{ setInputFile()
373
 
374
    /**
375
     * Sets the input xml file to be parsed
376
     *
377
     * @param    string      Filename (full path)
378
     * @return   resource    fopen handle of the given file
379
     * @throws   XML_Parser_Error
380
     * @see      setInput(), setInputString(), parse()
381
     * @access   public
382
     */
383
    function setInputFile($file)
384
    {
385
        /**
386
         * check, if file is a remote file
387
         */
388
        if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
389
            if (!ini_get('allow_url_fopen')) {
390
            	return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
391
            }
392
        }
393
 
394
        $fp = @fopen($file, 'rb');
395
        if (is_resource($fp)) {
396
            $this->fp = $fp;
397
            return $fp;
398
        }
399
        return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
400
    }
401
 
402
    // }}}
403
    // {{{ setInputString()
404
 
405
    /**
406
     * XML_Parser::setInputString()
407
     *
408
     * Sets the xml input from a string
409
     *
410
     * @param string $data a string containing the XML document
411
     * @return null
412
     **/
413
    function setInputString($data)
414
    {
415
        $this->fp = $data;
416
        return null;
417
    }
418
 
419
    // }}}
420
    // {{{ setInput()
421
 
422
    /**
423
     * Sets the file handle to use with parse().
424
     *
425
     * You should use setInputFile() or setInputString() if you
426
     * pass a string
427
     *
428
     * @param    mixed  $fp  Can be either a resource returned from fopen(),
429
     *                       a URL, a local filename or a string.
430
     * @access   public
431
     * @see      parse()
432
     * @uses     setInputString(), setInputFile()
433
     */
434
    function setInput($fp)
435
    {
436
        if (is_resource($fp)) {
437
            $this->fp = $fp;
438
            return true;
439
        }
440
        // see if it's an absolute URL (has a scheme at the beginning)
441
        elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
442
            return $this->setInputFile($fp);
443
        }
444
        // see if it's a local file
445
        elseif (file_exists($fp)) {
446
            return $this->setInputFile($fp);
447
        }
448
        // it must be a string
449
        else {
450
            $this->fp = $fp;
451
            return true;
452
        }
453
 
454
        return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
455
    }
456
 
457
    // }}}
458
    // {{{ parse()
459
 
460
    /**
461
     * Central parsing function.
462
     *
463
     * @return   true|object PEAR error     returns true on success, or a PEAR_Error otherwise
464
     * @access   public
465
     */
466
    function parse()
467
    {
468
        /**
469
         * reset the parser
470
         */
471
        $result = $this->reset();
472
        if ($this->isError($result)) {
473
            return $result;
474
        }
475
        // if $this->fp was fopened previously
476
        if (is_resource($this->fp)) {
477
 
478
            while ($data = fread($this->fp, 4096)) {
479
                if (!$this->_parseString($data, feof($this->fp))) {
480
                    $error = &$this->raiseError();
481
                    $this->free();
482
                    return $error;
483
                }
484
            }
485
        // otherwise, $this->fp must be a string
486
        } else {
487
            if (!$this->_parseString($this->fp, true)) {
488
                $error = &$this->raiseError();
489
                $this->free();
490
                return $error;
491
            }
492
        }
493
        $this->free();
494
 
495
        return true;
496
    }
497
 
498
    /**
499
     * XML_Parser::_parseString()
500
     *
501
     * @param string $data
502
     * @param boolean $eof
503
     * @return bool
504
     * @access private
505
     * @see parseString()
506
     **/
507
    function _parseString($data, $eof = false)
508
    {
509
        return xml_parse($this->parser, $data, $eof);
510
    }
511
 
512
    // }}}
513
    // {{{ parseString()
514
 
515
    /**
516
     * XML_Parser::parseString()
517
     *
518
     * Parses a string.
519
     *
520
     * @param    string  $data XML data
521
     * @param    boolean $eof  If set and TRUE, data is the last piece of data sent in this parser
522
     * @throws   XML_Parser_Error
523
     * @return   Pear Error|true   true on success or a PEAR Error
524
     * @see      _parseString()
525
     */
526
    function parseString($data, $eof = false)
527
    {
528
        if (!isset($this->parser) || !is_resource($this->parser)) {
529
            $this->reset();
530
        }
531
 
532
        if (!$this->_parseString($data, $eof)) {
533
           $error = &$this->raiseError();
534
           $this->free();
535
           return $error;
536
        }
537
 
538
        if ($eof === true) {
539
            $this->free();
540
        }
541
        return true;
542
    }
543
 
544
    /**
545
     * XML_Parser::free()
546
     *
547
     * Free the internal resources associated with the parser
548
     *
549
     * @return null
550
     **/
551
    function free()
552
    {
553
        if (isset($this->parser) && is_resource($this->parser)) {
554
            xml_parser_free($this->parser);
555
            unset( $this->parser );
556
        }
557
        if (isset($this->fp) && is_resource($this->fp)) {
558
            fclose($this->fp);
559
        }
560
        unset($this->fp);
561
        return null;
562
    }
563
 
564
    /**
565
     * XML_Parser::raiseError()
566
     *
567
     * Throws a XML_Parser_Error
568
     *
569
     * @param string  $msg   the error message
570
     * @param integer $ecode the error message code
571
     * @return XML_Parser_Error
572
     **/
573
    function raiseError($msg = null, $ecode = 0)
574
    {
575
        $msg = !is_null($msg) ? $msg : $this->parser;
576
        $err = &new XML_Parser_Error($msg, $ecode);
577
        return parent::raiseError($err);
578
    }
579
 
580
    // }}}
581
    // {{{ funcStartHandler()
582
 
583
    function funcStartHandler($xp, $elem, $attribs)
584
    {
585
        $func = 'xmltag_' . $elem;
586
        $func = str_replace(array('.', '-', ':'), '_', $func);
587
        if (method_exists($this->_handlerObj, $func)) {
588
            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
589
        } elseif (method_exists($this->_handlerObj, 'xmltag')) {
590
            call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
591
        }
592
    }
593
 
594
    // }}}
595
    // {{{ funcEndHandler()
596
 
597
    function funcEndHandler($xp, $elem)
598
    {
599
        $func = 'xmltag_' . $elem . '_';
600
        $func = str_replace(array('.', '-', ':'), '_', $func);
601
        if (method_exists($this->_handlerObj, $func)) {
602
            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
603
        } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
604
            call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
605
        }
606
    }
607
 
608
    // }}}
609
    // {{{ startHandler()
610
 
611
    /**
612
     *
613
     * @abstract
614
     */
615
    function startHandler($xp, $elem, &$attribs)
616
    {
617
        return NULL;
618
    }
619
 
620
    // }}}
621
    // {{{ endHandler()
622
 
623
    /**
624
     *
625
     * @abstract
626
     */
627
    function endHandler($xp, $elem)
628
    {
629
        return NULL;
630
    }
631
 
632
 
633
    // }}}me
634
}
635
 
636
/**
637
 * error class, replaces PEAR_Error
638
 *
639
 * An instance of this class will be returned
640
 * if an error occurs inside XML_Parser.
641
 *
642
 * There are three advantages over using the standard PEAR_Error:
643
 * - All messages will be prefixed
644
 * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
645
 * - messages can be generated from the xml_parser resource
646
 *
647
 * @package XML_Parser
648
 * @access  public
649
 * @see     PEAR_Error
650
 */
651
class XML_Parser_Error extends PEAR_Error
652
{
653
    // {{{ properties
654
 
655
   /**
656
    * prefix for all messages
657
    *
658
    * @var      string
659
    */
660
    var $error_message_prefix = 'XML_Parser: ';
661
 
662
    // }}}
663
    // {{{ constructor()
664
   /**
665
    * construct a new error instance
666
    *
667
    * You may either pass a message or an xml_parser resource as first
668
    * parameter. If a resource has been passed, the last error that
669
    * happened will be retrieved and returned.
670
    *
671
    * @access   public
672
    * @param    string|resource     message or parser resource
673
    * @param    integer             error code
674
    * @param    integer             error handling
675
    * @param    integer             error level
676
    */
677
    function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
678
    {
679
        if (is_resource($msgorparser)) {
680
            $code = xml_get_error_code($msgorparser);
681
            $msgorparser = sprintf('%s at XML input line %d:%d',
682
                                   xml_error_string($code),
683
                                   xml_get_current_line_number($msgorparser),
684
                                   xml_get_current_column_number($msgorparser));
685
        }
686
        $this->PEAR_Error($msgorparser, $code, $mode, $level);
687
    }
688
    // }}}
689
}
690
?>