Subversion Repositories Applications.papyrus

Rev

Rev 1371 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
341 jpm 1
<?php
2
//
3
// +----------------------------------------------------------------------+
4
// | PHP Version 4                                                        |
5
// +----------------------------------------------------------------------+
6
// | Copyright (c) 1997-2004 The PHP Group                                |
7
// +----------------------------------------------------------------------+
8
// | This source file is subject to version 3.0 of the PHP license,       |
9
// | that is bundled with this package in the file LICENSE, and is        |
10
// | available at through the world-wide-web at                           |
11
// | http://www.php.net/license/3_0.txt.                                  |
12
// | If you did not receive a copy of the PHP license and are unable to   |
13
// | obtain it through the world-wide-web, please send a note to          |
14
// | license@php.net so we can mail you a copy immediately.               |
15
// +----------------------------------------------------------------------+
16
// | Author: Stig Bakken <ssb@fast.no>                                    |
17
// |         Tomas V.V.Cox <cox@idecnet.com>                              |
18
// |         Stephan Schmidt <schst@php-tools.net>                        |
19
// +----------------------------------------------------------------------+
20
//
21
// $Id: Parser.php,v 1.1 2005-04-18 16:13:31 jpm Exp $
22
 
23
/**
24
 * XML Parser class.
25
 *
26
 * This is an XML parser based on PHP's "xml" extension,
27
 * based on the bundled expat library.
28
 *
29
 * @category XML
30
 * @package XML_Parser
31
 * @author  Stig Bakken <ssb@fast.no>
32
 * @author  Tomas V.V.Cox <cox@idecnet.com>
33
 * @author  Stephan Schmidt <schst@php-tools.net>
34
 */
35
 
36
/**
37
 * uses PEAR's error handling
38
 */
39
require_once 'PEAR.php';
40
 
41
/**
42
 * resource could not be created
43
 */
44
define('XML_PARSER_ERROR_NO_RESOURCE', 200);
45
 
46
/**
47
 * unsupported mode
48
 */
49
define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
50
 
51
/**
52
 * invalid encoding was given
53
 */
54
define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
55
 
56
/**
57
 * specified file could not be read
58
 */
59
define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
60
 
61
/**
62
 * invalid input
63
 */
64
define('XML_PARSER_ERROR_INVALID_INPUT', 204);
65
 
66
/**
67
 * remote file cannot be retrieved in safe mode
68
 */
69
define('XML_PARSER_ERROR_REMOTE', 205);
70
 
71
/**
72
 * XML Parser class.
73
 *
74
 * This is an XML parser based on PHP's "xml" extension,
75
 * based on the bundled expat library.
76
 *
77
 * Notes:
78
 * - It requires PHP 4.0.4pl1 or greater
79
 * - From revision 1.17, the function names used by the 'func' mode
80
 *   are in the format "xmltag_$elem", for example: use "xmltag_name"
81
 *   to handle the <name></name> tags of your xml file.
82
 *
83
 * @category XML
84
 * @package XML_Parser
85
 * @author  Stig Bakken <ssb@fast.no>
86
 * @author  Tomas V.V.Cox <cox@idecnet.com>
87
 * @author  Stephan Schmidt <schst@php-tools.net>
88
 * @todo    create XML_Parser_Namespace to parse documents with namespaces
89
 * @todo    create XML_Parser_Pull
90
 * @todo    Tests that need to be made:
91
 *          - mixing character encodings
92
 *          - a test using all expat handlers
93
 *          - options (folding, output charset)
94
 *          - different parsing modes
95
 */
96
class XML_Parser extends PEAR
97
{
98
    // {{{ properties
99
 
100
   /**
101
     * XML parser handle
102
     *
103
     * @var  resource
104
     * @see  xml_parser_create()
105
     */
106
    var $parser;
107
 
108
    /**
109
     * File handle if parsing from a file
110
     *
111
     * @var  resource
112
     */
113
    var $fp;
114
 
115
    /**
116
     * Whether to do case folding
117
     *
118
     * If set to true, all tag and attribute names will
119
     * be converted to UPPER CASE.
120
     *
121
     * @var  boolean
122
     */
123
    var $folding = true;
124
 
125
    /**
126
     * Mode of operation, one of "event" or "func"
127
     *
128
     * @var  string
129
     */
130
    var $mode;
131
 
132
    /**
133
     * Mapping from expat handler function to class method.
134
     *
135
     * @var  array
136
     */
137
    var $handler = array(
138
        'character_data_handler'            => 'cdataHandler',
139
        'default_handler'                   => 'defaultHandler',
140
        'processing_instruction_handler'    => 'piHandler',
141
        'unparsed_entity_decl_handler'      => 'unparsedHandler',
142
        'notation_decl_handler'             => 'notationHandler',
143
        'external_entity_ref_handler'       => 'entityrefHandler'
144
    );
145
 
146
    /**
147
     * source encoding
148
     *
149
     * @var string
150
     */
151
    var $srcenc;
152
 
153
    /**
154
     * target encoding
155
     *
156
     * @var string
157
     */
158
    var $tgtenc;
159
 
160
    /**
161
     * handler object
162
     *
163
     * @var object
164
     */
165
    var $_handlerObj;
166
 
167
    // }}}
168
    // {{{ constructor
169
 
170
    /**
171
     * Creates an XML parser.
172
     *
173
     * This is needed for PHP4 compatibility, it will
174
     * call the constructor, when a new instance is created.
175
     *
176
     * @param string $srcenc source charset encoding, use NULL (default) to use
177
     *                       whatever the document specifies
178
     * @param string $mode   how this parser object should work, "event" for
179
     *                       startelement/endelement-type events, "func"
180
     *                       to have it call functions named after elements
181
     * @param string $tgenc  a valid target encoding
182
     */
183
    function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
184
    {
185
        XML_Parser::__construct($srcenc, $mode, $tgtenc);
186
    }
187
    // }}}
188
 
189
    /**
190
     * PHP5 constructor
191
     *
192
     * @param string $srcenc source charset encoding, use NULL (default) to use
193
     *                       whatever the document specifies
194
     * @param string $mode   how this parser object should work, "event" for
195
     *                       startelement/endelement-type events, "func"
196
     *                       to have it call functions named after elements
197
     * @param string $tgenc  a valid target encoding
198
     */
199
    function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
200
    {
201
        $this->PEAR('XML_Parser_Error');
202
 
203
        $this->mode   = $mode;
204
        $this->srcenc = $srcenc;
205
        $this->tgtenc = $tgtenc;
206
    }
207
    // }}}
208
 
209
    /**
210
     * Sets the mode of the parser.
211
     *
212
     * Possible modes are:
213
     * - func
214
     * - event
215
     *
216
     * You can set the mode using the second parameter
217
     * in the constructor.
218
     *
219
     * This method is only needed, when switching to a new
220
     * mode at a later point.
221
     *
222
     * @access  public
223
     * @param   string          mode, either 'func' or 'event'
224
     * @return  boolean|object  true on success, PEAR_Error otherwise
225
     */
226
    function setMode($mode)
227
    {
228
        if ($mode != 'func' && $mode != 'event') {
229
            $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
230
        }
231
 
232
        $this->mode = $mode;
233
        return true;
234
    }
235
 
236
    /**
237
     * Sets the object, that will handle the XML events
238
     *
239
     * This allows you to create a handler object independent of the
240
     * parser object that you are using and easily switch the underlying
241
     * parser.
242
     *
243
     * If no object will be set, XML_Parser assumes that you
244
     * extend this class and handle the events in $this.
245
     *
246
     * @access  public
247
     * @param   object      object to handle the events
248
     * @return  boolean     will always return true
249
     * @since   v1.2.0beta3
250
     */
251
    function setHandlerObj(&$obj)
252
    {
253
        $this->_handlerObj = &$obj;
254
        return true;
255
    }
256
 
257
    /**
258
     * Init the element handlers
259
     *
260
     * @access  private
261
     */
262
    function _initHandlers()
263
    {
264
        if (!is_resource($this->parser)) {
265
            return false;
266
        }
267
 
268
        if (!is_object($this->_handlerObj)) {
269
            $this->_handlerObj = &$this;
270
        }
271
        switch ($this->mode) {
272
 
273
            case 'func':
274
                xml_set_object($this->parser, $this->_handlerObj);
275
                xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
276
                break;
277
 
278
            case 'event':
279
                xml_set_object($this->parser, $this->_handlerObj);
280
                xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
281
                break;
282
            default:
283
                return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
284
                break;
285
        }
286
 
287
 
288
        /**
289
         * set additional handlers for character data, entities, etc.
290
         */
291
        foreach ($this->handler as $xml_func => $method) {
292
            if (method_exists($this->_handlerObj, $method)) {
293
                $xml_func = 'xml_set_' . $xml_func;
294
                $xml_func($this->parser, $method);
295
            }
296
		}
297
    }
298
 
299
    // {{{ _create()
300
 
301
    /**
302
     * create the XML parser resource
303
     *
304
     * Has been moved from the constructor to avoid
305
     * problems with object references.
306
     *
307
     * Furthermore it allows us returning an error
308
     * if something fails.
309
     *
310
     * @access   private
311
     * @return   boolean|object     true on success, PEAR_Error otherwise
312
     *
313
     * @see xml_parser_create
314
     */
315
    function _create()
316
    {
317
        if ($this->srcenc === null) {
318
            $xp = @xml_parser_create();
319
        } else {
320
            $xp = @xml_parser_create($this->srcenc);
321
        }
322
        if (is_resource($xp)) {
323
            if ($this->tgtenc !== null) {
324
                if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
325
                                            $this->tgtenc)) {
326
                    return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
327
                }
328
            }
329
            $this->parser = $xp;
330
            $result = $this->_initHandlers($this->mode);
331
            if ($this->isError($result)) {
332
                return $result;
333
            }
334
            xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
335
 
336
            return true;
337
        }
338
        return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
339
    }
340
 
341
    // }}}
342
    // {{{ reset()
343
 
344
    /**
345
     * Reset the parser.
346
     *
347
     * This allows you to use one parser instance
348
     * to parse multiple XML documents.
349
     *
350
     * @access   public
351
     * @return   boolean|object     true on success, PEAR_Error otherwise
352
     */
353
    function reset()
354
    {
355
        $result = $this->_create();
356
        if ($this->isError( $result )) {
357
            return $result;
358
        }
359
        return true;
360
    }
361
 
362
    // }}}
363
    // {{{ setInputFile()
364
 
365
    /**
366
     * Sets the input xml file to be parsed
367
     *
368
     * @param    string      Filename (full path)
369
     * @return   resource    fopen handle of the given file
370
     * @throws   XML_Parser_Error
371
     * @see      setInput(), setInputString(), parse()
372
     * @access   public
373
     */
374
    function setInputFile($file)
375
    {
376
        /**
377
         * check, if file is a remote file
378
         */
379
        if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
380
            if (!ini_get('allow_url_fopen')) {
381
            	return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
382
            }
383
        }
384
 
385
        $fp = @fopen($file, 'rb');
386
        if (is_resource($fp)) {
387
            $this->fp = $fp;
388
            return $fp;
389
        }
390
        return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
391
    }
392
 
393
    // }}}
394
    // {{{ setInputString()
395
 
396
    /**
397
     * XML_Parser::setInputString()
398
     *
399
     * Sets the xml input from a string
400
     *
401
     * @param string $data a string containing the XML document
402
     * @return null
403
     **/
404
    function setInputString($data)
405
    {
406
        $this->fp = $data;
407
        return null;
408
    }
409
 
410
    // }}}
411
    // {{{ setInput()
412
 
413
    /**
414
     * Sets the file handle to use with parse().
415
     *
416
     * You should use setInputFile() or setInputString() if you
417
     * pass a string
418
     *
419
     * @param    mixed  $fp  Can be either a resource returned from fopen(),
420
     *                       a URL, a local filename or a string.
421
     * @access   public
422
     * @see      parse()
423
     * @uses     setInputString(), setInputFile()
424
     */
425
    function setInput($fp)
426
    {
427
        if (is_resource($fp)) {
428
            $this->fp = $fp;
429
            return true;
430
        }
431
        // see if it's an absolute URL (has a scheme at the beginning)
432
        elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
433
            return $this->setInputFile($fp);
434
        }
435
        // see if it's a local file
436
        elseif (file_exists($fp)) {
437
            return $this->setInputFile($fp);
438
        }
439
        // it must be a string
440
        else {
441
            $this->fp = $fp;
442
            return true;
443
        }
444
 
445
        return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
446
    }
447
 
448
    // }}}
449
    // {{{ parse()
450
 
451
    /**
452
     * Central parsing function.
453
     *
454
     * @return   true|object PEAR error     returns true on success, or a PEAR_Error otherwise
455
     * @access   public
456
     */
457
    function parse()
458
    {
459
        /**
460
         * reset the parser
461
         */
462
        $result = $this->reset();
463
        if ($this->isError($result)) {
464
            return $result;
465
        }
466
        // if $this->fp was fopened previously
467
        if (is_resource($this->fp)) {
468
 
469
            while ($data = fread($this->fp, 4096)) {
470
                if (!$this->_parseString($data, feof($this->fp))) {
471
                    $error = &$this->raiseError();
472
                    $this->free();
473
                    return $error;
474
                }
475
            }
476
        // otherwise, $this->fp must be a string
477
        } else {
478
            if (!$this->_parseString($this->fp, true)) {
479
                $error = &$this->raiseError();
480
                $this->free();
481
                return $error;
482
            }
483
        }
484
        $this->free();
485
 
486
        return true;
487
    }
488
 
489
    /**
490
     * XML_Parser::_parseString()
491
     *
492
     * @param string $data
493
     * @param boolean $eof
494
     * @return bool
495
     * @access private
496
     * @see parseString()
497
     **/
498
    function _parseString($data, $eof = false)
499
    {
500
        return xml_parse($this->parser, $data, $eof);
501
    }
502
 
503
    // }}}
504
    // {{{ parseString()
505
 
506
    /**
507
     * XML_Parser::parseString()
508
     *
509
     * Parses a string.
510
     *
511
     * @param    string  $data XML data
512
     * @param    boolean $eof  If set and TRUE, data is the last piece of data sent in this parser
513
     * @throws   XML_Parser_Error
514
     * @return   Pear Error|true   true on success or a PEAR Error
515
     * @see      _parseString()
516
     */
517
    function parseString($data, $eof = false)
518
    {
519
        if (!isset($this->parser) || !is_resource($this->parser)) {
520
            $this->reset();
521
        }
522
 
523
        if (!$this->_parseString($data, $eof)) {
524
           $error = &$this->raiseError();
525
           $this->free();
526
           return $error;
527
        }
528
 
529
        if ($eof === true) {
530
            $this->free();
531
        }
532
        return true;
533
    }
534
 
535
    /**
536
     * XML_Parser::free()
537
     *
538
     * Free the internal resources associated with the parser
539
     *
540
     * @return null
541
     **/
542
    function free()
543
    {
544
        if (isset($this->parser) && is_resource($this->parser)) {
545
            xml_parser_free($this->parser);
546
            unset( $this->parser );
547
        }
548
        if (isset($this->fp) && is_resource($this->fp)) {
549
            fclose($this->fp);
550
        }
551
        unset($this->fp);
552
        return null;
553
    }
554
 
555
    /**
556
     * XML_Parser::raiseError()
557
     *
558
     * Throws a XML_Parser_Error
559
     *
560
     * @param string  $msg   the error message
561
     * @param integer $ecode the error message code
562
     * @return XML_Parser_Error
563
     **/
564
    function raiseError($msg = null, $ecode = 0)
565
    {
566
        $msg = !is_null($msg) ? $msg : $this->parser;
567
        $err = &new XML_Parser_Error($msg, $ecode);
568
        return parent::raiseError($err);
569
    }
570
 
571
    // }}}
572
    // {{{ funcStartHandler()
573
 
574
    function funcStartHandler($xp, $elem, $attribs)
575
    {
576
        $func = 'xmltag_' . $elem;
577
        if (strchr($func, '.')) {
578
            $func = str_replace('.', '_', $func);
579
        }
580
        if (method_exists($this->_handlerObj, $func)) {
581
            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
582
        } elseif (method_exists($this->_handlerObj, 'xmltag')) {
583
            call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
584
        }
585
    }
586
 
587
    // }}}
588
    // {{{ funcEndHandler()
589
 
590
    function funcEndHandler($xp, $elem)
591
    {
592
        $func = 'xmltag_' . $elem . '_';
593
        if (strchr($func, '.')) {
594
            $func = str_replace('.', '_', $func);
595
        }
596
        if (method_exists($this->_handlerObj, $func)) {
597
            call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
598
        } elseif (method_exists($this->_handlerObj, 'xmltag_')) {
599
            call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
600
        }
601
    }
602
 
603
    // }}}
604
    // {{{ startHandler()
605
 
606
    /**
607
     *
608
     * @abstract
609
     */
610
    function startHandler($xp, $elem, &$attribs)
611
    {
612
        return NULL;
613
    }
614
 
615
    // }}}
616
    // {{{ endHandler()
617
 
618
    /**
619
     *
620
     * @abstract
621
     */
622
    function endHandler($xp, $elem)
623
    {
624
        return NULL;
625
    }
626
 
627
 
628
    // }}}me
629
}
630
 
631
/**
632
 * error class, replaces PEAR_Error
633
 *
634
 * An instance of this class will be returned
635
 * if an error occurs inside XML_Parser.
636
 *
637
 * There are three advantages over using the standard PEAR_Error:
638
 * - All messages will be prefixed
639
 * - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
640
 * - messages can be generated from the xml_parser resource
641
 *
642
 * @package XML_Parser
643
 * @access  public
644
 * @see     PEAR_Error
645
 */
646
class XML_Parser_Error extends PEAR_Error
647
{
648
    // {{{ properties
649
 
650
   /**
651
    * prefix for all messages
652
    *
653
    * @var      string
654
    */
655
    var $error_message_prefix = 'XML_Parser: ';
656
 
657
    // }}}
658
    // {{{ constructor()
659
   /**
660
    * construct a new error instance
661
    *
662
    * You may either pass a message or an xml_parser resource as first
663
    * parameter. If a resource has been passed, the last error that
664
    * happened will be retrieved and returned.
665
    *
666
    * @access   public
667
    * @param    string|resource     message or parser resource
668
    * @param    integer             error code
669
    * @param    integer             error handling
670
    * @param    integer             error level
671
    */
672
    function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
673
    {
674
        if (is_resource($msgorparser)) {
675
            $code = xml_get_error_code($msgorparser);
676
            $msgorparser = sprintf('%s at XML input line %d',
677
                                   xml_error_string($code),
678
                                   xml_get_current_line_number($msgorparser));
679
        }
680
        $this->PEAR_Error($msgorparser, $code, $mode, $level);
681
    }
682
    // }}}
683
}
684
?>