Blame | Last modification | View Log | RSS feed
<?php
//
// +----------------------------------------------------------------------+
// | PHP Version 4 |
// +----------------------------------------------------------------------+
// | Copyright (c) 1997-2004 The PHP Group |
// +----------------------------------------------------------------------+
// | This source file is subject to version 3.0 of the PHP license, |
// | that is bundled with this package in the file LICENSE, and is |
// | available at through the world-wide-web at |
// | http://www.php.net/license/3_0.txt. |
// | If you did not receive a copy of the PHP license and are unable to |
// | obtain it through the world-wide-web, please send a note to |
// | license@php.net so we can mail you a copy immediately. |
// +----------------------------------------------------------------------+
// | Author: Stig Bakken <ssb@fast.no> |
// | Tomas V.V.Cox <cox@idecnet.com> |
// | Stephan Schmidt <schst@php-tools.net> |
// +----------------------------------------------------------------------+
//
// $Id: Parser.php,v 1.2.2.2 2007-11-19 13:12:51 alexandre_tb Exp $
/**
* XML Parser class.
*
* This is an XML parser based on PHP's "xml" extension,
* based on the bundled expat library.
*
* @category XML
* @package XML_Parser
* @author Stig Bakken <ssb@fast.no>
* @author Tomas V.V.Cox <cox@idecnet.com>
* @author Stephan Schmidt <schst@php-tools.net>
*/
/**
* uses PEAR's error handling
*/
require_once 'PEAR.php';
/**
* resource could not be created
*/
define('XML_PARSER_ERROR_NO_RESOURCE', 200);
/**
* unsupported mode
*/
define('XML_PARSER_ERROR_UNSUPPORTED_MODE', 201);
/**
* invalid encoding was given
*/
define('XML_PARSER_ERROR_INVALID_ENCODING', 202);
/**
* specified file could not be read
*/
define('XML_PARSER_ERROR_FILE_NOT_READABLE', 203);
/**
* invalid input
*/
define('XML_PARSER_ERROR_INVALID_INPUT', 204);
/**
* remote file cannot be retrieved in safe mode
*/
define('XML_PARSER_ERROR_REMOTE', 205);
/**
* XML Parser class.
*
* This is an XML parser based on PHP's "xml" extension,
* based on the bundled expat library.
*
* Notes:
* - It requires PHP 4.0.4pl1 or greater
* - From revision 1.17, the function names used by the 'func' mode
* are in the format "xmltag_$elem", for example: use "xmltag_name"
* to handle the <name></name> tags of your xml file.
*
* @category XML
* @package XML_Parser
* @author Stig Bakken <ssb@fast.no>
* @author Tomas V.V.Cox <cox@idecnet.com>
* @author Stephan Schmidt <schst@php-tools.net>
* @todo create XML_Parser_Namespace to parse documents with namespaces
* @todo create XML_Parser_Pull
* @todo Tests that need to be made:
* - mixing character encodings
* - a test using all expat handlers
* - options (folding, output charset)
* - different parsing modes
*/
class XML_Parser extends PEAR
{
// {{{ properties
/**
* XML parser handle
*
* @var resource
* @see xml_parser_create()
*/
var $parser;
/**
* File handle if parsing from a file
*
* @var resource
*/
var $fp;
/**
* Whether to do case folding
*
* If set to true, all tag and attribute names will
* be converted to UPPER CASE.
*
* @var boolean
*/
var $folding = true;
/**
* Mode of operation, one of "event" or "func"
*
* @var string
*/
var $mode;
/**
* Mapping from expat handler function to class method.
*
* @var array
*/
var $handler = array(
'character_data_handler' => 'cdataHandler',
'default_handler' => 'defaultHandler',
'processing_instruction_handler' => 'piHandler',
'unparsed_entity_decl_handler' => 'unparsedHandler',
'notation_decl_handler' => 'notationHandler',
'external_entity_ref_handler' => 'entityrefHandler'
);
/**
* source encoding
*
* @var string
*/
var $srcenc;
/**
* target encoding
*
* @var string
*/
var $tgtenc;
/**
* handler object
*
* @var object
*/
var $_handlerObj;
/**
* valid encodings
*
* @var array
*/
var $_validEncodings = array('ISO-8859-1', 'UTF-8', 'US-ASCII');
// }}}
// {{{ constructor
/**
* Creates an XML parser.
*
* This is needed for PHP4 compatibility, it will
* call the constructor, when a new instance is created.
*
* @param string $srcenc source charset encoding, use NULL (default) to use
* whatever the document specifies
* @param string $mode how this parser object should work, "event" for
* startelement/endelement-type events, "func"
* to have it call functions named after elements
* @param string $tgenc a valid target encoding
*/
function XML_Parser($srcenc = null, $mode = 'event', $tgtenc = null)
{
XML_Parser::__construct($srcenc, $mode, $tgtenc);
}
// }}}
/**
* PHP5 constructor
*
* @param string $srcenc source charset encoding, use NULL (default) to use
* whatever the document specifies
* @param string $mode how this parser object should work, "event" for
* startelement/endelement-type events, "func"
* to have it call functions named after elements
* @param string $tgenc a valid target encoding
*/
function __construct($srcenc = null, $mode = 'event', $tgtenc = null)
{
$this->PEAR('XML_Parser_Error');
$this->mode = $mode;
$this->srcenc = $srcenc;
$this->tgtenc = $tgtenc;
}
// }}}
/**
* Sets the mode of the parser.
*
* Possible modes are:
* - func
* - event
*
* You can set the mode using the second parameter
* in the constructor.
*
* This method is only needed, when switching to a new
* mode at a later point.
*
* @access public
* @param string mode, either 'func' or 'event'
* @return boolean|object true on success, PEAR_Error otherwise
*/
function setMode($mode)
{
if ($mode != 'func' && $mode != 'event') {
$this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
}
$this->mode = $mode;
return true;
}
/**
* Sets the object, that will handle the XML events
*
* This allows you to create a handler object independent of the
* parser object that you are using and easily switch the underlying
* parser.
*
* If no object will be set, XML_Parser assumes that you
* extend this class and handle the events in $this.
*
* @access public
* @param object object to handle the events
* @return boolean will always return true
* @since v1.2.0beta3
*/
function setHandlerObj(&$obj)
{
$this->_handlerObj = &$obj;
return true;
}
/**
* Init the element handlers
*
* @access private
*/
function _initHandlers()
{
if (!is_resource($this->parser)) {
return false;
}
if (!is_object($this->_handlerObj)) {
$this->_handlerObj = &$this;
}
switch ($this->mode) {
case 'func':
xml_set_object($this->parser, $this->_handlerObj);
xml_set_element_handler($this->parser, array(&$this, 'funcStartHandler'), array(&$this, 'funcEndHandler'));
break;
case 'event':
xml_set_object($this->parser, $this->_handlerObj);
xml_set_element_handler($this->parser, 'startHandler', 'endHandler');
break;
default:
return $this->raiseError('Unsupported mode given', XML_PARSER_ERROR_UNSUPPORTED_MODE);
break;
}
/**
* set additional handlers for character data, entities, etc.
*/
foreach ($this->handler as $xml_func => $method) {
if (method_exists($this->_handlerObj, $method)) {
$xml_func = 'xml_set_' . $xml_func;
$xml_func($this->parser, $method);
}
}
}
// {{{ _create()
/**
* create the XML parser resource
*
* Has been moved from the constructor to avoid
* problems with object references.
*
* Furthermore it allows us returning an error
* if something fails.
*
* @access private
* @return boolean|object true on success, PEAR_Error otherwise
*
* @see xml_parser_create
*/
function _create()
{
if ($this->srcenc === null) {
$xp = @xml_parser_create();
} else {
$xp = @xml_parser_create($this->srcenc);
}
if (is_resource($xp)) {
if ($this->tgtenc !== null) {
if (!@xml_parser_set_option($xp, XML_OPTION_TARGET_ENCODING,
$this->tgtenc)) {
return $this->raiseError('invalid target encoding', XML_PARSER_ERROR_INVALID_ENCODING);
}
}
$this->parser = $xp;
$result = $this->_initHandlers($this->mode);
if ($this->isError($result)) {
return $result;
}
xml_parser_set_option($xp, XML_OPTION_CASE_FOLDING, $this->folding);
return true;
}
if (!in_array(strtoupper($this->srcenc), $this->_validEncodings)) {
return $this->raiseError('invalid source encoding', XML_PARSER_ERROR_INVALID_ENCODING);
}
return $this->raiseError('Unable to create XML parser resource.', XML_PARSER_ERROR_NO_RESOURCE);
}
// }}}
// {{{ reset()
/**
* Reset the parser.
*
* This allows you to use one parser instance
* to parse multiple XML documents.
*
* @access public
* @return boolean|object true on success, PEAR_Error otherwise
*/
function reset()
{
$result = $this->_create();
if ($this->isError( $result )) {
return $result;
}
return true;
}
// }}}
// {{{ setInputFile()
/**
* Sets the input xml file to be parsed
*
* @param string Filename (full path)
* @return resource fopen handle of the given file
* @throws XML_Parser_Error
* @see setInput(), setInputString(), parse()
* @access public
*/
function setInputFile($file)
{
/**
* check, if file is a remote file
*/
if (eregi('^(http|ftp)://', substr($file, 0, 10))) {
if (!ini_get('allow_url_fopen')) {
return $this->raiseError('Remote files cannot be parsed, as safe mode is enabled.', XML_PARSER_ERROR_REMOTE);
}
}
$fp = @fopen($file, 'rb');
if (is_resource($fp)) {
$this->fp = $fp;
return $fp;
}
return $this->raiseError('File could not be opened.', XML_PARSER_ERROR_FILE_NOT_READABLE);
}
// }}}
// {{{ setInputString()
/**
* XML_Parser::setInputString()
*
* Sets the xml input from a string
*
* @param string $data a string containing the XML document
* @return null
**/
function setInputString($data)
{
$this->fp = $data;
return null;
}
// }}}
// {{{ setInput()
/**
* Sets the file handle to use with parse().
*
* You should use setInputFile() or setInputString() if you
* pass a string
*
* @param mixed $fp Can be either a resource returned from fopen(),
* a URL, a local filename or a string.
* @access public
* @see parse()
* @uses setInputString(), setInputFile()
*/
function setInput($fp)
{
if (is_resource($fp)) {
$this->fp = $fp;
return true;
}
// see if it's an absolute URL (has a scheme at the beginning)
elseif (eregi('^[a-z]+://', substr($fp, 0, 10))) {
return $this->setInputFile($fp);
}
// see if it's a local file
elseif (file_exists($fp)) {
return $this->setInputFile($fp);
}
// it must be a string
else {
$this->fp = $fp;
return true;
}
return $this->raiseError('Illegal input format', XML_PARSER_ERROR_INVALID_INPUT);
}
// }}}
// {{{ parse()
/**
* Central parsing function.
*
* @return true|object PEAR error returns true on success, or a PEAR_Error otherwise
* @access public
*/
function parse()
{
/**
* reset the parser
*/
$result = $this->reset();
if ($this->isError($result)) {
return $result;
}
// if $this->fp was fopened previously
if (is_resource($this->fp)) {
while ($data = fread($this->fp, 4096)) {
if (!$this->_parseString($data, feof($this->fp))) {
$error = &$this->raiseError();
$this->free();
return $error;
}
}
// otherwise, $this->fp must be a string
} else {
if (!$this->_parseString($this->fp, true)) {
$error = &$this->raiseError();
$this->free();
return $error;
}
}
$this->free();
return true;
}
/**
* XML_Parser::_parseString()
*
* @param string $data
* @param boolean $eof
* @return bool
* @access private
* @see parseString()
**/
function _parseString($data, $eof = false)
{
return xml_parse($this->parser, $data, $eof);
}
// }}}
// {{{ parseString()
/**
* XML_Parser::parseString()
*
* Parses a string.
*
* @param string $data XML data
* @param boolean $eof If set and TRUE, data is the last piece of data sent in this parser
* @throws XML_Parser_Error
* @return Pear Error|true true on success or a PEAR Error
* @see _parseString()
*/
function parseString($data, $eof = false)
{
if (!isset($this->parser) || !is_resource($this->parser)) {
$this->reset();
}
if (!$this->_parseString($data, $eof)) {
$error = &$this->raiseError();
$this->free();
return $error;
}
if ($eof === true) {
$this->free();
}
return true;
}
/**
* XML_Parser::free()
*
* Free the internal resources associated with the parser
*
* @return null
**/
function free()
{
if (isset($this->parser) && is_resource($this->parser)) {
xml_parser_free($this->parser);
unset( $this->parser );
}
if (isset($this->fp) && is_resource($this->fp)) {
fclose($this->fp);
}
unset($this->fp);
return null;
}
/**
* XML_Parser::raiseError()
*
* Throws a XML_Parser_Error
*
* @param string $msg the error message
* @param integer $ecode the error message code
* @return XML_Parser_Error
**/
function raiseError($msg = null, $ecode = 0)
{
$msg = !is_null($msg) ? $msg : $this->parser;
$err = &new XML_Parser_Error($msg, $ecode);
return parent::raiseError($err);
}
// }}}
// {{{ funcStartHandler()
function funcStartHandler($xp, $elem, $attribs)
{
$func = 'xmltag_' . $elem;
$func = str_replace(array('.', '-', ':'), '_', $func);
if (method_exists($this->_handlerObj, $func)) {
call_user_func(array(&$this->_handlerObj, $func), $xp, $elem, $attribs);
} elseif (method_exists($this->_handlerObj, 'xmltag')) {
call_user_func(array(&$this->_handlerObj, 'xmltag'), $xp, $elem, $attribs);
}
}
// }}}
// {{{ funcEndHandler()
function funcEndHandler($xp, $elem)
{
$func = 'xmltag_' . $elem . '_';
$func = str_replace(array('.', '-', ':'), '_', $func);
if (method_exists($this->_handlerObj, $func)) {
call_user_func(array(&$this->_handlerObj, $func), $xp, $elem);
} elseif (method_exists($this->_handlerObj, 'xmltag_')) {
call_user_func(array(&$this->_handlerObj, 'xmltag_'), $xp, $elem);
}
}
// }}}
// {{{ startHandler()
/**
*
* @abstract
*/
function startHandler($xp, $elem, &$attribs)
{
return NULL;
}
// }}}
// {{{ endHandler()
/**
*
* @abstract
*/
function endHandler($xp, $elem)
{
return NULL;
}
// }}}me
}
/**
* error class, replaces PEAR_Error
*
* An instance of this class will be returned
* if an error occurs inside XML_Parser.
*
* There are three advantages over using the standard PEAR_Error:
* - All messages will be prefixed
* - check for XML_Parser error, using is_a( $error, 'XML_Parser_Error' )
* - messages can be generated from the xml_parser resource
*
* @package XML_Parser
* @access public
* @see PEAR_Error
*/
class XML_Parser_Error extends PEAR_Error
{
// {{{ properties
/**
* prefix for all messages
*
* @var string
*/
var $error_message_prefix = 'XML_Parser: ';
// }}}
// {{{ constructor()
/**
* construct a new error instance
*
* You may either pass a message or an xml_parser resource as first
* parameter. If a resource has been passed, the last error that
* happened will be retrieved and returned.
*
* @access public
* @param string|resource message or parser resource
* @param integer error code
* @param integer error handling
* @param integer error level
*/
function XML_Parser_Error($msgorparser = 'unknown error', $code = 0, $mode = PEAR_ERROR_RETURN, $level = E_USER_NOTICE)
{
if (is_resource($msgorparser)) {
$code = xml_get_error_code($msgorparser);
$msgorparser = sprintf('%s at XML input line %d:%d',
xml_error_string($code),
xml_get_current_line_number($msgorparser),
xml_get_current_column_number($msgorparser));
}
$this->PEAR_Error($msgorparser, $code, $mode, $level);
}
// }}}
}
?>