/trunk/api/pear/A_LIRE.txt |
---|
1,16 → 1,17 |
Liste des packages PEAR : |
============================== |
Package Version State |
Auth 1.4.3 stable |
Calendar 0.5.2 beta |
DB 1.7.6 stable |
HTML_Common 1.2.1 stable |
HTML_QuickForm 3.2.5 stable |
HTML_Table 1.5 stable |
HTTP 1.3.5 stable |
Net_FTP 1.3.0 stable |
Net_SMTP 1.2.6 stable |
Net_Socket 1.0.6 stable |
Net_URL 1.0.14 stable |
PEAR 1.4.11 stable |
Text_Wiki 1.0.0 stable |
Package Version State |
Auth 1.4.3 stable |
Calendar 0.5.2 beta |
DB 1.7.6 stable |
HTML_Common 1.2.1 stable |
HTML_QuickForm 3.2.5 stable |
HTML_Table 1.5 stable |
HTTP 1.3.5 stable |
Net_FTP 1.3.0 stable |
Net_SMTP 1.2.6 stable |
Net_Socket 1.0.6 stable |
Net_URL 1.0.14 stable |
PEAR 1.4.11 stable |
Text_Wiki 1.0.0 stable |
XML_Feed_Parser 1.0.2 stable |
/trunk/api/pear/XML/Parser.php |
---|
File deleted |
\ No newline at end of file |
/trunk/api/pear/XML/Feed/Parser/Atom.php |
---|
New file |
0,0 → 1,365 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Atom feed class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: Atom.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This is the class that determines how we manage Atom 1.0 feeds |
* |
* How we deal with constructs: |
* date - return as unix datetime for use with the 'date' function unless specified otherwise |
* text - return as is. optional parameter will give access to attributes |
* person - defaults to name, but parameter based access |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_Atom extends XML_Feed_Parser_Type |
{ |
/** |
* The URI of the RelaxNG schema used to (optionally) validate the feed |
* @var string |
*/ |
private $relax = 'atom.rnc'; |
/** |
* We're likely to use XPath, so let's keep it global |
* @var DOMXPath |
*/ |
public $xpath; |
/** |
* When performing XPath queries we will use this prefix |
* @var string |
*/ |
private $xpathPrefix = '//'; |
/** |
* The feed type we are parsing |
* @var string |
*/ |
public $version = 'Atom 1.0'; |
/** |
* The class used to represent individual items |
* @var string |
*/ |
protected $itemClass = 'XML_Feed_Parser_AtomElement'; |
/** |
* The element containing entries |
* @var string |
*/ |
protected $itemElement = 'entry'; |
/** |
* Here we map those elements we're not going to handle individually |
* to the constructs they are. The optional second parameter in the array |
* tells the parser whether to 'fall back' (not apt. at the feed level) or |
* fail if the element is missing. If the parameter is not set, the function |
* will simply return false and leave it to the client to decide what to do. |
* @var array |
*/ |
protected $map = array( |
'author' => array('Person'), |
'contributor' => array('Person'), |
'icon' => array('Text'), |
'logo' => array('Text'), |
'id' => array('Text', 'fail'), |
'rights' => array('Text'), |
'subtitle' => array('Text'), |
'title' => array('Text', 'fail'), |
'updated' => array('Date', 'fail'), |
'link' => array('Link'), |
'generator' => array('Text'), |
'category' => array('Category')); |
/** |
* Here we provide a few mappings for those very special circumstances in |
* which it makes sense to map back to the RSS2 spec. Key is RSS2 version |
* value is an array consisting of the equivalent in atom and any attributes |
* needed to make the mapping. |
* @var array |
*/ |
protected $compatMap = array( |
'guid' => array('id'), |
'links' => array('link'), |
'tags' => array('category'), |
'contributors' => array('contributor')); |
/** |
* Our constructor does nothing more than its parent. |
* |
* @param DOMDocument $xml A DOM object representing the feed |
* @param bool (optional) $string Whether or not to validate this feed |
*/ |
function __construct(DOMDocument $model, $strict = false) |
{ |
$this->model = $model; |
if ($strict) { |
if (! $this->model->relaxNGValidateSource($this->relax)) { |
throw new XML_Feed_Parser_Exception('Failed required validation'); |
} |
} |
$this->xpath = new DOMXPath($this->model); |
$this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); |
$this->numberEntries = $this->count('entry'); |
} |
/** |
* Implement retrieval of an entry based on its ID for atom feeds. |
* |
* This function uses XPath to get the entry based on its ID. If DOMXPath::evaluate |
* is available, we also use that to store a reference to the entry in the array |
* used by getEntryByOffset so that method does not have to seek out the entry |
* if it's requested that way. |
* |
* @param string $id any valid Atom ID. |
* @return XML_Feed_Parser_AtomElement |
*/ |
function getEntryById($id) |
{ |
if (isset($this->idMappings[$id])) { |
return $this->entries[$this->idMappings[$id]]; |
} |
$entries = $this->xpath->query("//atom:entry[atom:id='$id']"); |
if ($entries->length > 0) { |
$xmlBase = $entries->item(0)->baseURI; |
$entry = new $this->itemElement($entries->item(0), $this, $xmlBase); |
if (in_array('evaluate', get_class_methods($this->xpath))) { |
$offset = $this->xpath->evaluate("count(preceding-sibling::atom:entry)", $entries->item(0)); |
$this->entries[$offset] = $entry; |
} |
$this->idMappings[$id] = $entry; |
return $entry; |
} |
} |
/** |
* Retrieves data from a person construct. |
* |
* Get a person construct. We default to the 'name' element but allow |
* access to any of the elements. |
* |
* @param string $method The name of the person construct we want |
* @param array $arguments An array which we hope gives a 'param' |
* @return string|false |
*/ |
protected function getPerson($method, $arguments) |
{ |
$offset = empty($arguments[0]) ? 0 : $arguments[0]; |
$parameter = empty($arguments[1]['param']) ? 'name' : $arguments[1]['param']; |
$section = $this->model->getElementsByTagName($method); |
if ($parameter == 'url') { |
$parameter = 'uri'; |
} |
if ($section->length <= $offset) { |
return false; |
} |
$param = $section->item($offset)->getElementsByTagName($parameter); |
if ($param->length == 0) { |
return false; |
} |
return $param->item(0)->nodeValue; |
} |
/** |
* Retrieves an element's content where that content is a text construct. |
* |
* Get a text construct. When calling this method, the two arguments |
* allowed are 'offset' and 'attribute', so $parser->subtitle() would |
* return the content of the element, while $parser->subtitle(false, 'type') |
* would return the value of the type attribute. |
* |
* @todo Clarify overlap with getContent() |
* @param string $method The name of the text construct we want |
* @param array $arguments An array which we hope gives a 'param' |
* @return string |
*/ |
protected function getText($method, $arguments) |
{ |
$offset = empty($arguments[0]) ? 0: $arguments[0]; |
$attribute = empty($arguments[1]) ? false : $arguments[1]; |
$tags = $this->model->getElementsByTagName($method); |
if ($tags->length <= $offset) { |
return false; |
} |
$content = $tags->item($offset); |
if (! $content->hasAttribute('type')) { |
$content->setAttribute('type', 'text'); |
} |
$type = $content->getAttribute('type'); |
if (! empty($attribute) and |
! ($method == 'generator' and $attribute == 'name')) { |
if ($content->hasAttribute($attribute)) { |
return $content->getAttribute($attribute); |
} else if ($attribute == 'href' and $content->hasAttribute('uri')) { |
return $content->getAttribute('uri'); |
} |
return false; |
} |
return $this->parseTextConstruct($content); |
} |
/** |
* Extract content appropriately from atom text constructs |
* |
* Because of different rules applied to the content element and other text |
* constructs, they are deployed as separate functions, but they share quite |
* a bit of processing. This method performs the core common process, which is |
* to apply the rules for different mime types in order to extract the content. |
* |
* @param DOMNode $content the text construct node to be parsed |
* @return String |
* @author James Stewart |
**/ |
protected function parseTextConstruct(DOMNode $content) |
{ |
if ($content->hasAttribute('type')) { |
$type = $content->getAttribute('type'); |
} else { |
$type = 'text'; |
} |
if (strpos($type, 'text/') === 0) { |
$type = 'text'; |
} |
switch ($type) { |
case 'text': |
return $content->nodeValue; |
break; |
case 'html': |
return str_replace('<', '<', $content->nodeValue); |
break; |
case 'xhtml': |
$container = $content->getElementsByTagName('div'); |
if ($container->length == 0) { |
return false; |
} |
$contents = $container->item(0); |
if ($contents->hasChildNodes()) { |
/* Iterate through, applying xml:base and store the result */ |
$result = ''; |
foreach ($contents->childNodes as $node) { |
$result .= $this->traverseNode($node); |
} |
return utf8_decode($result); |
} |
break; |
case preg_match('@^[a-zA-Z]+/[a-zA-Z+]*xml@i', $type) > 0: |
return $content; |
break; |
case 'application/octet-stream': |
default: |
return base64_decode(trim($content->nodeValue)); |
break; |
} |
return false; |
} |
/** |
* Get a category from the entry. |
* |
* A feed or entry can have any number of categories. A category can have the |
* attributes term, scheme and label. |
* |
* @param string $method The name of the text construct we want |
* @param array $arguments An array which we hope gives a 'param' |
* @return string |
*/ |
function getCategory($method, $arguments) |
{ |
$offset = empty($arguments[0]) ? 0: $arguments[0]; |
$attribute = empty($arguments[1]) ? 'term' : $arguments[1]; |
$categories = $this->model->getElementsByTagName('category'); |
if ($categories->length <= $offset) { |
$category = $categories->item($offset); |
if ($category->hasAttribute($attribute)) { |
return $category->getAttribute($attribute); |
} |
} |
return false; |
} |
/** |
* This element must be present at least once with rel="feed". This element may be |
* present any number of further times so long as there is no clash. If no 'rel' is |
* present and we're asked for one, we follow the example of the Universal Feed |
* Parser and presume 'alternate'. |
* |
* @param int $offset the position of the link within the container |
* @param string $attribute the attribute name required |
* @param array an array of attributes to search by |
* @return string the value of the attribute |
*/ |
function getLink($offset = 0, $attribute = 'href', $params = false) |
{ |
if (is_array($params) and !empty($params)) { |
$terms = array(); |
$alt_predicate = ''; |
$other_predicate = ''; |
foreach ($params as $key => $value) { |
if ($key == 'rel' && $value == 'alternate') { |
$alt_predicate = '[not(@rel) or @rel="alternate"]'; |
} else { |
$terms[] = "@$key='$value'"; |
} |
} |
if (!empty($terms)) { |
$other_predicate = '[' . join(' and ', $terms) . ']'; |
} |
$query = $this->xpathPrefix . 'atom:link' . $alt_predicate . $other_predicate; |
$links = $this->xpath->query($query); |
} else { |
$links = $this->model->getElementsByTagName('link'); |
} |
if ($links->length > $offset) { |
if ($links->item($offset)->hasAttribute($attribute)) { |
$value = $links->item($offset)->getAttribute($attribute); |
if ($attribute == 'href') { |
$value = $this->addBase($value, $links->item($offset)); |
} |
return $value; |
} else if ($attribute == 'rel') { |
return 'alternate'; |
} |
} |
return false; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS09.php |
---|
New file |
0,0 → 1,214 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS0.9 class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS09.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class handles RSS0.9 feeds. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
* @todo Find a Relax NG URI we can use |
*/ |
class XML_Feed_Parser_RSS09 extends XML_Feed_Parser_Type |
{ |
/** |
* The URI of the RelaxNG schema used to (optionally) validate the feed |
* @var string |
*/ |
private $relax = ''; |
/** |
* We're likely to use XPath, so let's keep it global |
* @var DOMXPath |
*/ |
protected $xpath; |
/** |
* The feed type we are parsing |
* @var string |
*/ |
public $version = 'RSS 0.9'; |
/** |
* The class used to represent individual items |
* @var string |
*/ |
protected $itemClass = 'XML_Feed_Parser_RSS09Element'; |
/** |
* The element containing entries |
* @var string |
*/ |
protected $itemElement = 'item'; |
/** |
* Here we map those elements we're not going to handle individually |
* to the constructs they are. The optional second parameter in the array |
* tells the parser whether to 'fall back' (not apt. at the feed level) or |
* fail if the element is missing. If the parameter is not set, the function |
* will simply return false and leave it to the client to decide what to do. |
* @var array |
*/ |
protected $map = array( |
'title' => array('Text'), |
'link' => array('Text'), |
'description' => array('Text'), |
'image' => array('Image'), |
'textinput' => array('TextInput')); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS2. |
* @var array |
*/ |
protected $compatMap = array( |
'title' => array('title'), |
'link' => array('link'), |
'subtitle' => array('description')); |
/** |
* We will be working with multiple namespaces and it is useful to |
* keep them together |
* @var array |
*/ |
protected $namespaces = array( |
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); |
/** |
* Our constructor does nothing more than its parent. |
* |
* @todo RelaxNG validation |
* @param DOMDocument $xml A DOM object representing the feed |
* @param bool (optional) $string Whether or not to validate this feed |
*/ |
function __construct(DOMDocument $model, $strict = false) |
{ |
$this->model = $model; |
$this->xpath = new DOMXPath($model); |
foreach ($this->namespaces as $key => $value) { |
$this->xpath->registerNamespace($key, $value); |
} |
$this->numberEntries = $this->count('item'); |
} |
/** |
* Included for compatibility -- will not work with RSS 0.9 |
* |
* This is not something that will work with RSS0.9 as it does not have |
* clear restrictions on the global uniqueness of IDs. |
* |
* @param string $id any valid ID. |
* @return false |
*/ |
function getEntryById($id) |
{ |
return false; |
} |
/** |
* Get details of the image associated with the feed. |
* |
* @return array|false an array simply containing the child elements |
*/ |
protected function getImage() |
{ |
$images = $this->model->getElementsByTagName('image'); |
if ($images->length > 0) { |
$image = $images->item(0); |
$details = array(); |
if ($image->hasChildNodes()) { |
$details = array( |
'title' => $image->getElementsByTagName('title')->item(0)->value, |
'link' => $image->getElementsByTagName('link')->item(0)->value, |
'url' => $image->getElementsByTagName('url')->item(0)->value); |
} else { |
$details = array('title' => false, |
'link' => false, |
'url' => $image->attributes->getNamedItem('resource')->nodeValue); |
} |
$details = array_merge($details, |
array('description' => false, 'height' => false, 'width' => false)); |
if (! empty($details)) { |
return $details; |
} |
} |
return false; |
} |
/** |
* The textinput element is little used, but in the interests of |
* completeness we will support it. |
* |
* @return array|false |
*/ |
protected function getTextInput() |
{ |
$inputs = $this->model->getElementsByTagName('textinput'); |
if ($inputs->length > 0) { |
$input = $inputs->item(0); |
$results = array(); |
$results['title'] = isset( |
$input->getElementsByTagName('title')->item(0)->value) ? |
$input->getElementsByTagName('title')->item(0)->value : null; |
$results['description'] = isset( |
$input->getElementsByTagName('description')->item(0)->value) ? |
$input->getElementsByTagName('description')->item(0)->value : null; |
$results['name'] = isset( |
$input->getElementsByTagName('name')->item(0)->value) ? |
$input->getElementsByTagName('name')->item(0)->value : null; |
$results['link'] = isset( |
$input->getElementsByTagName('link')->item(0)->value) ? |
$input->getElementsByTagName('link')->item(0)->value : null; |
if (empty($results['link']) && |
$input->attributes->getNamedItem('resource')) { |
$results['link'] = $input->attributes->getNamedItem('resource')->nodeValue; |
} |
if (! empty($results)) { |
return $results; |
} |
} |
return false; |
} |
/** |
* Get details of a link from the feed. |
* |
* In RSS1 a link is a text element but in order to ensure that we resolve |
* URLs properly we have a special function for them. |
* |
* @return string |
*/ |
function getLink($offset = 0, $attribute = 'href', $params = false) |
{ |
$links = $this->model->getElementsByTagName('link'); |
if ($links->length <= $offset) { |
return false; |
} |
$link = $links->item($offset); |
return $this->addBase($link->nodeValue, $link); |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/Type.php |
---|
New file |
0,0 → 1,441 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Abstract class providing common methods for XML_Feed_Parser feeds. |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: Type.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This abstract class provides some general methods that are likely to be |
* implemented exactly the same way for all feed types. |
* |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
*/ |
abstract class XML_Feed_Parser_Type |
{ |
/** |
* Where we store our DOM object for this feed |
* @var DOMDocument |
*/ |
public $model; |
/** |
* For iteration we'll want a count of the number of entries |
* @var int |
*/ |
public $numberEntries; |
/** |
* Where we store our entry objects once instantiated |
* @var array |
*/ |
public $entries = array(); |
/** |
* Proxy to allow use of element names as method names |
* |
* We are not going to provide methods for every entry type so this |
* function will allow for a lot of mapping. We rely pretty heavily |
* on this to handle our mappings between other feed types and atom. |
* |
* @param string $call - the method attempted |
* @param array $arguments - arguments to that method |
* @return mixed |
*/ |
function __call($call, $arguments = array()) |
{ |
if (! is_array($arguments)) { |
$arguments = array(); |
} |
if (isset($this->compatMap[$call])) { |
$tempMap = $this->compatMap; |
$tempcall = array_pop($tempMap[$call]); |
if (! empty($tempMap)) { |
$arguments = array_merge($arguments, $tempMap[$call]); |
} |
$call = $tempcall; |
} |
/* To be helpful, we allow a case-insensitive search for this method */ |
if (! isset($this->map[$call])) { |
foreach (array_keys($this->map) as $key) { |
if (strtoupper($key) == strtoupper($call)) { |
$call = $key; |
break; |
} |
} |
} |
if (empty($this->map[$call])) { |
return false; |
} |
$method = 'get' . $this->map[$call][0]; |
if ($method == 'getLink') { |
$offset = empty($arguments[0]) ? 0 : $arguments[0]; |
$attribute = empty($arguments[1]) ? 'href' : $arguments[1]; |
$params = isset($arguments[2]) ? $arguments[2] : array(); |
return $this->getLink($offset, $attribute, $params); |
} |
if (method_exists($this, $method)) { |
return $this->$method($call, $arguments); |
} |
return false; |
} |
/** |
* Proxy to allow use of element names as attribute names |
* |
* For many elements variable-style access will be desirable. This function |
* provides for that. |
* |
* @param string $value - the variable required |
* @return mixed |
*/ |
function __get($value) |
{ |
return $this->__call($value, array()); |
} |
/** |
* Utility function to help us resolve xml:base values |
* |
* We have other methods which will traverse the DOM and work out the different |
* xml:base declarations we need to be aware of. We then need to combine them. |
* If a declaration starts with a protocol then we restart the string. If it |
* starts with a / then we add on to the domain name. Otherwise we simply tag |
* it on to the end. |
* |
* @param string $base - the base to add the link to |
* @param string $link |
*/ |
function combineBases($base, $link) |
{ |
if (preg_match('/^[A-Za-z]+:\/\//', $link)) { |
return $link; |
} else if (preg_match('/^\//', $link)) { |
/* Extract domain and suffix link to that */ |
preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results); |
$firstLayer = $results[0]; |
return $firstLayer . "/" . $link; |
} else if (preg_match('/^\.\.\//', $base)) { |
/* Step up link to find place to be */ |
preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases); |
$suffix = $bases[3]; |
$count = preg_match_all('/\.\.\//', $bases[1], $steps); |
$url = explode("/", $base); |
for ($i = 0; $i <= $count; $i++) { |
array_pop($url); |
} |
return implode("/", $url) . "/" . $suffix; |
} else if (preg_match('/^(?!\/$)/', $base)) { |
$base = preg_replace('/(.*\/).*$/', '$1', $base) ; |
return $base . $link; |
} else { |
/* Just stick it on the end */ |
return $base . $link; |
} |
} |
/** |
* Determine whether we need to apply our xml:base rules |
* |
* Gets us the xml:base data and then processes that with regard |
* to our current link. |
* |
* @param string |
* @param DOMElement |
* @return string |
*/ |
function addBase($link, $element) |
{ |
if (preg_match('/^[A-Za-z]+:\/\//', $link)) { |
return $link; |
} |
return $this->combineBases($element->baseURI, $link); |
} |
/** |
* Get an entry by its position in the feed, starting from zero |
* |
* As well as allowing the items to be iterated over we want to allow |
* users to be able to access a specific entry. This is one of two ways of |
* doing that, the other being by ID. |
* |
* @param int $offset |
* @return XML_Feed_Parser_RSS1Element |
*/ |
function getEntryByOffset($offset) |
{ |
if (! isset($this->entries[$offset])) { |
$entries = $this->model->getElementsByTagName($this->itemElement); |
if ($entries->length > $offset) { |
$xmlBase = $entries->item($offset)->baseURI; |
$this->entries[$offset] = new $this->itemClass( |
$entries->item($offset), $this, $xmlBase); |
if ($id = $this->entries[$offset]->id) { |
$this->idMappings[$id] = $this->entries[$offset]; |
} |
} else { |
throw new XML_Feed_Parser_Exception('No entries found'); |
} |
} |
return $this->entries[$offset]; |
} |
/** |
* Return a date in seconds since epoch. |
* |
* Get a date construct. We use PHP's strtotime to return it as a unix datetime, which |
* is the number of seconds since 1970-01-01 00:00:00. |
* |
* @link http://php.net/strtotime |
* @param string $method The name of the date construct we want |
* @param array $arguments Included for compatibility with our __call usage |
* @return int|false datetime |
*/ |
protected function getDate($method, $arguments) |
{ |
$time = $this->model->getElementsByTagName($method); |
if ($time->length == 0) { |
return false; |
} |
return strtotime($time->item(0)->nodeValue); |
} |
/** |
* Get a text construct. |
* |
* @param string $method The name of the text construct we want |
* @param array $arguments Included for compatibility with our __call usage |
* @return string |
*/ |
protected function getText($method, $arguments = array()) |
{ |
$tags = $this->model->getElementsByTagName($method); |
if ($tags->length > 0) { |
$value = $tags->item(0)->nodeValue; |
return $value; |
} |
return false; |
} |
/** |
* Apply various rules to retrieve category data. |
* |
* There is no single way of declaring a category in RSS1/1.1 as there is in RSS2 |
* and Atom. Instead the usual approach is to use the dublin core namespace to |
* declare categories. For example delicious use both: |
* <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag> |
* <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics> |
* to declare a categorisation of 'PEAR'. |
* |
* We need to be sensitive to this where possible. |
* |
* @param string $call for compatibility with our overloading |
* @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array |
* @return string|array|false |
*/ |
protected function getCategory($call, $arguments) |
{ |
$categories = $this->model->getElementsByTagName('subject'); |
$offset = empty($arguments[0]) ? 0 : $arguments[0]; |
$array = empty($arguments[1]) ? false : true; |
if ($categories->length <= $offset) { |
return false; |
} |
if ($array) { |
$list = array(); |
foreach ($categories as $category) { |
array_push($list, $category->nodeValue); |
} |
return $list; |
} |
return $categories->item($offset)->nodeValue; |
} |
/** |
* Count occurrences of an element |
* |
* This function will tell us how many times the element $type |
* appears at this level of the feed. |
* |
* @param string $type the element we want to get a count of |
* @return int |
*/ |
protected function count($type) |
{ |
if ($tags = $this->model->getElementsByTagName($type)) { |
return $tags->length; |
} |
return 0; |
} |
/** |
* Part of our xml:base processing code |
* |
* We need a couple of methods to access XHTML content stored in feeds. |
* This is because we dereference all xml:base references before returning |
* the element. This method handles the attributes. |
* |
* @param DOMElement $node The DOM node we are iterating over |
* @return string |
*/ |
function processXHTMLAttributes($node) { |
$return = ''; |
foreach ($node->attributes as $attribute) { |
if ($attribute->name == 'src' or $attribute->name == 'href') { |
$attribute->value = $this->addBase($attribute->value, $attribute); |
} |
if ($attribute->name == 'base') { |
continue; |
} |
$return .= $attribute->name . '="' . $attribute->value .'" '; |
} |
if (! empty($return)) { |
return ' ' . trim($return); |
} |
return ''; |
} |
/** |
* Part of our xml:base processing code |
* |
* We need a couple of methods to access XHTML content stored in feeds. |
* This is because we dereference all xml:base references before returning |
* the element. This method recurs through the tree descending from the node |
* and builds our string |
* |
* @param DOMElement $node The DOM node we are processing |
* @return string |
*/ |
function traverseNode($node) |
{ |
$content = ''; |
/* Add the opening of this node to the content */ |
if ($node instanceof DOMElement) { |
$content .= '<' . $node->tagName . |
$this->processXHTMLAttributes($node) . '>'; |
} |
/* Process children */ |
if ($node->hasChildNodes()) { |
foreach ($node->childNodes as $child) { |
$content .= $this->traverseNode($child); |
} |
} |
if ($node instanceof DOMText) { |
$content .= htmlentities($node->nodeValue); |
} |
/* Add the closing of this node to the content */ |
if ($node instanceof DOMElement) { |
$content .= '</' . $node->tagName . '>'; |
} |
return $content; |
} |
/** |
* Get content from RSS feeds (atom has its own implementation) |
* |
* The official way to include full content in an RSS1 entry is to use |
* the content module's element 'encoded', and RSS2 feeds often duplicate that. |
* Often, however, the 'description' element is used instead. We will offer that |
* as a fallback. Atom uses its own approach and overrides this method. |
* |
* @return string|false |
*/ |
protected function getContent() |
{ |
$options = array('encoded', 'description'); |
foreach ($options as $element) { |
$test = $this->model->getElementsByTagName($element); |
if ($test->length == 0) { |
continue; |
} |
if ($test->item(0)->hasChildNodes()) { |
$value = ''; |
foreach ($test->item(0)->childNodes as $child) { |
if ($child instanceof DOMText) { |
$value .= $child->nodeValue; |
} else { |
$simple = simplexml_import_dom($child); |
$value .= $simple->asXML(); |
} |
} |
return $value; |
} else if ($test->length > 0) { |
return $test->item(0)->nodeValue; |
} |
} |
return false; |
} |
/** |
* Checks if this element has a particular child element. |
* |
* @param String |
* @param Integer |
* @return bool |
**/ |
function hasKey($name, $offset = 0) |
{ |
$search = $this->model->getElementsByTagName($name); |
return $search->length > $offset; |
} |
/** |
* Return an XML serialization of the feed, should it be required. Most |
* users however, will already have a serialization that they used when |
* instantiating the object. |
* |
* @return string XML serialization of element |
*/ |
function __toString() |
{ |
$simple = simplexml_import_dom($this->model); |
return $simple->asXML(); |
} |
/** |
* Get directory holding RNG schemas. Method is based on that |
* found in Contact_AddressBook. |
* |
* @return string PEAR data directory. |
* @access public |
* @static |
*/ |
static function getSchemaDir() |
{ |
require_once 'PEAR/Config.php'; |
$config = new PEAR_Config; |
return $config->get('data_dir') . '/XML_Feed_Parser/schemas'; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS1Element.php |
---|
New file |
0,0 → 1,116 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS1 Element class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS1Element.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/* |
* This class provides support for RSS 1.0 entries. It will usually be called by |
* XML_Feed_Parser_RSS1 with which it shares many methods. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_RSS1Element extends XML_Feed_Parser_RSS1 |
{ |
/** |
* This will be a reference to the parent object for when we want |
* to use a 'fallback' rule |
* @var XML_Feed_Parser_RSS1 |
*/ |
protected $parent; |
/** |
* Our specific element map |
* @var array |
*/ |
protected $map = array( |
'id' => array('Id'), |
'title' => array('Text'), |
'link' => array('Link'), |
'description' => array('Text'), # or dc:description |
'category' => array('Category'), |
'rights' => array('Text'), # dc:rights |
'creator' => array('Text'), # dc:creator |
'publisher' => array('Text'), # dc:publisher |
'contributor' => array('Text'), # dc:contributor |
'date' => array('Date'), # dc:date |
'content' => array('Content') |
); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS1. |
* @var array |
*/ |
protected $compatMap = array( |
'content' => array('content'), |
'updated' => array('lastBuildDate'), |
'published' => array('pubdate'), |
'subtitle' => array('description'), |
'updated' => array('date'), |
'author' => array('creator'), |
'contributor' => array('contributor') |
); |
/** |
* Store useful information for later. |
* |
* @param DOMElement $element - this item as a DOM element |
* @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member |
*/ |
function __construct(DOMElement $element, $parent, $xmlBase = '') |
{ |
$this->model = $element; |
$this->parent = $parent; |
} |
/** |
* If an rdf:about attribute is specified, return it as an ID |
* |
* There is no established way of showing an ID for an RSS1 entry. We will |
* simulate it using the rdf:about attribute of the entry element. This cannot |
* be relied upon for unique IDs but may prove useful. |
* |
* @return string|false |
*/ |
function getId() |
{ |
if ($this->model->attributes->getNamedItem('about')) { |
return $this->model->attributes->getNamedItem('about')->nodeValue; |
} |
return false; |
} |
/** |
* How RSS1 should support for enclosures is not clear. For now we will return |
* false. |
* |
* @return false |
*/ |
function getEnclosure() |
{ |
return false; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS11Element.php |
---|
New file |
0,0 → 1,151 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS1 Element class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS11Element.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/* |
* This class provides support for RSS 1.1 entries. It will usually be called by |
* XML_Feed_Parser_RSS11 with which it shares many methods. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_RSS11Element extends XML_Feed_Parser_RSS11 |
{ |
/** |
* This will be a reference to the parent object for when we want |
* to use a 'fallback' rule |
* @var XML_Feed_Parser_RSS1 |
*/ |
protected $parent; |
/** |
* Our specific element map |
* @var array |
*/ |
protected $map = array( |
'id' => array('Id'), |
'title' => array('Text'), |
'link' => array('Link'), |
'description' => array('Text'), # or dc:description |
'category' => array('Category'), |
'rights' => array('Text'), # dc:rights |
'creator' => array('Text'), # dc:creator |
'publisher' => array('Text'), # dc:publisher |
'contributor' => array('Text'), # dc:contributor |
'date' => array('Date'), # dc:date |
'content' => array('Content') |
); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS1. |
* @var array |
*/ |
protected $compatMap = array( |
'content' => array('content'), |
'updated' => array('lastBuildDate'), |
'published' => array('pubdate'), |
'subtitle' => array('description'), |
'updated' => array('date'), |
'author' => array('creator'), |
'contributor' => array('contributor') |
); |
/** |
* Store useful information for later. |
* |
* @param DOMElement $element - this item as a DOM element |
* @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member |
*/ |
function __construct(DOMElement $element, $parent, $xmlBase = '') |
{ |
$this->model = $element; |
$this->parent = $parent; |
} |
/** |
* If an rdf:about attribute is specified, return that as an ID |
* |
* There is no established way of showing an ID for an RSS1 entry. We will |
* simulate it using the rdf:about attribute of the entry element. This cannot |
* be relied upon for unique IDs but may prove useful. |
* |
* @return string|false |
*/ |
function getId() |
{ |
if ($this->model->attributes->getNamedItem('about')) { |
return $this->model->attributes->getNamedItem('about')->nodeValue; |
} |
return false; |
} |
/** |
* Return the entry's content |
* |
* The official way to include full content in an RSS1 entry is to use |
* the content module's element 'encoded'. Often, however, the 'description' |
* element is used instead. We will offer that as a fallback. |
* |
* @return string|false |
*/ |
function getContent() |
{ |
$options = array('encoded', 'description'); |
foreach ($options as $element) { |
$test = $this->model->getElementsByTagName($element); |
if ($test->length == 0) { |
continue; |
} |
if ($test->item(0)->hasChildNodes()) { |
$value = ''; |
foreach ($test->item(0)->childNodes as $child) { |
if ($child instanceof DOMText) { |
$value .= $child->nodeValue; |
} else { |
$simple = simplexml_import_dom($child); |
$value .= $simple->asXML(); |
} |
} |
return $value; |
} else if ($test->length > 0) { |
return $test->item(0)->nodeValue; |
} |
} |
return false; |
} |
/** |
* How RSS1.1 should support for enclosures is not clear. For now we will return |
* false. |
* |
* @return false |
*/ |
function getEnclosure() |
{ |
return false; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS2Element.php |
---|
New file |
0,0 → 1,171 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Class representing entries in an RSS2 feed. |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS2Element.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class provides support for RSS 2.0 entries. It will usually be |
* called by XML_Feed_Parser_RSS2 with which it shares many methods. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_RSS2Element extends XML_Feed_Parser_RSS2 |
{ |
/** |
* This will be a reference to the parent object for when we want |
* to use a 'fallback' rule |
* @var XML_Feed_Parser_RSS2 |
*/ |
protected $parent; |
/** |
* Our specific element map |
* @var array |
*/ |
protected $map = array( |
'title' => array('Text'), |
'guid' => array('Guid'), |
'description' => array('Text'), |
'author' => array('Text'), |
'comments' => array('Text'), |
'enclosure' => array('Enclosure'), |
'pubDate' => array('Date'), |
'source' => array('Source'), |
'link' => array('Text'), |
'content' => array('Content')); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS2. |
* @var array |
*/ |
protected $compatMap = array( |
'id' => array('guid'), |
'updated' => array('lastBuildDate'), |
'published' => array('pubdate'), |
'guidislink' => array('guid', 'ispermalink'), |
'summary' => array('description')); |
/** |
* Store useful information for later. |
* |
* @param DOMElement $element - this item as a DOM element |
* @param XML_Feed_Parser_RSS2 $parent - the feed of which this is a member |
*/ |
function __construct(DOMElement $element, $parent, $xmlBase = '') |
{ |
$this->model = $element; |
$this->parent = $parent; |
} |
/** |
* Get the value of the guid element, if specified |
* |
* guid is the closest RSS2 has to atom's ID. It is usually but not always a |
* URI. The one attribute that RSS2 can posess is 'ispermalink' which specifies |
* whether the guid is itself dereferencable. Use of guid is not obligatory, |
* but is advisable. To get the guid you would call $item->id() (for atom |
* compatibility) or $item->guid(). To check if this guid is a permalink call |
* $item->guid("ispermalink"). |
* |
* @param string $method - the method name being called |
* @param array $params - parameters required |
* @return string the guid or value of ispermalink |
*/ |
protected function getGuid($method, $params) |
{ |
$attribute = (isset($params[0]) and $params[0] == 'ispermalink') ? |
true : false; |
$tag = $this->model->getElementsByTagName('guid'); |
if ($tag->length > 0) { |
if ($attribute) { |
if ($tag->hasAttribute("ispermalink")) { |
return $tag->getAttribute("ispermalink"); |
} |
} |
return $tag->item(0)->nodeValue; |
} |
return false; |
} |
/** |
* Access details of file enclosures |
* |
* The RSS2 spec is ambiguous as to whether an enclosure element must be |
* unique in a given entry. For now we will assume it needn't, and allow |
* for an offset. |
* |
* @param string $method - the method being called |
* @param array $parameters - we expect the first of these to be our offset |
* @return array|false |
*/ |
protected function getEnclosure($method, $parameters) |
{ |
$encs = $this->model->getElementsByTagName('enclosure'); |
$offset = isset($parameters[0]) ? $parameters[0] : 0; |
if ($encs->length > $offset) { |
try { |
if (! $encs->item($offset)->hasAttribute('url')) { |
return false; |
} |
$attrs = $encs->item($offset)->attributes; |
return array( |
'url' => $attrs->getNamedItem('url')->value, |
'length' => $attrs->getNamedItem('length')->value, |
'type' => $attrs->getNamedItem('type')->value); |
} catch (Exception $e) { |
return false; |
} |
} |
return false; |
} |
/** |
* Get the entry source if specified |
* |
* source is an optional sub-element of item. Like atom:source it tells |
* us about where the entry came from (eg. if it's been copied from another |
* feed). It is not a rich source of metadata in the same way as atom:source |
* and while it would be good to maintain compatibility by returning an |
* XML_Feed_Parser_RSS2 element, it makes a lot more sense to return an array. |
* |
* @return array|false |
*/ |
protected function getSource() |
{ |
$get = $this->model->getElementsByTagName('source'); |
if ($get->length) { |
$source = $get->item(0); |
$array = array( |
'content' => $source->nodeValue); |
foreach ($source->attributes as $attribute) { |
$array[$attribute->name] = $attribute->value; |
} |
return $array; |
} |
return false; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS1.php |
---|
New file |
0,0 → 1,277 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS1 class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS1.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class handles RSS1.0 feeds. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
* @todo Find a Relax NG URI we can use |
*/ |
class XML_Feed_Parser_RSS1 extends XML_Feed_Parser_Type |
{ |
/** |
* The URI of the RelaxNG schema used to (optionally) validate the feed |
* @var string |
*/ |
private $relax = 'rss10.rnc'; |
/** |
* We're likely to use XPath, so let's keep it global |
* @var DOMXPath |
*/ |
protected $xpath; |
/** |
* The feed type we are parsing |
* @var string |
*/ |
public $version = 'RSS 1.0'; |
/** |
* The class used to represent individual items |
* @var string |
*/ |
protected $itemClass = 'XML_Feed_Parser_RSS1Element'; |
/** |
* The element containing entries |
* @var string |
*/ |
protected $itemElement = 'item'; |
/** |
* Here we map those elements we're not going to handle individually |
* to the constructs they are. The optional second parameter in the array |
* tells the parser whether to 'fall back' (not apt. at the feed level) or |
* fail if the element is missing. If the parameter is not set, the function |
* will simply return false and leave it to the client to decide what to do. |
* @var array |
*/ |
protected $map = array( |
'title' => array('Text'), |
'link' => array('Text'), |
'description' => array('Text'), |
'image' => array('Image'), |
'textinput' => array('TextInput'), |
'updatePeriod' => array('Text'), |
'updateFrequency' => array('Text'), |
'updateBase' => array('Date'), |
'rights' => array('Text'), # dc:rights |
'description' => array('Text'), # dc:description |
'creator' => array('Text'), # dc:creator |
'publisher' => array('Text'), # dc:publisher |
'contributor' => array('Text'), # dc:contributor |
'date' => array('Date') # dc:contributor |
); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS2. |
* @var array |
*/ |
protected $compatMap = array( |
'title' => array('title'), |
'link' => array('link'), |
'subtitle' => array('description'), |
'author' => array('creator'), |
'updated' => array('date')); |
/** |
* We will be working with multiple namespaces and it is useful to |
* keep them together |
* @var array |
*/ |
protected $namespaces = array( |
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', |
'rss' => 'http://purl.org/rss/1.0/', |
'dc' => 'http://purl.org/rss/1.0/modules/dc/', |
'content' => 'http://purl.org/rss/1.0/modules/content/', |
'sy' => 'http://web.resource.org/rss/1.0/modules/syndication/'); |
/** |
* Our constructor does nothing more than its parent. |
* |
* @param DOMDocument $xml A DOM object representing the feed |
* @param bool (optional) $string Whether or not to validate this feed |
*/ |
function __construct(DOMDocument $model, $strict = false) |
{ |
$this->model = $model; |
if ($strict) { |
$validate = $this->model->relaxNGValidate(self::getSchemaDir . |
DIRECTORY_SEPARATOR . $this->relax); |
if (! $validate) { |
throw new XML_Feed_Parser_Exception('Failed required validation'); |
} |
} |
$this->xpath = new DOMXPath($model); |
foreach ($this->namespaces as $key => $value) { |
$this->xpath->registerNamespace($key, $value); |
} |
$this->numberEntries = $this->count('item'); |
} |
/** |
* Allows retrieval of an entry by ID where the rdf:about attribute is used |
* |
* This is not really something that will work with RSS1 as it does not have |
* clear restrictions on the global uniqueness of IDs. We will employ the |
* _very_ hit and miss method of selecting entries based on the rdf:about |
* attribute. If DOMXPath::evaluate is available, we also use that to store |
* a reference to the entry in the array used by getEntryByOffset so that |
* method does not have to seek out the entry if it's requested that way. |
* |
* @param string $id any valid ID. |
* @return XML_Feed_Parser_RSS1Element |
*/ |
function getEntryById($id) |
{ |
if (isset($this->idMappings[$id])) { |
return $this->entries[$this->idMappings[$id]]; |
} |
$entries = $this->xpath->query("//rss:item[@rdf:about='$id']"); |
if ($entries->length > 0) { |
$classname = $this->itemClass; |
$entry = new $classname($entries->item(0), $this); |
if (in_array('evaluate', get_class_methods($this->xpath))) { |
$offset = $this->xpath->evaluate("count(preceding-sibling::rss:item)", $entries->item(0)); |
$this->entries[$offset] = $entry; |
} |
$this->idMappings[$id] = $entry; |
return $entry; |
} |
return false; |
} |
/** |
* Get details of the image associated with the feed. |
* |
* @return array|false an array simply containing the child elements |
*/ |
protected function getImage() |
{ |
$images = $this->model->getElementsByTagName('image'); |
if ($images->length > 0) { |
$image = $images->item(0); |
$details = array(); |
if ($image->hasChildNodes()) { |
$details = array( |
'title' => $image->getElementsByTagName('title')->item(0)->value, |
'link' => $image->getElementsByTagName('link')->item(0)->value, |
'url' => $image->getElementsByTagName('url')->item(0)->value); |
} else { |
$details = array('title' => false, |
'link' => false, |
'url' => $image->attributes->getNamedItem('resource')->nodeValue); |
} |
$details = array_merge($details, array('description' => false, 'height' => false, 'width' => false)); |
if (! empty($details)) { |
return $details; |
} |
} |
return false; |
} |
/** |
* The textinput element is little used, but in the interests of |
* completeness we will support it. |
* |
* @return array|false |
*/ |
protected function getTextInput() |
{ |
$inputs = $this->model->getElementsByTagName('textinput'); |
if ($inputs->length > 0) { |
$input = $inputs->item(0); |
$results = array(); |
$results['title'] = isset( |
$input->getElementsByTagName('title')->item(0)->value) ? |
$input->getElementsByTagName('title')->item(0)->value : null; |
$results['description'] = isset( |
$input->getElementsByTagName('description')->item(0)->value) ? |
$input->getElementsByTagName('description')->item(0)->value : null; |
$results['name'] = isset( |
$input->getElementsByTagName('name')->item(0)->value) ? |
$input->getElementsByTagName('name')->item(0)->value : null; |
$results['link'] = isset( |
$input->getElementsByTagName('link')->item(0)->value) ? |
$input->getElementsByTagName('link')->item(0)->value : null; |
if (empty($results['link']) and |
$input->attributes->getNamedItem('resource')) { |
$results['link'] = |
$input->attributes->getNamedItem('resource')->nodeValue; |
} |
if (! empty($results)) { |
return $results; |
} |
} |
return false; |
} |
/** |
* Employs various techniques to identify the author |
* |
* Dublin Core provides the dc:creator, dc:contributor, and dc:publisher |
* elements for defining authorship in RSS1. We will try each of those in |
* turn in order to simulate the atom author element and will return it |
* as text. |
* |
* @return array|false |
*/ |
function getAuthor() |
{ |
$options = array('creator', 'contributor', 'publisher'); |
foreach ($options as $element) { |
$test = $this->model->getElementsByTagName($element); |
if ($test->length > 0) { |
return $test->item(0)->value; |
} |
} |
return false; |
} |
/** |
* Retrieve a link |
* |
* In RSS1 a link is a text element but in order to ensure that we resolve |
* URLs properly we have a special function for them. |
* |
* @return string |
*/ |
function getLink($offset = 0, $attribute = 'href', $params = false) |
{ |
$links = $this->model->getElementsByTagName('link'); |
if ($links->length <= $offset) { |
return false; |
} |
$link = $links->item($offset); |
return $this->addBase($link->nodeValue, $link); |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS11.php |
---|
New file |
0,0 → 1,276 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS1.1 class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS11.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class handles RSS1.1 feeds. RSS1.1 is documented at: |
* http://inamidst.com/rss1.1/ |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
* @todo Support for RDF:List |
* @todo Ensure xml:lang is accessible to users |
*/ |
class XML_Feed_Parser_RSS11 extends XML_Feed_Parser_Type |
{ |
/** |
* The URI of the RelaxNG schema used to (optionally) validate the feed |
* @var string |
*/ |
private $relax = 'rss11.rnc'; |
/** |
* We're likely to use XPath, so let's keep it global |
* @var DOMXPath |
*/ |
protected $xpath; |
/** |
* The feed type we are parsing |
* @var string |
*/ |
public $version = 'RSS 1.0'; |
/** |
* The class used to represent individual items |
* @var string |
*/ |
protected $itemClass = 'XML_Feed_Parser_RSS1Element'; |
/** |
* The element containing entries |
* @var string |
*/ |
protected $itemElement = 'item'; |
/** |
* Here we map those elements we're not going to handle individually |
* to the constructs they are. The optional second parameter in the array |
* tells the parser whether to 'fall back' (not apt. at the feed level) or |
* fail if the element is missing. If the parameter is not set, the function |
* will simply return false and leave it to the client to decide what to do. |
* @var array |
*/ |
protected $map = array( |
'title' => array('Text'), |
'link' => array('Text'), |
'description' => array('Text'), |
'image' => array('Image'), |
'updatePeriod' => array('Text'), |
'updateFrequency' => array('Text'), |
'updateBase' => array('Date'), |
'rights' => array('Text'), # dc:rights |
'description' => array('Text'), # dc:description |
'creator' => array('Text'), # dc:creator |
'publisher' => array('Text'), # dc:publisher |
'contributor' => array('Text'), # dc:contributor |
'date' => array('Date') # dc:contributor |
); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS2. |
* @var array |
*/ |
protected $compatMap = array( |
'title' => array('title'), |
'link' => array('link'), |
'subtitle' => array('description'), |
'author' => array('creator'), |
'updated' => array('date')); |
/** |
* We will be working with multiple namespaces and it is useful to |
* keep them together. We will retain support for some common RSS1.0 modules |
* @var array |
*/ |
protected $namespaces = array( |
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', |
'rss' => 'http://purl.org/net/rss1.1#', |
'dc' => 'http://purl.org/rss/1.0/modules/dc/', |
'content' => 'http://purl.org/rss/1.0/modules/content/', |
'sy' => 'http://web.resource.org/rss/1.0/modules/syndication/'); |
/** |
* Our constructor does nothing more than its parent. |
* |
* @param DOMDocument $xml A DOM object representing the feed |
* @param bool (optional) $string Whether or not to validate this feed |
*/ |
function __construct(DOMDocument $model, $strict = false) |
{ |
$this->model = $model; |
if ($strict) { |
$validate = $this->model->relaxNGValidate(self::getSchemaDir . |
DIRECTORY_SEPARATOR . $this->relax); |
if (! $validate) { |
throw new XML_Feed_Parser_Exception('Failed required validation'); |
} |
} |
$this->xpath = new DOMXPath($model); |
foreach ($this->namespaces as $key => $value) { |
$this->xpath->registerNamespace($key, $value); |
} |
$this->numberEntries = $this->count('item'); |
} |
/** |
* Attempts to identify an element by ID given by the rdf:about attribute |
* |
* This is not really something that will work with RSS1.1 as it does not have |
* clear restrictions on the global uniqueness of IDs. We will employ the |
* _very_ hit and miss method of selecting entries based on the rdf:about |
* attribute. Please note that this is even more hit and miss with RSS1.1 than |
* with RSS1.0 since RSS1.1 does not require the rdf:about attribute for items. |
* |
* @param string $id any valid ID. |
* @return XML_Feed_Parser_RSS1Element |
*/ |
function getEntryById($id) |
{ |
if (isset($this->idMappings[$id])) { |
return $this->entries[$this->idMappings[$id]]; |
} |
$entries = $this->xpath->query("//rss:item[@rdf:about='$id']"); |
if ($entries->length > 0) { |
$classname = $this->itemClass; |
$entry = new $classname($entries->item(0), $this); |
return $entry; |
} |
return false; |
} |
/** |
* Get details of the image associated with the feed. |
* |
* @return array|false an array simply containing the child elements |
*/ |
protected function getImage() |
{ |
$images = $this->model->getElementsByTagName('image'); |
if ($images->length > 0) { |
$image = $images->item(0); |
$details = array(); |
if ($image->hasChildNodes()) { |
$details = array( |
'title' => $image->getElementsByTagName('title')->item(0)->value, |
'url' => $image->getElementsByTagName('url')->item(0)->value); |
if ($image->getElementsByTagName('link')->length > 0) { |
$details['link'] = |
$image->getElementsByTagName('link')->item(0)->value; |
} |
} else { |
$details = array('title' => false, |
'link' => false, |
'url' => $image->attributes->getNamedItem('resource')->nodeValue); |
} |
$details = array_merge($details, |
array('description' => false, 'height' => false, 'width' => false)); |
if (! empty($details)) { |
return $details; |
} |
} |
return false; |
} |
/** |
* The textinput element is little used, but in the interests of |
* completeness we will support it. |
* |
* @return array|false |
*/ |
protected function getTextInput() |
{ |
$inputs = $this->model->getElementsByTagName('textinput'); |
if ($inputs->length > 0) { |
$input = $inputs->item(0); |
$results = array(); |
$results['title'] = isset( |
$input->getElementsByTagName('title')->item(0)->value) ? |
$input->getElementsByTagName('title')->item(0)->value : null; |
$results['description'] = isset( |
$input->getElementsByTagName('description')->item(0)->value) ? |
$input->getElementsByTagName('description')->item(0)->value : null; |
$results['name'] = isset( |
$input->getElementsByTagName('name')->item(0)->value) ? |
$input->getElementsByTagName('name')->item(0)->value : null; |
$results['link'] = isset( |
$input->getElementsByTagName('link')->item(0)->value) ? |
$input->getElementsByTagName('link')->item(0)->value : null; |
if (empty($results['link']) and |
$input->attributes->getNamedItem('resource')) { |
$results['link'] = $input->attributes->getNamedItem('resource')->nodeValue; |
} |
if (! empty($results)) { |
return $results; |
} |
} |
return false; |
} |
/** |
* Attempts to discern authorship |
* |
* Dublin Core provides the dc:creator, dc:contributor, and dc:publisher |
* elements for defining authorship in RSS1. We will try each of those in |
* turn in order to simulate the atom author element and will return it |
* as text. |
* |
* @return array|false |
*/ |
function getAuthor() |
{ |
$options = array('creator', 'contributor', 'publisher'); |
foreach ($options as $element) { |
$test = $this->model->getElementsByTagName($element); |
if ($test->length > 0) { |
return $test->item(0)->value; |
} |
} |
return false; |
} |
/** |
* Retrieve a link |
* |
* In RSS1 a link is a text element but in order to ensure that we resolve |
* URLs properly we have a special function for them. |
* |
* @return string |
*/ |
function getLink($offset = 0, $attribute = 'href', $params = false) |
{ |
$links = $this->model->getElementsByTagName('link'); |
if ($links->length <= $offset) { |
return false; |
} |
$link = $links->item($offset); |
return $this->addBase($link->nodeValue, $link); |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS2.php |
---|
New file |
0,0 → 1,334 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Class representing feed-level data for an RSS2 feed |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS2.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class handles RSS2 feeds. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_RSS2 extends XML_Feed_Parser_Type |
{ |
/** |
* The URI of the RelaxNG schema used to (optionally) validate the feed |
* @var string |
*/ |
private $relax = 'rss20.rnc'; |
/** |
* We're likely to use XPath, so let's keep it global |
* @var DOMXPath |
*/ |
protected $xpath; |
/** |
* The feed type we are parsing |
* @var string |
*/ |
public $version = 'RSS 2.0'; |
/** |
* The class used to represent individual items |
* @var string |
*/ |
protected $itemClass = 'XML_Feed_Parser_RSS2Element'; |
/** |
* The element containing entries |
* @var string |
*/ |
protected $itemElement = 'item'; |
/** |
* Here we map those elements we're not going to handle individually |
* to the constructs they are. The optional second parameter in the array |
* tells the parser whether to 'fall back' (not apt. at the feed level) or |
* fail if the element is missing. If the parameter is not set, the function |
* will simply return false and leave it to the client to decide what to do. |
* @var array |
*/ |
protected $map = array( |
'ttl' => array('Text'), |
'pubDate' => array('Date'), |
'lastBuildDate' => array('Date'), |
'title' => array('Text'), |
'link' => array('Link'), |
'description' => array('Text'), |
'language' => array('Text'), |
'copyright' => array('Text'), |
'managingEditor' => array('Text'), |
'webMaster' => array('Text'), |
'category' => array('Text'), |
'generator' => array('Text'), |
'docs' => array('Text'), |
'ttl' => array('Text'), |
'image' => array('Image'), |
'skipDays' => array('skipDays'), |
'skipHours' => array('skipHours')); |
/** |
* Here we map some elements to their atom equivalents. This is going to be |
* quite tricky to pull off effectively (and some users' methods may vary) |
* but is worth trying. The key is the atom version, the value is RSS2. |
* @var array |
*/ |
protected $compatMap = array( |
'title' => array('title'), |
'rights' => array('copyright'), |
'updated' => array('lastBuildDate'), |
'subtitle' => array('description'), |
'date' => array('pubDate'), |
'author' => array('managingEditor')); |
protected $namespaces = array( |
'dc' => 'http://purl.org/rss/1.0/modules/dc/', |
'content' => 'http://purl.org/rss/1.0/modules/content/'); |
/** |
* Our constructor does nothing more than its parent. |
* |
* @param DOMDocument $xml A DOM object representing the feed |
* @param bool (optional) $string Whether or not to validate this feed |
*/ |
function __construct(DOMDocument $model, $strict = false) |
{ |
$this->model = $model; |
if ($strict) { |
if (! $this->model->relaxNGValidate($this->relax)) { |
throw new XML_Feed_Parser_Exception('Failed required validation'); |
} |
} |
$this->xpath = new DOMXPath($this->model); |
foreach ($this->namespaces as $key => $value) { |
$this->xpath->registerNamespace($key, $value); |
} |
$this->numberEntries = $this->count('item'); |
} |
/** |
* Retrieves an entry by ID, if the ID is specified with the guid element |
* |
* This is not really something that will work with RSS2 as it does not have |
* clear restrictions on the global uniqueness of IDs. But we can emulate |
* it by allowing access based on the 'guid' element. If DOMXPath::evaluate |
* is available, we also use that to store a reference to the entry in the array |
* used by getEntryByOffset so that method does not have to seek out the entry |
* if it's requested that way. |
* |
* @param string $id any valid ID. |
* @return XML_Feed_Parser_RSS2Element |
*/ |
function getEntryById($id) |
{ |
if (isset($this->idMappings[$id])) { |
return $this->entries[$this->idMappings[$id]]; |
} |
$entries = $this->xpath->query("//item[guid='$id']"); |
if ($entries->length > 0) { |
$entry = new $this->itemElement($entries->item(0), $this); |
if (in_array('evaluate', get_class_methods($this->xpath))) { |
$offset = $this->xpath->evaluate("count(preceding-sibling::item)", $entries->item(0)); |
$this->entries[$offset] = $entry; |
} |
$this->idMappings[$id] = $entry; |
return $entry; |
} |
} |
/** |
* Get a category from the element |
* |
* The category element is a simple text construct which can occur any number |
* of times. We allow access by offset or access to an array of results. |
* |
* @param string $call for compatibility with our overloading |
* @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array |
* @return string|array|false |
*/ |
function getCategory($call, $arguments = array()) |
{ |
$categories = $this->model->getElementsByTagName('category'); |
$offset = empty($arguments[0]) ? 0 : $arguments[0]; |
$array = empty($arguments[1]) ? false : true; |
if ($categories->length <= $offset) { |
return false; |
} |
if ($array) { |
$list = array(); |
foreach ($categories as $category) { |
array_push($list, $category->nodeValue); |
} |
return $list; |
} |
return $categories->item($offset)->nodeValue; |
} |
/** |
* Get details of the image associated with the feed. |
* |
* @return array|false an array simply containing the child elements |
*/ |
protected function getImage() |
{ |
$images = $this->model->getElementsByTagName('image'); |
if ($images->length > 0) { |
$image = $images->item(0); |
$desc = $image->getElementsByTagName('description'); |
$description = $desc->length ? $desc->item(0)->nodeValue : false; |
$heigh = $image->getElementsByTagName('height'); |
$height = $heigh->length ? $heigh->item(0)->nodeValue : false; |
$widt = $image->getElementsByTagName('width'); |
$width = $widt->length ? $widt->item(0)->nodeValue : false; |
return array( |
'title' => $image->getElementsByTagName('title')->item(0)->nodeValue, |
'link' => $image->getElementsByTagName('link')->item(0)->nodeValue, |
'url' => $image->getElementsByTagName('url')->item(0)->nodeValue, |
'description' => $description, |
'height' => $height, |
'width' => $width); |
} |
return false; |
} |
/** |
* The textinput element is little used, but in the interests of |
* completeness... |
* |
* @return array|false |
*/ |
function getTextInput() |
{ |
$inputs = $this->model->getElementsByTagName('input'); |
if ($inputs->length > 0) { |
$input = $inputs->item(0); |
return array( |
'title' => $input->getElementsByTagName('title')->item(0)->value, |
'description' => |
$input->getElementsByTagName('description')->item(0)->value, |
'name' => $input->getElementsByTagName('name')->item(0)->value, |
'link' => $input->getElementsByTagName('link')->item(0)->value); |
} |
return false; |
} |
/** |
* Utility function for getSkipDays and getSkipHours |
* |
* This is a general function used by both getSkipDays and getSkipHours. It simply |
* returns an array of the values of the children of the appropriate tag. |
* |
* @param string $tagName The tag name (getSkipDays or getSkipHours) |
* @return array|false |
*/ |
protected function getSkips($tagName) |
{ |
$hours = $this->model->getElementsByTagName($tagName); |
if ($hours->length == 0) { |
return false; |
} |
$skipHours = array(); |
foreach($hours->item(0)->childNodes as $hour) { |
if ($hour instanceof DOMElement) { |
array_push($skipHours, $hour->nodeValue); |
} |
} |
return $skipHours; |
} |
/** |
* Retrieve skipHours data |
* |
* The skiphours element provides a list of hours on which this feed should |
* not be checked. We return an array of those hours (integers, 24 hour clock) |
* |
* @return array |
*/ |
function getSkipHours() |
{ |
return $this->getSkips('skipHours'); |
} |
/** |
* Retrieve skipDays data |
* |
* The skipdays element provides a list of days on which this feed should |
* not be checked. We return an array of those days. |
* |
* @return array |
*/ |
function getSkipDays() |
{ |
return $this->getSkips('skipDays'); |
} |
/** |
* Return content of the little-used 'cloud' element |
* |
* The cloud element is rarely used. It is designed to provide some details |
* of a location to update the feed. |
* |
* @return array an array of the attributes of the element |
*/ |
function getCloud() |
{ |
$cloud = $this->model->getElementsByTagName('cloud'); |
if ($cloud->length == 0) { |
return false; |
} |
$cloudData = array(); |
foreach ($cloud->item(0)->attributes as $attribute) { |
$cloudData[$attribute->name] = $attribute->value; |
} |
return $cloudData; |
} |
/** |
* Get link URL |
* |
* In RSS2 a link is a text element but in order to ensure that we resolve |
* URLs properly we have a special function for them. We maintain the |
* parameter used by the atom getLink method, though we only use the offset |
* parameter. |
* |
* @param int $offset The position of the link within the feed. Starts from 0 |
* @param string $attribute The attribute of the link element required |
* @param array $params An array of other parameters. Not used. |
* @return string |
*/ |
function getLink($offset, $attribute = 'href', $params = array()) |
{ |
$links = $this->model->getElementsByTagName('link'); |
if ($links->length <= $offset) { |
return false; |
} |
$link = $links->item($offset); |
return $this->addBase($link->nodeValue, $link); |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/AtomElement.php |
---|
New file |
0,0 → 1,261 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* AtomElement class for XML_Feed_Parser package |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: AtomElement.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* This class provides support for atom entries. It will usually be called by |
* XML_Feed_Parser_Atom with which it shares many methods. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_AtomElement extends XML_Feed_Parser_Atom |
{ |
/** |
* This will be a reference to the parent object for when we want |
* to use a 'fallback' rule |
* @var XML_Feed_Parser_Atom |
*/ |
protected $parent; |
/** |
* When performing XPath queries we will use this prefix |
* @var string |
*/ |
private $xpathPrefix = ''; |
/** |
* xml:base values inherited by the element |
* @var string |
*/ |
protected $xmlBase; |
/** |
* Here we provide a few mappings for those very special circumstances in |
* which it makes sense to map back to the RSS2 spec or to manage other |
* compatibilities (eg. with the Univeral Feed Parser). Key is the other version's |
* name for the command, value is an array consisting of the equivalent in our atom |
* api and any attributes needed to make the mapping. |
* @var array |
*/ |
protected $compatMap = array( |
'guid' => array('id'), |
'links' => array('link'), |
'tags' => array('category'), |
'contributors' => array('contributor')); |
/** |
* Our specific element map |
* @var array |
*/ |
protected $map = array( |
'author' => array('Person', 'fallback'), |
'contributor' => array('Person'), |
'id' => array('Text', 'fail'), |
'published' => array('Date'), |
'updated' => array('Date', 'fail'), |
'title' => array('Text', 'fail'), |
'rights' => array('Text', 'fallback'), |
'summary' => array('Text'), |
'content' => array('Content'), |
'link' => array('Link'), |
'enclosure' => array('Enclosure'), |
'category' => array('Category')); |
/** |
* Store useful information for later. |
* |
* @param DOMElement $element - this item as a DOM element |
* @param XML_Feed_Parser_Atom $parent - the feed of which this is a member |
*/ |
function __construct(DOMElement $element, $parent, $xmlBase = '') |
{ |
$this->model = $element; |
$this->parent = $parent; |
$this->xmlBase = $xmlBase; |
$this->xpathPrefix = "//atom:entry[atom:id='" . $this->id . "']/"; |
$this->xpath = $this->parent->xpath; |
} |
/** |
* Provides access to specific aspects of the author data for an atom entry |
* |
* Author data at the entry level is more complex than at the feed level. |
* If atom:author is not present for the entry we need to look for it in |
* an atom:source child of the atom:entry. If it's not there either, then |
* we look to the parent for data. |
* |
* @param array |
* @return string |
*/ |
function getAuthor($arguments) |
{ |
/* Find out which part of the author data we're looking for */ |
if (isset($arguments['param'])) { |
$parameter = $arguments['param']; |
} else { |
$parameter = 'name'; |
} |
$test = $this->model->getElementsByTagName('author'); |
if ($test->length > 0) { |
$item = $test->item(0); |
return $item->getElementsByTagName($parameter)->item(0)->nodeValue; |
} |
$source = $this->model->getElementsByTagName('source'); |
if ($source->length > 0) { |
$test = $this->model->getElementsByTagName('author'); |
if ($test->length > 0) { |
$item = $test->item(0); |
return $item->getElementsByTagName($parameter)->item(0)->nodeValue; |
} |
} |
return $this->parent->getAuthor($arguments); |
} |
/** |
* Returns the content of the content element or info on a specific attribute |
* |
* This element may or may not be present. It cannot be present more than |
* once. It may have a 'src' attribute, in which case there's no content |
* If not present, then the entry must have link with rel="alternate". |
* If there is content we return it, if not and there's a 'src' attribute |
* we return the value of that instead. The method can take an 'attribute' |
* argument, in which case we return the value of that attribute if present. |
* eg. $item->content("type") will return the type of the content. It is |
* recommended that all users check the type before getting the content to |
* ensure that their script is capable of handling the type of returned data. |
* (data carried in the content element can be either 'text', 'html', 'xhtml', |
* or any standard MIME type). |
* |
* @return string|false |
*/ |
protected function getContent($method, $arguments = array()) |
{ |
$attribute = empty($arguments[0]) ? false : $arguments[0]; |
$tags = $this->model->getElementsByTagName('content'); |
if ($tags->length == 0) { |
return false; |
} |
$content = $tags->item(0); |
if (! $content->hasAttribute('type')) { |
$content->setAttribute('type', 'text'); |
} |
if (! empty($attribute)) { |
return $content->getAttribute($attribute); |
} |
$type = $content->getAttribute('type'); |
if (! empty($attribute)) { |
if ($content->hasAttribute($attribute)) |
{ |
return $content->getAttribute($attribute); |
} |
return false; |
} |
if ($content->hasAttribute('src')) { |
return $content->getAttribute('src'); |
} |
return $this->parseTextConstruct($content); |
} |
/** |
* For compatibility, this method provides a mapping to access enclosures. |
* |
* The Atom spec doesn't provide for an enclosure element, but it is |
* generally supported using the link element with rel='enclosure'. |
* |
* @param string $method - for compatibility with our __call usage |
* @param array $arguments - for compatibility with our __call usage |
* @return array|false |
*/ |
function getEnclosure($method, $arguments = array()) |
{ |
$offset = isset($arguments[0]) ? $arguments[0] : 0; |
$query = "//atom:entry[atom:id='" . $this->getText('id', false) . |
"']/atom:link[@rel='enclosure']"; |
$encs = $this->parent->xpath->query($query); |
if ($encs->length > $offset) { |
try { |
if (! $encs->item($offset)->hasAttribute('href')) { |
return false; |
} |
$attrs = $encs->item($offset)->attributes; |
$length = $encs->item($offset)->hasAttribute('length') ? |
$encs->item($offset)->getAttribute('length') : false; |
return array( |
'url' => $attrs->getNamedItem('href')->value, |
'type' => $attrs->getNamedItem('type')->value, |
'length' => $length); |
} catch (Exception $e) { |
return false; |
} |
} |
return false; |
} |
/** |
* Get details of this entry's source, if available/relevant |
* |
* Where an atom:entry is taken from another feed then the aggregator |
* is supposed to include an atom:source element which replicates at least |
* the atom:id, atom:title, and atom:updated metadata from the original |
* feed. Atom:source therefore has a very similar structure to atom:feed |
* and if we find it we will return it as an XML_Feed_Parser_Atom object. |
* |
* @return XML_Feed_Parser_Atom|false |
*/ |
function getSource() |
{ |
$test = $this->model->getElementsByTagName('source'); |
if ($test->length == 0) { |
return false; |
} |
$source = new XML_Feed_Parser_Atom($test->item(0)); |
} |
/** |
* Get the entry as an XML string |
* |
* Return an XML serialization of the feed, should it be required. Most |
* users however, will already have a serialization that they used when |
* instantiating the object. |
* |
* @return string XML serialization of element |
*/ |
function __toString() |
{ |
$simple = simplexml_import_dom($this->model); |
return $simple->asXML(); |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/RSS09Element.php |
---|
New file |
0,0 → 1,62 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* RSS0.9 Element class for XML_Feed_Parser |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 |
* @version CVS: $Id: RSS09Element.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/* |
* This class provides support for RSS 0.9 entries. It will usually be called by |
* XML_Feed_Parser_RSS09 with which it shares many methods. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_RSS09Element extends XML_Feed_Parser_RSS09 |
{ |
/** |
* This will be a reference to the parent object for when we want |
* to use a 'fallback' rule |
* @var XML_Feed_Parser_RSS09 |
*/ |
protected $parent; |
/** |
* Our specific element map |
* @var array |
*/ |
protected $map = array( |
'title' => array('Text'), |
'link' => array('Link')); |
/** |
* Store useful information for later. |
* |
* @param DOMElement $element - this item as a DOM element |
* @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member |
*/ |
function __construct(DOMElement $element, $parent, $xmlBase = '') |
{ |
$this->model = $element; |
$this->parent = $parent; |
} |
} |
?> |
/trunk/api/pear/XML/Feed/Parser/Exception.php |
---|
New file |
0,0 → 1,42 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Keeps the exception class for XML_Feed_Parser. |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL |
* @version CVS: $Id: Exception.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* We are extending PEAR_Exception |
*/ |
require_once 'PEAR/Exception.php'; |
/** |
* XML_Feed_Parser_Exception is a simple extension of PEAR_Exception, existing |
* to help with identification of the source of exceptions. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser_Exception extends PEAR_Exception |
{ |
} |
?> |
/trunk/api/pear/XML/Feed/Parser.php |
---|
New file |
0,0 → 1,351 |
<?php |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */ |
/** |
* Key gateway class for XML_Feed_Parser package |
* |
* PHP versions 5 |
* |
* LICENSE: This source file is subject to version 3.0 of the PHP license |
* that is available through the world-wide-web at the following URI: |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of |
* the PHP License and are unable to obtain it through the web, please |
* send a note to license@php.net so we can mail you a copy immediately. |
* |
* @category XML |
* @package XML_Feed_Parser |
* @author James Stewart <james@jystewart.net> |
* @copyright 2005 James Stewart <james@jystewart.net> |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL |
* @version CVS: $Id: Parser.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $ |
* @link http://pear.php.net/package/XML_Feed_Parser/ |
*/ |
/** |
* XML_Feed_Parser_Type is an abstract class required by all of our |
* feed types. It makes sense to load it here to keep the other files |
* clean. |
*/ |
require_once 'XML/Feed/Parser/Type.php'; |
/** |
* We will throw exceptions when errors occur. |
*/ |
require_once 'XML/Feed/Parser/Exception.php'; |
/** |
* This is the core of the XML_Feed_Parser package. It identifies feed types |
* and abstracts access to them. It is an iterator, allowing for easy access |
* to the entire feed. |
* |
* @author James Stewart <james@jystewart.net> |
* @version Release: 1.0.2 |
* @package XML_Feed_Parser |
*/ |
class XML_Feed_Parser implements Iterator |
{ |
/** |
* This is where we hold the feed object |
* @var Object |
*/ |
private $feed; |
/** |
* To allow for extensions, we make a public reference to the feed model |
* @var DOMDocument |
*/ |
public $model; |
/** |
* A map between entry ID and offset |
* @var array |
*/ |
protected $idMappings = array(); |
/** |
* A storage space for Namespace URIs. |
* @var array |
*/ |
private $feedNamespaces = array( |
'rss2' => array( |
'http://backend.userland.com/rss', |
'http://backend.userland.com/rss2', |
'http://blogs.law.harvard.edu/tech/rss')); |
/** |
* Detects feed types and instantiate appropriate objects. |
* |
* Our constructor takes care of detecting feed types and instantiating |
* appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0 |
* but raise a warning. I do not intend to introduce full support for |
* Atom 0.3 as it has been deprecated, but others are welcome to. |
* |
* @param string $feed XML serialization of the feed |
* @param bool $strict Whether or not to validate the feed |
* @param bool $suppressWarnings Trigger errors for deprecated feed types? |
* @param bool $tidy Whether or not to try and use the tidy library on input |
*/ |
function __construct($feed, $strict = false, $suppressWarnings = false, $tidy = false) |
{ |
$this->model = new DOMDocument; |
if (! $this->model->loadXML($feed)) { |
if (extension_loaded('tidy') && $tidy) { |
$tidy = new tidy; |
$tidy->parseString($feed, |
array('input-xml' => true, 'output-xml' => true)); |
$tidy->cleanRepair(); |
if (! $this->model->loadXML((string) $tidy)) { |
throw new XML_Feed_Parser_Exception('Invalid input: this is not ' . |
'valid XML'); |
} |
} else { |
throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML'); |
} |
} |
/* detect feed type */ |
$doc_element = $this->model->documentElement; |
$error = false; |
switch (true) { |
case ($doc_element->namespaceURI == 'http://www.w3.org/2005/Atom'): |
require_once 'XML/Feed/Parser/Atom.php'; |
require_once 'XML/Feed/Parser/AtomElement.php'; |
$class = 'XML_Feed_Parser_Atom'; |
break; |
case ($doc_element->namespaceURI == 'http://purl.org/atom/ns#'): |
require_once 'XML/Feed/Parser/Atom.php'; |
require_once 'XML/Feed/Parser/AtomElement.php'; |
$class = 'XML_Feed_Parser_Atom'; |
$error = 'Atom 0.3 deprecated, using 1.0 parser which won\'t provide ' . |
'all options'; |
break; |
case ($doc_element->namespaceURI == 'http://purl.org/rss/1.0/' || |
($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
&& $doc_element->childNodes->item(1)->namespaceURI == |
'http://purl.org/rss/1.0/')): |
require_once 'XML/Feed/Parser/RSS1.php'; |
require_once 'XML/Feed/Parser/RSS1Element.php'; |
$class = 'XML_Feed_Parser_RSS1'; |
break; |
case ($doc_element->namespaceURI == 'http://purl.org/rss/1.1/' || |
($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
&& $doc_element->childNodes->item(1)->namespaceURI == |
'http://purl.org/rss/1.1/')): |
require_once 'XML/Feed/Parser/RSS11.php'; |
require_once 'XML/Feed/Parser/RSS11Element.php'; |
$class = 'XML_Feed_Parser_RSS11'; |
break; |
case (($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 |
&& $doc_element->childNodes->item(1)->namespaceURI == |
'http://my.netscape.com/rdf/simple/0.9/') || |
$doc_element->namespaceURI == 'http://my.netscape.com/rdf/simple/0.9/'): |
require_once 'XML/Feed/Parser/RSS09.php'; |
require_once 'XML/Feed/Parser/RSS09Element.php'; |
$class = 'XML_Feed_Parser_RSS09'; |
break; |
case ($doc_element->tagName == 'rss' and |
$doc_element->hasAttribute('version') && |
$doc_element->getAttribute('version') == 0.91): |
$error = 'RSS 0.91 has been superceded by RSS2.0. Using RSS2.0 parser.'; |
require_once 'XML/Feed/Parser/RSS2.php'; |
require_once 'XML/Feed/Parser/RSS2Element.php'; |
$class = 'XML_Feed_Parser_RSS2'; |
break; |
case ($doc_element->tagName == 'rss' and |
$doc_element->hasAttribute('version') && |
$doc_element->getAttribute('version') == 0.92): |
$error = 'RSS 0.92 has been superceded by RSS2.0. Using RSS2.0 parser.'; |
require_once 'XML/Feed/Parser/RSS2.php'; |
require_once 'XML/Feed/Parser/RSS2Element.php'; |
$class = 'XML_Feed_Parser_RSS2'; |
break; |
case (in_array($doc_element->namespaceURI, $this->feedNamespaces['rss2']) |
|| $doc_element->tagName == 'rss'): |
if (! $doc_element->hasAttribute('version') || |
$doc_element->getAttribute('version') != 2) { |
$error = 'RSS version not specified. Parsing as RSS2.0'; |
} |
require_once 'XML/Feed/Parser/RSS2.php'; |
require_once 'XML/Feed/Parser/RSS2Element.php'; |
$class = 'XML_Feed_Parser_RSS2'; |
break; |
default: |
throw new XML_Feed_Parser_Exception('Feed type unknown'); |
break; |
} |
if (! $suppressWarnings && ! empty($error)) { |
trigger_error($error, E_USER_WARNING); |
} |
/* Instantiate feed object */ |
$this->feed = new $class($this->model, $strict); |
} |
/** |
* Proxy to allow feed element names to be used as method names |
* |
* For top-level feed elements we will provide access using methods or |
* attributes. This function simply passes on a request to the appropriate |
* feed type object. |
* |
* @param string $call - the method being called |
* @param array $attributes |
*/ |
function __call($call, $attributes) |
{ |
$attributes = array_pad($attributes, 5, false); |
list($a, $b, $c, $d, $e) = $attributes; |
return $this->feed->$call($a, $b, $c, $d, $e); |
} |
/** |
* Proxy to allow feed element names to be used as attribute names |
* |
* To allow variable-like access to feed-level data we use this |
* method. It simply passes along to __call() which in turn passes |
* along to the relevant object. |
* |
* @param string $val - the name of the variable required |
*/ |
function __get($val) |
{ |
return $this->feed->$val; |
} |
/** |
* Provides iteration functionality. |
* |
* Of course we must be able to iterate... This function simply increases |
* our internal counter. |
*/ |
function next() |
{ |
if (isset($this->current_item) && |
$this->current_item <= $this->feed->numberEntries - 1) { |
++$this->current_item; |
} else if (! isset($this->current_item)) { |
$this->current_item = 0; |
} else { |
return false; |
} |
} |
/** |
* Return XML_Feed_Type object for current element |
* |
* @return XML_Feed_Parser_Type Object |
*/ |
function current() |
{ |
return $this->getEntryByOffset($this->current_item); |
} |
/** |
* For iteration -- returns the key for the current stage in the array. |
* |
* @return int |
*/ |
function key() |
{ |
return $this->current_item; |
} |
/** |
* For iteration -- tells whether we have reached the |
* end. |
* |
* @return bool |
*/ |
function valid() |
{ |
return $this->current_item < $this->feed->numberEntries; |
} |
/** |
* For iteration -- resets the internal counter to the beginning. |
*/ |
function rewind() |
{ |
$this->current_item = 0; |
} |
/** |
* Provides access to entries by ID if one is specified in the source feed. |
* |
* As well as allowing the items to be iterated over we want to allow |
* users to be able to access a specific entry. This is one of two ways of |
* doing that, the other being by offset. This method can be quite slow |
* if dealing with a large feed that hasn't yet been processed as it |
* instantiates objects for every entry until it finds the one needed. |
* |
* @param string $id Valid ID for the given feed format |
* @return XML_Feed_Parser_Type|false |
*/ |
function getEntryById($id) |
{ |
if (isset($this->idMappings[$id])) { |
return $this->getEntryByOffset($this->idMappings[$id]); |
} |
/* |
* Since we have not yet encountered that ID, let's go through all the |
* remaining entries in order till we find it. |
* This is a fairly slow implementation, but it should work. |
*/ |
return $this->feed->getEntryById($id); |
} |
/** |
* Retrieve entry by numeric offset, starting from zero. |
* |
* As well as allowing the items to be iterated over we want to allow |
* users to be able to access a specific entry. This is one of two ways of |
* doing that, the other being by ID. |
* |
* @param int $offset The position of the entry within the feed, starting from 0 |
* @return XML_Feed_Parser_Type|false |
*/ |
function getEntryByOffset($offset) |
{ |
if ($offset < $this->feed->numberEntries) { |
if (isset($this->feed->entries[$offset])) { |
return $this->feed->entries[$offset]; |
} else { |
try { |
$this->feed->getEntryByOffset($offset); |
} catch (Exception $e) { |
return false; |
} |
$id = $this->feed->entries[$offset]->getID(); |
$this->idMappings[$id] = $offset; |
return $this->feed->entries[$offset]; |
} |
} else { |
return false; |
} |
} |
/** |
* Retrieve version details from feed type class. |
* |
* @return void |
* @author James Stewart |
*/ |
function version() |
{ |
return $this->feed->version; |
} |
/** |
* Returns a string representation of the feed. |
* |
* @return String |
**/ |
function __toString() |
{ |
return $this->feed->__toString(); |
} |
} |
?> |