1328 |
aurelien |
1 |
<?php
|
|
|
2 |
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
|
|
3 |
|
|
|
4 |
/**
|
|
|
5 |
* Abstract class providing common methods for XML_Feed_Parser feeds.
|
|
|
6 |
*
|
|
|
7 |
* PHP versions 5
|
|
|
8 |
*
|
|
|
9 |
* LICENSE: This source file is subject to version 3.0 of the PHP license
|
|
|
10 |
* that is available through the world-wide-web at the following URI:
|
|
|
11 |
* http://www.php.net/license/3_0.txt. If you did not receive a copy of
|
|
|
12 |
* the PHP License and are unable to obtain it through the web, please
|
|
|
13 |
* send a note to license@php.net so we can mail you a copy immediately.
|
|
|
14 |
*
|
|
|
15 |
* @category XML
|
|
|
16 |
* @package XML_Feed_Parser
|
|
|
17 |
* @author James Stewart <james@jystewart.net>
|
|
|
18 |
* @copyright 2005 James Stewart <james@jystewart.net>
|
|
|
19 |
* @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1
|
|
|
20 |
* @version CVS: $Id: Type.php 304308 2010-10-11 12:05:50Z clockwerx $
|
|
|
21 |
* @link http://pear.php.net/package/XML_Feed_Parser/
|
|
|
22 |
*/
|
|
|
23 |
|
|
|
24 |
/**
|
|
|
25 |
* This abstract class provides some general methods that are likely to be
|
|
|
26 |
* implemented exactly the same way for all feed types.
|
|
|
27 |
*
|
|
|
28 |
* @package XML_Feed_Parser
|
|
|
29 |
* @author James Stewart <james@jystewart.net>
|
|
|
30 |
* @version Release: @package_version@
|
|
|
31 |
*/
|
|
|
32 |
abstract class XmlFeedParserType {
|
|
|
33 |
/**
|
|
|
34 |
* Where we store our DOM object for this feed
|
|
|
35 |
* @var DOMDocument
|
|
|
36 |
*/
|
|
|
37 |
public $model;
|
|
|
38 |
|
|
|
39 |
/**
|
|
|
40 |
* For iteration we'll want a count of the number of entries
|
|
|
41 |
* @var int
|
|
|
42 |
*/
|
|
|
43 |
public $numberEntries;
|
|
|
44 |
|
|
|
45 |
/**
|
|
|
46 |
* Where we store our entry objects once instantiated
|
|
|
47 |
* @var array
|
|
|
48 |
*/
|
|
|
49 |
public $entries = array();
|
|
|
50 |
|
|
|
51 |
/**
|
|
|
52 |
* Store mappings between entry IDs and their position in the feed
|
|
|
53 |
*/
|
|
|
54 |
public $idMappings = array();
|
|
|
55 |
|
|
|
56 |
/**
|
|
|
57 |
* Proxy to allow use of element names as method names
|
|
|
58 |
*
|
|
|
59 |
* We are not going to provide methods for every entry type so this
|
|
|
60 |
* function will allow for a lot of mapping. We rely pretty heavily
|
|
|
61 |
* on this to handle our mappings between other feed types and atom.
|
|
|
62 |
*
|
|
|
63 |
* @param string $call - the method attempted
|
|
|
64 |
* @param array $arguments - arguments to that method
|
|
|
65 |
* @return mixed
|
|
|
66 |
*/
|
|
|
67 |
function __call($call, $arguments = array()) {
|
|
|
68 |
if (! is_array($arguments)) {
|
|
|
69 |
$arguments = array();
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
if (isset($this->compatMap[$call])) {
|
|
|
73 |
$tempMap = $this->compatMap;
|
|
|
74 |
$tempcall = array_pop($tempMap[$call]);
|
|
|
75 |
if (! empty($tempMap)) {
|
|
|
76 |
$arguments = array_merge($arguments, $tempMap[$call]);
|
|
|
77 |
}
|
|
|
78 |
$call = $tempcall;
|
|
|
79 |
}
|
|
|
80 |
|
|
|
81 |
/* To be helpful, we allow a case-insensitive search for this method */
|
|
|
82 |
if (! isset($this->map[$call])) {
|
|
|
83 |
foreach (array_keys($this->map) as $key) {
|
|
|
84 |
if (strtoupper($key) == strtoupper($call)) {
|
|
|
85 |
$call = $key;
|
|
|
86 |
break;
|
|
|
87 |
}
|
|
|
88 |
}
|
|
|
89 |
}
|
|
|
90 |
|
|
|
91 |
if (empty($this->map[$call])) {
|
|
|
92 |
return false;
|
|
|
93 |
}
|
|
|
94 |
|
|
|
95 |
$method = 'get' . $this->map[$call][0];
|
|
|
96 |
if ($method == 'getLink') {
|
|
|
97 |
$offset = empty($arguments[0]) ? 0 : $arguments[0];
|
|
|
98 |
$attribute = empty($arguments[1]) ? 'href' : $arguments[1];
|
|
|
99 |
$params = isset($arguments[2]) ? $arguments[2] : array();
|
|
|
100 |
return $this->getLink($offset, $attribute, $params);
|
|
|
101 |
}
|
|
|
102 |
if (method_exists($this, $method)) {
|
|
|
103 |
return $this->$method($call, $arguments);
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
return false;
|
|
|
107 |
}
|
|
|
108 |
|
|
|
109 |
/**
|
|
|
110 |
* Proxy to allow use of element names as attribute names
|
|
|
111 |
*
|
|
|
112 |
* For many elements variable-style access will be desirable. This function
|
|
|
113 |
* provides for that.
|
|
|
114 |
*
|
|
|
115 |
* @param string $value - the variable required
|
|
|
116 |
* @return mixed
|
|
|
117 |
*/
|
|
|
118 |
function __get($value) {
|
|
|
119 |
return $this->__call($value, array());
|
|
|
120 |
}
|
|
|
121 |
|
|
|
122 |
/**
|
|
|
123 |
* Utility function to help us resolve xml:base values
|
|
|
124 |
*
|
|
|
125 |
* We have other methods which will traverse the DOM and work out the different
|
|
|
126 |
* xml:base declarations we need to be aware of. We then need to combine them.
|
|
|
127 |
* If a declaration starts with a protocol then we restart the string. If it
|
|
|
128 |
* starts with a / then we add on to the domain name. Otherwise we simply tag
|
|
|
129 |
* it on to the end.
|
|
|
130 |
*
|
|
|
131 |
* @param string $base - the base to add the link to
|
|
|
132 |
* @param string $link
|
|
|
133 |
*/
|
|
|
134 |
function combineBases($base, $link) {
|
|
|
135 |
if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
|
|
|
136 |
return $link;
|
|
|
137 |
} else if (preg_match('/^\//', $link)) {
|
|
|
138 |
/* Extract domain and suffix link to that */
|
|
|
139 |
preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
|
|
|
140 |
$firstLayer = $results[0];
|
|
|
141 |
return $firstLayer . "/" . $link;
|
|
|
142 |
} else if (preg_match('/^\.\.\//', $base)) {
|
|
|
143 |
/* Step up link to find place to be */
|
|
|
144 |
preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
|
|
|
145 |
$suffix = $bases[3];
|
|
|
146 |
$count = preg_match_all('/\.\.\//', $bases[1], $steps);
|
|
|
147 |
$url = explode("/", $base);
|
|
|
148 |
for ($i = 0; $i <= $count; $i++) {
|
|
|
149 |
array_pop($url);
|
|
|
150 |
}
|
|
|
151 |
return implode("/", $url) . "/" . $suffix;
|
|
|
152 |
} else if (preg_match('/^(?!\/$)/', $base)) {
|
|
|
153 |
$base = preg_replace('/(.*\/).*$/', '$1', $base) ;
|
|
|
154 |
return $base . $link;
|
|
|
155 |
} else {
|
|
|
156 |
/* Just stick it on the end */
|
|
|
157 |
return $base . $link;
|
|
|
158 |
}
|
|
|
159 |
}
|
|
|
160 |
|
|
|
161 |
/**
|
|
|
162 |
* Determine whether we need to apply our xml:base rules
|
|
|
163 |
*
|
|
|
164 |
* Gets us the xml:base data and then processes that with regard
|
|
|
165 |
* to our current link.
|
|
|
166 |
*
|
|
|
167 |
* @param string
|
|
|
168 |
* @param DOMElement
|
|
|
169 |
* @return string
|
|
|
170 |
*/
|
|
|
171 |
function addBase($link, $element) {
|
|
|
172 |
if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
|
|
|
173 |
return $link;
|
|
|
174 |
}
|
|
|
175 |
|
|
|
176 |
return $this->combineBases($element->baseURI, $link);
|
|
|
177 |
}
|
|
|
178 |
|
|
|
179 |
/**
|
|
|
180 |
* Get an entry by its position in the feed, starting from zero
|
|
|
181 |
*
|
|
|
182 |
* As well as allowing the items to be iterated over we want to allow
|
|
|
183 |
* users to be able to access a specific entry. This is one of two ways of
|
|
|
184 |
* doing that, the other being by ID.
|
|
|
185 |
*
|
|
|
186 |
* @param int $offset
|
|
|
187 |
* @return XML_Feed_Parser_RSS1Element
|
|
|
188 |
*/
|
|
|
189 |
function getEntryByOffset($offset) {
|
|
|
190 |
if (! isset($this->entries[$offset])) {
|
|
|
191 |
$entries = $this->model->getElementsByTagName($this->itemElement);
|
|
|
192 |
if ($entries->length > $offset) {
|
|
|
193 |
$xmlBase = $entries->item($offset)->baseURI;
|
|
|
194 |
$this->entries[$offset] = new $this->itemClass(
|
|
|
195 |
$entries->item($offset), $this, $xmlBase);
|
|
|
196 |
if ($id = $this->entries[$offset]->id) {
|
|
|
197 |
$this->idMappings[$id] = $this->entries[$offset];
|
|
|
198 |
}
|
|
|
199 |
} else {
|
|
|
200 |
throw new XML_Feed_Parser_Exception('No entries found');
|
|
|
201 |
}
|
|
|
202 |
}
|
|
|
203 |
|
|
|
204 |
return $this->entries[$offset];
|
|
|
205 |
}
|
|
|
206 |
|
|
|
207 |
/**
|
|
|
208 |
* Return a date in seconds since epoch.
|
|
|
209 |
*
|
|
|
210 |
* Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
|
|
|
211 |
* is the number of seconds since 1970-01-01 00:00:00.
|
|
|
212 |
*
|
|
|
213 |
* @link http://php.net/strtotime
|
|
|
214 |
* @param string $method The name of the date construct we want
|
|
|
215 |
* @param array $arguments Included for compatibility with our __call usage
|
|
|
216 |
* @return int|false datetime
|
|
|
217 |
*/
|
|
|
218 |
protected function getDate($method, $arguments) {
|
|
|
219 |
$time = $this->model->getElementsByTagName($method);
|
|
|
220 |
if ($time->length == 0 || empty($time->item(0)->nodeValue)) {
|
|
|
221 |
return false;
|
|
|
222 |
}
|
|
|
223 |
return strtotime($time->item(0)->nodeValue);
|
|
|
224 |
}
|
|
|
225 |
|
|
|
226 |
/**
|
|
|
227 |
* Get a text construct.
|
|
|
228 |
*
|
|
|
229 |
* @param string $method The name of the text construct we want
|
|
|
230 |
* @param array $arguments Included for compatibility with our __call usage
|
|
|
231 |
* @return string
|
|
|
232 |
*/
|
|
|
233 |
protected function getText($method, $arguments = array()) {
|
|
|
234 |
$tags = $this->model->getElementsByTagName($method);
|
|
|
235 |
if ($tags->length > 0) {
|
|
|
236 |
$value = $tags->item(0)->nodeValue;
|
|
|
237 |
return $value;
|
|
|
238 |
}
|
|
|
239 |
return false;
|
|
|
240 |
}
|
|
|
241 |
|
|
|
242 |
/**
|
|
|
243 |
* Apply various rules to retrieve category data.
|
|
|
244 |
*
|
|
|
245 |
* There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
|
|
|
246 |
* and Atom. Instead the usual approach is to use the dublin core namespace to
|
|
|
247 |
* declare categories. For example delicious use both:
|
|
|
248 |
* <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
|
|
|
249 |
* <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
|
|
|
250 |
* to declare a categorisation of 'PEAR'.
|
|
|
251 |
*
|
|
|
252 |
* We need to be sensitive to this where possible.
|
|
|
253 |
*
|
|
|
254 |
* @param string $call for compatibility with our overloading
|
|
|
255 |
* @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array
|
|
|
256 |
* @return string|array|false
|
|
|
257 |
*/
|
|
|
258 |
protected function getCategory($call, $arguments) {
|
|
|
259 |
$categories = $this->model->getElementsByTagName('subject');
|
|
|
260 |
$offset = empty($arguments[0]) ? 0 : $arguments[0];
|
|
|
261 |
$array = empty($arguments[1]) ? false : true;
|
|
|
262 |
if ($categories->length <= $offset) {
|
|
|
263 |
return false;
|
|
|
264 |
}
|
|
|
265 |
if ($array) {
|
|
|
266 |
$list = array();
|
|
|
267 |
foreach ($categories as $category) {
|
|
|
268 |
array_push($list, $category->nodeValue);
|
|
|
269 |
}
|
|
|
270 |
return $list;
|
|
|
271 |
}
|
|
|
272 |
return $categories->item($offset)->nodeValue;
|
|
|
273 |
}
|
|
|
274 |
|
|
|
275 |
/**
|
|
|
276 |
* Count occurrences of an element
|
|
|
277 |
*
|
|
|
278 |
* This function will tell us how many times the element $type
|
|
|
279 |
* appears at this level of the feed.
|
|
|
280 |
*
|
|
|
281 |
* @param string $type the element we want to get a count of
|
|
|
282 |
* @return int
|
|
|
283 |
*/
|
|
|
284 |
protected function count($type) {
|
|
|
285 |
if ($tags = $this->model->getElementsByTagName($type)) {
|
|
|
286 |
return $tags->length;
|
|
|
287 |
}
|
|
|
288 |
return 0;
|
|
|
289 |
}
|
|
|
290 |
|
|
|
291 |
/**
|
|
|
292 |
* Part of our xml:base processing code
|
|
|
293 |
*
|
|
|
294 |
* We need a couple of methods to access XHTML content stored in feeds.
|
|
|
295 |
* This is because we dereference all xml:base references before returning
|
|
|
296 |
* the element. This method handles the attributes.
|
|
|
297 |
*
|
|
|
298 |
* @param DOMElement $node The DOM node we are iterating over
|
|
|
299 |
* @return string
|
|
|
300 |
*/
|
|
|
301 |
function processXHTMLAttributes($node) {
|
|
|
302 |
$return = '';
|
|
|
303 |
foreach ($node->attributes as $attribute) {
|
|
|
304 |
if ($attribute->name == 'src' or $attribute->name == 'href') {
|
|
|
305 |
$attribute->value = $this->addBase(htmlentities($attribute->value, NULL, 'utf-8'), $attribute);
|
|
|
306 |
}
|
|
|
307 |
if ($attribute->name == 'base') {
|
|
|
308 |
continue;
|
|
|
309 |
}
|
|
|
310 |
$return .= $attribute->name . '="' . htmlentities($attribute->value, NULL, 'utf-8') .'" ';
|
|
|
311 |
}
|
|
|
312 |
if (! empty($return)) {
|
|
|
313 |
return ' ' . trim($return);
|
|
|
314 |
}
|
|
|
315 |
return '';
|
|
|
316 |
}
|
|
|
317 |
|
|
|
318 |
/**
|
|
|
319 |
* Convert HTML entities based on the current character set.
|
|
|
320 |
*
|
|
|
321 |
* @param String
|
|
|
322 |
* @return String
|
|
|
323 |
*/
|
|
|
324 |
function processEntitiesForNodeValue($node) {
|
|
|
325 |
if (function_exists('iconv')) {
|
|
|
326 |
$current_encoding = $node->ownerDocument->encoding;
|
|
|
327 |
$value = iconv($current_encoding, 'UTF-8', $node->nodeValue);
|
|
|
328 |
} else if ($current_encoding == 'iso-8859-1') {
|
|
|
329 |
$value = utf8_encode($node->nodeValue);
|
|
|
330 |
} else {
|
|
|
331 |
$value = $node->nodeValue;
|
|
|
332 |
}
|
|
|
333 |
|
|
|
334 |
$decoded = html_entity_decode($value, NULL, 'UTF-8');
|
|
|
335 |
return htmlentities($decoded, NULL, 'UTF-8');
|
|
|
336 |
}
|
|
|
337 |
|
|
|
338 |
/**
|
|
|
339 |
* Part of our xml:base processing code
|
|
|
340 |
*
|
|
|
341 |
* We need a couple of methods to access XHTML content stored in feeds.
|
|
|
342 |
* This is because we dereference all xml:base references before returning
|
|
|
343 |
* the element. This method recurs through the tree descending from the node
|
|
|
344 |
* and builds our string.
|
|
|
345 |
*
|
|
|
346 |
* @param DOMElement $node The DOM node we are processing
|
|
|
347 |
* @return string
|
|
|
348 |
*/
|
|
|
349 |
function traverseNode($node) {
|
|
|
350 |
$content = '';
|
|
|
351 |
|
|
|
352 |
/* Add the opening of this node to the content */
|
|
|
353 |
if ($node instanceof DOMElement) {
|
|
|
354 |
$content .= '<' . $node->tagName .
|
|
|
355 |
$this->processXHTMLAttributes($node) . '>';
|
|
|
356 |
}
|
|
|
357 |
|
|
|
358 |
/* Process children */
|
|
|
359 |
if ($node->hasChildNodes()) {
|
|
|
360 |
foreach ($node->childNodes as $child) {
|
|
|
361 |
$content .= $this->traverseNode($child);
|
|
|
362 |
}
|
|
|
363 |
}
|
|
|
364 |
|
|
|
365 |
if ($node instanceof DOMText) {
|
|
|
366 |
$content .= $this->processEntitiesForNodeValue($node);
|
|
|
367 |
}
|
|
|
368 |
|
|
|
369 |
/* Add the closing of this node to the content */
|
|
|
370 |
if ($node instanceof DOMElement) {
|
|
|
371 |
$content .= '</' . $node->tagName . '>';
|
|
|
372 |
}
|
|
|
373 |
|
|
|
374 |
return $content;
|
|
|
375 |
}
|
|
|
376 |
|
|
|
377 |
/**
|
|
|
378 |
* Get content from RSS feeds (atom has its own implementation)
|
|
|
379 |
*
|
|
|
380 |
* The official way to include full content in an RSS1 entry is to use
|
|
|
381 |
* the content module's element 'encoded', and RSS2 feeds often duplicate that.
|
|
|
382 |
* Often, however, the 'description' element is used instead. We will offer that
|
|
|
383 |
* as a fallback. Atom uses its own approach and overrides this method.
|
|
|
384 |
*
|
|
|
385 |
* @return string|false
|
|
|
386 |
*/
|
|
|
387 |
protected function getContent() {
|
|
|
388 |
$options = array('encoded', 'description');
|
|
|
389 |
foreach ($options as $element) {
|
|
|
390 |
$test = $this->model->getElementsByTagName($element);
|
|
|
391 |
if ($test->length == 0) {
|
|
|
392 |
continue;
|
|
|
393 |
}
|
|
|
394 |
if ($test->item(0)->hasChildNodes()) {
|
|
|
395 |
$value = '';
|
|
|
396 |
foreach ($test->item(0)->childNodes as $child) {
|
|
|
397 |
if ($child instanceof DOMText) {
|
|
|
398 |
$value .= $child->nodeValue;
|
|
|
399 |
} else {
|
|
|
400 |
$simple = simplexml_import_dom($child);
|
|
|
401 |
$value .= $simple->asXML();
|
|
|
402 |
}
|
|
|
403 |
}
|
|
|
404 |
return $value;
|
|
|
405 |
} else if ($test->length > 0) {
|
|
|
406 |
return $test->item(0)->nodeValue;
|
|
|
407 |
}
|
|
|
408 |
}
|
|
|
409 |
return false;
|
|
|
410 |
}
|
|
|
411 |
|
|
|
412 |
/**
|
|
|
413 |
* Checks if this element has a particular child element.
|
|
|
414 |
*
|
|
|
415 |
* @param String
|
|
|
416 |
* @param Integer
|
|
|
417 |
* @return bool
|
|
|
418 |
**/
|
|
|
419 |
function hasKey($name, $offset = 0) {
|
|
|
420 |
$search = $this->model->getElementsByTagName($name);
|
|
|
421 |
return $search->length > $offset;
|
|
|
422 |
}
|
|
|
423 |
|
|
|
424 |
/**
|
|
|
425 |
* Return an XML serialization of the feed, should it be required. Most
|
|
|
426 |
* users however, will already have a serialization that they used when
|
|
|
427 |
* instantiating the object.
|
|
|
428 |
*
|
|
|
429 |
* @return string XML serialization of element
|
|
|
430 |
*/
|
|
|
431 |
function __toString() {
|
|
|
432 |
$simple = simplexml_import_dom($this->model);
|
|
|
433 |
return $simple->asXML();
|
|
|
434 |
}
|
|
|
435 |
|
|
|
436 |
/**
|
|
|
437 |
* Get directory holding RNG schemas. Method is based on that
|
|
|
438 |
* found in Contact_AddressBook.
|
|
|
439 |
*
|
|
|
440 |
* @return string PEAR data directory.
|
|
|
441 |
* @access public
|
|
|
442 |
* @static
|
|
|
443 |
*/
|
|
|
444 |
static function getSchemaDir() {
|
|
|
445 |
return dirname(__FILE__).'/../schemas';
|
|
|
446 |
}
|
|
|
447 |
|
|
|
448 |
public function relaxNGValidate() {
|
|
|
449 |
$dir = self::getSchemaDir();
|
|
|
450 |
$path = $dir . '/' . $this->relax;
|
|
|
451 |
return $this->model->relaxNGValidate($path);
|
|
|
452 |
}
|
|
|
453 |
}
|
|
|
454 |
|
|
|
455 |
?>
|