Subversion Repositories Applications.papyrus

Rev

Rev 1919 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2150 mathias 1
<?php
2
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3
 
4
/**
5
 * Abstract class providing common methods for XML_Feed_Parser feeds.
6
 *
7
 * PHP versions 5
8
 *
9
 * LICENSE: This source file is subject to version 3.0 of the PHP license
10
 * that is available through the world-wide-web at the following URI:
11
 * http://www.php.net/license/3_0.txt.  If you did not receive a copy of
12
 * the PHP License and are unable to obtain it through the web, please
13
 * send a note to license@php.net so we can mail you a copy immediately.
14
 *
15
 * @category   XML
16
 * @package    XML_Feed_Parser
17
 * @author     James Stewart <james@jystewart.net>
18
 * @copyright  2005 James Stewart <james@jystewart.net>
19
 * @license    http://www.gnu.org/copyleft/lesser.html  GNU LGPL 2.1
20
 * @version    CVS: $Id: Type.php,v 1.2 2007-07-25 15:05:34 jp_milcent Exp $
21
 * @link       http://pear.php.net/package/XML_Feed_Parser/
22
 */
23
 
24
/**
25
 * This abstract class provides some general methods that are likely to be
26
 * implemented exactly the same way for all feed types.
27
 *
28
 * @package XML_Feed_Parser
29
 * @author  James Stewart <james@jystewart.net>
30
 * @version Release: 1.0.2
31
 */
32
abstract class XML_Feed_Parser_Type
33
{
34
    /**
35
     * Where we store our DOM object for this feed
36
     * @var DOMDocument
37
     */
38
    public $model;
39
 
40
    /**
41
     * For iteration we'll want a count of the number of entries
42
     * @var int
43
     */
44
    public $numberEntries;
45
 
46
    /**
47
     * Where we store our entry objects once instantiated
48
     * @var array
49
     */
50
    public $entries = array();
51
 
52
    /**
53
     * Proxy to allow use of element names as method names
54
     *
55
     * We are not going to provide methods for every entry type so this
56
     * function will allow for a lot of mapping. We rely pretty heavily
57
     * on this to handle our mappings between other feed types and atom.
58
     *
59
     * @param   string  $call - the method attempted
60
     * @param   array   $arguments - arguments to that method
61
     * @return  mixed
62
     */
63
    function __call($call, $arguments = array())
64
    {
65
        if (! is_array($arguments)) {
66
            $arguments = array();
67
        }
68
 
69
        if (isset($this->compatMap[$call])) {
70
            $tempMap = $this->compatMap;
71
            $tempcall = array_pop($tempMap[$call]);
72
            if (! empty($tempMap)) {
73
                $arguments = array_merge($arguments, $tempMap[$call]);
74
            }
75
            $call = $tempcall;
76
        }
77
 
78
        /* To be helpful, we allow a case-insensitive search for this method */
79
        if (! isset($this->map[$call])) {
80
            foreach (array_keys($this->map) as $key) {
81
                if (strtoupper($key) == strtoupper($call)) {
82
                    $call = $key;
83
                    break;
84
                }
85
            }
86
        }
87
 
88
        if (empty($this->map[$call])) {
89
            return false;
90
        }
91
 
92
        $method = 'get' . $this->map[$call][0];
93
        if ($method == 'getLink') {
94
            $offset = empty($arguments[0]) ? 0 : $arguments[0];
95
            $attribute = empty($arguments[1]) ? 'href' : $arguments[1];
96
            $params = isset($arguments[2]) ? $arguments[2] : array();
97
            return $this->getLink($offset, $attribute, $params);
98
        }
99
        if (method_exists($this, $method)) {
100
            return $this->$method($call, $arguments);
101
        }
102
 
103
        return false;
104
    }
105
 
106
    /**
107
     * Proxy to allow use of element names as attribute names
108
     *
109
     * For many elements variable-style access will be desirable. This function
110
     * provides for that.
111
     *
112
     * @param   string  $value - the variable required
113
     * @return  mixed
114
     */
115
    function __get($value)
116
    {
117
        return $this->__call($value, array());
118
    }
119
 
120
    /**
121
     * Utility function to help us resolve xml:base values
122
     *
123
     * We have other methods which will traverse the DOM and work out the different
124
     * xml:base declarations we need to be aware of. We then need to combine them.
125
     * If a declaration starts with a protocol then we restart the string. If it
126
     * starts with a / then we add on to the domain name. Otherwise we simply tag
127
     * it on to the end.
128
     *
129
     * @param   string  $base - the base to add the link to
130
     * @param   string  $link
131
     */
132
    function combineBases($base, $link)
133
    {
134
        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
135
            return $link;
136
        } else if (preg_match('/^\//', $link)) {
137
            /* Extract domain and suffix link to that */
138
            preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results);
139
            $firstLayer = $results[0];
140
            return $firstLayer . "/" . $link;
141
        } else if (preg_match('/^\.\.\//', $base)) {
142
            /* Step up link to find place to be */
143
            preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases);
144
            $suffix = $bases[3];
145
            $count = preg_match_all('/\.\.\//', $bases[1], $steps);
146
            $url = explode("/", $base);
147
            for ($i = 0; $i <= $count; $i++) {
148
                array_pop($url);
149
            }
150
            return implode("/", $url) . "/" . $suffix;
151
        } else if (preg_match('/^(?!\/$)/', $base)) {
152
            $base = preg_replace('/(.*\/).*$/', '$1', $base)  ;
153
            return $base . $link;
154
        } else {
155
            /* Just stick it on the end */
156
            return $base . $link;
157
        }
158
    }
159
 
160
    /**
161
     * Determine whether we need to apply our xml:base rules
162
     *
163
     * Gets us the xml:base data and then processes that with regard
164
     * to our current link.
165
     *
166
     * @param   string
167
     * @param   DOMElement
168
     * @return  string
169
     */
170
    function addBase($link, $element)
171
    {
172
        if (preg_match('/^[A-Za-z]+:\/\//', $link)) {
173
            return $link;
174
        }
175
 
176
        return $this->combineBases($element->baseURI, $link);
177
    }
178
 
179
    /**
180
     * Get an entry by its position in the feed, starting from zero
181
     *
182
     * As well as allowing the items to be iterated over we want to allow
183
     * users to be able to access a specific entry. This is one of two ways of
184
     * doing that, the other being by ID.
185
     *
186
     * @param   int $offset
187
     * @return  XML_Feed_Parser_RSS1Element
188
     */
189
    function getEntryByOffset($offset)
190
    {
191
        if (! isset($this->entries[$offset])) {
192
            $entries = $this->model->getElementsByTagName($this->itemElement);
193
            if ($entries->length > $offset) {
194
                $xmlBase = $entries->item($offset)->baseURI;
195
                $this->entries[$offset] = new $this->itemClass(
196
                    $entries->item($offset), $this, $xmlBase);
197
                if ($id = $this->entries[$offset]->id) {
198
                    $id_mappings = $this->idMappings;
199
                    $id_mappings[$id] = $this->entries[$offset];
200
                    $this->idMappings = $id_mappings;
201
                }
202
            } else {
203
                throw new XML_Feed_Parser_Exception('No entries found');
204
            }
205
        }
206
 
207
        return $this->entries[$offset];
208
    }
209
 
210
    /**
211
     * Return a date in seconds since epoch.
212
     *
213
     * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which
214
     * is the number of seconds since 1970-01-01 00:00:00.
215
     *
216
     * @link    http://php.net/strtotime
217
     * @param    string    $method        The name of the date construct we want
218
     * @param    array     $arguments    Included for compatibility with our __call usage
219
     * @return    int|false datetime
220
     */
221
    protected function getDate($method, $arguments)
222
    {
223
        $time = $this->model->getElementsByTagName($method);
224
        if ($time->length == 0) {
225
            return false;
226
        }
227
        return strtotime($time->item(0)->nodeValue);
228
    }
229
 
230
    /**
231
     * Get a text construct.
232
     *
233
     * @param    string    $method    The name of the text construct we want
234
     * @param    array     $arguments    Included for compatibility with our __call usage
235
     * @return    string
236
     */
237
    protected function getText($method, $arguments = array())
238
    {
239
        $tags = $this->model->getElementsByTagName($method);
240
        if ($tags->length > 0) {
241
            $value = $tags->item(0)->nodeValue;
242
            return $value;
243
        }
244
        return false;
245
    }
246
 
247
    /**
248
     * Apply various rules to retrieve category data.
249
     *
250
     * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2
251
     * and  Atom. Instead the usual approach is to use the dublin core namespace to
252
     * declare  categories. For example delicious use both:
253
     * <dc:subject>PEAR</dc:subject> and: <taxo:topics><rdf:Bag>
254
     * <rdf:li resource="http://del.icio.us/tag/PEAR" /></rdf:Bag></taxo:topics>
255
     * to declare a categorisation of 'PEAR'.
256
     *
257
     * We need to be sensitive to this where possible.
258
     *
259
     * @param    string    $call    for compatibility with our overloading
260
     * @param   array $arguments - arg 0 is the offset, arg 1 is whether to return as array
261
     * @return  string|array|false
262
     */
263
    protected function getCategory($call, $arguments)
264
    {
265
        $categories = $this->model->getElementsByTagName('subject');
266
        $offset = empty($arguments[0]) ? 0 : $arguments[0];
267
        $array = empty($arguments[1]) ? false : true;
268
        if ($categories->length <= $offset) {
269
            return false;
270
        }
271
        if ($array) {
272
            $list = array();
273
            foreach ($categories as $category) {
274
                array_push($list, $category->nodeValue);
275
            }
276
            return $list;
277
        }
278
        return $categories->item($offset)->nodeValue;
279
    }
280
 
281
    /**
282
     * Count occurrences of an element
283
     *
284
     * This function will tell us how many times the element $type
285
     * appears at this level of the feed.
286
     *
287
     * @param    string    $type    the element we want to get a count of
288
     * @return    int
289
     */
290
    protected function count($type)
291
    {
292
        if ($tags = $this->model->getElementsByTagName($type)) {
293
            return $tags->length;
294
        }
295
        return 0;
296
    }
297
 
298
    /**
299
     * Part of our xml:base processing code
300
     *
301
     * We need a couple of methods to access XHTML content stored in feeds.
302
     * This is because we dereference all xml:base references before returning
303
     * the element. This method handles the attributes.
304
     *
305
     * @param   DOMElement $node    The DOM node we are iterating over
306
     * @return  string
307
     */
308
    function processXHTMLAttributes($node) {
309
        $return = '';
310
        foreach ($node->attributes as $attribute) {
311
            if ($attribute->name == 'src' or $attribute->name == 'href') {
312
                $attribute->value = $this->addBase($attribute->value, $attribute);
313
            }
314
            if ($attribute->name == 'base') {
315
                continue;
316
            }
317
            $return .= $attribute->name . '="' . $attribute->value .'" ';
318
        }
319
        if (! empty($return)) {
320
            return ' ' . trim($return);
321
        }
322
        return '';
323
    }
324
 
325
    /**
326
     * Part of our xml:base processing code
327
     *
328
     * We need a couple of methods to access XHTML content stored in feeds.
329
     * This is because we dereference all xml:base references before returning
330
     * the element. This method recurs through the tree descending from the node
331
     * and builds our string
332
     *
333
     * @param   DOMElement $node    The DOM node we are processing
334
     * @return   string
335
     */
336
    function traverseNode($node)
337
    {
338
        $content = '';
339
 
340
        /* Add the opening of this node to the content */
341
        if ($node instanceof DOMElement) {
342
            $content .= '<' . $node->tagName .
343
                $this->processXHTMLAttributes($node) . '>';
344
        }
345
 
346
        /* Process children */
347
        if ($node->hasChildNodes()) {
348
            foreach ($node->childNodes as $child) {
349
                $content .= $this->traverseNode($child);
350
            }
351
        }
352
 
353
        if ($node instanceof DOMText) {
354
            $content .= htmlentities($node->nodeValue);
355
        }
356
 
357
        /* Add the closing of this node to the content */
358
        if ($node instanceof DOMElement) {
359
            $content .= '</' . $node->tagName . '>';
360
        }
361
 
362
        return $content;
363
    }
364
 
365
    /**
366
     * Get content from RSS feeds (atom has its own implementation)
367
     *
368
     * The official way to include full content in an RSS1 entry is to use
369
     * the content module's element 'encoded', and RSS2 feeds often duplicate that.
370
     * Often, however, the 'description' element is used instead. We will offer that
371
     * as a fallback. Atom uses its own approach and overrides this method.
372
     *
373
     * @return  string|false
374
     */
375
    protected function getContent()
376
    {
377
        $options = array('encoded', 'description');
378
        foreach ($options as $element) {
379
            $test = $this->model->getElementsByTagName($element);
380
            if ($test->length == 0) {
381
                continue;
382
            }
383
            if ($test->item(0)->hasChildNodes()) {
384
                $value = '';
385
                foreach ($test->item(0)->childNodes as $child) {
386
                    if ($child instanceof DOMText) {
387
                        $value .= $child->nodeValue;
388
                    } else {
389
                        $simple = simplexml_import_dom($child);
390
                        $value .= $simple->asXML();
391
                    }
392
                }
393
                return $value;
394
            } else if ($test->length > 0) {
395
                return $test->item(0)->nodeValue;
396
            }
397
        }
398
        return false;
399
    }
400
 
401
    /**
402
     * Checks if this element has a particular child element.
403
     *
404
     * @param   String
405
     * @param   Integer
406
     * @return  bool
407
     **/
408
    function hasKey($name, $offset = 0)
409
    {
410
        $search = $this->model->getElementsByTagName($name);
411
        return $search->length > $offset;
412
    }
413
 
414
    /**
415
     * Return an XML serialization of the feed, should it be required. Most
416
     * users however, will already have a serialization that they used when
417
     * instantiating the object.
418
     *
419
     * @return    string    XML serialization of element
420
     */
421
    function __toString()
422
    {
423
        $simple = simplexml_import_dom($this->model);
424
        return $simple->asXML();
425
    }
426
 
427
    /**
428
     * Get directory holding RNG schemas. Method is based on that
429
     * found in Contact_AddressBook.
430
     *
431
     * @return string PEAR data directory.
432
     * @access public
433
     * @static
434
     */
435
    static function getSchemaDir()
436
    {
437
        require_once 'PEAR/Config.php';
438
        $config = new PEAR_Config;
439
        return $config->get('data_dir') . '/XML_Feed_Parser/schemas';
440
    }
441
}
442
 
1527 jp_milcent 443
?>