Subversion Repositories Applications.papyrus

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
770 florian 1
<?php
2
/*
3
 * Project:     MagpieRSS: a simple RSS integration tool
4
 * File:        rss_fetch.inc, a simple functional interface
5
                to fetching and parsing RSS files, via the
6
                function fetch_rss()
7
 * Author:      Kellan Elliott-McCrea <kellan@protest.net>
8
 * License:     GPL
9
 *
10
 * The lastest version of MagpieRSS can be obtained from:
11
 * http://magpierss.sourceforge.net
12
 *
13
 * For questions, help, comments, discussion, etc., please join the
14
 * Magpie mailing list:
15
 * magpierss-general@lists.sourceforge.net
16
 *
17
 */
18
 
19
// Setup MAGPIE_DIR for use on hosts that don't include
20
// the current path in include_path.
21
// with thanks to rajiv and smarty
22
if (!defined('DIR_SEP')) {
23
    define('DIR_SEP', DIRECTORY_SEPARATOR);
24
}
25
 
26
if (!defined('MAGPIE_DIR')) {
27
    define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
28
}
29
 
1285 florian 30
define('MAGPIE_CACHE_ON', 0);
31
 
770 florian 32
require_once( MAGPIE_DIR . 'rss_parse.inc' );
33
require_once( MAGPIE_DIR . 'rss_cache.inc' );
34
 
35
// for including 3rd party libraries
36
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
37
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
38
 
39
 
40
/*
41
 * CONSTANTS - redefine these in your script to change the
42
 * behaviour of fetch_rss() currently, most options effect the cache
43
 *
44
 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
45
 * For me a built in cache was essential to creating a "PHP-like"
46
 * feel to Magpie, see rss_cache.inc for rationale
47
 *
48
 *
49
 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
50
 * This should be a location that the webserver can write to.   If this
51
 * directory does not already exist Mapie will try to be smart and create
52
 * it.  This will often fail for permissions reasons.
53
 *
54
 *
55
 * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
56
 *
57
 *
58
 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
59
 * instead of returning stale object?
60
 *
61
 * MAGPIE_DEBUG - Display debugging notices?
62
 *
63
*/
64
 
65
 
66
/*=======================================================================*\
67
    Function: fetch_rss:
68
    Purpose:  return RSS object for the give url
69
              maintain the cache
70
    Input:    url of RSS file
71
    Output:   parsed RSS object (see rss_parse.inc)
72
 
73
    NOTES ON CACHEING:
74
    If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
75
 
76
    NOTES ON RETRIEVING REMOTE FILES:
77
    If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
78
    return a cached object, and touch the cache object upon recieving a
79
    304.
80
 
81
    NOTES ON FAILED REQUESTS:
82
    If there is an HTTP error while fetching an RSS object, the cached
83
    version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
84
\*=======================================================================*/
85
 
86
define('MAGPIE_VERSION', '0.72');
87
 
88
$MAGPIE_ERROR = "";
89
 
90
function fetch_rss ($url) {
91
    // initialize constants
92
    init();
93
 
94
    if ( !isset($url) ) {
95
        error("fetch_rss called without a url");
96
        return false;
97
    }
98
 
99
    // if cache is disabled
100
    if ( !MAGPIE_CACHE_ON ) {
101
        // fetch file, and parse it
102
        $resp = _fetch_remote_file( $url );
103
        if ( is_success( $resp->status ) ) {
104
            return _response_to_rss( $resp );
105
        }
106
        else {
107
            error("Failed to fetch $url and cache is off");
108
            return false;
109
        }
110
    }
111
    // else cache is ON
112
    else {
113
        // Flow
114
        // 1. check cache
115
        // 2. if there is a hit, make sure its fresh
116
        // 3. if cached obj fails freshness check, fetch remote
117
        // 4. if remote fails, return stale object, or error
118
 
119
        $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
120
 
121
        if (MAGPIE_DEBUG and $cache->ERROR) {
122
            debug($cache->ERROR, E_USER_WARNING);
123
        }
124
 
125
 
126
        $cache_status    = 0;       // response of check_cache
127
        $request_headers = array(); // HTTP headers to send with fetch
128
        $rss             = 0;       // parsed RSS object
129
        $errormsg        = 0;       // errors, if any
130
 
131
        // store parsed XML by desired output encoding
132
        // as character munging happens at parse time
133
        $cache_key       = $url . MAGPIE_OUTPUT_ENCODING;
134
 
135
        if (!$cache->ERROR) {
136
            // return cache HIT, MISS, or STALE
137
            $cache_status = $cache->check_cache( $cache_key);
138
        }
139
 
140
        // if object cached, and cache is fresh, return cached obj
141
        if ( $cache_status == 'HIT' ) {
142
            $rss = $cache->get( $cache_key );
143
            if ( isset($rss) and $rss ) {
144
                // should be cache age
145
                $rss->from_cache = 1;
146
                if ( MAGPIE_DEBUG > 1) {
147
                    debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
148
                }
149
                return $rss;
150
            }
151
        }
152
 
153
        // else attempt a conditional get
154
 
155
        // setup headers
156
        if ( $cache_status == 'STALE' ) {
157
            $rss = $cache->get( $cache_key );
158
            if ( $rss and $rss->etag and $rss->last_modified ) {
159
                $request_headers['If-None-Match'] = $rss->etag;
160
                $request_headers['If-Last-Modified'] = $rss->last_modified;
161
            }
162
        }
163
 
164
        $resp = _fetch_remote_file( $url, $request_headers );
165
 
166
        if (isset($resp) and $resp) {
167
          if ($resp->status == '304' ) {
168
                // we have the most current copy
169
                if ( MAGPIE_DEBUG > 1) {
170
                    debug("Got 304 for $url");
171
                }
172
                // reset cache on 304 (at minutillo insistent prodding)
173
                $cache->set($cache_key, $rss);
174
                return $rss;
175
            }
176
            elseif ( is_success( $resp->status ) ) {
177
                $rss = _response_to_rss( $resp );
178
                if ( $rss ) {
179
                    if (MAGPIE_DEBUG > 1) {
180
                        debug("Fetch successful");
181
                    }
182
                    // add object to cache
183
                    $cache->set( $cache_key, $rss );
184
                    return $rss;
185
                }
186
            }
187
            else {
188
                $errormsg = "Failed to fetch $url ";
189
                if ( $resp->status == '-100' ) {
190
                    $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
191
                }
192
                elseif ( $resp->error ) {
193
                    # compensate for Snoopy's annoying habbit to tacking
194
                    # on '\n'
195
                    $http_error = substr($resp->error, 0, -2);
196
                    $errormsg .= "(HTTP Error: $http_error)";
197
                }
198
                else {
199
                    $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
200
                }
201
            }
202
        }
203
        else {
204
            $errormsg = "Unable to retrieve RSS file for unknown reasons.";
205
        }
206
 
207
        // else fetch failed
208
 
209
        // attempt to return cached object
210
        if ($rss) {
211
            if ( MAGPIE_DEBUG ) {
212
                debug("Returning STALE object for $url");
213
            }
214
            return $rss;
215
        }
216
 
217
        // else we totally failed
218
        error( $errormsg );
219
 
220
        return false;
221
 
222
    } // end if ( !MAGPIE_CACHE_ON ) {
223
} // end fetch_rss()
224
 
225
/*=======================================================================*\
226
    Function:   error
227
    Purpose:    set MAGPIE_ERROR, and trigger error
228
\*=======================================================================*/
229
 
230
function error ($errormsg, $lvl=E_USER_WARNING) {
231
        global $MAGPIE_ERROR;
232
 
233
        // append PHP's error message if track_errors enabled
234
        if ( isset($php_errormsg) ) {
235
            $errormsg .= " ($php_errormsg)";
236
        }
237
        if ( $errormsg ) {
238
            $errormsg = "MagpieRSS: $errormsg";
239
            $MAGPIE_ERROR = $errormsg;
240
            trigger_error( $errormsg, $lvl);
241
        }
242
}
243
 
244
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
245
    trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
246
}
247
 
248
/*=======================================================================*\
249
    Function:   magpie_error
250
    Purpose:    accessor for the magpie error variable
251
\*=======================================================================*/
252
function magpie_error ($errormsg="") {
253
    global $MAGPIE_ERROR;
254
 
255
    if ( isset($errormsg) and $errormsg ) {
256
        $MAGPIE_ERROR = $errormsg;
257
    }
258
 
259
    return $MAGPIE_ERROR;
260
}
261
 
262
/*=======================================================================*\
263
    Function:   _fetch_remote_file
264
    Purpose:    retrieve an arbitrary remote file
265
    Input:      url of the remote file
266
                headers to send along with the request (optional)
267
    Output:     an HTTP response object (see Snoopy.class.inc)
268
\*=======================================================================*/
269
function _fetch_remote_file ($url, $headers = "" ) {
270
    // Snoopy is an HTTP client in PHP
271
    $client = new Snoopy();
272
    $client->agent = MAGPIE_USER_AGENT;
273
    $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
274
    $client->use_gzip = MAGPIE_USE_GZIP;
275
    if (is_array($headers) ) {
276
        $client->rawheaders = $headers;
277
    }
278
 
279
    @$client->fetch($url);
280
    return $client;
281
 
282
}
283
 
284
/*=======================================================================*\
285
    Function:   _response_to_rss
286
    Purpose:    parse an HTTP response object into an RSS object
287
    Input:      an HTTP response object (see Snoopy)
288
    Output:     parsed RSS object (see rss_parse)
289
\*=======================================================================*/
290
function _response_to_rss ($resp) {
291
    $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
292
 
293
    // if RSS parsed successfully
294
    if ( $rss and !$rss->ERROR) {
295
 
296
        // find Etag, and Last-Modified
297
        foreach($resp->headers as $h) {
298
            // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
299
            if (strpos($h, ": ")) {
300
                list($field, $val) = explode(": ", $h, 2);
301
            }
302
            else {
303
                $field = $h;
304
                $val = "";
305
            }
306
 
307
            if ( $field == 'ETag' ) {
308
                $rss->etag = $val;
309
            }
310
 
311
            if ( $field == 'Last-Modified' ) {
312
                $rss->last_modified = $val;
313
            }
314
        }
315
 
316
        return $rss;
317
    } // else construct error message
318
    else {
319
        $errormsg = "Failed to parse RSS file.";
320
 
321
        if ($rss) {
322
            $errormsg .= " (" . $rss->ERROR . ")";
323
        }
324
        error($errormsg);
325
 
326
        return false;
327
    } // end if ($rss and !$rss->error)
328
}
329
 
330
/*=======================================================================*\
331
    Function:   init
332
    Purpose:    setup constants with default values
333
                check for user overrides
334
\*=======================================================================*/
335
function init () {
336
    if ( defined('MAGPIE_INITALIZED') ) {
337
        return;
338
    }
339
    else {
340
        define('MAGPIE_INITALIZED', true);
341
    }
342
 
343
    if ( !defined('MAGPIE_CACHE_ON') ) {
1285 florian 344
        define('MAGPIE_CACHE_ON', true);
770 florian 345
    }
346
 
347
    if ( !defined('MAGPIE_CACHE_DIR') ) {
348
        define('MAGPIE_CACHE_DIR', './cache');
349
    }
350
 
351
    if ( !defined('MAGPIE_CACHE_AGE') ) {
352
        define('MAGPIE_CACHE_AGE', 60*60); // one hour
353
    }
354
 
355
    if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
356
        define('MAGPIE_CACHE_FRESH_ONLY', false);
357
    }
358
 
359
    if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
360
        define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
361
    }
362
 
363
    if ( !defined('MAGPIE_INPUT_ENCODING') ) {
364
        define('MAGPIE_INPUT_ENCODING', null);
365
    }
366
 
367
    if ( !defined('MAGPIE_DETECT_ENCODING') ) {
368
        define('MAGPIE_DETECT_ENCODING', true);
369
    }
370
 
371
    if ( !defined('MAGPIE_DEBUG') ) {
372
        define('MAGPIE_DEBUG', 0);
373
    }
374
 
375
    if ( !defined('MAGPIE_USER_AGENT') ) {
376
        $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
377
 
378
        if ( MAGPIE_CACHE_ON ) {
379
            $ua = $ua . ')';
380
        }
381
        else {
382
            $ua = $ua . '; No cache)';
383
        }
384
 
385
        define('MAGPIE_USER_AGENT', $ua);
386
    }
387
 
388
    if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
389
        define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
390
    }
391
 
392
    // use gzip encoding to fetch rss files if supported?
393
    if ( !defined('MAGPIE_USE_GZIP') ) {
394
        define('MAGPIE_USE_GZIP', true);
395
    }
396
}
397
 
398
// NOTE: the following code should really be in Snoopy, or at least
399
// somewhere other then rss_fetch!
400
 
401
/*=======================================================================*\
402
    HTTP STATUS CODE PREDICATES
403
    These functions attempt to classify an HTTP status code
404
    based on RFC 2616 and RFC 2518.
405
 
406
    All of them take an HTTP status code as input, and return true or false
407
 
408
    All this code is adapted from LWP's HTTP::Status.
409
\*=======================================================================*/
410
 
411
 
412
/*=======================================================================*\
413
    Function:   is_info
414
    Purpose:    return true if Informational status code
415
\*=======================================================================*/
416
function is_info ($sc) {
417
    return $sc >= 100 && $sc < 200;
418
}
419
 
420
/*=======================================================================*\
421
    Function:   is_success
422
    Purpose:    return true if Successful status code
423
\*=======================================================================*/
424
function is_success ($sc) {
425
    return $sc >= 200 && $sc < 300;
426
}
427
 
428
/*=======================================================================*\
429
    Function:   is_redirect
430
    Purpose:    return true if Redirection status code
431
\*=======================================================================*/
432
function is_redirect ($sc) {
433
    return $sc >= 300 && $sc < 400;
434
}
435
 
436
/*=======================================================================*\
437
    Function:   is_error
438
    Purpose:    return true if Error status code
439
\*=======================================================================*/
440
function is_error ($sc) {
441
    return $sc >= 400 && $sc < 600;
442
}
443
 
444
/*=======================================================================*\
445
    Function:   is_client_error
446
    Purpose:    return true if Error status code, and its a client error
447
\*=======================================================================*/
448
function is_client_error ($sc) {
449
    return $sc >= 400 && $sc < 500;
450
}
451
 
452
/*=======================================================================*\
453
    Function:   is_client_error
454
    Purpose:    return true if Error status code, and its a server error
455
\*=======================================================================*/
456
function is_server_error ($sc) {
457
    return $sc >= 500 && $sc < 600;
458
}
459
 
460
?>