Subversion Repositories Applications.papyrus

Compare Revisions

Ignore whitespace Rev 769 → Rev 770

/trunk/api/syndication_rss/magpierss/NEWS
New file
0,0 → 1,53
MagpieRSS News
 
MAGPIERSS 0.51 RELEASED
* important bugfix!
* fix "silent failure" when PHP doesn't have zlib
 
FEED ON FEEDS USES MAGPIE
* web-based RSS aggregator built with Magpie
* easy to install, easy to use.
http://minutillo.com/steve/feedonfeeds/
MAGPIERSS 0.5 RELEASED
* supports transparent HTTP gzip content negotiation for reduced bandwidth usage
* quashed some undefined index notices
 
MAGPIERSS 0.46 RELEASED
* minor release, more error handling clean up
* documentation fixes, simpler example
* new trouble shooting guide for installation and usage problems
http://magpierss.sourceforge.net/TROUBLESHOOTING
 
MAGPIE NEWS AS RSS
* releases, bug fixes, releated stories in RSS
MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS
* answers some of the most frequently asked Magpie questions
* feedback, suggestions, requests, recipes welcome
http://magpierss.sourceforge.net/cookbook.html
 
MAGPIERSS 0.4 RELEASED!
* improved error handling, more flexibility for script authors, backwards compatible
* new and better examples! including using MagpieRSS and Smarty
* new Smarty plugin for RSS date parsing
http://smarty.php.net
 
INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3
* simple, sophisticated RSS viewer
* includes auto-generated javascript ticker from RSS feed
http://www.infinitepenguins.net/rss/
 
TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS
* drop in support using regex based XML parser
* parses improperly formed XML that chokes expat
http://traumwind.de/blog/magpie/magpie_alike.php
 
MAGPIERSS 0.3 RELEASED!
* Support added for HTTP Conditional GETs.
http://fishbowl.pastiche.org/archives/001132.html
 
MAGPIERSS 0.2!
* Major clean up of the code. Easier to use.
* Simpler install on shared hosts.
* Better documentation and comments.
/trunk/api/syndication_rss/magpierss/README
New file
0,0 → 1,48
NAME
 
MagpieRSS - a simple RSS integration tool
 
SYNOPSIS
 
require_once(rss_fetch.inc);
$url = $_GET['url'];
$rss = fetch_rss( $url );
echo "Channel Title: " . $rss->channel['title'] . "<p>";
echo "<ul>";
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
echo "<li><a href=$href>$title</a></li>";
}
echo "</ul>";
 
DESCRIPTION
 
MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like",
and simple to use.
Some features include:
* supports RSS 0.9 - 1.0, with limited RSS 2.0 support
* supports namespaces, and modules, including mod_content and mod_event
* open minded [1]
* simple, functional interface, to object oriented backend parser
* automatic caching of parsed RSS objects makes its easy to integrate
* supports conditional GET with Last-Modified, and ETag
* uses constants for easy override of default behaviour
* heavily commented
 
 
1. By open minded I mean Magpie will accept any tag it finds in good faith that
it was supposed to be here. For strict validation, look elsewhere.
 
 
GETTING STARTED
 
 
COPYRIGHT:
Copyright(c) 2002 kellan@protest.net. All rights reserved.
This software is released under the GNU General Public License.
Please read the disclaimer at the top of the Snoopy.class.inc file.
/trunk/api/syndication_rss/magpierss/rss_utils.inc
New file
0,0 → 1,67
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_utils.inc, utility methods for working with RSS
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* Version: 0.51
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*/
 
 
/*======================================================================*\
Function: parse_w3cdtf
Purpose: parse a W3CDTF date into unix epoch
 
NOTE: http://www.w3.org/TR/NOTE-datetime
\*======================================================================*/
 
function parse_w3cdtf ( $date_str ) {
# regex to match wc3dtf
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/";
if ( preg_match( $pat, $date_str, $match ) ) {
list( $year, $month, $day, $hours, $minutes, $seconds) =
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
# calc epoch for current date assuming GMT
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year);
$offset = 0;
if ( $match[10] == 'Z' ) {
# zulu time, aka GMT
}
else {
list( $tz_mod, $tz_hour, $tz_min ) =
array( $match[8], $match[9], $match[10]);
# zero out the variables
if ( ! $tz_hour ) { $tz_hour = 0; }
if ( ! $tz_min ) { $tz_min = 0; }
$offset_secs = (($tz_hour*60)+$tz_min)*60;
# is timezone ahead of GMT? then subtract offset
#
if ( $tz_mod == '+' ) {
$offset_secs = $offset_secs * -1;
}
$offset = $offset_secs;
}
$epoch = $epoch + $offset;
return $epoch;
}
else {
return -1;
}
}
 
?>
/trunk/api/syndication_rss/magpierss/rss_cache.inc
New file
0,0 → 1,200
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_cache.inc, a simple, rolling(no GC), cache
* for RSS objects, keyed on URL.
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* Version: 0.51
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* http://lists.sourceforge.net/lists/listinfo/magpierss-general
*
*/
 
class RSSCache {
var $BASE_CACHE = './cache'; // where the cache files are stored
var $MAX_AGE = 3600; // when are files stale, default one hour
var $ERROR = ""; // accumulate error messages
function RSSCache ($base='', $age='') {
if ( $base ) {
$this->BASE_CACHE = $base;
}
if ( $age ) {
$this->MAX_AGE = $age;
}
// attempt to make the cache directory
if ( ! file_exists( $this->BASE_CACHE ) ) {
$status = @mkdir( $this->BASE_CACHE, 0755 );
// if make failed
if ( ! $status ) {
$this->error(
"Cache couldn't make dir '" . $this->BASE_CACHE . "'."
);
}
}
}
/*=======================================================================*\
Function: set
Purpose: add an item to the cache, keyed on url
Input: url from wich the rss file was fetched
Output: true on sucess
\*=======================================================================*/
function set ($url, $rss) {
$this->ERROR = "";
$cache_file = $this->file_name( $url );
$fp = @fopen( $cache_file, 'w' );
if ( ! $fp ) {
$this->error(
"Cache unable to open file for writing: $cache_file"
);
return 0;
}
$data = $this->serialize( $rss );
fwrite( $fp, $data );
fclose( $fp );
return $cache_file;
}
/*=======================================================================*\
Function: get
Purpose: fetch an item from the cache
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function get ($url) {
$this->ERROR = "";
$cache_file = $this->file_name( $url );
if ( ! file_exists( $cache_file ) ) {
$this->debug(
"Cache doesn't contain: $url (cache file: $cache_file)"
);
return 0;
}
$fp = @fopen($cache_file, 'r');
if ( ! $fp ) {
$this->error(
"Failed to open cache file for reading: $cache_file"
);
return 0;
}
if ($filesize = filesize($cache_file) ) {
$data = fread( $fp, filesize($cache_file) );
$rss = $this->unserialize( $data );
return $rss;
}
return 0;
}
 
/*=======================================================================*\
Function: check_cache
Purpose: check a url for membership in the cache
and whether the object is older then MAX_AGE (ie. STALE)
Input: url from wich the rss file was fetched
Output: cached object on HIT, false on MISS
\*=======================================================================*/
function check_cache ( $url ) {
$this->ERROR = "";
$filename = $this->file_name( $url );
if ( file_exists( $filename ) ) {
// find how long ago the file was added to the cache
// and whether that is longer then MAX_AGE
$mtime = filemtime( $filename );
$age = time() - $mtime;
if ( $this->MAX_AGE > $age ) {
// object exists and is current
return 'HIT';
}
else {
// object exists but is old
return 'STALE';
}
}
else {
// object does not exist
return 'MISS';
}
}
 
function cache_age( $cache_key ) {
$filename = $this->file_name( $url );
if ( file_exists( $filename ) ) {
$mtime = filemtime( $filename );
$age = time() - $mtime;
return $age;
}
else {
return -1;
}
}
/*=======================================================================*\
Function: serialize
\*=======================================================================*/
function serialize ( $rss ) {
return serialize( $rss );
}
 
/*=======================================================================*\
Function: unserialize
\*=======================================================================*/
function unserialize ( $data ) {
return unserialize( $data );
}
/*=======================================================================*\
Function: file_name
Purpose: map url to location in cache
Input: url from wich the rss file was fetched
Output: a file name
\*=======================================================================*/
function file_name ($url) {
$filename = md5( $url );
return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) );
}
 
/*=======================================================================*\
Function: error
Purpose: register error
\*=======================================================================*/
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
$this->ERROR = $errormsg;
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
}
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
if ( MAGPIE_DEBUG ) {
$this->error("MagpieRSS [debug] $debugmsg", $lvl);
}
}
 
}
 
?>
/trunk/api/syndication_rss/magpierss/htdocs/cookbook.html
New file
0,0 → 1,237
<html>
<head>
<title>Magie RSS Recipes: Simple PHP RSS How To</title>
<style>
body {
font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif;
font-size: 11px;
}
pre { font-family: "Courier New", monospace;
padding: 1em;
margin: 0.2em 2.5em 0.2em 3em;
background-color: #efeff5;
border: 1px solid #cfcfcf;
white-space: pre;
}
 
</style>
</head>
<body>
<p>
<h1>MagpieRSS Recipes: Cooking with Corbies</h1>
 
<div align="center"><h3><em>"Four and twenty blackbirds baked in a
pie."</em></h3></div>
</p>
<p>
<ol>
<li><a href="#limit">Limit the Number of Headlines(aka Items) Returned</a></li>
<li><a href="#error_message">Display a Custom Error Message if Something Goes
Wrong</a></li>
<li><a href="#write_rss">Generate a New RSS Feed</a></li>
<li><a href="#by_date">Display Headlines More Recent then X Date</a></li>
<li><a href="#from_file">Parse a Local File Containing RSS</a></li>
 
</ol>
</p>
 
<a name="limit"></a><h2>1. Limit the Number of Headlines(aka Items) Returned.</h2>
 
<h3>Problem:</h3>
 
You want to display the 10 (or 3 or whatever) most recent headlines, but the RSS feed
contains 15.
 
<h3>Solution:</h3>
 
<pre>
$num_items = 10;
$rss = fetch_rss($url);
 
$items = array_slice($rss->items, 0, $num_items);
 
foreach ( $items as $item ) {
</pre>
<h3>Discussion:</h3>
 
Rather then trying to limit the number of items Magpie parses, a much simpler,
and more flexible approach is to take a "slice" of the array of items. And
array_slice() is smart enough to do the right thing if the feed has less items
then $num_items.
 
<h3>See:</h3> <a href="http://www.php.net/array_slice">http://www.php.net/array_slice</a>
</p>
 
<a name="error_message"></a><h2>2. Display a Custom Error Message if Something Goes Wrong</h2>
 
<h3>Problem:</h3>
 
You don't want Magpie's error messages showing up if something goes wrong.
 
<h3>Solution:</h3>
<pre>
# Magpie throws USER_WARNINGS only
# so you can cloak these, by only showing ERRORs
error_reporting(E_ERROR);
 
# check the return value of fetch_rss()
 
$rss = fetch_rss($url);
 
if ( $rss ) {
...display rss feed...
}
else {
echo "An error occured! " .
"Consider donating more $$$ for restoration of services." .
"&lt;br&gt;Error Message: " . magpie_error();
}
</pre>
<h3>Discussion:</h3>
 
MagpieRSS triggers a warning in a number of circumstances. The 2 most common
circumstances are: if the specified RSS file isn't properly formed (usually
because it includes illegal HTML), or if Magpie can't download the remote RSS
file, and there is no cached version.
 
If you don't want your users to see these warnings change your error_reporting
settings to only display ERRORs.<br />
Another option is to turn off display_error,
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages.
 
You can do this with:
 
<pre>
# you can also do this in your php.ini file
ini_set('display_errors', 0);
</pre>
 
<h3>See:</h3>
<a
href="http://www.php.net/error_reporting">http://www.php.net/error_reporting</a>,<br
/>
<a href="http://www.php.net/ini_set">http://www.php.net/ini_set</a>, <br />
<a
href="http://www.php.net/manual/en/ref.errorfunc.php">http://www.php.net/manual/en/ref.errorfunc.php</a><br
/>
 
<a name="write_rss"></a><h2>3. Generate a New RSS Feed</h2>
 
<h3>Problem:</h3>
 
Create an RSS feed for other people to use.
 
<h3>Solution:</h3>
 
Use Useful Inc's <a href="http://usefulinc.com/rss/rsswriter/">RSSWriter</a>.
 
<h3>Discussion:</h3>
 
An example of turning a Magpie parsed RSS object back into an RSS file is
forthcoming. In the meantime RSSWriter is well documented.
 
<a name="by_date"></a><h2>4. Display Headlines More Recent then X Date</h2>
 
<h3>Problem:</h3>
 
You only want to display headlines that were published on, or after a certain
date.
 
 
<h3>Solution:</h3>
<pre>
require_once('rss_utils.inc');
 
# get all headlines published today
$today = getdate();
 
# today, 12AM
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']);
 
$rss = fetch_rss($url);
 
foreach ( $rss->items as $item ) {
$published = parse_w3cdtf($item['dc']['date']);
if ( $published &gt;= $date ) {
echo "Title: " . $item['title'];
echo "Published: " . date("h:i:s A", $published);
echo "&lt;p&gt;";
}
}
</pre>
<h3>Discussion:</h3>
 
This recipe only works for RSS 1.0 feeds that include the <dc:date> field.
(which is very good RSS style) <br />
<code>parse_w3cdtf()</code> is defined in
<code>rss_utils.inc</code>, and parses RSS style dates into Unix epoch
seconds.
 
<h3>See: </h3>
<a
href="http://www.php.net/manual/en/ref.datetime.php">http://www.php.net/manual/en/ref.datetime.php</a>
 
<a name="from_file"></a>
<h2>5. Parse a Local File Containing RSS</h2>
<h3>Problem:</h3>
MagpieRSS provides <code>fetch_rss()</code> which takes a URL and returns a
parsed RSS object, but what if you want to parse a file stored locally that
doesn't have a URL?
 
<h3>Solution</h3>
<pre>
require_once('rss_parse.inc');
 
$rss_file = 'some_rss_file.rdf';
$rss_string = read_file($rss_file);
$rss = new MagpieRSS( $rss_string );
 
if ( $rss and !$rss->ERROR) {
...display rss...
}
else {
echo "Error: " . $rss->ERROR;
}
 
# efficiently read a file into a string
# in php >= 4.3.0 you can simply use file_get_contents()
#
function read_file($filename) {
$fh = fopen($filename, 'r') or die($php_errormsg);
$rss_string = fread($fh, filesize($filename) );
fclose($fh);
return $rss_string;
}
</pre>
 
<h3>Discussion</h3>
Here we are using MagpieRSS's RSS parser directly without the convience wrapper
of <code>fetch_rss()</code>. We read the contents of the RSS file into a
string, and pass it to the parser constructor. Notice also that error handling
is subtly different.
 
<h3>See: </h3>
<a
href="http://www.php.net/manual/en/ref.filesystem.php">http://www.php.net/manual/en/ref.filesystem.php</a>,<br
/>
<a
href="http://www.php.net/manual/en/language.oop.php">http://www.php.net/manual/en/language.oop.php</a>
 
<!--
<a name="link"></a><h2>#. Recipe</h2>
<h3>Problem:</h3>
Problem description
<h3>Solution</h3>
<pre>
code
</pre>
<h3>Discussion/h3>
Discuss code
<h3>See: </h3>
Documentation links:
-->
 
</body>
</html>
/trunk/api/syndication_rss/magpierss/htdocs/index.html
New file
0,0 → 1,419
<html>
<head>
<title>Magpie RSS - PHP RSS Parser</title>
<link rel="alternate" type="application/rss+xml" title="RSS"
href="http://laughingmeme.org/magpierss.rdf" />
<style>
body {
font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif;
font-size: 11px;
}
pre { font-family: "Courier New", monospace;
padding: 1em;
margin: 0.2em 2.5em 0.2em 3em;
background-color: #efeff5;
border: 1px solid #cfcfcf;
white-space: pre;
}
 
li.news {
padding-bottom:15px;
}
 
a.nav { color: #FFFFFF; }
div.nav {
width: 2in;
float: right;
border: 2px solid #cfcfcf;
padding: 5px;
background-color: #996699;
}
</style>
</head>
<body>
<img src="magpie-photo.jpg">
<h1>MagpieRSS</h1>
<p>
<h2>MagpieRSS provides an XML-based (expat) RSS parser in PHP.</h2>
<p>
MagpieRSS is compatible with RSS .9 through RSS 1.0, and supports the
RSS 1.0's modules. (with a few exceptions)
<p>
<div class="nav">
<center><h3>Project Info</h3></center>
<ul>
<li><a class="nav"
href="http://sourceforge.net/project/showfiles.php?group_id=55691">Download
Magpie</a></li>
<li><a class="nav"
href="http://sourceforge.net/mail/?group_id=55691">Mailing
Lists</a></li>
<li><a class="nav" href="#news">News!</a></li>
<li><a class="nav" href="#why">Why?</a></li>
<li><a class="nav" href="#features">Features</a></li>
<li><a class="nav" href="#philosophy">Philosophy</a></li>
<li><a class="nav" href="#usage">Usage Examples</a></li>
<li><a class="nav" href="/cookbook.html">Cookbook</a></li>
<li><a class="nav" href="#todo">Todo</a></li>
<li style="list-style: none; padding-top: 5px;"><a title="Keep up on MagpieRSS news via RSS" href="http://laughingmeme.org/magpierss.rdf"><img
src="http://magpierss.sf.net/black_grey_magpie_news.gif" border="0"></a></li>
</ul>
</div>
<a name="news"></a>
<h3>News!</h3>
<ul>
 
<li class="news">
<a
href="http://sourceforge.net/project/showfiles.php?group_id=55691">MagpieRSS
0.51 Released</a>
<ul>
<li> important bugfix!</li>
<li> fix <a href="http://laughingmeme.org/archives/000811.html
">"silent failure"</a> when PHP doesn't have zlib</li>
</ul>
 
</li>
 
<li class="news">
<a href="http://minutillo.com/steve/feedonfeeds/">Feed On Feeds Uses Magpie</a>
<ul>
<li> server based PHP RSS aggregator built with MagpieRSS</li>
<li> easy to install, easy to use.</li>
</ul>
 
</li>
 
 
<li class="news">
<a
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=158897">MagpieRSS
0.5 Released</a>
<ul>
<li> supports transparent HTTP gzip content negotiation for reduced bandwidth usage</li>
<li> quashed some undefined index notices</li>
</ul>
 
</li>
 
 
<li class="news">
<a
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=139643">MagpieRSS
0.46 Released</a>
<ul>
<li> minor release, more error handling clean up</li>
<li> documentation fixes, simpler example</li>
<li> new <a href="/TROUBLESHOOTING">trouble shooting</a> guide for installation and usage problems</a>
</ul>
 
</li>
 
<li class="news">
<a
href="http://laughingmeme.org/magpierss.rdf">Magpie News as RSS</a>
<ul>
<li> releases, bug fixes, releated stories as an RSS feed</li>
</ul>
 
</li>
 
 
<li class="news">
<a
href="http://magpierss.sourceforge.net/cookbook.html">MagpieRSS
Cookbook: Simple PHP RSS How Tos</a>
<ul>
<li> answers some of the most frequently asked Magpie questions</li>
<li> feedback, suggestions, requests, recipes welcome</li>
</ul>
 
</li>
 
<li clas="news">
<a href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=134850">MagpieRSS 0.4 Released!</a>
<ul>
<li> improved error handling, more flexibility for script authors,
backwards compatible</li>
<li> new and better examples! including using MagpieRSS and <a
href="http://smarty.php.net">Smarty</a></li>
<li> new Smarty plugin for RSS date parsing</li>
</ul>
<br />
</li>
<!--
<li class="news">
<a href="http://www.infinitepenguins.net/rss/">Infinite Penguin now
supports Magpie 0.3</a>
<ul>
<li> simple, sophisticated RSS viewer</li>
<li> includes auto-generated javascript ticker from RSS feed</li>
</ul>
 
</li>
 
 
<li class="news">
<a
href="http://traumwind.tierpfad.de/blog/magpie/magpie_alike.php">Traumwind
releases REX backend for MagpieRSS</a>
<ul>
<li>drop in support using regex based XML parser</li>
<li>parses improperly formed XML that chokes expat</li>
</ul>
 
</li>
 
<li class="news">
<a
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=118652">
MagpieRSS 0.3 Released!</a>
<ul>
<li>Support added for
<a href="http://fishbowl.pastiche.org/archives/001132.html">HTTP
Conditional GETs</a>.</li>
<li>See <a href="http://sourceforge.net/project/shownotes.php?group_id=55691&release_id=118652">ChangeLog</a>
for more info.</li>
</ul>
</li>
<li class="news">MagpieRSS 0.2!</a>
<ul>
<li>Major clean up of the code. Easier to use.</li>
<li>Simpler install on shared hosts.</li>
<li>Better documentation and comments.</li>
</ul>
</li>
<li class="news">We've <a href="http://sourceforge.net/projects/magpierss/">moved to
Sourceforge!</a></li>
-->
</ul>
</p>
<p>
<a name="why"></a>
<h3>Why?</h3>
I wrote MagpieRSS out of a frustration with the limitations of existing
solutions. In particular many of the existing PHP solutions seemed to:
<ul>
<li>use a parser based on regular expressions, making for an inherently
fragile solution
<li>only support early versions of RSS
<li>discard all the interesting information besides item title, description,
and link.
<li>not build proper separation between parsing the RSS and displaying it.
</ul>
In particular I failed to find any PHP RSS parsers that could sufficiently
parse RSS 1.0 feeds, to be useful on the RSS based event feeds we generate
at <a href="http://protest.net">Protest.net</a>.
</p>
<p>
<a name="features"></a>
<h3>Features</h3>
 
<ul>
<li class="toplevel">
<h4>Easy to Use</h4>
As simple as:
<pre>
require('rss_fetch.inc');
$rss = fetch_rss($url);
</pre>
 
</li>
<li class="toplevel">
<h4>Parses RSS 0.9 - RSS 1.0</h4>
 
Parses most RSS formats, including support for
<a href="http://www.purl.org/rss/1.0/modules/">1.0 modules</a> and limited
namespace support. RSS is packed into convenient data structures; easy to
use in PHP, and appropriate for passing to a templating system, like
<a href="http://smarty.php.net">Smarty</a>.
</li>
<li>
<h4>Integrated Object Cache</h4>
Caching the parsed RSS means that the 2nd request is fast, and that
including the rss_fetch call in your PHP page won't destroy your performance,
and force you to reply on an external cron job. And it happens transparently.
 
</li>
<li>
<h4>HTTP Conditional GETs</h4>
Save bandwidth and speed up download times with intelligent use of
Last-Modified and ETag.<br /> See <a
href="http://fishbowl.pastiche.org/archives/001132.html">HTTP Conditional Get for RSS Hackers</a>
</li>
<li><h4>Configurable</h4>
Makes extensive use of constants to allow overriding default behaviour, and
installation on shared hosts.
</li>
<li><h4>Modular</h4>
<ul>
<li>rss_fetch.inc - wraps a simple interface (<code>fetch_rss()</code>)
around the library.
<li>rss_parse.inc - provides the RSS parser, and the RSS object
<li>rss_cache.inc - a simple (no GC) object cache, optimized for RSS objects
<li>rss_utils.inc - utility functions for working with RSS. currently
provides <code>parse_w3cdtf()</code>, for parsing <a
href="http://www.w3.org/TR/NOTE-datetime">W3CDTF</a> into epoch seconds.
</ul>
</ul>
 
</p>
<p>
<a name="philosophy"></a>
<h3>Magpie's approach to parsing RSS</h3>
 
Magpie takes a naive, and inclusive approach. Absolutely
non-validating, as long as the RSS feed is well formed, Magpie will
cheerfully parse new, and never before seen tags in your RSS feeds.
</p>
<p>
This makes it very simple support the varied versions of RSS simply, but
forces the consumer of a RSS feed to be cognizant of how it is
structured.(at least if you want to do something fancy)
</p>
<p>
Magpie parses a RSS feed into a simple object, with 4 fields:
<code>channel</code>, <code>items</code>, <code>image</code>, and
<code>textinput</code>.
</p>
<p>
<h4>channel</h4>
<code>$rss->channel</code> contains key-value pairs of all tags, without
nested tags, found between the root tag (&lt;rdf:RDF&gt;, or &lt;rss&gt;)
and the end of the document.
</p>
<p>
<h4>items</h4>
<code>$rss->items</code> is an array of associative arrays, each one
describing a single item. An example that looks like:
<pre>
&lt;item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257"&gt;
&lt;title&gt;Weekly Peace Vigil&lt;/title&gt;
&lt;link&gt;http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257&lt;/link&gt;
&lt;description&gt;Wear a white ribbon&lt;/description&gt;
&lt;dc:subject&gt;Peace&lt;/dc:subject&gt;
&lt;ev:startdate&gt;2002-06-01T11:00:00&lt;/ev:startdate&gt;
&lt;ev:location&gt;Northampton, MA&lt;/ev:location&gt;
&lt;ev:enddate&gt;2002-06-01T12:00:00&lt;/ev:enddate&gt;
&lt;ev:type&gt;Protest&lt;/ev:type&gt;
&lt;/item&gt;
</pre><p>
Is parsed, and pushed on the <code>$rss-&gt;items</code> array as:
<p><pre>
array(
title =&gt; 'Weekly Peace Vigil',
link =&gt; 'http://protest.net/NorthEast/calendrome.cgi?span=event&#38;ID=210257',
description =&gt; 'Wear a white ribbon',
dc =&gt; array (
subject =&gt; 'Peace'
),
ev =&gt; array (
startdate =&gt; '2002-06-01T11:00:00',
enddate =&gt; '2002-06-01T12:00:00',
type =&gt; 'Protest',
location =&gt; 'Northampton, MA'
)
);
</pre>
</p>
<p>
<h4>image and textinput</h4>
<code>$rss->image</code> and <code>$rss-textinput</code> are associative arrays
including name-value pairs for anything found between the respective parent
tags.
</p>
<p>
<a name="usage"></a>
<h3>Usage Examples:</h3>
 
A very simple example would be:
<pre>
require_once 'rss_fetch.inc';
 
$url = 'http://magpie.sf.net/samples/imc.1-0.rdf';
$rss = fetch_rss($url);
 
echo "Site: ", $rss->channel['title'], "&lt;br&gt;\n";
foreach ($rss->items as $item ) {
$title = $item[title];
$url = $item[link];
echo "&lt;a href=$url&gt;$title&lt;/a&gt;&lt;/li&gt;&lt;br&gt;\n";
}
</pre>
More soon....in the meantime you can check out a
<a href="http://www.infinitepenguins.net/rss/">cool tool built with
MagpieRSS</a>, version 0.1.
</p>
<p>
<a name="todo"></a>
<h3>Todos</h3>
<h4>RSS Parser</h4>
<ul>
<li>Swap in a smarter parser that includes optional
support for validation, and required fields.</li>
<li>Support RSS 2.0 (as much as I'm annoyed by it)</li>
<li>Improve support for modules that rely on attributes</li>
</ul>
<h4>RSS Cache</h4>
<ul>
<li>Light-weight garbage collection
</ul>
<h4>Fetch RSS</h4>
<ul>
<li>Attempt to <a
href="http://diveintomark.org/archives/2002/08/15.html">auto-detect an
RSS feed</a>, given a URL following, much like <a
href="http://diveintomark.org/projects/misc/rssfinder.py.txt">rssfinder.py</a>does.
</li>
</ul>
<h4>Misc</h4>
<ul>
<li>More examples</li>
<li>A test suite</li>
<li>RSS generation, perhaps with <a
href="http://usefulinc.com/rss/rsswriter/">RSSwriter</a>?
</li>
</ul>
</p>
<p>
<h3>RSS Resources</h3>
<ul>
<li><a href="http://mnot.net/rss/tutorial/">RSS Tutorial for Content Publishers
and Webmasters</a> is a great place to start.
<li><a href="http://gils.utah.gov/rss/">RSS Workshop: Publish and Syndicate
Your News to the Web</a> is also a good introduction</li>
<li><a href="http://www.disobey.com/amphetadesk/finding_more.html">Finding
More Channels</a> on how to find RSS feeds.
<li>Hammersley's <a href="http://rss.benhammersley.com/">Content Syndication
with XML and RSS</a> is a blog covering RSS current events.
<li><a href="http://groups.yahoo.com/group/rss-dev/">RSS-DEV mailing
list</a> is generally a very helpful, informative space, with the occasional
heated debate
<li><a href="http://feeds.archive.org/validator/">RSS Validator</a>
</ul>.
</p>
<h3>License and Contact Info</h3>
Magpie is distributed under the GPL license...
<p>
coded by: kellan (at) protest.net, feedback is always appreciated.
<p>
<a href="http://sourceforge.net"><img
src="http://sourceforge.net/sflogo.php?group_id=55691&amp;type=3"
width="125" height="37" border="0" alt="SourceForge.net Logo"></a>
<img src="http://laughingmeme.org/magpie_views.gif">
</body>
</html>
/trunk/api/syndication_rss/magpierss/AUTHORS
New file
0,0 → 1,0
kellan <kellan@protest.net>
/trunk/api/syndication_rss/magpierss/INSTALL
New file
0,0 → 1,143
REQUIREMENTS
 
MapieRSS requires a recent PHP 4+ (developed with 4.2.0)
with xml (expat) support.
Optionally:
* PHP5 with libxml2 support.
* cURL for SSL support
* iconv (preferred) or mb_string for expanded character set support
QUICK START
 
Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc,
and rss_utils.inc), and the directory extlib (which contains a modified
version of the Snoopy HTTP client)
Copy these 5 resources to a directory named 'magpierss' in the same
directory as your PHP script.
At the top of your script add the following line:
require_once('magpierss/rss_fetch.inc');
Now you can use the fetch_rss() method:
$rss = fetch_rss($url);
Done. That's it. See README for more details on using MagpieRSS.
 
NEXT STEPS
 
Important: you'll probably want to get the cache directory working in
order to speed up your application, and not abuse the webserver you're
downloading the RSS from.
Optionally you can install MagpieRSS in your PHP include path in order to
make it available server wide.
Lastly you might want to look through the constants in rss_fetch.inc see if
there is anything you want to override (the defaults are pretty good)
 
For more info, or if you have trouble, see TROUBLESHOOTING
 
SETTING UP CACHING
 
Magpie has built-in transparent caching. With caching Magpie will only
fetch and parse RSS feeds when there is new content. Without this feature
your pages will be slow, and the sites serving the RSS feed will be annoyed
with you.
** Simple and Automatic **
By default Magpie will try to create a cache directory named 'cache' in the
same directory as your PHP script.
** Creating a Local Cache Directory **
Often this will fail, because your webserver doesn't have sufficient
permissions to create the directory.
Exact instructions for how to do this will vary from install to install and
platform to platform. The steps are:
1. Make a directory named 'cache'
2. Give the web server write access to that directory.
An example of how to do this on Debian would be:
1. mkdir /path/to/script/cache
2. chgrp www-data /path/to/script/cache
3. chmod 775 /path/to/script/cache
On other Unixes you'll need to change 'www-data' to what ever user Apache
runs as. (on MacOS X the user would be 'www')
** Cache in /tmp **
Sometimes you won't be able to create a local cache directory. Some reasons
might be:
1. No shell account
2. Insufficient permissions to change ownership of a directory
3. Webserver runs as 'nobody'
In these situations using a cache directory in /tmp can often be a good
option.
The drawback is /tmp is public, so anyone on the box can read the cache
files. Usually RSS feeds are public information, so you'll have to decide
how much of an issue that is.
 
To use /tmp as your cache directory you need to add the following line to
your script:
define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache');
** Global Cache **
 
If you have several applications using Magpie, you can create a single
shared cache directory, either using the /tmp cache, or somewhere else on
the system.
The upside is that you'll distribute fetching and parsing feeds across
several applications.
INSTALLING MAGPIE SERVER WIDE
 
Rather then following the Quickstart instructions which requires you to have
a copy of Magpie per application, alternately you can place it in some
shared location.
** Adding Magpie to Your Include Path **
 
Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc,
rss_utils.inc, and extlib) to a directory named 'magpierss' in your include
path. Now any PHP file on your system can use Magpie with:
require_once('magpierss/rss_fetch.inc');
 
Different installs have different include paths, and you'll have to figure
out what your include_path is.
From shell you can try:
php -i | grep 'include_path'
 
Alternatley you can create a phpinfo.php file with contains:
<?php phpinfo(); ?>
Debian's default is:
/usr/share/php
(though more idealogically pure location would be /usr/local/share/php)
Apple's default include path is:
/usr/lib/php
While the Entropy PHP build seems to use:
/usr/local/php/lib/php
/trunk/api/syndication_rss/magpierss/cookbook
New file
0,0 → 1,125
MAGPIERSS RECIPES: Cooking with Corbies
 
"Four and twenty blackbirds baked in a pie."
 
1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED.
 
PROBLEM:
 
You want to display the 10 (or 3) most recent headlines, but the RSS feed
contains 15.
 
SOLUTION:
 
$num_items = 10;
$rss = fetch_rss($url);
 
$items = array_slice($rss->items, 0, $num_items);
 
DISCUSSION:
 
Rather then trying to limit the number of items Magpie parses, a much simpler,
and more flexible approach is to take a "slice" of the array of items. And
array_slice() is smart enough to do the right thing if the feed has less items
then $num_items.
 
See: http://www.php.net/array_slice
 
 
2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG
 
PROBLEM:
 
You don't want Magpie's error messages showing up if something goes wrong.
 
SOLUTION:
 
# Magpie throws USER_WARNINGS only
# so you can cloak these, by only showing ERRORs
error_reporting(E_ERROR);
 
# check the return value of fetch_rss()
 
$rss = fetch_rss($url);
 
if ( $rss ) {
...display rss feed...
}
else {
echo "An error occured! " .
"Consider donating more $$$ for restoration of services." .
"<br>Error Message: " . magpie_error();
}
 
DISCUSSION:
 
MagpieRSS triggers a warning in a number of circumstances. The 2 most common
circumstances are: if the specified RSS file isn't properly formed (usually
because it includes illegal HTML), or if Magpie can't download the remote RSS
file, and there is no cached version.
 
If you don't want your users to see these warnings change your error_reporting
settings to only display ERRORs. Another option is to turn off display_error,
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages.
 
You can do this with:
 
ini_set('display_errors', 0);
 
See: http://www.php.net/error_reporting,
http://www.php.net/ini_set,
http://www.php.net/manual/en/ref.errorfunc.php
 
3. GENERATE A NEW RSS FEED
 
PROBLEM:
 
Create an RSS feed for other people to use.
 
SOLUTION:
 
Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/)
 
DISCUSSION:
 
An example of turning a Magpie parsed RSS object back into an RSS file is forth
coming. In the meantime RSSWriter has great documentation.
 
4. DISPLAY HEADLINES MORE RECENT THEN X DATE
 
PROBLEM:
 
You only want to display headlines that were published on, or after a certain
date.
 
 
SOLUTION:
 
require 'rss_utils.inc';
 
# get all headlines published today
$today = getdate();
 
# today, 12AM
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']);
 
$rss = fetch_rss($url);
 
foreach ( $rss->items as $item ) {
$published = parse_w3cdtf($item['dc']['date']);
if ( $published >= $date ) {
echo "Title: " . $item['title'];
echo "Published: " . date("h:i:s A", $published);
echo "<p>";
}
}
 
DISCUSSION:
 
This recipe only works for RSS 1.0 feeds that include the <dc:date> field.
(which is very good RSS style)
 
parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix
epoch seconds.
 
See: http://www.php.net/manual/en/ref.datetime.php
/trunk/api/syndication_rss/magpierss/ChangeLog
New file
0,0 → 1,405
2005-10-28 14:11 kellan
 
* extlib/Snoopy.class.inc: a better solution
 
2005-10-28 11:51 kellan
 
* extlib/Snoopy.class.inc: fix arbtriary code execution
vulnerability when using curl+ssl
 
http://www.sec-consult.com/216.html
 
2005-03-08 10:46 kellan
 
* rss_parse.inc: fix bug w/ atom and date normalization
 
2005-02-09 14:59 kellan
 
* rss_fetch.inc: fix stale cache bug
 
2005-01-28 02:27 kellan
 
* rss_parse.inc: support php w/o array_change_case
 
2005-01-23 20:02 kellan
 
* rss_fetch.inc: fix cache bug introduced by charset encoding
 
2005-01-12 09:14 kellan
 
* rss_cache.inc, rss_fetch.inc: more sanity checks for when things
go wrong
 
2004-12-12 13:44 kellan
 
* INSTALL, rss_cache.inc, rss_utils.inc: detab
 
2004-11-23 20:15 kellan
 
* rss_parse.inc: fix calling iconv instead of mb_convert_encoding
 
2004-11-22 02:11 kellan
 
* CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last
bit of tidying
 
2004-11-22 01:45 kellan
 
* rss_fetch.inc: detab, bump version
 
2004-11-22 01:43 kellan
 
* rss_parse.inc: was filtering too much
 
2004-11-22 00:03 kellan
 
* rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding
otherwise we can get munged output
 
2004-11-21 23:52 kellan
 
* rss_parse.inc: add WARNING
 
2004-11-21 23:45 kellan
 
* rss_parse.inc: don't set ERROR on notice or warning (rss_fetch
dies on parse errors)
 
2004-11-21 23:44 kellan
 
* rss_fetch.inc: add encoding defines (fix timeout error reporting)
 
2004-11-21 20:21 kellan
 
* rss_parse.inc: incorporate steve's patch
 
2004-11-21 19:26 kellan
 
* rss_parse.inc: remove old debugging functions, totally
arbitrarily. might break stuff. can't really explain why i'm
doing this.
 
2004-10-28 15:52 kellan
 
* rss_parse.inc: fixed '=' instead of '=='
 
2004-10-26 00:48 kellan
 
* rss_parse.inc: chance epoch to timestamp to conform w/ php naming
conventions
 
2004-06-15 12:00 kellan
 
* rss_parse.inc: [no log message]
 
2004-04-26 14:16 kellan
 
* rss_fetch.inc: bump version
 
2004-04-26 12:36 kellan
 
* rss_parse.inc: fix field doubling
 
2004-04-24 17:47 kellan
 
* CHANGES, ChangeLog: updated
 
2004-04-24 17:35 kellan
 
* rss_fetch.inc: bumped version
 
2004-04-24 16:52 kellan
 
* rss_parse.inc: support arbitrary atom content constructs
 
some refactoring
 
2004-04-24 16:15 kellan
 
* rss_parse.inc: support summary content contstruct. add normalize
function
 
2004-03-27 16:29 kellan
 
* extlib/Snoopy.class.inc: accept self-signed certs
 
2004-03-27 12:53 kellan
 
* extlib/Snoopy.class.inc: fixed SSL support * set status * set
error on bad curl
 
(also ripped out big chunks of dead weight (submit_form) which
were getting in my way
 
2004-01-25 02:25 kellan
 
* rss_parse.inc: make RSS 1.0's rdf:about available
 
2004-01-25 02:07 kellan
 
* rss_parse.inc: clean up text, and line formats. add support item
rdf:about
 
2004-01-24 23:40 kellan
 
* CHANGES, ChangeLog: update changes
 
2004-01-24 23:37 kellan
 
* rss_fetch.inc: updated version
 
2004-01-24 23:35 kellan
 
* rss_parse.inc: whitespace
 
2004-01-24 23:23 kellan
 
* extlib/Snoopy.class.inc: support badly formatted http headers
 
2004-01-24 23:20 kellan
 
* rss_parse.inc: added alpha atom parsing support
 
2003-06-25 22:34 kellan
 
* extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems
 
2003-06-13 11:31 kellan
 
* rss_fetch.inc: reset cache on 304
 
2003-06-12 21:37 kellan
 
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc:
bumped up version numbers
 
2003-06-12 21:32 kellan
 
* htdocs/index.html: updated news
 
2003-06-12 21:27 kellan
 
* NEWS: a manual blog :)
 
2003-06-12 21:22 kellan
 
* htdocs/index.html: fully qualified img
 
2003-06-12 21:20 kellan
 
* htdocs/index.html: clean up. added badge.
 
2003-06-12 21:04 kellan
 
* rss_utils.inc: clean up regex
 
2003-06-12 21:02 kellan
 
* rss_cache.inc: suppress some warnings
 
2003-05-30 20:44 kellan
 
* extlib/Snoopy.class.inc: more comments, cleaned up notice
 
2003-05-30 15:14 kellan
 
* extlib/Snoopy.class.inc: don't advertise gzip support if the user
hasn't built php with gzinflate support
 
2003-05-12 22:32 kellan
 
* ChangeLog: changes
 
2003-05-12 22:11 kellan
 
* htdocs/index.html: announce 0.5
 
2003-05-12 21:42 kellan
 
* htdocs/index.html: change
 
2003-05-12 21:39 kellan
 
* rss_fetch.inc: use gzip
 
2003-05-12 21:37 kellan
 
* extlib/Snoopy.class.inc: added support gzip encoded content
negoiation
 
2003-05-12 21:32 kellan
 
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed
typoes
 
2003-04-26 21:44 kellan
 
* rss_parse.inc: fix minor typo
 
2003-04-18 08:19 kellan
 
* htdocs/cookbook.html: updated cookbook to show more code for
limiting items
 
2003-03-03 16:02 kellan
 
* rss_parse.inc, scripts/magpie_slashbox.php: committed (or
adpated) patch from Nicola (www.technick.com) to quell 'Undefined
Indexes' notices
 
2003-03-03 15:59 kellan
 
* rss_fetch.inc: commited patch from nicola (www.technick.com) to
quell 'undefined indexes' notices.
 
* Magpie now automatically includes its version in the
user-agent, & whether cacheing is turned on.
 
2003-02-12 01:22 kellan
 
* CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl
 
2003-02-12 00:21 kellan
 
* rss_fetch.inc: better errors, hopefully stomped on pesky notices
 
2003-02-12 00:19 kellan
 
* rss_parse.inc: check to see is xml is supported, if not die
 
also throw better xml errors
 
2003-02-12 00:18 kellan
 
* rss_cache.inc: hopefully cleared up some notices that were being
thrown into the log
 
fixed a debug statement that was being called as an error
 
2003-02-12 00:15 kellan
 
* scripts/: magpie_simple.php, magpie_slashbox.php: moved
magpie_simple to magpie_slashbox, and replaced it with a simpler
demo.
 
2003-02-12 00:02 kellan
 
* INSTALL, README, TROUBLESHOOTING: Improved documentation. Better
install instructions.
 
TROUBLESHOOTING cover common installation and usage problems
 
2003-01-22 14:40 kellan
 
* htdocs/cookbook.html: added cookbook.html
 
2003-01-21 23:47 kellan
 
* cookbook: a magpie cookbook
 
2003-01-20 10:09 kellan
 
* ChangeLog: updated
 
2003-01-20 09:23 kellan
 
* scripts/simple_smarty.php: minor clean up
 
2003-01-20 09:15 kellan
 
* scripts/README: added smarty url
 
2003-01-20 09:14 kellan
 
* magpie_simple.php, htdocs/index.html, scripts/README,
scripts/magpie_debug.php, scripts/magpie_simple.php,
scripts/simple_smarty.php,
scripts/smarty_plugin/modifier.rss_date_parse.php,
scripts/templates/simple.smarty: Added scripts directory for
examples on how to use MagpieRSS
 
magpie_simple - is a simple example magpie_debug - spew all the
information from a parsed RSS feed simple_smary - example of
using magpie with Smarty template system
smarty_plugin/modifier.rss_date_parse.php - support file for the
smarty demo templates/simple.smary - template for the smarty demo
 
2003-01-20 09:11 kellan
 
* rss_fetch.inc, rss_parse.inc: changes to error handling to give
script authors more access to magpie's errors.
 
added method magpie_error() to retrieve global MAGPIE_ERROR
variable for when fetch_rss() returns false
 
2002-10-26 19:02 kellan
 
* htdocs/index.html: putting the website under source control
 
2002-10-26 18:43 kellan
 
* AUTHORS, ChangeLog, INSTALL, README: some documentation to make
it all look official :)
 
2002-10-25 23:04 kellan
 
* magpie_simple.php: quxx
 
2002-10-25 23:04 kellan
 
* rss_parse.inc: added support for textinput and image
 
2002-10-25 19:23 kellan
 
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc,
rss_utils.inc: switched to using Snoopy for fetching remote RSS
files.
 
added support for conditional gets
 
2002-10-25 19:22 kellan
 
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc:
Change comment style to slavishly imitate the phpinsider style
found in Smarty and Snoopy :)
 
2002-10-25 19:18 kellan
 
* extlib/Snoopy.class.inc: added Snoopy in order to support
conditional gets
 
2002-10-23 23:19 kellan
 
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc:
MAJOR CLEANUP!
 
* rss_fetch got rid of the options array, replaced it with a more
PHP-like solution of using defines. constants are setup, with
defaults, in the function init()
 
got rid of the idiom of passing back an array, its was awkward to
deal with in PHP, and unusual (and consquently confusing to
people). now i return true/false values, and try to setup error
string where appropiate (rss_cache has the most complete example
of this)
 
change the logic for interacting with the cache
 
* rss_cache major re-working of how error are handled. tried to
make the code more resillient. the cache is now much more aware
of MAX_AGE, where before this was being driven out of rss_fetch
(which was silly)
 
* rss_parse properly handles xml parse errors. used to sail
along blithely unaware.
 
2002-09-11 11:11 kellan
 
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc,
rss_utils.inc: Initial revision
 
2002-09-11 11:11 kellan
 
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc,
rss_utils.inc: initial import
 
/trunk/api/syndication_rss/magpierss/TROUBLESHOOTING
New file
0,0 → 1,152
TROUBLESHOOTING
 
 
Trouble Installing MagpieRSS:
 
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc'
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear')
 
2. Cache couldn't make dir './cache'.
 
3. Fatal error: Failed to load PHP's XML Extension.
http://www.php.net/manual/en/ref.xml.php
 
Trouble Using MagpieRSS
 
4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf.
(HTTP Error: Invalid protocol "")
 
5. Warning: MagpieRSS: Failed to parse RSS file.
(not well-formed (invalid token) at line 19, column 98)
 
6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss.
(HTTP Response: HTTP/1.1 404 Not Found)
 
If you would rather provide a custom error, see the COOKBOOK
(http://magpierss.sf.net/cookbook.html) recipe 2.
 
*************************************************************************
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc'
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear')
This could mean that:
a) PHP can't find the MagpieRSS files.
b) PHP found them the MagpieRSS files, but can't read them.
a. Telling PHP where to look for MagpieRSS file.
This might mean your PHP program can't find the MagpieRSS libraries.
Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc,
rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the
cookbook for exceptions).
 
This can be fixed by making sure the MagpieRSS files are in your include
path.
If you can edit your include path (for example your on a shared host) then
you need to replace:
require_once('rss_fetch.inc');
-with-
define('MAGPIE_DIR', '/path/to/magpierss/');
require_once(MAGPIE_DIR.'rss_fetch.inc');
 
b. PHP can't read the MagpieRSS files
All PHP libraries need to be readable by your webserver.
On Unix you can accomplish this with:
chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc
*************************************************************************
2. Cache couldn't make dir './cache'.
 
MagpieRSS caches the results of fetched and parsed RSS to reduce the load on
both your server, and the remote server providing the RSS. It does this by
writing files to a cache directory.
 
This error means the webserver doesn't have write access to the current
directory.
a. Make a webserver writeable cache directory
Find the webserver's group. (on my system it is 'www')
mkdir ./cache
chgrp www directory_name
chmod g+w directory_name
(this is the best, and desired solution)
b. Tell MagpieRSS to create the cache directory somewhere the webserver can
write to.
define('MAGPIE_CACHE_DIR', '/tmp/magpierss');
 
(this is not a great solution, and might have security considerations)
c. Turn off cacheing.
Magpie can work fine with cacheing, but it will be slower, and you might
become a nuiance to the RSS provider, but it is an option.
define('MAGPIE_CACHE_ON', 0);
d. And lastly, do NOT
chmod 777 ./cache
Any of the above solutions are better then this.
 
NOTE: If none of this works for you, let me know. I've got root, and a
custom compiled Apache on almost any box I ever touch, so I can be a little
out of touch with reality. But I won't know that if I don't feedback.
 
************************************************************************* 3.
3. Fatal error: Failed to load PHP's XML Extension.
http://www.php.net/manual/en/ref.xml.php
 
-or-
Fatal error: Failed to create an instance of PHP's XML parser.
http://www.php.net/manual/en/ref.xml.php
Make sure your PHP was built with --with-xml
This has been turned on by default for several versions of PHP, but it might
be turned off in your build.
See php.net for details on building and configuring PHP.
 
 
*************************************************************************
4. Warning: MagpieRSS: Failed to fetch index.rdf.
(HTTP Error: Invalid protocol "")
You need to put http:// in front of your the URL to your RSS feed
 
*************************************************************************
5. Warning: MagpieRSS: Failed to parse RSS file.
(not well-formed (invalid token) at line 19, column 98)
 
There is a problem with the RSS feed you are trying to read.
MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid
characters. Some RSS parser are based on regular expressions, and can
parse invalid RSS but they have their own problems.
 
You could try contacting the author of the RSS feed, and pointing them to
the online RSS validator at:
http://feeds.archive.org/validator/
 
*************************************************************************
6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf
(HTTP Response: HTTP/1.1 404 Not Found)
 
Its a 404! The RSS file ain't there.
/trunk/api/syndication_rss/magpierss/extlib/Snoopy.class.inc
New file
0,0 → 1,900
<?php
 
/*************************************************
 
Snoopy - the PHP net client
Author: Monte Ohrt <monte@ispi.net>
Copyright (c): 1999-2000 ispi, all rights reserved
Version: 1.0
 
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 
You may contact the author of Snoopy by e-mail at:
monte@ispi.net
 
Or, write to:
Monte Ohrt
CTO, ispi
237 S. 70th suite 220
Lincoln, NE 68510
 
The latest version of Snoopy can be obtained from:
http://snoopy.sourceforge.com
 
*************************************************/
 
class Snoopy
{
/**** Public variables ****/
/* user definable vars */
 
var $host = "www.php.net"; // host name we are connecting to
var $port = 80; // port we are connecting to
var $proxy_host = ""; // proxy host to use
var $proxy_port = ""; // proxy port to use
var $agent = "Snoopy v1.0"; // agent we masquerade as
var $referer = ""; // referer info to pass
var $cookies = array(); // array of cookies to pass
// $cookies["username"]="joe";
var $rawheaders = array(); // array of raw headers to send
// $rawheaders["Content-type"]="text/html";
 
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
var $lastredirectaddr = ""; // contains address of last redirected address
var $offsiteok = true; // allows redirection off-site
var $maxframes = 0; // frame content depth maximum. 0 = disallow
var $expandlinks = true; // expand links to fully qualified URLs.
// this only applies to fetchlinks()
// or submitlinks()
var $passcookies = true; // pass set cookies back through redirects
// NOTE: this currently does not respect
// dates, domains or paths.
var $user = ""; // user for http authentication
var $pass = ""; // password for http authentication
// http accept types
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
var $results = ""; // where the content is put
var $error = ""; // error messages sent here
var $response_code = ""; // response code returned from server
var $headers = array(); // headers returned from server sent here
var $maxlength = 500000; // max return data length (body)
var $read_timeout = 0; // timeout on read operations, in seconds
// supported only since PHP 4 Beta 4
// set to 0 to disallow timeouts
var $timed_out = false; // if a read operation timed out
var $status = 0; // http request status
var $curl_path = "/usr/bin/curl";
// Snoopy will use cURL for fetching
// SSL content if a full system path to
// the cURL binary is supplied here.
// set to false if you do not have
// cURL installed. See http://curl.haxx.se
// for details on installing cURL.
// Snoopy does *not* use the cURL
// library functions built into php,
// as these functions are not stable
// as of this Snoopy release.
// send Accept-encoding: gzip?
var $use_gzip = true;
/**** Private variables ****/
var $_maxlinelen = 4096; // max line length (headers)
var $_httpmethod = "GET"; // default http request method
var $_httpversion = "HTTP/1.0"; // default http request version
var $_submit_method = "POST"; // default submit method
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
var $_redirectaddr = false; // will be set if page fetched is a redirect
var $_redirectdepth = 0; // increments on an http redirect
var $_frameurls = array(); // frame src urls
var $_framedepth = 0; // increments on frame depth
var $_isproxy = false; // set if using a proxy server
var $_fp_timeout = 30; // timeout for socket connection
 
/*======================================================================*\
Function: fetch
Purpose: fetch the contents of a web page
(and possibly other protocols in the
future like ftp, nntp, gopher, etc.)
Input: $URI the location of the page to fetch
Output: $this->results the output text from the fetch
\*======================================================================*/
 
function fetch($URI)
{
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
$URI_PARTS = parse_url($URI);
if (!empty($URI_PARTS["user"]))
$this->user = $URI_PARTS["user"];
if (!empty($URI_PARTS["pass"]))
$this->pass = $URI_PARTS["pass"];
switch($URI_PARTS["scheme"])
{
case "http":
$this->host = $URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port = $URI_PARTS["port"];
if($this->_connect($fp))
{
if($this->_isproxy)
{
// using proxy, send entire URI
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
}
else
{
$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
// no proxy, send only the path
$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
}
$this->_disconnect($fp);
 
if($this->_redirectaddr)
{
/* url was redirected, check if we've hit the max depth */
if($this->maxredirs > $this->_redirectdepth)
{
// only follow redirect if it's on this site, or offsiteok is true
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
{
/* follow the redirect */
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}
 
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
{
$frameurls = $this->_frameurls;
$this->_frameurls = array();
while(list(,$frameurl) = each($frameurls))
{
if($this->_framedepth < $this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
}
else
{
return false;
}
return true;
break;
case "https":
if(!$this->curl_path || (!is_executable($this->curl_path))) {
$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
return false;
}
$this->host = $URI_PARTS["host"];
if(!empty($URI_PARTS["port"]))
$this->port = $URI_PARTS["port"];
if($this->_isproxy)
{
// using proxy, send entire URI
$this->_httpsrequest($URI,$URI,$this->_httpmethod);
}
else
{
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
// no proxy, send only the path
$this->_httpsrequest($path, $URI, $this->_httpmethod);
}
 
if($this->_redirectaddr)
{
/* url was redirected, check if we've hit the max depth */
if($this->maxredirs > $this->_redirectdepth)
{
// only follow redirect if it's on this site, or offsiteok is true
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
{
/* follow the redirect */
$this->_redirectdepth++;
$this->lastredirectaddr=$this->_redirectaddr;
$this->fetch($this->_redirectaddr);
}
}
}
 
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
{
$frameurls = $this->_frameurls;
$this->_frameurls = array();
 
while(list(,$frameurl) = each($frameurls))
{
if($this->_framedepth < $this->maxframes)
{
$this->fetch($frameurl);
$this->_framedepth++;
}
else
break;
}
}
return true;
break;
default:
// not a valid protocol
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
return false;
break;
}
return true;
}
 
 
 
/*======================================================================*\
Private functions
\*======================================================================*/
/*======================================================================*\
Function: _striplinks
Purpose: strip the hyperlinks from an html document
Input: $document document to strip.
Output: $match an array of the links
\*======================================================================*/
 
function _striplinks($document)
{
preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
([\"\'])? # find single or double quote
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
# quote, otherwise match up to next space
'isx",$document,$links);
 
// catenate the non-empty matches from the conditional subpattern
 
while(list($key,$val) = each($links[2]))
{
if(!empty($val))
$match[] = $val;
}
while(list($key,$val) = each($links[3]))
{
if(!empty($val))
$match[] = $val;
}
// return the links
return $match;
}
 
/*======================================================================*\
Function: _stripform
Purpose: strip the form elements from an html document
Input: $document document to strip.
Output: $match an array of the links
\*======================================================================*/
 
function _stripform($document)
{
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
// catenate the matches
$match = implode("\r\n",$elements[0]);
// return the links
return $match;
}
 
/*======================================================================*\
Function: _striptext
Purpose: strip the text from an html document
Input: $document document to strip.
Output: $text the resulting text
\*======================================================================*/
 
function _striptext($document)
{
// I didn't use preg eval (//e) since that is only available in PHP 4.0.
// so, list your entities one by one here. I included some of the
// more common ones.
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags
"'([\r\n])[\s]+'", // strip out white space
"'&(quote|#34);'i", // replace html entities
"'&(amp|#38);'i",
"'&(lt|#60);'i",
"'&(gt|#62);'i",
"'&(nbsp|#160);'i",
"'&(iexcl|#161);'i",
"'&(cent|#162);'i",
"'&(pound|#163);'i",
"'&(copy|#169);'i"
);
$replace = array( "",
"",
"\\1",
"\"",
"&",
"<",
">",
" ",
chr(161),
chr(162),
chr(163),
chr(169));
$text = preg_replace($search,$replace,$document);
return $text;
}
 
/*======================================================================*\
Function: _expandlinks
Purpose: expand each link into a fully qualified URL
Input: $links the links to qualify
$URI the full URI to get the base from
Output: $expandedLinks the expanded links
\*======================================================================*/
 
function _expandlinks($links,$URI)
{
preg_match("/^[^\?]+/",$URI,$match);
 
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
$search = array( "|^http://".preg_quote($this->host)."|i",
"|^(?!http://)(\/)?(?!mailto:)|i",
"|/\./|",
"|/[^\/]+/\.\./|"
);
$replace = array( "",
$match."/",
"/",
"/"
);
$expandedLinks = preg_replace($search,$replace,$links);
 
return $expandedLinks;
}
 
/*======================================================================*\
Function: _httprequest
Purpose: go get the http data from the server
Input: $url the url to fetch
$fp the current open file pointer
$URI the full URI
$body body contents to send if any (POST)
Output:
\*======================================================================*/
function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
{
if($this->passcookies && $this->_redirectaddr)
$this->setcookies();
$URI_PARTS = parse_url($URI);
if(empty($url))
$url = "/";
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
if(!empty($this->agent))
$headers .= "User-Agent: ".$this->agent."\r\n";
if(!empty($this->host) && !isset($this->rawheaders['Host']))
$headers .= "Host: ".$this->host."\r\n";
if(!empty($this->accept))
$headers .= "Accept: ".$this->accept."\r\n";
if($this->use_gzip) {
// make sure PHP was built with --with-zlib
// and we can handle gzipp'ed data
if ( function_exists(gzinflate) ) {
$headers .= "Accept-encoding: gzip\r\n";
}
else {
trigger_error(
"use_gzip is on, but PHP was built without zlib support.".
" Requesting file(s) without gzip encoding.",
E_USER_NOTICE);
}
}
if(!empty($this->referer))
$headers .= "Referer: ".$this->referer."\r\n";
if(!empty($this->cookies))
{
if(!is_array($this->cookies))
$this->cookies = (array)$this->cookies;
reset($this->cookies);
if ( count($this->cookies) > 0 ) {
$cookie_headers .= 'Cookie: ';
foreach ( $this->cookies as $cookieKey => $cookieVal ) {
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
}
$headers .= substr($cookie_headers,0,-2) . "\r\n";
}
}
if(!empty($this->rawheaders))
{
if(!is_array($this->rawheaders))
$this->rawheaders = (array)$this->rawheaders;
while(list($headerKey,$headerVal) = each($this->rawheaders))
$headers .= $headerKey.": ".$headerVal."\r\n";
}
if(!empty($content_type)) {
$headers .= "Content-type: $content_type";
if ($content_type == "multipart/form-data")
$headers .= "; boundary=".$this->_mime_boundary;
$headers .= "\r\n";
}
if(!empty($body))
$headers .= "Content-length: ".strlen($body)."\r\n";
if(!empty($this->user) || !empty($this->pass))
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
 
$headers .= "\r\n";
// set the read timeout if needed
if ($this->read_timeout > 0)
socket_set_timeout($fp, $this->read_timeout);
$this->timed_out = false;
fwrite($fp,$headers.$body,strlen($headers.$body));
$this->_redirectaddr = false;
unset($this->headers);
// content was returned gzip encoded?
$is_gzipped = false;
while($currentHeader = fgets($fp,$this->_maxlinelen))
{
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
{
$this->status=-100;
return false;
}
// if($currentHeader == "\r\n")
if(preg_match("/^\r?\n$/", $currentHeader) )
break;
// if a header begins with Location: or URI:, set the redirect
if(preg_match("/^(Location:|URI:)/i",$currentHeader))
{
// get URL portion of the redirect
preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
// look for :// in the Location header to see if hostname is included
if(!preg_match("|\:\/\/|",$matches[2]))
{
// no host in the path, so prepend
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
// eliminate double slash
if(!preg_match("|^/|",$matches[2]))
$this->_redirectaddr .= "/".$matches[2];
else
$this->_redirectaddr .= $matches[2];
}
else
$this->_redirectaddr = $matches[2];
}
if(preg_match("|^HTTP/|",$currentHeader))
{
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
{
$this->status= $status[1];
}
$this->response_code = $currentHeader;
}
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
$is_gzipped = true;
}
$this->headers[] = $currentHeader;
}
 
# $results = fread($fp, $this->maxlength);
$results = "";
while ( $data = fread($fp, $this->maxlength) ) {
$results .= $data;
if (
strlen($results) > $this->maxlength ) {
break;
}
}
// gunzip
if ( $is_gzipped ) {
// per http://www.php.net/manual/en/function.gzencode.php
$results = substr($results, 10);
$results = gzinflate($results);
}
if ($this->read_timeout > 0 && $this->_check_timeout($fp))
{
$this->status=-100;
return false;
}
// check if there is a a redirect meta tag
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
{
$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
}
 
// have we hit our frame depth and is there frame src to fetch?
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
{
$this->results[] = $results;
for($x=0; $x<count($match[1]); $x++)
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
}
// have we already fetched framed content?
elseif(is_array($this->results))
$this->results[] = $results;
// no framed content
else
$this->results = $results;
return true;
}
 
/*======================================================================*\
Function: _httpsrequest
Purpose: go get the https data from the server using curl
Input: $url the url to fetch
$URI the full URI
$body body contents to send if any (POST)
Output:
\*======================================================================*/
function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
{
if($this->passcookies && $this->_redirectaddr)
$this->setcookies();
 
$headers = array();
$URI_PARTS = parse_url($URI);
if(empty($url))
$url = "/";
// GET ... header not needed for curl
//$headers[] = $http_method." ".$url." ".$this->_httpversion;
if(!empty($this->agent))
$headers[] = "User-Agent: ".$this->agent;
if(!empty($this->host))
$headers[] = "Host: ".$this->host;
if(!empty($this->accept))
$headers[] = "Accept: ".$this->accept;
if(!empty($this->referer))
$headers[] = "Referer: ".$this->referer;
if(!empty($this->cookies))
{
if(!is_array($this->cookies))
$this->cookies = (array)$this->cookies;
reset($this->cookies);
if ( count($this->cookies) > 0 ) {
$cookie_str = 'Cookie: ';
foreach ( $this->cookies as $cookieKey => $cookieVal ) {
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
}
$headers[] = substr($cookie_str,0,-2);
}
}
if(!empty($this->rawheaders))
{
if(!is_array($this->rawheaders))
$this->rawheaders = (array)$this->rawheaders;
while(list($headerKey,$headerVal) = each($this->rawheaders))
$headers[] = $headerKey.": ".$headerVal;
}
if(!empty($content_type)) {
if ($content_type == "multipart/form-data")
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
else
$headers[] = "Content-type: $content_type";
}
if(!empty($body))
$headers[] = "Content-length: ".strlen($body);
if(!empty($this->user) || !empty($this->pass))
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
$cmdline_params .= " -H \"".$headers[$curr_header]."\"";
}
if(!empty($body))
$cmdline_params .= " -d \"$body\"";
if($this->read_timeout > 0)
$cmdline_params .= " -m ".$this->read_timeout;
$headerfile = uniqid(time());
# accept self-signed certs
$cmdline_params .= " -k";
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return);
if($return)
{
$this->error = "Error: cURL could not retrieve the document, error $return.";
return false;
}
$results = implode("\r\n",$results);
$result_headers = file("/tmp/$headerfile");
$this->_redirectaddr = false;
unset($this->headers);
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
{
// if a header begins with Location: or URI:, set the redirect
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
{
// get URL portion of the redirect
preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
// look for :// in the Location header to see if hostname is included
if(!preg_match("|\:\/\/|",$matches[2]))
{
// no host in the path, so prepend
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
// eliminate double slash
if(!preg_match("|^/|",$matches[2]))
$this->_redirectaddr .= "/".$matches[2];
else
$this->_redirectaddr .= $matches[2];
}
else
$this->_redirectaddr = $matches[2];
}
if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
{
$this->response_code = $result_headers[$currentHeader];
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
{
$this->status= $match[1];
}
}
$this->headers[] = $result_headers[$currentHeader];
}
 
// check if there is a a redirect meta tag
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
{
$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
}
 
// have we hit our frame depth and is there frame src to fetch?
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
{
$this->results[] = $results;
for($x=0; $x<count($match[1]); $x++)
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
}
// have we already fetched framed content?
elseif(is_array($this->results))
$this->results[] = $results;
// no framed content
else
$this->results = $results;
 
unlink("/tmp/$headerfile");
return true;
}
 
/*======================================================================*\
Function: setcookies()
Purpose: set cookies for a redirection
\*======================================================================*/
function setcookies()
{
for($x=0; $x<count($this->headers); $x++)
{
if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
$this->cookies[$match[1]] = $match[2];
}
}
 
/*======================================================================*\
Function: _check_timeout
Purpose: checks whether timeout has occurred
Input: $fp file pointer
\*======================================================================*/
 
function _check_timeout($fp)
{
if ($this->read_timeout > 0) {
$fp_status = socket_get_status($fp);
if ($fp_status["timed_out"]) {
$this->timed_out = true;
return true;
}
}
return false;
}
 
/*======================================================================*\
Function: _connect
Purpose: make a socket connection
Input: $fp file pointer
\*======================================================================*/
function _connect(&$fp)
{
if(!empty($this->proxy_host) && !empty($this->proxy_port))
{
$this->_isproxy = true;
$host = $this->proxy_host;
$port = $this->proxy_port;
}
else
{
$host = $this->host;
$port = $this->port;
}
$this->status = 0;
if($fp = fsockopen(
$host,
$port,
$errno,
$errstr,
$this->_fp_timeout
))
{
// socket connection succeeded
 
return true;
}
else
{
// socket connection failed
$this->status = $errno;
switch($errno)
{
case -3:
$this->error="socket creation failed (-3)";
case -4:
$this->error="dns lookup failure (-4)";
case -5:
$this->error="connection refused or timed out (-5)";
default:
$this->error="connection failed (".$errno.")";
}
return false;
}
}
/*======================================================================*\
Function: _disconnect
Purpose: disconnect a socket connection
Input: $fp file pointer
\*======================================================================*/
function _disconnect($fp)
{
return(fclose($fp));
}
 
/*======================================================================*\
Function: _prepare_post_body
Purpose: Prepare post body according to encoding type
Input: $formvars - form variables
$formfiles - form upload files
Output: post body
\*======================================================================*/
function _prepare_post_body($formvars, $formfiles)
{
settype($formvars, "array");
settype($formfiles, "array");
 
if (count($formvars) == 0 && count($formfiles) == 0)
return;
switch ($this->_submit_type) {
case "application/x-www-form-urlencoded":
reset($formvars);
while(list($key,$val) = each($formvars)) {
if (is_array($val) || is_object($val)) {
while (list($cur_key, $cur_val) = each($val)) {
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
}
} else
$postdata .= urlencode($key)."=".urlencode($val)."&";
}
break;
 
case "multipart/form-data":
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
reset($formvars);
while(list($key,$val) = each($formvars)) {
if (is_array($val) || is_object($val)) {
while (list($cur_key, $cur_val) = each($val)) {
$postdata .= "--".$this->_mime_boundary."\r\n";
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
$postdata .= "$cur_val\r\n";
}
} else {
$postdata .= "--".$this->_mime_boundary."\r\n";
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
$postdata .= "$val\r\n";
}
}
reset($formfiles);
while (list($field_name, $file_names) = each($formfiles)) {
settype($file_names, "array");
while (list(, $file_name) = each($file_names)) {
if (!is_readable($file_name)) continue;
 
$fp = fopen($file_name, "r");
$file_content = fread($fp, filesize($file_name));
fclose($fp);
$base_name = basename($file_name);
 
$postdata .= "--".$this->_mime_boundary."\r\n";
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
$postdata .= "$file_content\r\n";
}
}
$postdata .= "--".$this->_mime_boundary."--\r\n";
break;
}
 
return $postdata;
}
}
 
?>
/trunk/api/syndication_rss/magpierss/scripts/README
New file
0,0 → 1,27
Some example on how to use Magpie:
 
* magpie_simple.php *
Simple example of fetching and parsing an RSS file. Expects to be
called with a query param 'rss_url=http://<some rss file>'
 
* simple_smarty.php *
Similiar to magpie_simple, but using the Smarty template engine to do
display. Also demostrates using rss_utils.inc and a smarty plugin to
parse and display when each RSS item was published.
 
* magpie_debug.php *
Displays all the information available from a parsed feed.
 
* smarty_plugin/modifier.rss_date_parse.php *
A Smarty plugin for parsing RSS style dates. You must include rss_utils.inc
for this plugin to work. It also must be installed in the Smarty plugin
directory, see the Smarty docs for details.
 
* templates/simple.smarty
A Smarty template used by simple_smarty.php which demostrates
displaying an RSS feed and using the date parse plugin.
The Smarty template engine and documentation on how to use it are available from
http://smarty.php.net
/trunk/api/syndication_rss/magpierss/scripts/smarty_plugin/modifier.rss_date_parse.php
New file
0,0 → 1,31
<?php
 
/*
* Smarty plugin
* -------------------------------------------------------------
* Type: modifier
* Name: rss_date_parse
* Purpose: parse rss date into unix epoch
* Input: string: rss date
* default_date: default date if $rss_date is empty
*
* NOTE!!! parse_w3cdtf provided by MagpieRSS's rss_utils.inc
* this file needs to be included somewhere in your script
* -------------------------------------------------------------
*/
function smarty_modifier_rss_date_parse ($rss_date, $default_date=null)
{
if($rss_date != '') {
return parse_w3cdtf( $rss_date );
} elseif (isset($default_date) && $default_date != '') {
return parse_w3cdtf( $default_date );
} else {
return;
}
}
 
 
 
 
?>
/trunk/api/syndication_rss/magpierss/scripts/magpie_debug.php
New file
0,0 → 1,80
<?php
 
ini_set('display_errors', 1);
ini_set('error_reporting', E_ALL);
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8');
define('MAGPIE_DIR', '../');
define('MAGPIE_DEBUG', 1);
 
// flush cache quickly for debugging purposes, don't do this on a live site
define('MAGPIE_CACHE_AGE', 10);
 
require_once(MAGPIE_DIR.'rss_fetch.inc');
 
 
if ( isset($_GET['url']) ) {
$url = $_GET['url'];
}
else {
$url = 'http://magpierss.sf.net/test.rss';
}
 
 
test_library_support();
 
$rss = fetch_rss( $url );
if ($rss) {
echo "<h3>Example Output</h3>";
echo "Channel: " . $rss->channel['title'] . "<p>";
echo "<ul>";
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
echo "<li><a href=$href>$title</a></li>";
}
echo "</ul>";
}
else {
echo "Error: " . magpie_error();
}
?>
 
<form>
RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br />
<input type="submit" value="Parse RSS">
</form>
 
<h3>Parsed Results (var_dump'ed)</h3>
<pre>
<?php var_dump($rss); ?>
</pre>
 
<?php
 
function test_library_support() {
if (!function_exists('xml_parser_create')) {
echo "<b>Error:</b> PHP compiled without XML support (--with-xml), Mapgie won't work without PHP support for XML.<br />\n";
exit;
}
else {
echo "<b>OK:</b> Found an XML parser. <br />\n";
}
if ( ! function_exists('gzinflate') ) {
echo "<b>Warning:</b> PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.<br />\n";
}
else {
echo "<b>OK:</b> Support for GZIP encoding.<br />\n";
}
if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) {
echo "<b>Warning:</b> No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." .
"No support character set munging.<br />\n";
}
else {
echo "<b>OK:</b> Support for character munging.<br />\n";
}
}
 
?>
/trunk/api/syndication_rss/magpierss/scripts/simple_smarty.php
New file
0,0 → 1,58
<?php
 
// Define path to Smarty files (don't forget trailing slash)
// and load library. (you'll want to change this value)
//
// NOTE: you can also simply add Smarty to your include path
define('SMARTY_DIR', '/home/kellan/projs/magpierss/scripts/Smarty/');
require_once(SMARTY_DIR.'Smarty.class.php');
 
// define path to Magpie files and load library
// (you'll want to change this value)
//
// NOTE: you can also simple add MagpieRSS to your include path
define('MAGPIE_DIR', '/home/kellan/projs/magpierss/');
require_once(MAGPIE_DIR.'rss_fetch.inc');
require_once(MAGPIE_DIR.'rss_utils.inc');
 
 
// optionally show lots of debugging info
# define('MAGPIE_DEBUG', 2);
 
// optionally flush cache quickly for debugging purposes,
// don't do this on a live site
# define('MAGPIE_CACHE_AGE', 10);
 
// use cache? default is yes. see rss_fetch for other Magpie options
# define('MAGPIE_CACHE_ON', 1)
 
// setup template object
$smarty = new Smarty;
$smarty->compile_check = true;
 
// url of an rss file
$url = $_GET['rss_url'];
 
 
if ( $url ) {
// assign a variable to smarty for use in the template
$smarty->assign('rss_url', $url);
// use MagpieRSS to fetch remote RSS file, and parse it
$rss = fetch_rss( $url );
// if fetch_rss returned false, we encountered an error
if ( !$rss ) {
$smarty->assign( 'error', magpie_error() );
}
$smarty->assign('rss', $rss );
$item = $rss->items[0];
$date = parse_w3cdtf( $item['dc']['date'] );
$smarty->assign( 'date', $date );
}
 
// parse smarty template, and display using the variables we assigned
$smarty->display('simple.smarty');
 
?>
/trunk/api/syndication_rss/magpierss/scripts/templates/simple.smarty
New file
0,0 → 1,46
<html>
<head>
<title>A Simple RSS Box: I'm not a designer</title>
</head>
<body>
 
<form>
<b>RSS File:</b>
<input type=text" name="rss_url" value="{$rss_url}" size="50">
<input type="submit">
</form>
 
<b>Displaying:</b> {$rss_url}
<p>
 
{* if $error display the error
elseif parsed RSS object display the RSS
else solicit user for a URL
*}
 
{if $error }
<b>Error:</b> {$error}
{elseif $rss}
<table border=1>
<tr>
<th colspan=2>
<a href="{$rss->channel.link}">{$rss->channel.title}</a>
</th>
</tr>
{foreach from=$rss->items item=item}
<tr>
<td>
<a href="{$item.link}">{$item.title}</a>
</td>
<td>
{$item.dc.date|rss_date_parse|date_format:"%A, %B %e, %Y"}
</td>
</tr>
{/foreach}
</table>
{else}
Enter the URL of an RSS file to display.
{/if}
 
</body>
</html>
/trunk/api/syndication_rss/magpierss/scripts/magpie_simple.php
New file
0,0 → 1,29
<?php
 
define('MAGPIE_DIR', '../');
require_once(MAGPIE_DIR.'rss_fetch.inc');
 
$url = $_GET['url'];
 
if ( $url ) {
$rss = fetch_rss( $url );
echo "Channel: " . $rss->channel['title'] . "<p>";
echo "<ul>";
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
echo "<li><a href=$href>$title</a></li>";
}
echo "</ul>";
}
?>
 
<form>
RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br />
<input type="submit" value="Parse RSS">
</form>
<p>
<h2>Security Note:</h2>
This is a simple <b>example</b> script. If this was a <b>real</b> script we probably wouldn't allow strangers to submit random URLs, and we certainly wouldn't simply echo anything passed in the URL. Additionally its a bad idea to leave this example script lying around.
</p>
/trunk/api/syndication_rss/magpierss/scripts/magpie_slashbox.php
New file
0,0 → 1,66
<?php
 
define('MAGPIE_DIR', '../');
require_once(MAGPIE_DIR.'rss_fetch.inc');
$url = $_GET['rss_url'];
 
?>
 
<html
<body LINK="#999999" VLINK="#000000">
 
<form>
<input type="text" name="rss_url" size="40" value="<?php echo $url ?>"><input type="Submit">
</form>
 
<?php
 
if ( $url ) {
echo "displaying: $url<p>";
$rss = fetch_rss( $url );
echo slashbox ($rss);
}
 
echo "<pre>";
print_r($rss);
echo "</pre>";
?>
 
</body>
</html>
 
<?php
 
# just some quick and ugly php to generate html
#
#
function slashbox ($rss) {
echo "<table cellpadding=2 cellspacing=0><tr>";
echo "<td bgcolor=#006666>";
# get the channel title and link properties off of the rss object
#
$title = $rss->channel['title'];
$link = $rss->channel['link'];
echo "<a href=$link><font color=#FFFFFF><b>$title</b></font></a>";
echo "</td></tr>";
# foreach over each item in the array.
# displaying simple links
#
# we could be doing all sorts of neat things with the dublin core
# info, or the event info, or what not, but keeping it simple for now.
#
foreach ($rss->items as $item ) {
echo "<tr><td bgcolor=#cccccc>";
echo "<a href=$item[link]>";
echo $item['title'];
echo "</a></td></tr>";
}
echo "</table>";
}
 
?>
/trunk/api/syndication_rss/magpierss/CHANGES
New file
0,0 → 1,41
Version 0.72
-----------
- fix security exploit: http://www.sec-consult.com/216.html
Version 0.7
-----------
- support for input and output charset encoding
based on the work in FoF, uses iconv or mbstring if available
-
 
Version 0.6
-----------
- basic support for Atom syndication format
including support for Atom content constructs
- fixed support for private feeds (HTTP Auth and SSL)
(thanks to silverorange.com for providing test feeds)
- support for some broken webservers
 
Version 0.52
-----------
- support GZIP content negoiation
- PHP 4.3.2 support
 
Version 0.4
-----------
- improved error handling, better access for script authors
- included example scripts of working with MagpieRSS
- new Smarty plugin for RSS date parsing
 
Version 0.3
-----------
- added support for conditional gets (Last-Modified, ETag)
- now use Snoopy to handle fetching RSS files
Version 0.2
-----------
- MAJOR CLEAN UP
- removed kludgy $options array in favour of constants
- phased out returning arrays
- added better error handling
- re-worked comments
/trunk/api/syndication_rss/magpierss/rss_fetch.inc
New file
0,0 → 1,458
<?php
/*
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_fetch.inc, a simple functional interface
to fetching and parsing RSS files, via the
function fetch_rss()
* Author: Kellan Elliott-McCrea <kellan@protest.net>
* License: GPL
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*
*/
// Setup MAGPIE_DIR for use on hosts that don't include
// the current path in include_path.
// with thanks to rajiv and smarty
if (!defined('DIR_SEP')) {
define('DIR_SEP', DIRECTORY_SEPARATOR);
}
 
if (!defined('MAGPIE_DIR')) {
define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
}
 
require_once( MAGPIE_DIR . 'rss_parse.inc' );
require_once( MAGPIE_DIR . 'rss_cache.inc' );
 
// for including 3rd party libraries
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
 
 
/*
* CONSTANTS - redefine these in your script to change the
* behaviour of fetch_rss() currently, most options effect the cache
*
* MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
* For me a built in cache was essential to creating a "PHP-like"
* feel to Magpie, see rss_cache.inc for rationale
*
*
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
* This should be a location that the webserver can write to. If this
* directory does not already exist Mapie will try to be smart and create
* it. This will often fail for permissions reasons.
*
*
* MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
*
*
* MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
* instead of returning stale object?
*
* MAGPIE_DEBUG - Display debugging notices?
*
*/
 
 
/*=======================================================================*\
Function: fetch_rss:
Purpose: return RSS object for the give url
maintain the cache
Input: url of RSS file
Output: parsed RSS object (see rss_parse.inc)
 
NOTES ON CACHEING:
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
NOTES ON RETRIEVING REMOTE FILES:
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
return a cached object, and touch the cache object upon recieving a
304.
NOTES ON FAILED REQUESTS:
If there is an HTTP error while fetching an RSS object, the cached
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
\*=======================================================================*/
 
define('MAGPIE_VERSION', '0.72');
 
$MAGPIE_ERROR = "";
 
function fetch_rss ($url) {
// initialize constants
init();
if ( !isset($url) ) {
error("fetch_rss called without a url");
return false;
}
// if cache is disabled
if ( !MAGPIE_CACHE_ON ) {
// fetch file, and parse it
$resp = _fetch_remote_file( $url );
if ( is_success( $resp->status ) ) {
return _response_to_rss( $resp );
}
else {
error("Failed to fetch $url and cache is off");
return false;
}
}
// else cache is ON
else {
// Flow
// 1. check cache
// 2. if there is a hit, make sure its fresh
// 3. if cached obj fails freshness check, fetch remote
// 4. if remote fails, return stale object, or error
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
if (MAGPIE_DEBUG and $cache->ERROR) {
debug($cache->ERROR, E_USER_WARNING);
}
$cache_status = 0; // response of check_cache
$request_headers = array(); // HTTP headers to send with fetch
$rss = 0; // parsed RSS object
$errormsg = 0; // errors, if any
// store parsed XML by desired output encoding
// as character munging happens at parse time
$cache_key = $url . MAGPIE_OUTPUT_ENCODING;
if (!$cache->ERROR) {
// return cache HIT, MISS, or STALE
$cache_status = $cache->check_cache( $cache_key);
}
// if object cached, and cache is fresh, return cached obj
if ( $cache_status == 'HIT' ) {
$rss = $cache->get( $cache_key );
if ( isset($rss) and $rss ) {
// should be cache age
$rss->from_cache = 1;
if ( MAGPIE_DEBUG > 1) {
debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
}
return $rss;
}
}
// else attempt a conditional get
// setup headers
if ( $cache_status == 'STALE' ) {
$rss = $cache->get( $cache_key );
if ( $rss and $rss->etag and $rss->last_modified ) {
$request_headers['If-None-Match'] = $rss->etag;
$request_headers['If-Last-Modified'] = $rss->last_modified;
}
}
$resp = _fetch_remote_file( $url, $request_headers );
if (isset($resp) and $resp) {
if ($resp->status == '304' ) {
// we have the most current copy
if ( MAGPIE_DEBUG > 1) {
debug("Got 304 for $url");
}
// reset cache on 304 (at minutillo insistent prodding)
$cache->set($cache_key, $rss);
return $rss;
}
elseif ( is_success( $resp->status ) ) {
$rss = _response_to_rss( $resp );
if ( $rss ) {
if (MAGPIE_DEBUG > 1) {
debug("Fetch successful");
}
// add object to cache
$cache->set( $cache_key, $rss );
return $rss;
}
}
else {
$errormsg = "Failed to fetch $url ";
if ( $resp->status == '-100' ) {
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
}
elseif ( $resp->error ) {
# compensate for Snoopy's annoying habbit to tacking
# on '\n'
$http_error = substr($resp->error, 0, -2);
$errormsg .= "(HTTP Error: $http_error)";
}
else {
$errormsg .= "(HTTP Response: " . $resp->response_code .')';
}
}
}
else {
$errormsg = "Unable to retrieve RSS file for unknown reasons.";
}
// else fetch failed
// attempt to return cached object
if ($rss) {
if ( MAGPIE_DEBUG ) {
debug("Returning STALE object for $url");
}
return $rss;
}
// else we totally failed
error( $errormsg );
return false;
} // end if ( !MAGPIE_CACHE_ON ) {
} // end fetch_rss()
 
/*=======================================================================*\
Function: error
Purpose: set MAGPIE_ERROR, and trigger error
\*=======================================================================*/
 
function error ($errormsg, $lvl=E_USER_WARNING) {
global $MAGPIE_ERROR;
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( $errormsg ) {
$errormsg = "MagpieRSS: $errormsg";
$MAGPIE_ERROR = $errormsg;
trigger_error( $errormsg, $lvl);
}
}
 
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
}
/*=======================================================================*\
Function: magpie_error
Purpose: accessor for the magpie error variable
\*=======================================================================*/
function magpie_error ($errormsg="") {
global $MAGPIE_ERROR;
if ( isset($errormsg) and $errormsg ) {
$MAGPIE_ERROR = $errormsg;
}
return $MAGPIE_ERROR;
}
 
/*=======================================================================*\
Function: _fetch_remote_file
Purpose: retrieve an arbitrary remote file
Input: url of the remote file
headers to send along with the request (optional)
Output: an HTTP response object (see Snoopy.class.inc)
\*=======================================================================*/
function _fetch_remote_file ($url, $headers = "" ) {
// Snoopy is an HTTP client in PHP
$client = new Snoopy();
$client->agent = MAGPIE_USER_AGENT;
$client->read_timeout = MAGPIE_FETCH_TIME_OUT;
$client->use_gzip = MAGPIE_USE_GZIP;
if (is_array($headers) ) {
$client->rawheaders = $headers;
}
@$client->fetch($url);
return $client;
 
}
 
/*=======================================================================*\
Function: _response_to_rss
Purpose: parse an HTTP response object into an RSS object
Input: an HTTP response object (see Snoopy)
Output: parsed RSS object (see rss_parse)
\*=======================================================================*/
function _response_to_rss ($resp) {
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
// if RSS parsed successfully
if ( $rss and !$rss->ERROR) {
// find Etag, and Last-Modified
foreach($resp->headers as $h) {
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
if (strpos($h, ": ")) {
list($field, $val) = explode(": ", $h, 2);
}
else {
$field = $h;
$val = "";
}
if ( $field == 'ETag' ) {
$rss->etag = $val;
}
if ( $field == 'Last-Modified' ) {
$rss->last_modified = $val;
}
}
return $rss;
} // else construct error message
else {
$errormsg = "Failed to parse RSS file.";
if ($rss) {
$errormsg .= " (" . $rss->ERROR . ")";
}
error($errormsg);
return false;
} // end if ($rss and !$rss->error)
}
 
/*=======================================================================*\
Function: init
Purpose: setup constants with default values
check for user overrides
\*=======================================================================*/
function init () {
if ( defined('MAGPIE_INITALIZED') ) {
return;
}
else {
define('MAGPIE_INITALIZED', true);
}
if ( !defined('MAGPIE_CACHE_ON') ) {
define('MAGPIE_CACHE_ON', true);
}
 
if ( !defined('MAGPIE_CACHE_DIR') ) {
define('MAGPIE_CACHE_DIR', './cache');
}
 
if ( !defined('MAGPIE_CACHE_AGE') ) {
define('MAGPIE_CACHE_AGE', 60*60); // one hour
}
 
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
define('MAGPIE_CACHE_FRESH_ONLY', false);
}
 
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
}
if ( !defined('MAGPIE_INPUT_ENCODING') ) {
define('MAGPIE_INPUT_ENCODING', null);
}
if ( !defined('MAGPIE_DETECT_ENCODING') ) {
define('MAGPIE_DETECT_ENCODING', true);
}
if ( !defined('MAGPIE_DEBUG') ) {
define('MAGPIE_DEBUG', 0);
}
if ( !defined('MAGPIE_USER_AGENT') ) {
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
if ( MAGPIE_CACHE_ON ) {
$ua = $ua . ')';
}
else {
$ua = $ua . '; No cache)';
}
define('MAGPIE_USER_AGENT', $ua);
}
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
}
// use gzip encoding to fetch rss files if supported?
if ( !defined('MAGPIE_USE_GZIP') ) {
define('MAGPIE_USE_GZIP', true);
}
}
 
// NOTE: the following code should really be in Snoopy, or at least
// somewhere other then rss_fetch!
 
/*=======================================================================*\
HTTP STATUS CODE PREDICATES
These functions attempt to classify an HTTP status code
based on RFC 2616 and RFC 2518.
All of them take an HTTP status code as input, and return true or false
 
All this code is adapted from LWP's HTTP::Status.
\*=======================================================================*/
 
 
/*=======================================================================*\
Function: is_info
Purpose: return true if Informational status code
\*=======================================================================*/
function is_info ($sc) {
return $sc >= 100 && $sc < 200;
}
 
/*=======================================================================*\
Function: is_success
Purpose: return true if Successful status code
\*=======================================================================*/
function is_success ($sc) {
return $sc >= 200 && $sc < 300;
}
 
/*=======================================================================*\
Function: is_redirect
Purpose: return true if Redirection status code
\*=======================================================================*/
function is_redirect ($sc) {
return $sc >= 300 && $sc < 400;
}
 
/*=======================================================================*\
Function: is_error
Purpose: return true if Error status code
\*=======================================================================*/
function is_error ($sc) {
return $sc >= 400 && $sc < 600;
}
 
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a client error
\*=======================================================================*/
function is_client_error ($sc) {
return $sc >= 400 && $sc < 500;
}
 
/*=======================================================================*\
Function: is_client_error
Purpose: return true if Error status code, and its a server error
\*=======================================================================*/
function is_server_error ($sc) {
return $sc >= 500 && $sc < 600;
}
 
?>
/trunk/api/syndication_rss/magpierss/rss_parse.inc
New file
0,0 → 1,605
<?php
 
/**
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_parse.inc - parse an RSS or Atom feed
* return as a simple object.
*
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
*
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
*
* For questions, help, comments, discussion, etc., please join the
* Magpie mailing list:
* magpierss-general@lists.sourceforge.net
*
* @author Kellan Elliott-McCrea <kellan@protest.net>
* @version 0.7a
* @license GPL
*
*/
 
define('RSS', 'RSS');
define('ATOM', 'Atom');
 
require_once (MAGPIE_DIR . 'rss_utils.inc');
 
/**
* Hybrid parser, and object, takes RSS as a string and returns a simple object.
*
* see: rss_fetch.inc for a simpler interface with integrated caching support
*
*/
class MagpieRSS {
var $parser;
var $current_item = array(); // item currently being parsed
var $items = array(); // collection of parsed items
var $channel = array(); // hash of channel fields
var $textinput = array();
var $image = array();
var $feed_type;
var $feed_version;
var $encoding = ''; // output encoding of parsed rss
var $_source_encoding = ''; // only set if we have to parse xml prolog
var $ERROR = "";
var $WARNING = "";
// define some constants
var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright');
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1');
 
// parser variables, useless if you're not a parser, treat as private
var $stack = array(); // parser stack
var $inchannel = false;
var $initem = false;
var $incontent = false; // if in Atom <content mode="xml"> field
var $intextinput = false;
var $inimage = false;
var $current_namespace = false;
 
/**
* Set up XML parser, parse source, and return populated RSS object..
*
* @param string $source string containing the RSS to be parsed
*
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* a little weird.
*
* NOTE: A lot of this is unnecessary but harmless with PHP5
*
*
* @param string $output_encoding output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* default.
*
* NOTE: might be changed to UTF-8 in future
* versions.
*
* @param string $input_encoding the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* out.
*
*
* @param bool $detect_encoding if false Magpie won't attempt to detect
* source encoding. (caveat emptor)
*
*/
function MagpieRSS ($source, $output_encoding='ISO-8859-1',
$input_encoding=null, $detect_encoding=true)
{
# if PHP xml isn't compiled in, die
#
if (!function_exists('xml_parser_create')) {
$this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
list($parser, $source) = $this->create_parser($source,
$output_encoding, $input_encoding, $detect_encoding);
if (!is_resource($parser)) {
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
E_USER_ERROR );
}
 
$this->parser = $parser;
# pass in parser, and a reference to this object
# setup handlers
#
xml_set_object( $this->parser, $this );
xml_set_element_handler($this->parser,
'feed_start_element', 'feed_end_element' );
xml_set_character_data_handler( $this->parser, 'feed_cdata' );
$status = xml_parse( $this->parser, $source );
if (! $status ) {
$errorcode = xml_get_error_code( $this->parser );
if ( $errorcode != XML_ERROR_NONE ) {
$xml_error = xml_error_string( $errorcode );
$error_line = xml_get_current_line_number($this->parser);
$error_col = xml_get_current_column_number($this->parser);
$errormsg = "$xml_error at line $error_line, column $error_col";
 
$this->error( $errormsg );
}
}
xml_parser_free( $this->parser );
 
$this->normalize();
}
function feed_start_element($p, $element, &$attrs) {
$el = $element = strtolower($element);
$attrs = array_change_key_case($attrs, CASE_LOWER);
// check for a namespace, and split if found
$ns = false;
if ( strpos( $element, ':' ) ) {
list($ns, $el) = split( ':', $element, 2);
}
if ( $ns and $ns != 'rdf' ) {
$this->current_namespace = $ns;
}
# if feed type isn't set, then this is first element of feed
# identify feed from root element
#
if (!isset($this->feed_type) ) {
if ( $el == 'rdf' ) {
$this->feed_type = RSS;
$this->feed_version = '1.0';
}
elseif ( $el == 'rss' ) {
$this->feed_type = RSS;
$this->feed_version = $attrs['version'];
}
elseif ( $el == 'feed' ) {
$this->feed_type = ATOM;
$this->feed_version = $attrs['version'];
$this->inchannel = true;
}
return;
}
if ( $el == 'channel' )
{
$this->inchannel = true;
}
elseif ($el == 'item' or $el == 'entry' )
{
$this->initem = true;
if ( isset($attrs['rdf:about']) ) {
$this->current_item['about'] = $attrs['rdf:about'];
}
}
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'textinput' )
{
$this->intextinput = true;
}
elseif (
$this->feed_type == RSS and
$this->current_namespace == '' and
$el == 'image' )
{
$this->inimage = true;
}
# handle atom content constructs
elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
{
// avoid clashing w/ RSS mod_content
if ($el == 'content' ) {
$el = 'atom_content';
}
$this->incontent = $el;
}
// if inside an Atom content construct (e.g. content or summary) field treat tags as text
elseif ($this->feed_type == ATOM and $this->incontent )
{
// if tags are inlined, then flatten
$attrs_str = join(' ',
array_map('map_attrs',
array_keys($attrs),
array_values($attrs) ) );
$this->append_content( "<$element $attrs_str>" );
array_unshift( $this->stack, $el );
}
// Atom support many links per containging element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
//
elseif ($this->feed_type == ATOM and $el == 'link' )
{
if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' )
{
$link_el = 'link';
}
else {
$link_el = 'link_' . $attrs['rel'];
}
$this->append($link_el, $attrs['href']);
}
// set stack[0] to current element
else {
array_unshift($this->stack, $el);
}
}
 
function feed_cdata ($p, $text) {
if ($this->feed_type == ATOM and $this->incontent)
{
$this->append_content( $text );
}
else {
$current_el = join('_', array_reverse($this->stack));
$this->append($current_el, $text);
}
}
function feed_end_element ($p, $el) {
$el = strtolower($el);
if ( $el == 'item' or $el == 'entry' )
{
$this->items[] = $this->current_item;
$this->current_item = array();
$this->initem = false;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' )
{
$this->intextinput = false;
}
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' )
{
$this->inimage = false;
}
elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) )
{
$this->incontent = false;
}
elseif ($el == 'channel' or $el == 'feed' )
{
$this->inchannel = false;
}
elseif ($this->feed_type == ATOM and $this->incontent ) {
// balance tags properly
// note: i don't think this is actually neccessary
if ( $this->stack[0] == $el )
{
$this->append_content("</$el>");
}
else {
$this->append_content("<$el />");
}
 
array_shift( $this->stack );
}
else {
array_shift( $this->stack );
}
$this->current_namespace = false;
}
function concat (&$str1, $str2="") {
if (!isset($str1) ) {
$str1="";
}
$str1 .= $str2;
}
function append_content($text) {
if ( $this->initem ) {
$this->concat( $this->current_item[ $this->incontent ], $text );
}
elseif ( $this->inchannel ) {
$this->concat( $this->channel[ $this->incontent ], $text );
}
}
// smart append - field and namespace aware
function append($el, $text) {
if (!$el) {
return;
}
if ( $this->current_namespace )
{
if ( $this->initem ) {
$this->concat(
$this->current_item[ $this->current_namespace ][ $el ], $text);
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $this->current_namespace][ $el ], $text );
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $this->current_namespace][ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $this->current_namespace ][ $el ], $text );
}
}
else {
if ( $this->initem ) {
$this->concat(
$this->current_item[ $el ], $text);
}
elseif ($this->intextinput) {
$this->concat(
$this->textinput[ $el ], $text );
}
elseif ($this->inimage) {
$this->concat(
$this->image[ $el ], $text );
}
elseif ($this->inchannel) {
$this->concat(
$this->channel[ $el ], $text );
}
}
}
function normalize () {
// if atom populate rss fields
if ( $this->is_atom() ) {
$this->channel['description'] = $this->channel['tagline'];
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
if ( isset($item['summary']) )
$item['description'] = $item['summary'];
if ( isset($item['atom_content']))
$item['content']['encoded'] = $item['atom_content'];
$atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified'];
if ( $atom_date ) {
$epoch = @parse_w3cdtf($atom_date);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
elseif ( $this->is_rss() ) {
$this->channel['tagline'] = $this->channel['description'];
for ( $i = 0; $i < count($this->items); $i++) {
$item = $this->items[$i];
if ( isset($item['description']))
$item['summary'] = $item['description'];
if ( isset($item['content']['encoded'] ) )
$item['atom_content'] = $item['content']['encoded'];
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) {
$epoch = @parse_w3cdtf($item['dc']['date']);
if ($epoch and $epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
elseif ( isset($item['pubdate']) ) {
$epoch = @strtotime($item['pubdate']);
if ($epoch > 0) {
$item['date_timestamp'] = $epoch;
}
}
$this->items[$i] = $item;
}
}
}
function is_rss () {
if ( $this->feed_type == RSS ) {
return $this->feed_version;
}
else {
return false;
}
}
function is_atom() {
if ( $this->feed_type == ATOM ) {
return $this->feed_version;
}
else {
return false;
}
}
 
/**
* return XML parser, and possibly re-encoded source
*
*/
function create_parser($source, $out_enc, $in_enc, $detect) {
if ( substr(phpversion(),0,1) == 5) {
$parser = $this->php5_create_parser($in_enc, $detect);
}
else {
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect);
}
if ($out_enc) {
$this->encoding = $out_enc;
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc);
}
return array($parser, $source);
}
/**
* Instantiate an XML parser under PHP5
*
* PHP5 will do a fine job of detecting input encoding
* if passed an empty string as the encoding.
*
* All hail libxml2!
*
*/
function php5_create_parser($in_enc, $detect) {
// by default php5 does a fine job of detecting input encodings
if(!$detect && $in_enc) {
return xml_parser_create($in_enc);
}
else {
return xml_parser_create('');
}
}
/**
* Instaniate an XML parser under PHP4
*
* Unfortunately PHP4's support for character encodings
* and especially XML and character encodings sucks. As
* long as the documents you parse only contain characters
* from the ISO-8859-1 character set (a superset of ASCII,
* and a subset of UTF-8) you're fine. However once you
* step out of that comfy little world things get mad, bad,
* and dangerous to know.
*
* The following code is based on SJM's work with FoF
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
*
*/
function php4_create_parser($source, $in_enc, $detect) {
if ( !$detect ) {
return array(xml_parser_create($in_enc), $source);
}
if (!$in_enc) {
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
$in_enc = strtoupper($m[1]);
$this->source_encoding = $in_enc;
}
else {
$in_enc = 'UTF-8';
}
}
if ($this->known_encoding($in_enc)) {
return array(xml_parser_create($in_enc), $source);
}
// the dectected encoding is not one of the simple encodings PHP knows
// attempt to use the iconv extension to
// cast the XML to a known encoding
// @see http://php.net/iconv
if (function_exists('iconv')) {
$encoded_source = iconv($in_enc,'UTF-8', $source);
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// iconv didn't work, try mb_convert_encoding
// @see http://php.net/mbstring
if(function_exists('mb_convert_encoding')) {
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc );
if ($encoded_source) {
return array(xml_parser_create('UTF-8'), $encoded_source);
}
}
// else
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
E_USER_NOTICE);
return array(xml_parser_create(), $source);
}
function known_encoding($enc) {
$enc = strtoupper($enc);
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) {
return $enc;
}
else {
return false;
}
}
 
function error ($errormsg, $lvl=E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset($php_errormsg) ) {
$errormsg .= " ($php_errormsg)";
}
if ( MAGPIE_DEBUG ) {
trigger_error( $errormsg, $lvl);
}
else {
error_log( $errormsg, 0);
}
$notices = E_USER_NOTICE|E_NOTICE;
if ( $lvl&$notices ) {
$this->WARNING = $errormsg;
} else {
$this->ERROR = $errormsg;
}
}
} // end class RSS
 
function map_attrs($k, $v) {
return "$k=\"$v\"";
}
 
// patch to support medieval versions of PHP4.1.x,
// courtesy, Ryan Currie, ryan@digibliss.com
 
if (!function_exists('array_change_key_case')) {
define("CASE_UPPER",1);
define("CASE_LOWER",0);
 
 
function array_change_key_case($array,$case=CASE_LOWER) {
if ($case=CASE_LOWER) $cmd=strtolower;
elseif ($case=CASE_UPPER) $cmd=strtoupper;
foreach($array as $key=>$value) {
$output[$cmd($key)]=$value;
}
return $output;
}
 
}
 
?>
/trunk/api/syndication_rss/syndication-rss.php
New file
0,0 → 1,50
<?php
 
define('MAGPIE_DIR', 'magpierss/');
define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache');
require_once(MAGPIE_DIR.'rss_fetch.inc');
 
function voir_rss($titre='', $url='', $nb=0, $nouvelle_fenetre=1, $formatdate='') {
$res= '';
if ( $url!='' ) {
$rss = fetch_rss( $url );
if ( $titre=='' ) {$res .= '<h2>'.$rss->channel['title'].'</h2>'."\n";}
elseif ( $titre!='0' ) {$res .= '<h2>'.$titre.'</h2>'."\n";}
$res .= '<ul class="liste_rss">'."\n";
$i=0;
$nb_item=count($rss->items);
if (($nb==0)or($nb_item<=$nb)) {
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
$res .= '<li class="titre_rss"><a class="lien_rss" href="'.$href;
if ($nouvelle_fenetre==1) $res .= '" onclick="window.open(this.href); return false;';
$res .= '">'.$title.'</a></li>'."\n";
}
}
else {
$i=0;
foreach ($rss->items as $item) {
$href = $item['link'];
$title = $item['title'];
$res .= '<li class="titre_rss">';
if (isset($item['pubdate'])) $date=$item['pubdate'];
elseif ((!isset($item['pubdate']))and(isset($item['date_timestamp']))) $date=$item['date_timestamp'];
else $formatdate='';
if ($formatdate=='jm') {$res .= strftime('%d.%m',strtotime($date)).': ';}
if ($formatdate=='jma') {$res .= strftime('%d.%m.%Y',strtotime($date)).': ';}
if ($formatdate=='jmh') {$res .= strftime('%d.%m %H:%M',strtotime($date)).': ';}
if ($formatdate=='jmah') {$res .= strftime('%d.%m.%Y %H:%M',strtotime($date)).': ';}
$res .= '<a class="lien_rss" href="'.$href;
if ($nouvelle_fenetre==1) $res .= '" onclick="window.open(this.href); return false;';
$res .= '">'.$title.'</a></li>'."\n";
$i++;
if ($i>=$nb) break;
}
}
$res .= '</ul>'."\n";
}
//echo '<pre>'.var_dump($rss->items).'</pre><br /><br />';
return $res;
}
?>