User:Sylvain Schmitz~metawiki/Watchlist RSS feed in PHP
It's a hack, it's a kludge, it's localized for the French Wikipedia, it's an ugly tiny script written in PHP. But it does generate an RSS feed from the Special:Watchlist recent changes.
Feel free to modify and improve this GFDL'ed code! Syntax coloration is generated using the .phps extension.
Installation[edit]
Copy this code in a .php file on a server; a cookie file .htcookie.$wp_domain has to be readable and writable by the server. Point your feed aggregator to the PHP script; you can provide as an extra parameter a different path to the watchlist, for instance to hide your own entries.
The Code[edit]
<?php
/* example call:
* fr.wikipedia.rss/w/index.php?title=Special:Watchlist&hideOwn=1
*/
/****************************************************************** Setup. */
// user name and password on the targeted wikipedia
$wp_name = 'login';
$wp_password = 'password';
// default domain and path
$wp_domain = 'fr.wikipedia.org';
$wp_watchlist = '/wiki/Special:Watchlist';
// maximum number of entries in the feed
$max_entries = 20;
// time zone on the server
$wp_tmz = "+01:00";
// localized array for month names
$months = array ("janvier" => "01", "février" => "02", "mars" => "03",
"avril" => "04", "mai" => "05", "juin" => "06",
"juillet" => "07", "août" => "08", "septembre" => "09",
"octobre" => "10", "novembre" => "11", "décembre" => "12");
// localized user pages prefix
$wp_userpage = "Utilisateur:";
// localized title
$wp_title = "Liste de suivi";
// localized description
$wp_description = $wp_title." de ".$wp_name;
/*********************************************************** End of setup. */
// process the script request
$ruri = substr($_SERVER['REQUEST_URI'],
strlen($_SERVER['SCRIPT_NAME']));
if (strlen ($ruri) != 0)
{
$wp_watchlist = $ruri;
}
// name of the cookie file
$cookie_file = ".htcookie.$wp_domain";
// get the expiration time from the cookie
$time = 0;
$cookie_fp = fopen ($cookie_file, "r");
if ($cookie_fp)
{
while (!feof ($cookie_fp))
{
$cookie = fgets ($cookie_fp, 4096);
if (strpos ($cookie, "wikiUserID") !== FALSE)
{
$ce = explode ("\t", $cookie);
$time = $ce[4];
break;
}
}
fclose ($cookie_fp);
}
// check whether a new login is needed
if (($time - 60) < time ())
{
// login URL
$wp_login = '/w/index.php?title=Special:Userlogin'
.'&action=submitlogin&type=login';
// login connection
$login = curl_init ();
$postdata = array ();
$postdata['wpName'] = $wp_name;
$postdata['wpPassword'] = $wp_password;
$postdata['wpRemember'] = '1';
$postdata['wpLoginattempt'] = 'true';
$post = null;
foreach ($postdata as $key=>$value)
if ($key && $value)
$post .= $key."=".urlencode($value)."&";
curl_setopt ($login, CURLOPT_POST, TRUE);
curl_setopt ($login, CURLOPT_POSTFIELDS, $post);
curl_setopt ($login, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt ($login, CURLOPT_URL, $wp_domain.$wp_login);
curl_exec ($login);
curl_close ($login);
}
// grab the contents
$content = curl_init ();
curl_setopt ($content, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt ($content, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt ($content, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt ($content, CURLOPT_URL, $wp_domain.$wp_watchlist);
$watchlist = curl_exec ($content);
curl_close ($content);
// function for ISO8601 time and date
function to_iso8601 ($date_str)
{
global $months;
$date_fields = explode (" ", $date_str);
$day = $date_fields[0];
if (strlen ($day) == 1)
$day = "0".$day;
$month = $date_fields[1];
$year = $date_fields[2];
return $year."-".$months[$month]."-".$day."T";
}
// explode the contents by days
$days = explode ("<h4>", $watchlist);
$entries = array ();
$times = array ();
$authors = array ();
$nentries = 0;
for ($i = 1; $i < sizeof ($days) && $nentries < $max_entries; $i++)
{
$the_date = to_iso8601 (substr ($days[$i], 0,
strpos ($days[$i], "</h4>")));
$tmp = explode ("<li>", $days[$i]);
for ($j = 1; $j < sizeof ($tmp) && $nentries < $max_entries; $j++)
{
$offset = strpos ($tmp[$j], ' title="') + 8;
$entries[$nentries] = substr ($tmp[$j], $offset,
strpos (substr ($tmp[$j], $offset), '"'));
$offset = strpos ($tmp[$j], '; ') + 2;
$times[$nentries] = $the_date.substr ($tmp[$j], $offset, 5).$wp_tmz;
$offset = strpos ($tmp[$j], ' title="'.$wp_userpage)
+ 8 + strlen ($wp_userpage);
$authors[$nentries] = substr ($tmp[$j], $offset,
strpos (substr ($tmp[$j], $offset), '"'));
$nentries++;
}
}
/********************************************************* RSS generation. */
$disallowed_xml = array ("&", "<", ">");
$replacements_xml = array ("&", "<", ">");
// header
header("Content-Type: application/xml; charset=utf-8");
print ("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
print ("<!DOCTYPE rdf:RDF [\n");
print ("<!ENTITY % HTMLlat1 PUBLIC\n");
print (" \"-//W3C//ENTITIES Latin 1 for XHTML//EN\"\n");
print (" \"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent\">\n");
print ("]>\n");
print ("<rdf:RDF\n");
print (" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" \n");
print (" xmlns:sy=\"http://purl.org/rss/1.0/modules/syndication/\"\n");
print (" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
//print (" xmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n");
print (" xmlns=\"http://purl.org/rss/1.0/\"\n");
print (">\n");
// channel summary
print (" <channel rdf:about=\"http://"
.$wp_domain.str_replace ($disallowed_xml,
$replacements_xml,
$wp_watchlist)."\">\n");
print (" <title>$wp_title</title>\n");
print (" <link>http://"
.$wp_domain.str_replace ($disallowed_xml,
$replacements_xml,
$wp_watchlist)."</link>\n");
print (" <description>$wp_description</description>\n");
print (" <dc:source>http://"
.$wp_domain.str_replace ($disallowed_xml,
$replacements_xml,
$wp_watchlist)."</dc:source>\n");
print (" <dc:date>".date("Y-m-d\TH:iO")."</dc:date>\n");
print (" <sy:updatePeriod>hourly</sy:updatePeriod>\n");
print (" <sy:updateFrequency>4</sy:updateFrequency>\n");
print (" <sy:updateBase>1970-01-01T00:00+00:00</sy:updateBase>\n");
print (" <items>\n");
print (" <rdf:Seq>\n");
for ($i = 0; $i < $nentries; $i++)
{
print (" <rdf:li resource=\"http://$wp_domain/wiki/"
.urlencode(str_replace (" ", "_", $entries[$i]))."\" />\n");
}
print (" </rdf:Seq>\n");
print (" </items>\n");
print ("\n");
print (" </channel>\n");
// items
for ($i = 0; $i < $nentries; $i++)
{
print (" <item rdf:about=\"http://$wp_domain/wiki/"
.urlencode(str_replace (" ", "_", $entries[$i]))."\">\n");
print (" <title>".$entries[$i]."</title>\n");
print (" <dc:creator>".$authors[$i]."</dc:creator>\n");
print (" <dc:date>".$times[$i]."</dc:date>\n");
print (" </item>\n\n");
}
// footer
print ("</rdf:RDF>\n");
?>