Subversion Repositories eFlore/Projets.eflore-projets

Compare Revisions

Ignore whitespace Rev 274 → Rev 1027

/tags/v5.5-arbousiere/services/bibliotheque/robots/WikipediaBot.php
New file
0,0 → 1,158
<?php
class WikipediaBot {
const HTTP_URL_REQUETE_SEPARATEUR = '&';
private $langue = 'fr';
private $url = '';
private $parametres = array();
private $titre = '';
private $txt = '';
private $userAgent = 'eFloreBot v0.1';
private $reponse_entetes = null;
 
public function __construct($options = array()) {
if (array_key_exists('langue', $options)) {
$this->langue = strtolower($options['langue']);
}
}
 
public function chargerPage($article) {
$this->initialiserRequete();
$this->url = $this->getBaseApiURL();
$this->parametres = array(
'action' => 'query',
'prop' => 'revisions',
'titles' => $article,
'rvprop' => 'content',
'redirects' => 1
);
$this->resultats = $this->consulterAPI();
$sxGetAID = $this->resultats['query']['pages'];
$sxGetAID = array_shift($sxGetAID);
$this->titre = $sxGetAID['title'];
$this->txt = $sxGetAID['revisions'][0]['*'];
}
 
public function getPageTitre() {
return $this->titre;
}
 
public function getPageTxt() {
return $this->txt;
}
 
public function getTaxobox() {
$taxobox = '';
if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) {
$taxobox = $match[1];
}
return $taxobox;
}
 
public function extraireTaxobox() {
$taxobox = $this->getTaxobox();
$this->txt = str_replace($taxobox, '', $this->txt);
return $taxobox;
}
 
public function getSectionParNumero($num) {
$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt);
//Debug::printr($sections);
$sectionTxt = isset($sections[$num]) ? $sections[$num] : '';
return $sectionTxt;
}
 
public function getSectionParTitre($titre) {
$section = '';
if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) {
$section = $match[1];
}
return $section;
}
 
public function rendre($wikitxt) {
$wikitxt .= '<references />';
$this->initialiserRequete();
$this->url = $this->getBaseApiURL();
$this->parametres = array(
'action' => 'parse',
'prop' => 'text',
'text' => $wikitxt
);
$this->resultats = $this->consulterAPI();
$txt = $this->resultats['parse']['text']['*'];
$txt = $this->remplacerUrls($txt);
return $txt;
}
 
private function initialiserRequete() {
$this->url = '';
$this->parametres = array();
$this->resultats = array();
}
 
private function getBaseWpURL() {
$baseURL = "http://{$this->langue}.wikipedia.org";
return $baseURL;
}
 
private function getBaseApiURL() {
$baseURL = $this->getBaseWpURL().'/w/api.php';
return $baseURL;
}
 
private function consulterAPI() {
$this->parametres['format'] = 'php';
$resultat = $this->consulterEnPost();
$resultat = unserialize($resultat);
 
if (isset($resultat['error'])) {
throw new Exception($resultat['error']['info'], $resultat['error']['info']);
}
return $resultat;
}
 
private function consulterEnPost() {
return $this->consulter('POST');
}
 
private function consulter($mode) {
$entetes = array(
'Content-type' => 'application/x-www-form-urlencoded',
'User-Agent' => $this->userAgent);
$contexte = array('http' => array(
'method' => $mode,
'header' => $this->getEnteteChaine($entetes),
'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR)));
$contexteFlux = stream_context_create($contexte);
$flux = fopen($this->url, 'r', false, $contexteFlux);
 
if (!$flux) {
$this->reponse_entetes = $http_response_header;
$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!";
throw new Exception($e);
}
// Informations sur les en-têtes et métadonnées du flux
$this->reponse_entetes = stream_get_meta_data($flux);
// Contenu actuel de $url
$contenu = stream_get_contents($flux);
fclose($flux);
return $contenu;
}
 
private function getEnteteChaine(Array $entetes) {
$entetesCleVal = array();
foreach ($entetes as $cle => $valeur) {
$entetesCleVal[] = $cle.': '.$valeur;
}
return implode("\r\n", $entetesCleVal);
}
 
private function remplacerUrls($txt) {
$remplacements = array(
'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/',
'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/');
$txt = strtr($txt, $remplacements);
return $txt;
}
}
?>