New file |
0,0 → 1,158 |
<?php |
class WikipediaBot { |
const HTTP_URL_REQUETE_SEPARATEUR = '&'; |
private $langue = 'fr'; |
private $url = ''; |
private $parametres = array(); |
private $titre = ''; |
private $txt = ''; |
private $userAgent = 'eFloreBot v0.1'; |
private $reponse_entetes = null; |
|
public function __construct($options = array()) { |
if (array_key_exists('langue', $options)) { |
$this->langue = strtolower($options['langue']); |
} |
} |
|
public function chargerPage($article) { |
$this->initialiserRequete(); |
$this->url = $this->getBaseApiURL(); |
$this->parametres = array( |
'action' => 'query', |
'prop' => 'revisions', |
'titles' => $article, |
'rvprop' => 'content', |
'redirects' => 1 |
); |
$this->resultats = $this->consulterAPI(); |
$sxGetAID = $this->resultats['query']['pages']; |
$sxGetAID = array_shift($sxGetAID); |
$this->titre = $sxGetAID['title']; |
$this->txt = $sxGetAID['revisions'][0]['*']; |
} |
|
public function getPageTitre() { |
return $this->titre; |
} |
|
public function getPageTxt() { |
return $this->txt; |
} |
|
public function getTaxobox() { |
$taxobox = ''; |
if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) { |
$taxobox = $match[1]; |
} |
return $taxobox; |
} |
|
public function extraireTaxobox() { |
$taxobox = $this->getTaxobox(); |
$this->txt = str_replace($taxobox, '', $this->txt); |
return $taxobox; |
} |
|
public function getSectionParNumero($num) { |
$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt); |
//Debug::printr($sections); |
$sectionTxt = isset($sections[$num]) ? $sections[$num] : ''; |
return $sectionTxt; |
} |
|
public function getSectionParTitre($titre) { |
$section = ''; |
if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) { |
$section = $match[1]; |
} |
return $section; |
} |
|
public function rendre($wikitxt) { |
$wikitxt .= '<references />'; |
$this->initialiserRequete(); |
$this->url = $this->getBaseApiURL(); |
$this->parametres = array( |
'action' => 'parse', |
'prop' => 'text', |
'text' => $wikitxt |
); |
$this->resultats = $this->consulterAPI(); |
$txt = $this->resultats['parse']['text']['*']; |
$txt = $this->remplacerUrls($txt); |
return $txt; |
} |
|
private function initialiserRequete() { |
$this->url = ''; |
$this->parametres = array(); |
$this->resultats = array(); |
} |
|
private function getBaseWpURL() { |
$baseURL = "http://{$this->langue}.wikipedia.org"; |
return $baseURL; |
} |
|
private function getBaseApiURL() { |
$baseURL = $this->getBaseWpURL().'/w/api.php'; |
return $baseURL; |
} |
|
private function consulterAPI() { |
$this->parametres['format'] = 'php'; |
$resultat = $this->consulterEnPost(); |
$resultat = unserialize($resultat); |
|
if (isset($resultat['error'])) { |
throw new Exception($resultat['error']['info'], $resultat['error']['info']); |
} |
return $resultat; |
} |
|
private function consulterEnPost() { |
return $this->consulter('POST'); |
} |
|
private function consulter($mode) { |
$entetes = array( |
'Content-type' => 'application/x-www-form-urlencoded', |
'User-Agent' => $this->userAgent); |
$contexte = array('http' => array( |
'method' => $mode, |
'header' => $this->getEnteteChaine($entetes), |
'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR))); |
$contexteFlux = stream_context_create($contexte); |
$flux = fopen($this->url, 'r', false, $contexteFlux); |
|
if (!$flux) { |
$this->reponse_entetes = $http_response_header; |
$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!"; |
throw new Exception($e); |
} |
// Informations sur les en-têtes et métadonnées du flux |
$this->reponse_entetes = stream_get_meta_data($flux); |
// Contenu actuel de $url |
$contenu = stream_get_contents($flux); |
fclose($flux); |
return $contenu; |
} |
|
private function getEnteteChaine(Array $entetes) { |
$entetesCleVal = array(); |
foreach ($entetes as $cle => $valeur) { |
$entetesCleVal[] = $cle.': '.$valeur; |
} |
return implode("\r\n", $entetesCleVal); |
} |
|
private function remplacerUrls($txt) { |
$remplacements = array( |
'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/', |
'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/'); |
$txt = strtr($txt, $remplacements); |
return $txt; |
} |
} |
?> |