Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?phpclass WikipediaBot {const HTTP_URL_REQUETE_SEPARATEUR = '&';private $langue = 'fr';private $url = '';private $parametres = array();private $titre = '';private $txt = '';private $userAgent = 'eFloreBot v0.1';private $reponse_entetes = null;public function __construct($options = array()) {if (array_key_exists('langue', $options)) {$this->langue = strtolower($options['langue']);}}public function chargerPage($article) {$this->initialiserRequete();$this->url = $this->getBaseApiURL();$this->parametres = array('action' => 'query','prop' => 'revisions','titles' => $article,'rvprop' => 'content','redirects' => 1);$this->resultats = $this->consulterAPI();$sxGetAID = $this->resultats['query']['pages'];$sxGetAID = array_shift($sxGetAID);$this->titre = $sxGetAID['title'];$this->txt = $sxGetAID['revisions'][0]['*'];}public function getPageTitre() {return $this->titre;}public function getPageTxt() {return $this->txt;}public function getTaxobox() {$taxobox = '';if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) {$taxobox = $match[1];}return $taxobox;}public function extraireTaxobox() {$taxobox = $this->getTaxobox();$this->txt = str_replace($taxobox, '', $this->txt);return $taxobox;}public function getSectionParNumero($num) {$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt);//Debug::printr($sections);$sectionTxt = isset($sections[$num]) ? $sections[$num] : '';return $sectionTxt;}public function getSectionParTitre($titre) {$section = '';if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) {$section = $match[1];}return $section;}public function rendre($wikitxt) {$wikitxt .= '<references />';$this->initialiserRequete();$this->url = $this->getBaseApiURL();$this->parametres = array('action' => 'parse','prop' => 'text','text' => $wikitxt);$this->resultats = $this->consulterAPI();$txt = $this->resultats['parse']['text']['*'];$txt = $this->remplacerUrls($txt);return $txt;}private function initialiserRequete() {$this->url = '';$this->parametres = array();$this->resultats = array();}private function getBaseWpURL() {$baseURL = "http://{$this->langue}.wikipedia.org";return $baseURL;}private function getBaseApiURL() {$baseURL = $this->getBaseWpURL().'/w/api.php';return $baseURL;}private function consulterAPI() {$this->parametres['format'] = 'php';$resultat = $this->consulterEnPost();$resultat = unserialize($resultat);if (isset($resultat['error'])) {throw new Exception($resultat['error']['info'], $resultat['error']['info']);}return $resultat;}private function consulterEnPost() {return $this->consulter('POST');}private function consulter($mode) {$entetes = array('Content-type' => 'application/x-www-form-urlencoded','User-Agent' => $this->userAgent);$contexte = array('http' => array('method' => $mode,'header' => $this->getEnteteChaine($entetes),'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR)));$contexteFlux = stream_context_create($contexte);$flux = fopen($this->url, 'r', false, $contexteFlux);if (!$flux) {$this->reponse_entetes = $http_response_header;$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!";throw new Exception($e);}// Informations sur les en-têtes et métadonnées du flux$this->reponse_entetes = stream_get_meta_data($flux);// Contenu actuel de $url$contenu = stream_get_contents($flux);fclose($flux);return $contenu;}private function getEnteteChaine(Array $entetes) {$entetesCleVal = array();foreach ($entetes as $cle => $valeur) {$entetesCleVal[] = $cle.': '.$valeur;}return implode("\r\n", $entetesCleVal);}private function remplacerUrls($txt) {$remplacements = array('href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/','href="/w/' => 'href="'.$this->getBaseWpURL().'/w/');$txt = strtr($txt, $remplacements);return $txt;}}?>