Subversion Repositories eFlore/Projets.eflore-projets

Rev

Rev 274 | Blame | Compare with Previous | Last modification | View Log | RSS feed

<?php
class WikipediaBot {
        const HTTP_URL_REQUETE_SEPARATEUR = '&';
        private $langue = 'fr';
        private $url = '';
        private $parametres = array();
        private $titre = '';
        private $txt = '';
        private $userAgent = 'eFloreBot v0.1';
        private $reponse_entetes = null;

        public function __construct($options = array()) {
                if (array_key_exists('langue', $options)) {
                        $this->langue = strtolower($options['langue']);
                }
        }

        public function chargerPage($article) {
                $this->initialiserRequete();
                $this->url = $this->getBaseApiURL();
                $this->parametres = array(
                        'action' => 'query',
                        'prop' => 'revisions',
                        'titles' => $article,
                        'rvprop' => 'content',
                        'redirects' => 1
                );
                $this->resultats = $this->consulterAPI();
                $sxGetAID = $this->resultats['query']['pages'];
                $sxGetAID = array_shift($sxGetAID);
                $this->titre = $sxGetAID['title'];
                $this->txt = $sxGetAID['revisions'][0]['*'];
        }

        public function getPageTitre() {
                return $this->titre;
        }

        public function getPageTxt() {
                return $this->txt;
        }

        public function getTaxobox() {
                $taxobox = '';
                if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) {
                        $taxobox = $match[1];
                }
                return $taxobox;
        }

        public function extraireTaxobox() {
                $taxobox = $this->getTaxobox();
                $this->txt = str_replace($taxobox, '', $this->txt);
                return $taxobox;
        }

        public function getSectionParNumero($num) {
                $sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt);
                //Debug::printr($sections);
                $sectionTxt = isset($sections[$num]) ? $sections[$num] : '';
                return $sectionTxt;
        }

        public function getSectionParTitre($titre) {
                $section = '';
                if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) {
                        $section = $match[1];
                }
                return $section;
        }

        public function rendre($wikitxt) {
                $wikitxt .= '<references />';
                $this->initialiserRequete();
                $this->url = $this->getBaseApiURL();
                $this->parametres = array(
                                        'action' => 'parse',
                                        'prop' => 'text',
                                        'text' => $wikitxt
                );
                $this->resultats = $this->consulterAPI();
                $txt = $this->resultats['parse']['text']['*'];
                $txt = $this->remplacerUrls($txt);
                return $txt;
        }

        private function initialiserRequete() {
                $this->url = '';
                $this->parametres = array();
                $this->resultats = array();
        }

        private function getBaseWpURL() {
                $baseURL = "http://{$this->langue}.wikipedia.org";
                return $baseURL;
        }

        private function getBaseApiURL() {
                $baseURL = $this->getBaseWpURL().'/w/api.php';
                return $baseURL;
        }

        private function consulterAPI() {
                $this->parametres['format'] = 'php';
                $resultat = $this->consulterEnPost();
                $resultat = unserialize($resultat);

                if (isset($resultat['error'])) {
                        throw new Exception($resultat['error']['info'], $resultat['error']['info']);
                }
                return $resultat;
        }

        private function consulterEnPost() {
                return $this->consulter('POST');
        }

        private function consulter($mode) {
                $entetes = array(
                                'Content-type' => 'application/x-www-form-urlencoded',
                                'User-Agent' => $this->userAgent);
                $contexte = array('http' => array(
                        'method' => $mode,
                        'header' => $this->getEnteteChaine($entetes),
                        'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR)));
                $contexteFlux = stream_context_create($contexte);
                $flux = fopen($this->url, 'r', false, $contexteFlux);

                if (!$flux) {
                        $this->reponse_entetes = $http_response_header;
                        $e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!";
                        throw new Exception($e);
                }
                // Informations sur les en-têtes et métadonnées du flux
                $this->reponse_entetes = stream_get_meta_data($flux);
                // Contenu actuel de $url
                $contenu = stream_get_contents($flux);
                fclose($flux);
                return $contenu;
        }

        private function getEnteteChaine(Array $entetes) {
                $entetesCleVal = array();
                foreach ($entetes as $cle => $valeur) {
                        $entetesCleVal[] = $cle.': '.$valeur;
                }
                return implode("\r\n", $entetesCleVal);
        }

        private function remplacerUrls($txt) {
                $remplacements = array(
                        'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/',
                        'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/');
                $txt = strtr($txt, $remplacements);
                return $txt;
        }
}
?>