274 |
jpm |
1 |
<?php
|
|
|
2 |
class WikipediaBot {
|
|
|
3 |
const HTTP_URL_REQUETE_SEPARATEUR = '&';
|
|
|
4 |
private $langue = 'fr';
|
|
|
5 |
private $url = '';
|
|
|
6 |
private $parametres = array();
|
|
|
7 |
private $titre = '';
|
|
|
8 |
private $txt = '';
|
|
|
9 |
private $userAgent = 'eFloreBot v0.1';
|
|
|
10 |
private $reponse_entetes = null;
|
|
|
11 |
|
|
|
12 |
public function __construct($options = array()) {
|
|
|
13 |
if (array_key_exists('langue', $options)) {
|
|
|
14 |
$this->langue = strtolower($options['langue']);
|
|
|
15 |
}
|
|
|
16 |
}
|
|
|
17 |
|
|
|
18 |
public function chargerPage($article) {
|
|
|
19 |
$this->initialiserRequete();
|
|
|
20 |
$this->url = $this->getBaseApiURL();
|
|
|
21 |
$this->parametres = array(
|
|
|
22 |
'action' => 'query',
|
|
|
23 |
'prop' => 'revisions',
|
|
|
24 |
'titles' => $article,
|
|
|
25 |
'rvprop' => 'content',
|
|
|
26 |
'redirects' => 1
|
|
|
27 |
);
|
|
|
28 |
$this->resultats = $this->consulterAPI();
|
|
|
29 |
$sxGetAID = $this->resultats['query']['pages'];
|
|
|
30 |
$sxGetAID = array_shift($sxGetAID);
|
|
|
31 |
$this->titre = $sxGetAID['title'];
|
|
|
32 |
$this->txt = $sxGetAID['revisions'][0]['*'];
|
|
|
33 |
}
|
|
|
34 |
|
|
|
35 |
public function getPageTitre() {
|
|
|
36 |
return $this->titre;
|
|
|
37 |
}
|
|
|
38 |
|
|
|
39 |
public function getPageTxt() {
|
|
|
40 |
return $this->txt;
|
|
|
41 |
}
|
|
|
42 |
|
|
|
43 |
public function getTaxobox() {
|
|
|
44 |
$taxobox = '';
|
|
|
45 |
if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) {
|
|
|
46 |
$taxobox = $match[1];
|
|
|
47 |
}
|
|
|
48 |
return $taxobox;
|
|
|
49 |
}
|
|
|
50 |
|
|
|
51 |
public function extraireTaxobox() {
|
|
|
52 |
$taxobox = $this->getTaxobox();
|
|
|
53 |
$this->txt = str_replace($taxobox, '', $this->txt);
|
|
|
54 |
return $taxobox;
|
|
|
55 |
}
|
|
|
56 |
|
|
|
57 |
public function getSectionParNumero($num) {
|
|
|
58 |
$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt);
|
|
|
59 |
//Debug::printr($sections);
|
|
|
60 |
$sectionTxt = isset($sections[$num]) ? $sections[$num] : '';
|
|
|
61 |
return $sectionTxt;
|
|
|
62 |
}
|
|
|
63 |
|
|
|
64 |
public function getSectionParTitre($titre) {
|
|
|
65 |
$section = '';
|
|
|
66 |
if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) {
|
|
|
67 |
$section = $match[1];
|
|
|
68 |
}
|
|
|
69 |
return $section;
|
|
|
70 |
}
|
|
|
71 |
|
|
|
72 |
public function rendre($wikitxt) {
|
|
|
73 |
$wikitxt .= '<references />';
|
|
|
74 |
$this->initialiserRequete();
|
|
|
75 |
$this->url = $this->getBaseApiURL();
|
|
|
76 |
$this->parametres = array(
|
|
|
77 |
'action' => 'parse',
|
|
|
78 |
'prop' => 'text',
|
|
|
79 |
'text' => $wikitxt
|
|
|
80 |
);
|
|
|
81 |
$this->resultats = $this->consulterAPI();
|
|
|
82 |
$txt = $this->resultats['parse']['text']['*'];
|
|
|
83 |
$txt = $this->remplacerUrls($txt);
|
|
|
84 |
return $txt;
|
|
|
85 |
}
|
|
|
86 |
|
|
|
87 |
private function initialiserRequete() {
|
|
|
88 |
$this->url = '';
|
|
|
89 |
$this->parametres = array();
|
|
|
90 |
$this->resultats = array();
|
|
|
91 |
}
|
|
|
92 |
|
|
|
93 |
private function getBaseWpURL() {
|
|
|
94 |
$baseURL = "http://{$this->langue}.wikipedia.org";
|
|
|
95 |
return $baseURL;
|
|
|
96 |
}
|
|
|
97 |
|
|
|
98 |
private function getBaseApiURL() {
|
|
|
99 |
$baseURL = $this->getBaseWpURL().'/w/api.php';
|
|
|
100 |
return $baseURL;
|
|
|
101 |
}
|
|
|
102 |
|
|
|
103 |
private function consulterAPI() {
|
|
|
104 |
$this->parametres['format'] = 'php';
|
|
|
105 |
$resultat = $this->consulterEnPost();
|
|
|
106 |
$resultat = unserialize($resultat);
|
|
|
107 |
|
|
|
108 |
if (isset($resultat['error'])) {
|
|
|
109 |
throw new Exception($resultat['error']['info'], $resultat['error']['info']);
|
|
|
110 |
}
|
|
|
111 |
return $resultat;
|
|
|
112 |
}
|
|
|
113 |
|
|
|
114 |
private function consulterEnPost() {
|
|
|
115 |
return $this->consulter('POST');
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
private function consulter($mode) {
|
|
|
119 |
$entetes = array(
|
|
|
120 |
'Content-type' => 'application/x-www-form-urlencoded',
|
|
|
121 |
'User-Agent' => $this->userAgent);
|
|
|
122 |
$contexte = array('http' => array(
|
|
|
123 |
'method' => $mode,
|
|
|
124 |
'header' => $this->getEnteteChaine($entetes),
|
|
|
125 |
'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR)));
|
|
|
126 |
$contexteFlux = stream_context_create($contexte);
|
|
|
127 |
$flux = fopen($this->url, 'r', false, $contexteFlux);
|
|
|
128 |
|
|
|
129 |
if (!$flux) {
|
|
|
130 |
$this->reponse_entetes = $http_response_header;
|
|
|
131 |
$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!";
|
|
|
132 |
throw new Exception($e);
|
|
|
133 |
}
|
|
|
134 |
// Informations sur les en-têtes et métadonnées du flux
|
|
|
135 |
$this->reponse_entetes = stream_get_meta_data($flux);
|
|
|
136 |
// Contenu actuel de $url
|
|
|
137 |
$contenu = stream_get_contents($flux);
|
|
|
138 |
fclose($flux);
|
|
|
139 |
return $contenu;
|
|
|
140 |
}
|
|
|
141 |
|
|
|
142 |
private function getEnteteChaine(Array $entetes) {
|
|
|
143 |
$entetesCleVal = array();
|
|
|
144 |
foreach ($entetes as $cle => $valeur) {
|
|
|
145 |
$entetesCleVal[] = $cle.': '.$valeur;
|
|
|
146 |
}
|
|
|
147 |
return implode("\r\n", $entetesCleVal);
|
|
|
148 |
}
|
|
|
149 |
|
|
|
150 |
private function remplacerUrls($txt) {
|
|
|
151 |
$remplacements = array(
|
|
|
152 |
'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/',
|
|
|
153 |
'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/');
|
|
|
154 |
$txt = strtr($txt, $remplacements);
|
|
|
155 |
return $txt;
|
|
|
156 |
}
|
|
|
157 |
}
|
|
|
158 |
?>
|