Subversion Repositories eFlore/Projets.eflore-projets

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
274 jpm 1
<?php
2
class WikipediaBot {
3
	const HTTP_URL_REQUETE_SEPARATEUR = '&';
4
	private $langue = 'fr';
5
	private $url = '';
6
	private $parametres = array();
7
	private $titre = '';
8
	private $txt = '';
9
	private $userAgent = 'eFloreBot v0.1';
10
	private $reponse_entetes = null;
11
 
12
	public function __construct($options = array()) {
13
		if (array_key_exists('langue', $options)) {
14
			$this->langue = strtolower($options['langue']);
15
		}
16
	}
17
 
18
	public function chargerPage($article) {
19
		$this->initialiserRequete();
20
		$this->url = $this->getBaseApiURL();
21
		$this->parametres = array(
22
			'action' => 'query',
23
			'prop' => 'revisions',
24
			'titles' => $article,
25
			'rvprop' => 'content',
26
			'redirects' => 1
27
		);
28
		$this->resultats = $this->consulterAPI();
29
		$sxGetAID = $this->resultats['query']['pages'];
30
		$sxGetAID = array_shift($sxGetAID);
31
		$this->titre = $sxGetAID['title'];
32
		$this->txt = $sxGetAID['revisions'][0]['*'];
33
	}
34
 
35
	public function getPageTitre() {
36
		return $this->titre;
37
	}
38
 
39
	public function getPageTxt() {
40
		return $this->txt;
41
	}
42
 
43
	public function getTaxobox() {
44
		$taxobox = '';
45
		if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) {
46
			$taxobox = $match[1];
47
		}
48
		return $taxobox;
49
	}
50
 
51
	public function extraireTaxobox() {
52
		$taxobox = $this->getTaxobox();
53
		$this->txt = str_replace($taxobox, '', $this->txt);
54
		return $taxobox;
55
	}
56
 
57
	public function getSectionParNumero($num) {
58
		$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt);
59
		//Debug::printr($sections);
60
		$sectionTxt = isset($sections[$num]) ? $sections[$num] : '';
61
		return $sectionTxt;
62
	}
63
 
64
	public function getSectionParTitre($titre) {
65
		$section = '';
66
		if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) {
67
			$section = $match[1];
68
		}
69
		return $section;
70
	}
71
 
72
	public function rendre($wikitxt) {
73
		$wikitxt .= '<references />';
74
		$this->initialiserRequete();
75
		$this->url = $this->getBaseApiURL();
76
		$this->parametres = array(
77
					'action' => 'parse',
78
					'prop' => 'text',
79
					'text' => $wikitxt
80
		);
81
		$this->resultats = $this->consulterAPI();
82
		$txt = $this->resultats['parse']['text']['*'];
83
		$txt = $this->remplacerUrls($txt);
84
		return $txt;
85
	}
86
 
87
	private function initialiserRequete() {
88
		$this->url = '';
89
		$this->parametres = array();
90
		$this->resultats = array();
91
	}
92
 
93
	private function getBaseWpURL() {
94
		$baseURL = "http://{$this->langue}.wikipedia.org";
95
		return $baseURL;
96
	}
97
 
98
	private function getBaseApiURL() {
99
		$baseURL = $this->getBaseWpURL().'/w/api.php';
100
		return $baseURL;
101
	}
102
 
103
	private function consulterAPI() {
104
		$this->parametres['format'] = 'php';
105
		$resultat = $this->consulterEnPost();
106
		$resultat = unserialize($resultat);
107
 
108
		if (isset($resultat['error'])) {
109
			throw new Exception($resultat['error']['info'], $resultat['error']['info']);
110
		}
111
		return $resultat;
112
	}
113
 
114
	private function consulterEnPost() {
115
		return $this->consulter('POST');
116
	}
117
 
118
	private function consulter($mode) {
119
		$entetes = array(
120
				'Content-type' => 'application/x-www-form-urlencoded',
121
				'User-Agent' => $this->userAgent);
122
		$contexte = array('http' => array(
123
			'method' => $mode,
124
			'header' => $this->getEnteteChaine($entetes),
125
			'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR)));
126
		$contexteFlux = stream_context_create($contexte);
127
		$flux = fopen($this->url, 'r', false, $contexteFlux);
128
 
129
		if (!$flux) {
130
			$this->reponse_entetes = $http_response_header;
131
			$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!";
132
			throw new Exception($e);
133
		}
134
		// Informations sur les en-têtes et métadonnées du flux
135
		$this->reponse_entetes = stream_get_meta_data($flux);
136
		// Contenu actuel de $url
137
		$contenu = stream_get_contents($flux);
138
		fclose($flux);
139
		return $contenu;
140
	}
141
 
142
	private function getEnteteChaine(Array $entetes) {
143
		$entetesCleVal = array();
144
		foreach ($entetes as $cle => $valeur) {
145
			$entetesCleVal[] = $cle.': '.$valeur;
146
		}
147
		return implode("\r\n", $entetesCleVal);
148
	}
149
 
150
	private function remplacerUrls($txt) {
151
		$remplacements = array(
152
			'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/',
153
			'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/');
154
		$txt = strtr($txt, $remplacements);
155
		return $txt;
156
	}
157
}
158
?>