/trunk/services/modules/0.1/wikipedia/textes/PageTextes.php |
---|
New file |
0,0 → 1,132 |
<?php |
class PageTextes { |
private $parametres = array(); |
private $ressources = array(); |
const MIME_JSON = 'application/json'; |
const PRESENCE_CHOROLOGIE = '1'; |
private $retourFormatsSupportes = array(self::MIME_JSON); |
private $txtFormatsSupportes = array('txt', 'htm'); |
private $serviceUrl = null; |
private $idPage = null; |
private $wpBot = null; |
private $infosPage = null; |
public function __construct($ressources, $parametres, Conteneur $conteneur) { |
$this->parametres = $parametres; |
$this->ressources = $ressources; |
$this->wpBot = $conteneur->getWikipediaBot(); |
$url = $conteneur->getParametre('url_service').DS.$this->ressources[0]; |
$this->serviceUrl = $conteneur->getUrl($url); |
} |
public function consulter() { |
$this->idPage = $this->ressources[0]; |
$this->definirValeurParDefautDesParametres(); |
$this->verifierParametres(); |
$resultat = $this->obtenirResultat(); |
return $resultat; |
} |
private function definirValeurParDefautDesParametres() { |
if (isset($this->parametres['retour']) == false) { |
$this->parametres['retour'] = self::MIME_JSON; |
} |
if (isset($this->parametres['txt.format']) == false) { |
$this->parametres['txt.format'] = 'txt'; |
} |
} |
private function verifierParametres() { |
$erreurs = array(); |
if (isset($this->parametres['retour']) == false) { |
$erreurs[] = "Le paramètre type de retour 'retour' est obligatoire."; |
} |
if ($this->verifierValeurParametreTxtFormat() == false) { |
$erreurs[] = "Le format du texte '{$this->parametres['txt.format']}' n'est pas supporté."; |
} |
if (count($erreurs) > 0) { |
$message = implode('<br />', $erreurs); |
$code = RestServeur::HTTP_CODE_MAUVAISE_REQUETE; |
throw new Exception($message, $code); |
} |
} |
private function verifierValeurParametreRetour() { |
return in_array($this->parametres['retour'], $this->retourFormatsSupportes); |
} |
private function verifierValeurParametreTxtFormat() { |
return in_array($this->parametres['txt.format'], $this->txtFormatsSupportes); |
} |
private function obtenirResultat() { |
$this->chargerPageWp(); |
$resultat = new ResultatService(); |
$resultat->corps = $this->infosPage; |
$resultat->mime = $this->parametres['retour']; |
return $resultat; |
} |
private function chargerPageWp() { |
$options = array('langue' => 'fr'); |
$this->wpBot = new WikipediaBot($options); |
$this->wpBot->chargerPage($this->idPage); |
$this->infosPage['id'] = $this->idPage; |
$this->infosPage['titre'] = $this->wpBot->getPageTitre(); |
$this->infosPage['texte'] = $this->getTxt(); |
$this->infosPage['mime'] = $this->getTypeMime(); |
$this->infosPage['href'] = $this->getHref(); |
} |
private function getTxt() { |
$txt = ''; |
if (isset($this->parametres['txt.section.position'])) { |
$positionSection = $this->parametres['txt.section.position']; |
$txt = $this->wpBot->getSectionParNumero($positionSection); |
} else if (isset($this->parametres['txt.section.titre'])) { |
$titreSection = $this->parametres['txt.section.titre']; |
if ($titreSection == 'taxobox') { |
$txt = $this->wpBot->extraireTaxobox(); |
} else { |
$txt = $this->wpBot->getSectionParTitre($titreSection); |
} |
} else { |
$txt = $this->wpBot->getPageTxt(); |
} |
if ($this->parametres['txt.format'] == 'htm') { |
$txt = $this->wpBot->rendre($txt); |
} |
return $txt; |
} |
private function getTypeMime() { |
$mime = ''; |
if ($this->parametres['txt.format'] == 'htm') { |
$mime = 'txt/html'; |
} else if ($this->parametres['txt.format'] == 'txt') { |
$mime = 'text/plain'; |
} |
return $mime; |
} |
private function getHref() { |
$href = ''; |
$this->serviceUrl->setRequete($this->parametres); |
$href = $this->serviceUrl->getUrl(); |
return $href; |
} |
} |
?> |
/trunk/services/modules/0.1/wikipedia/Textes.php |
---|
New file |
0,0 → 1,79 |
<?php |
// declare(encoding='UTF-8'); |
/** |
* Classe implémentant l'API d'eFlore Textes pour le projet WIKIPEDIA. |
* |
* @see http://www.tela-botanica.org/wikini/eflore/wakka.php?wiki=EfloreApi01Textes |
* |
* @package eFlore/services |
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org> |
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt> |
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt> |
* @version 1.0 |
* @copyright 1999-2012 Tela Botanica (accueil@tela-botanica.org) |
*/ |
class Textes { |
private $parametres = array(); |
private $ressources = array(); |
public function consulter($ressources, $parametres) { |
$this->parametres = $parametres; |
$this->ressources = $ressources; |
$this->analyserRessources(); |
$resultat = $this->executerSousService(); |
return $resultat; |
} |
private function analyserRessources() { |
$nbreRessources = count($this->ressources); |
if ($nbreRessources == 0) { |
$message = "A implémenter : listes des pages trouvées"; |
$code = RestServeur::HTTP_CODE_RESSOURCE_INTROUVABLE; |
throw new Exception($message, $code); |
} else if ($nbreRessources == 1) { |
if ($this->etreRessourceIdentifiants(0)) { |
$this->sousService = 'Page'; |
} else { |
$message = "La ressource n°1 '{$this->ressources[0]} indiquée n'est pas valable."; |
$code = RestServeur::HTTP_CODE_MAUVAISE_REQUETE; |
throw new Exception($message, $code); |
} |
} else if ($nbreRessources > 1) { |
$message = "Les ressources indiquées ne sont pas valables."; |
$code = RestServeur::HTTP_CODE_MAUVAISE_REQUETE; |
throw new Exception($message, $code); |
} |
} |
private function etreRessourceIdentifiants($position) { |
$ok = true; |
if (isset($this->ressources[$position])) { |
$ids = $this->ressources[$position]; |
$supraSpPattern = "[A-Z][a-z]+"; |
$spPattern = "{$supraSpPattern}_[a-z]+"; |
$pagePattern = "/^(?:$supraSpPattern|$spPattern)$/i"; |
$ok = preg_match($pagePattern, $ids) ? true : false; |
} |
return $ok; |
} |
private function executerSousService() { |
if (isset($this->sousService)) { |
$classe = $this->sousService.'Textes'; |
require_once dirname(__FILE__).DS.'textes'.DS.$classe.'.php'; |
$sousService = new $classe($this->ressources, $this->parametres, new Conteneur()); |
$resultat = $sousService->consulter(); |
} else { |
$message = "L'analyse des ressources n'a pu aboutir à déterminer le sous service à exécuter."; |
$code = RestServeur::HTTP_CODE_RESSOURCE_INTROUVABLE; |
throw new Exception($message, $code); |
} |
return $resultat; |
} |
} |
?> |
/trunk/services/modules/0.1/Projets.php |
---|
122,10 → 122,12 |
return null; |
} |
$cheminBiblio = Config::get('chemin_bibliotheque'); |
$chemins = array(); |
$chemins[] = $this->cheminCourrant.$this->projetNom.DS; |
$chemins[] = $this->cheminCourrant.'commun'.DS; |
$chemins[] = Config::get('chemin_bibliotheque'); |
$chemins[] = $cheminBiblio; |
$chemins[] = $cheminBiblio.'robots'.DS; |
foreach ($chemins as $chemin) { |
$chemin = $chemin.$classe.'.php'; |
/trunk/services/configurations/config_wikipedia.ini |
---|
13,4 → 13,4 |
url_service="{ref:url_base}service:eflore:0.1/wikipedia" |
; Noms des services disponibles pour ce projet |
servicesDispo = "meta-donnees,aide,nom-commune" |
servicesDispo = "meta-donnees,aide,textes,nom-commune" |
/trunk/services/bibliotheque/Conteneur.php |
---|
153,6 → 153,16 |
return $formateur; |
} |
public function getWikipediaBot($options = array()) { |
$wpBot = new WikipediaBot($options); |
return $wpBot; |
} |
public function getUrl($url) { |
$url = new Url($url); |
return $url; |
} |
public function getServiceGenerique() { |
$ressources = $this->getRessourcesUrl(); |
$classe = $ressources->getServiceClasse(); |
/trunk/services/bibliotheque/robots/WikipediaBot.php |
---|
New file |
0,0 → 1,158 |
<?php |
class WikipediaBot { |
const HTTP_URL_REQUETE_SEPARATEUR = '&'; |
private $langue = 'fr'; |
private $url = ''; |
private $parametres = array(); |
private $titre = ''; |
private $txt = ''; |
private $userAgent = 'eFloreBot v0.1'; |
private $reponse_entetes = null; |
public function __construct($options = array()) { |
if (array_key_exists('langue', $options)) { |
$this->langue = strtolower($options['langue']); |
} |
} |
public function chargerPage($article) { |
$this->initialiserRequete(); |
$this->url = $this->getBaseApiURL(); |
$this->parametres = array( |
'action' => 'query', |
'prop' => 'revisions', |
'titles' => $article, |
'rvprop' => 'content', |
'redirects' => 1 |
); |
$this->resultats = $this->consulterAPI(); |
$sxGetAID = $this->resultats['query']['pages']; |
$sxGetAID = array_shift($sxGetAID); |
$this->titre = $sxGetAID['title']; |
$this->txt = $sxGetAID['revisions'][0]['*']; |
} |
public function getPageTitre() { |
return $this->titre; |
} |
public function getPageTxt() { |
return $this->txt; |
} |
public function getTaxobox() { |
$taxobox = ''; |
if (preg_match('/([{]{2}Taxobox début.+[{]{2}Taxobox fin[}]{2})/s', $this->txt, $match)) { |
$taxobox = $match[1]; |
} |
return $taxobox; |
} |
public function extraireTaxobox() { |
$taxobox = $this->getTaxobox(); |
$this->txt = str_replace($taxobox, '', $this->txt); |
return $taxobox; |
} |
public function getSectionParNumero($num) { |
$sections = preg_split('/[=]{2}[^=]+[=]{2}/U', $this->txt); |
//Debug::printr($sections); |
$sectionTxt = isset($sections[$num]) ? $sections[$num] : ''; |
return $sectionTxt; |
} |
public function getSectionParTitre($titre) { |
$section = ''; |
if (preg_match('/[=]{2} '.$titre.' [=]{2}(.*)\n\n/sU', $this->txt, $match)) { |
$section = $match[1]; |
} |
return $section; |
} |
public function rendre($wikitxt) { |
$wikitxt .= '<references />'; |
$this->initialiserRequete(); |
$this->url = $this->getBaseApiURL(); |
$this->parametres = array( |
'action' => 'parse', |
'prop' => 'text', |
'text' => $wikitxt |
); |
$this->resultats = $this->consulterAPI(); |
$txt = $this->resultats['parse']['text']['*']; |
$txt = $this->remplacerUrls($txt); |
return $txt; |
} |
private function initialiserRequete() { |
$this->url = ''; |
$this->parametres = array(); |
$this->resultats = array(); |
} |
private function getBaseWpURL() { |
$baseURL = "http://{$this->langue}.wikipedia.org"; |
return $baseURL; |
} |
private function getBaseApiURL() { |
$baseURL = $this->getBaseWpURL().'/w/api.php'; |
return $baseURL; |
} |
private function consulterAPI() { |
$this->parametres['format'] = 'php'; |
$resultat = $this->consulterEnPost(); |
$resultat = unserialize($resultat); |
if (isset($resultat['error'])) { |
throw new Exception($resultat['error']['info'], $resultat['error']['info']); |
} |
return $resultat; |
} |
private function consulterEnPost() { |
return $this->consulter('POST'); |
} |
private function consulter($mode) { |
$entetes = array( |
'Content-type' => 'application/x-www-form-urlencoded', |
'User-Agent' => $this->userAgent); |
$contexte = array('http' => array( |
'method' => $mode, |
'header' => $this->getEnteteChaine($entetes), |
'content' => http_build_query($this->parametres, null, self::HTTP_URL_REQUETE_SEPARATEUR))); |
$contexteFlux = stream_context_create($contexte); |
$flux = fopen($this->url, 'r', false, $contexteFlux); |
if (!$flux) { |
$this->reponse_entetes = $http_response_header; |
$e = "L'ouverture de l'url '{$this->url}' par la méthode HTTP '$mode' a échoué!"; |
throw new Exception($e); |
} |
// Informations sur les en-têtes et métadonnées du flux |
$this->reponse_entetes = stream_get_meta_data($flux); |
// Contenu actuel de $url |
$contenu = stream_get_contents($flux); |
fclose($flux); |
return $contenu; |
} |
private function getEnteteChaine(Array $entetes) { |
$entetesCleVal = array(); |
foreach ($entetes as $cle => $valeur) { |
$entetesCleVal[] = $cle.': '.$valeur; |
} |
return implode("\r\n", $entetesCleVal); |
} |
private function remplacerUrls($txt) { |
$remplacements = array( |
'href="/wiki/' => 'href="'.$this->getBaseWpURL().'/wiki/', |
'href="/w/' => 'href="'.$this->getBaseWpURL().'/w/'); |
$txt = strtr($txt, $remplacements); |
return $txt; |
} |
} |
?> |