| New file |
| 0,0 → 1,337 |
| <?php |
| //declare(encoding='UTF-8'); |
| /** |
| * Exemple de lancement du script : : |
| * /opt/lampp/bin/php cli.php lbf -a chargerTous |
| * |
| * @category php 5.2 |
| * @package eFlore/Scripts |
| * @author Delphine Cauquil <delphine@tela-botanica.org> |
| * @author Aurélien PERONNET <aurelien@tela-botanica.org> |
| * @copyright Copyright (c) 2014, Tela Botanica (accueil@tela-botanica.org) |
| * @license http://www.cecill.info/licences/Licence_CeCILL_V2-fr.txt Licence CECILL |
| * @license http://www.gnu.org/licenses/gpl.html Licence GNU-GPL |
| * @version $Id$ |
| */ |
| class Lbf extends EfloreScript { |
| |
| private $table = null; |
| private $pasInsertion = 1000; |
| private $departInsertion = 0; |
| |
| protected $parametres_autorises = array( |
| '-t' => array(false, false, 'Permet de tester le script sur un jeu réduit de données (indiquer le nombre de lignes).')); |
| |
| public function executer() { |
| try { |
| $this->initialiserProjet('lbf'); |
| |
| // Lancement de l'action demandée |
| $cmd = $this->getParametre('a'); |
| switch ($cmd) { |
| case 'chargerTous' : |
| $this->chargerStructureSql(); |
| $this->chargerlbf(); |
| $this->genererChpNomSciHtml(); |
| $this->genererChpFamille(); |
| //$this->genererChpHierarchie(); |
| break; |
| case 'chargerStructureSql' : |
| $this->chargerStructureSql(); |
| break; |
| case 'chargerlbf' : |
| $this->chargerlbf(); |
| break; |
| case 'genererNomSciHtml' : |
| $this->genererChpNomSciHtml(); |
| break; |
| case 'genererChpFamille' : |
| $this->genererChpFamille(); |
| break; |
| case 'supprimerTous' : |
| $this->supprimerTous(); |
| break; |
| default : |
| throw new Exception("Erreur : la commande '$cmd' n'existe pas!"); |
| } |
| } catch (Exception $e) { |
| $this->traiterErreur($e->getMessage()); |
| } |
| } |
| |
| private function chargerlbf() { |
| $chemin = Config::get('chemins.lbf'); |
| $table = Config::get('tables.lbf'); |
| $requete = "LOAD DATA INFILE '$chemin' ". |
| "REPLACE INTO TABLE $table ". |
| 'CHARACTER SET utf8 '. |
| 'FIELDS '. |
| " TERMINATED BY '\t' ". |
| " ENCLOSED BY '' ". |
| " ESCAPED BY '\\\' ". |
| 'IGNORE 1 LINES'; |
| $this->getBdd()->requeter($requete); |
| } |
| |
| private function genererChpNomSciHtml() { |
| $this->initialiserGenerationChamps(); |
| $this->preparerTablePrChpNomSciHtml(); |
| $generateur = new GenerateurNomSciHtml(); |
| $nbreTotal = $this->recupererNbTotalTuples(); |
| $erreurs = array(); |
| $this->departInsertion = 0; |
| while ($this->departInsertion < $nbreTotal) { |
| $resultat = $this->recupererTuplesPrChpNomSciHtml(); |
| |
| try { |
| $nomsSciEnHtml = $generateur->generer($resultat); |
| } catch (Exception $e) { |
| $erreurs[] = $e->getMessage(); |
| } |
| |
| $this->remplirChpNomSciHtm($nomsSciEnHtml); |
| $this->departInsertion += $this->pasInsertion; |
| $this->afficherAvancement("Insertion des noms scientifique au format HTML dans la base par paquet de {$this->pasInsertion} en cours"); |
| if ($this->stopperLaBoucle($this->getParametre('t'))) break; |
| } |
| echo "\n"; |
| |
| $this->creerFichierLog('Erreurs lors de la génération HTML des noms scientifiques', $erreurs, 'erreurs_noms_sci_html'); |
| } |
| |
| private function initialiserGenerationChamps() { |
| $this->table = Config::get('tables.lbf'); |
| } |
| |
| private function preparerTablePrChpNomSciHtml() { |
| $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'nom_sci_html' "; |
| $resultat = $this->getBdd()->recuperer($requete); |
| if ($resultat === false) { |
| $requete = "ALTER TABLE {$this->table} ". |
| 'ADD nom_sci_html VARCHAR( 500 ) '. |
| 'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL '; |
| $this->getBdd()->requeter($requete); |
| } |
| } |
| |
| private function recupererNbTotalTuples(){ |
| $requete = "SELECT count(*) AS nb FROM {$this->table} "; |
| $resultat = $this->getBdd()->recuperer($requete); |
| return $resultat['nb']; |
| } |
| |
| private function recupererTuplesPrChpNomSciHtml() { |
| $requete = 'SELECT num_nom, rang, nom_sci, nom_supra_generique, genre, epithete_infra_generique, '. |
| ' epithete_sp, type_epithete, epithete_infra_sp,cultivar_groupe, '. |
| ' nom_commercial, cultivar '. |
| "FROM {$this->table} ". |
| "LIMIT {$this->departInsertion},{$this->pasInsertion} "; |
| $resultat = $this->getBdd()->recupererTous($requete); |
| return $resultat; |
| } |
| |
| private function remplirChpNomSciHtm($nomsSciHtm) { |
| foreach ($nomsSciHtm as $id => $html) { |
| $html = $this->getBdd()->proteger($html); |
| $requete = "UPDATE {$this->table} SET nom_sci_html = $html WHERE num_nom = $id "; |
| $resultat = $this->getBdd()->requeter($requete); |
| if ($resultat === false) { |
| throw new Exception("Erreur d'insertion pour le tuple $id"); |
| } |
| } |
| } |
| |
| private function traiterResultatsFamille(&$resultats, &$noms, &$introuvables, &$introuvablesSyno) { |
| foreach ($resultats as $id => $nom) { |
| $nn = $nom['num_nom']; |
| $nnr = $nom['num_nom_retenu']; |
| $nts = $nom['num_tax_sup']; |
| $rg = $nom['rang']; |
| if ($nnr != '') { |
| if ($rg == '180') { |
| $noms[$nn] = $nom['nom_sci']; |
| } else { |
| if ($nn == $nnr) {// nom retenu |
| if (isset($noms[$nts])) { |
| // signifie que recupererTuplesPrChpFamille() devrait |
| // récupérer ce record *avant* |
| $noms[$nn] = $noms[$nts]; |
| } else { |
| $introuvables[] = $nn; |
| } |
| } else {// nom synonyme |
| if (isset($noms[$nnr])) { |
| // signifie que recupererTuplesPrChpFamille() devrait |
| // récupérer ce record *avant* |
| $noms[$nn] = $noms[$nnr]; |
| } else { |
| $introuvablesSyno[] = $nom; |
| } |
| } |
| } |
| } |
| unset($resultats[$id]); |
| $this->afficherAvancement("Attribution de leur famille aux noms en cours"); |
| if ($this->stopperLaBoucle($this->getParametre('t'))) break; |
| } |
| } |
| |
| private function genererChpFamille() { |
| $this->initialiserGenerationChamps(); |
| $this->preparerTablePrChpFamille(); |
| $resultats = $this->recupererTuplesPrChpFamille(); |
| $noms = array(); |
| $introuvables = array(); |
| $introuvablesSyno = array(); |
| $i = 1; |
| |
| while(true) { |
| printf("passe n°%d:\n", $i); |
| $this->traiterResultatsFamille($resultats, $noms, $introuvables, $introuvablesSyno); |
| echo "\n\n"; |
| // printf("noms: %d, introuvables: %d, introuvablesSyno: %d\n", count($noms), count($introuvables), count($introuvablesSyno)); |
| // XXX, au 22/07/2013, 3 passes sont suffisantes |
| // TODO: MySQL procédure stockée ! |
| if($i++ == 3) break; |
| $resultats = array_merge($resultats, $introuvables, $introuvablesSyno); |
| $introuvables = $introuvablesSyno = array(); |
| } |
| |
| foreach ($introuvablesSyno as $id => $nom) { |
| $nn = $nom['num_nom']; |
| $nnr = $nom['num_nom_retenu']; |
| if (isset($noms[$nnr])) { |
| $noms[$nn] = $noms[$nnr]; |
| } else { |
| $introuvables[] = $nn; |
| } |
| unset($introuvablesSyno[$id]); |
| $this->afficherAvancement("Attribution de leur famille aux synonymes en cours"); |
| } |
| echo "\n"; |
| |
| $msg = 'Plusieurs familles sont introuvables'; |
| $this->creerFichierLog($msg, $introuvables, 'famille_introuvable'); |
| |
| $this->remplirChpFamille($noms); |
| } |
| |
| private function preparerTablePrChpFamille() { |
| $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'famille' "; |
| $resultat = $this->getBdd()->recuperer($requete); |
| if ($resultat === false) { |
| $requete = "ALTER TABLE {$this->table} ". |
| 'ADD famille VARCHAR(255) '. |
| 'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL '; |
| $this->getBdd()->requeter($requete); |
| } |
| } |
| |
| private function recupererTuplesPrChpFamille() { |
| $requete = 'SELECT num_nom, num_nom_retenu, num_tax_sup, rang, nom_sci '. |
| "FROM {$this->table} ". |
| "WHERE rang >= 180 ". |
| "ORDER BY rang ASC, num_tax_sup ASC, num_nom_retenu DESC "; |
| $resultat = $this->getBdd()->recupererTous($requete); |
| return $resultat; |
| } |
| |
| private function remplirChpFamille($noms) { |
| foreach ($noms as $id => $famille) { |
| $famille = $this->getBdd()->proteger($famille); |
| $requete = "UPDATE {$this->table} SET famille = $famille WHERE num_nom = $id "; |
| $resultat = $this->getBdd()->requeter($requete); |
| if ($resultat === false) { |
| throw new Exception("Erreur d'insertion pour le tuple $id"); |
| } |
| $this->afficherAvancement("Insertion des noms de famille dans la base en cours"); |
| } |
| echo "\n"; |
| } |
| |
| private function genererChpHierarchie() { |
| $this->initialiserGenerationChamps(); |
| $this->preparerTablePrChpHierarchie(); |
| $table = Config::get('tables.lbf'); |
| |
| $requete = "UPDATE $table SET hierarchie = NULL "; |
| $mise_a_jour = $this->getBdd()->requeter($requete); |
| |
| $requete_hierarchie = "SELECT num_nom, num_nom_retenu, num_tax_sup FROM ".$table." ORDER BY rang DESC"; |
| |
| $resultat = $this->getBdd()->recupererTous($requete_hierarchie); |
| $num_nom_a_num_sup = array(); |
| foreach($resultat as &$taxon) { |
| $num_nom_a_num_sup[$taxon['num_nom']] = $taxon['num_tax_sup']; |
| } |
| $chemin_taxo = ""; |
| foreach($resultat as &$taxon) { |
| $chemin_taxo = $this->traiterHierarchieNumTaxSup($taxon['num_nom_retenu'], $num_nom_a_num_sup).'-'; |
| $requete = "UPDATE $table SET hierarchie = ".$this->getBdd()->proteger($chemin_taxo)." WHERE num_nom = ".$taxon['num_nom']." "; |
| $mise_a_jour = $this->getBdd()->requeter($requete); |
| $this->afficherAvancement("Insertion de la hierarchie taxonomique en cours"); |
| } |
| echo "\n"; |
| } |
| |
| private function traiterHierarchieNumTaxSup($num_nom_retenu, &$num_nom_a_num_sup) { |
| $chaine_hierarchie = ""; |
| if(isset($num_nom_a_num_sup[$num_nom_retenu])) { |
| $num_tax_sup = $num_nom_a_num_sup[$num_nom_retenu]; |
| $chaine_hierarchie = '-'.$num_tax_sup; |
| if($num_tax_sup != 0 && $num_tax_sup != '') { |
| $chaine_hierarchie = $this->traiterHierarchieNumTaxSup($num_tax_sup, $num_nom_a_num_sup).$chaine_hierarchie; |
| } |
| } |
| return $chaine_hierarchie; |
| } |
| |
| private function preparerTablePrChpHierarchie() { |
| $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'hierarchie' "; |
| $resultat = $this->getBdd()->recuperer($requete); |
| if ($resultat === false) { |
| $requete = "ALTER TABLE {$this->table} ". |
| 'ADD hierarchie VARCHAR(1000) '. |
| 'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL '; |
| $this->getBdd()->requeter($requete); |
| } |
| } |
| |
| private function genererDonneesTestMultiVersion() { |
| $contenuSql = $this->recupererContenu(Config::get('chemins.structureSqlTest')); |
| $this->executerScripSql($contenuSql); |
| |
| $table = Config::get('tables.lbf'); |
| $tableTest = Config::get('tables.lbfTest'); |
| $requete = "INSERT INTO $tableTest SELECT * FROM $table"; |
| $this->getBdd()->requeter($requete); |
| } |
| |
| private function supprimerDonneesTestMultiVersion() { |
| $tableMeta = Config::get('tables.lbfMeta'); |
| $requete = "DELETE FROM $tableMeta WHERE guid = 'urn:lsid:tela-botanica.org:lbf:1.02'"; |
| $this->getBdd()->requeter($requete); |
| |
| $tableTest = Config::get('tables.lbfTest'); |
| $requete = "DROP TABLE IF EXISTS $tableTest"; |
| $this->getBdd()->requeter($requete); |
| } |
| |
| private function supprimerTous() { |
| $requete = "DROP TABLE IF EXISTS lbf_meta, lbf_v1_00"; |
| $this->getBdd()->requeter($requete); |
| } |
| |
| private function creerFichierLog($message, $lignes, $nomFichier) { |
| $lignesNbre = count($lignes); |
| if ($lignesNbre != 0) { |
| echo "$message. Voir le log de $lignesNbre lignes :\n"; |
| |
| $logContenu = implode(", \n", $lignes); |
| $logFichier = realpath(dirname(__FILE__))."/log/$nomFichier.log"; |
| echo $logFichier."\n"; |
| file_put_contents($logFichier, $logContenu); |
| } |
| } |
| } |
| ?> |