Rev 415 | Rev 439 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php/** Exemple lancement:* /opt/lampp/bin/php -d memory_limit=3500M ~/web/eflore-projets/scripts/cli.php baseflor -a chargerTous*/class Baseflor extends EfloreScript {private $table = null;private $nb_erreurs;private $erreurs_ligne;private $ligne_num;private $colonne_valeur;private $colonne_num;private $type_bio = array("A","a","B","b","C","c","Cfru","cfru","Csuf","csuf","Ccou","ccou","H","h","Heri","heri","Hsto","hsto","Hces","hces","Hros","hros","Hrub","hrub","Hbis","hbis","G","g","Gbul","gbul","Gtub","gtub","Grhi","grhi","T","t","Tver","tver","Test","test");private $ss_type_bio = array("aqua", "lia", "épi", "hpar", "par", "suc", "semp", "cad", "car");//les deux suivants basés sur valeurs trouvées (--> pas de légende !)private $signes_seuls = array("?", "x", "x~", "x=","xb","xB","-");private $signes_nn_seuls = array("~", "=","b","B");//paramètres pour lancement de fonctions sur colonnesprivate $verifierIntervalle = array(23 => array(1,9), 24 => array(1,9), 25 => array(1,9),26 => array(1,9), 27 => array(1,12), 28 => array(1,9), 29 => array(1,9), 30 => array(0,9),31 => array(1,9), 32 => array(1,9), 33 => array(1,9), 34 => array(1,9), 35 => array(1,9),36 => array(1,12), 37 => array(1,9), 38 => array(1,9), 39 => array(0,9),);private $verifierColonnesBasiques = array(1 =>"/(^[0-9]*\/)|(inconnu)/", 2 => "/(^[AB]?[0-9]+$)|(^nc$)/",3 => "/(^[AB]?[0-9]+$)|(^nc$)/", 4 => "/[^0-9]+/", 5 => "/[^0-9]+/", 6 => "/[^0-9]+/", 7 => "/[^0-9]+/",8 => "/[^0-9]+/", 9 => "/[^0-9]+/", 10 => "/[^0-9]+/", 11 => "/[^0-9]+/", 12 => "/[^0-9]+/",13 => "/[^0-9]+/", 14 => "/^([1-9]|1[0-2])(\-([1-9]|1[0-2]))*$/", 16 => "/[^0-9]+/", 19 => "/[^0-9]+/",20 => "/[^0-9]+/", 21 => "/[^0-9]+/", 22 => "/[^0-9]+/", 40 => "/[^0-9]+/",41 => "/[^0-9]+/", 42 => "/[^0-9]+/", 43 => "/[^0-9]+/", 44 => "/[^0-9]+/", 45 => "/[^0-9]+/",46 => "/[^0-9]+/", 47 => "/[^0-9]+/", 48 => "/[^0-9]+/", 49 => "/[^0-9]+/", 50 => "/[^0-9]+/",51 => "/[^0-9]+/", 52 => "/[^0-9]+/", 53 => "/[^0-9]+/", 54 => "/[^0-9]+/", 55 => "/[^0-9]+/");public function executer() {// Lancement de l'action demandéetry {$this->initialiserProjet('baseflor');$cmd = $this->getParametre('a');switch ($cmd) {case 'chargerStructureSql' :$this->chargerStructureSql();break;case 'chargerDonnees' :$this->verifFichier();if($this->nb_erreurs == 0){$this->chargerDonnees();} else {echo "je ne peux pas charger les données car le fichier comporte des erreurs.Voir le fichier baseflor_verif.txt\n";}break;case 'supprimerTous' :$this->supprimerTous();break;case 'chargerOntologies' :$this->chargerOntologies();break;case 'verifierDonnees' :$this->verifFichier();break;case 'genererChamps' :$this->genererChpsBDNT();$this->genererChpsNumTaxon();$this->genererChpsNumNomen();break;case 'chargerMetadonnees':$this->chargerMetadonnees();break;default :throw new Exception("Erreur : la commande '$cmd' n'existe pas!");}} catch (Exception $e) {$this->traiterErreur($e->getMessage());}}//-- génération de champs --//private function genererChamps(){$this->genererChpsBDNT();$this->genererChpsNumTaxon();$this->genererChpsNumNomen();}private function genererChpsBDNT(){$this->initialiserGenerationChamps();$this->preparerTablePrChpsBDNT();$resultats = $this->recupererTuplesNumsOriginels();$bdnt = array();foreach ($resultats as $chps => $res) {$cle = $res['cle'];$nno = $res['num_nomen_originel'];$nto = $res['num_taxon_originel'];if ( (preg_match("/^([AB])[0-9]+$/", $nno, $retour)) || (preg_match("/^([AB])[0-9]+$/", $nto, $retour)) ){if($retour[1]=='A'){$bdnt[$cle]= "BDAFX";}else{$bdnt[$cle]= "BDBFX";}}elseif(($nno == 'nc') && ($nto == 'nc')){$bdnt[$cle]= "nc";}else{$bdnt[$cle]= "BDTFX";}}$this->remplirChamps($bdnt, "BDNT");}private function genererChpsNumTaxon(){$this->initialiserGenerationChamps();$this->preparerTablePrChpsNumTaxon();$resultats = $this->recupererTuplesNumsOriginels();$num_taxon= array();foreach ($resultats as $chps => $res) {$cle = $res['cle'];$nto = $res['num_taxon_originel'];if (preg_match("/^[AB]([0-9]+)$/", $nto, $retour)){$num_taxon[$cle]= intval($retour[1]);}elseif($nto == 'nc'){$num_taxon[$cle]= 0;}else{$num_taxon[$cle]= intval($nto);}}$this->remplirChamps($num_taxon, "num_taxon");}private function genererChpsNumNomen(){$this->initialiserGenerationChamps();$this->preparerTablePrChpsNumNomen();$resultats = $this->recupererTuplesNumsOriginels();$num_nomen= array();foreach ($resultats as $chps => $res) {$cle = $res['cle'];$nno = $res['num_nomen_originel'];if (preg_match("/^[AB]([0-9]+)$/", $nno, $retour)){$num_nomen[$cle]= intval($retour[1]);}elseif($nno == 'nc'){$num_nomen[$cle]= 0;}else{$num_nomen[$cle]=intval($nno);}}$this->remplirChamps($num_nomen, "num_nomen");}private function remplirChamps($champs, $nom_colonne) {foreach ($champs as $cle => $val) {$val = $this->getBdd()->proteger($val);$requete = "UPDATE {$this->table} SET $nom_colonne = $val WHERE cle = $cle ";$resultat = $this->getBdd()->requeter($requete);if ($resultat === false) {throw new Exception("Erreur d'insertion pour le tuple clé = ".$cle);}$this->afficherAvancement("Insertion des valeurs pour ".$nom_colonne." dans la base en cours");}echo "\n";}private function initialiserGenerationChamps() {$this->table = Config::get('tables.donnees');}private function preparerTablePrChpsBDNT() {$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'BDNT' ";$resultat = $this->getBdd()->recuperer($requete);if ($resultat === false) {$requete = "ALTER TABLE {$this->table} ".'ADD BDNT VARCHAR( 6 ) '.'CHARACTER SET utf8 COLLATE utf8_general_ci'.' NOT NULL AFTER catminat_code ';$this->getBdd()->requeter($requete);}}private function preparerTablePrChpsNumTaxon() {$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'num_taxon' ";$resultat = $this->getBdd()->recuperer($requete);if ($resultat === false) {$requete = "ALTER TABLE {$this->table} ".'ADD num_taxon INT( 10 ) NOT NULL'.' AFTER catminat_code';$this->getBdd()->requeter($requete);}}private function preparerTablePrChpsNumNomen() {$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'num_nomen' ";$resultat = $this->getBdd()->recuperer($requete);if ($resultat === false) {$requete = "ALTER TABLE {$this->table} ".'ADD num_nomen INT( 10 ) NOT NULL'.' AFTER catminat_code';$this->getBdd()->requeter($requete);}}private function recupererTuplesNumsOriginels(){$requete = 'SELECT cle, num_taxon_originel, num_nomen_originel '."FROM {$this->table} ";$resultat = $this->getBdd()->recupererTous($requete);return $resultat;}//-- chargements, suppression, exécution --//protected function chargerMetadonnees() {$contenuSql = $this->recupererContenu(Config::get('chemins.metadonnees'));$this->executerScripSql($contenuSql);}private function chargerOntologies() {$chemin = Config::get('chemins.ontologies');$table = Config::get('tables.ontologies');$requete = "LOAD DATA INFILE '$chemin' "."REPLACE INTO TABLE $table ".'CHARACTER SET utf8 '.'FIELDS '." TERMINATED BY '\t' "." ENCLOSED BY '' "." ESCAPED BY '\\\' ";$this->getBdd()->requeter($requete);}protected function chargerStructureSql() {$contenuSql = $this->recupererContenu(Config::get('chemins.structureSql'));$this->executerScripSql($contenuSql);}protected function executerScripSql($sql) {$requetes = Outils::extraireRequetes($sql);foreach ($requetes as $requete) {$this->getBdd()->requeter($requete);}}private function chargerDonnees() {$table = Config::get('tables.donnees');$requete = "LOAD DATA INFILE '".Config::get('chemins.donnees')."' "."REPLACE INTO TABLE $table ".'CHARACTER SET utf8 '.'FIELDS '." TERMINATED BY '\t' "." ENCLOSED BY '' "." ESCAPED BY '\\\'";$this->getBdd()->requeter($requete);}private function supprimerTous() {$requete = "DROP TABLE IF EXISTS baseflor_meta, baseflor_ontologies, baseflor_v2012_03_19";$this->getBdd()->requeter($requete);}//-- vérifications de données --//private function verifierSiVide(){$bool=false;if ( $this->colonne_valeur == "" ){$bool=true;}return $bool;}private function verifierIntervallesTous($valeur){$intervalle=$this->verifierIntervalle[$this->colonne_num];$this->verifierIntervalleChiffres($intervalle[0], $intervalle[1],$valeur);}private function verifierIntervalleChiffres($mini, $max, $valeur){if (( ($valeur < $mini) || ($valeur > $max) ) && ($valeur != "")){$this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;$this->nb_erreurs++;}}private function verifierValeursIndic(){if (( preg_match("/^([^0-9])*([0-9]+)([^0-9])*$/", $this->colonne_valeur,$retour) == 1 )){$this->verifierIntervallesTous($retour[2]);if((isset($retour[3]))&&(!(in_array($retour[3], $this->signes_nn_seuls))) ){$this->erreurs_ligne[$this->colonne_num] =$this->colonne_valeur;$this->nb_erreurs++;}if(($retour[1] != "-")&&($retour[1]!="") ){$this->erreurs_ligne[$this->colonne_num] =$this->colonne_valeur;$this->nb_erreurs++;if($this->ligne_num==3){}}}elseif(!(in_array( $this->colonne_valeur, $this->signes_seuls))&& !($this->verifierSiVide())) {$this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;$this->nb_erreurs++;}}private function verifierTypeBio(){if (( preg_match("/(.+)\((.+)\)$/", $this->colonne_valeur,$retour) == 1 )){$this->verifType_SsType($retour[1]);$this->verifType_SsType($retour[2]);}else{$this->verifType_SsType($this->colonne_valeur);}}private function verifType_SsType($chaine_a_verif){//verifie typeif(( preg_match("/^([a-zA-Zé]+)\-(.+)$|^([a-zA-Zé]+[^\-])$/", $chaine_a_verif, $retour) == 1 )){$type=$retour[1];if(isset($retour[3])){$type=$retour[3];}if(!(in_array($type, $this->type_bio))) {$this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;$this->nb_erreurs++;}//verifie sous_type(s)if ($retour[2]!=""){$ss_type[0]=$retour[2];if (( preg_match("/\-/", $ss_type[0])) == 1 ){$ss_type = preg_split("/\-/", $retour[2]);}foreach($ss_type as $sst){if(!(in_array($sst, $this->ss_type_bio))) {$this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;$this->nb_erreurs++;}}}}}private function verifierColonne($chaine){if ( ( preg_match($chaine, $this->colonne_valeur) == 0 ) && !($this->verifierSiVide()) ){$this->erreurs_ligne[$this->colonne_num]=$this->colonne_valeur;$this->nb_erreurs++;}}//vérifie par colonnes les erreurs d'une ligneprivate function verifErreursLignes($colonne_array){$this->erreurs_ligne=array();for ($this->colonne_num = 0; $this->colonne_num < count($colonne_array); $this->colonne_num++){if ( isset($colonne_array) ){$this->colonne_valeur = $colonne_array[$this->colonne_num];if ( (( ($this->colonne_num > 0) && ($this->colonne_num < 15) ) || ( $this->colonne_num == 16 )|| (($this->colonne_num > 18 ) && ( $this->colonne_num < 23 ) )|| ( $this->colonne_num > 39 )) ){$motif = $this->verifierColonnesBasiques[$this->colonne_num];$this->verifierColonne($motif);}elseif ( $this->colonne_num == 15 ){$this->verifierTypeBio();} elseif (($this->colonne_num > 22 ) && ( $this->colonne_num < 33 )){$this->verifierIntervallesTous($this->colonne_valeur);}elseif (( $this->colonne_num > 32) && ( $this->colonne_num < 41)){$this->verifierValeursIndic();}} else {echo "ligne ".$this->ligne_num.": pas de tabulation \n ";}}}//verifie la cohérence des valeurs des colonnesprivate function verifFichier(){$dernier_nb_erreur = 0;if (file_exists(Config::get('chemins.donnees_verif'))) {unlink(Config::get('chemins.donnees_verif'));}$fichier = fopen(Config::get('chemins.donnees'), "r");$fichier2 = fopen(Config::get('chemins.donnees_verif'), "w");$this->valeurs=array();$this->nb_erreurs=0;if ( ($fichier != false) && ($fichier2 != false) ){$this->ligne_num=0;while ( !feof($fichier) ) {if ($this->ligne_num == 0){fputs($fichier2," !!! REGARDEZ LES COLONNES DANS NUMERO_COLONNES_IMPORTANT.TXT.\n");}$this->ligne_num++;$ligne = fgets($fichier);$col = preg_split("/ /", $ligne);$this->verifErreursLignes($col);//rédaction du fichier de vérifif(count($this->nb_erreurs)!= 0){if($dernier_nb_erreur != $this->nb_erreurs){$dernier_nb_erreur = $this->nb_erreurs;fputs($fichier2," \n erreurs sur la ligne".$this->ligne_num."\n");foreach ($this->erreurs_ligne as $cle => $v){fputs($fichier2,"colonne ".$cle." : ".$v." - ");}}}}}if ($this->nb_erreurs == 0){fputs($fichier2,"/n il n'y a pas d'erreurs./n");}echo $this->nb_erreurs."erreurs";fclose($fichier);fclose($fichier2);}}?>