Subversion Repositories eFlore/Projets.eflore-projets

Rev

Rev 372 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

<?php
/** Exemple lancement:
 * /opt/lampp/bin/php -d memory_limit=3500M ~/web/eflore-projets/scripts/cli.php baseflor -a chargerTous
*/
class Baseflor extends EfloreScript {
        
        private $table = null;
        private $nb_erreurs; 
        private $erreurs_ligne;
        private $ligne_num;
        private $colonne_valeur;
        private $colonne_num;
        private $type_bio = array(
                "A","a","B","b","C","c","Cfru","cfru","Csuf","csuf","Ccou","ccou","H","h","Heri",
                "heri","Hsto","hsto","Hces","hces","Hros","hros","Hrub","hrub","Hbis","hbis","G",
                "g","Gbul","gbul","Gtub","gtub","Grhi","grhi","T","t","Tver","tver","Test","test");
        private $ss_type_bio = array("aqua", "lia", "épi", "hpar", "par", "suc", "semp", "cad", "car");
        //les deux suivants basés sur valeurs trouvées (--> pas de légende !)
        private $signes_seuls = array("?", "x", "x~", "x=","xb","xB","-"); 
        private $signes_nn_seuls = array("~", "=","b","B");
        //paramètres pour lancement de fonctions sur colonnes
        private $verifierIntervalle = array(23 => array(1,9), 24 => array(1,9), 25 => array(1,9),
                26 => array(1,9), 27 => array(1,12), 28 => array(1,9), 29 => array(1,9), 30 => array(0,9),
                31 => array(1,9), 32 => array(1,9), 33 => array(1,9), 34 => array(1,9), 35 => array(1,9),
                36 => array(1,12), 37 => array(1,9), 38 => array(1,9), 39 => array(0,9),);
        private $verifierColonnesBasiques = array(1 =>"/(^[0-9]*\/)|(inconnu)/", 2 => "/(^[AB]?[0-9]+$)|(^nc$)/",
                3 => "/(^[AB]?[0-9]+$)|(^nc$)/", 4 => "/[^0-9]+/", 5 => "/[^0-9]+/", 6 => "/[^0-9]+/", 7 => "/[^0-9]+/",
                8 => "/[^0-9]+/", 9 => "/[^0-9]+/", 10 => "/[^0-9]+/", 11 => "/[^0-9]+/", 12 => "/[^0-9]+/",
                13 => "/[^0-9]+/", 14 => "/^([1-9]|1[0-2])(\-([1-9]|1[0-2]))*$/", 16 => "/[^0-9]+/", 19 => "/[^0-9]+/", 
                20 => "/[^0-9]+/", 21 => "/[^0-9]+/", 22 => "/[^0-9]+/",  40 => "/[^0-9]+/", 
                41 => "/[^0-9]+/", 42 => "/[^0-9]+/", 43 => "/[^0-9]+/", 44 => "/[^0-9]+/", 45 => "/[^0-9]+/", 
                46 => "/[^0-9]+/", 47 => "/[^0-9]+/", 48 => "/[^0-9]+/", 49 => "/[^0-9]+/", 50 => "/[^0-9]+/", 
                51 => "/[^0-9]+/", 52 => "/[^0-9]+/", 53 => "/[^0-9]+/", 54 => "/[^0-9]+/", 55 => "/[^0-9]+/"
                );

        
        public function executer() {
                // Lancement de l'action demandée
                try {
                        $this->initialiserProjet('baseflor');
                        $cmd = $this->getParametre('a');
                    switch ($cmd) {
                        case 'chargerTous' :
                                $this->supprimerTous();
                                $this->chargerStructureSql();
                                $this->chargerOntologies();
                                $this->verifFichier();
                                if ($this->$nb_erreurs == 0){
                                $this->chargerDonnees();
                                } else {
                                        echo "je ne peux pas charger les données car le fichier comporte des erreurs.\n";
                                }
                                $This->genererChamps();
                                break;
                        case 'chargerStructureSql' :
                                $this->chargerStructureSql();
                                break;
                        case 'chargerDonnees' :
                                $this->verifFichier();
                                if($this->$nb_erreurs == 0){
                                        $this->chargerDonnees();
                                } else {
                                        echo "je ne peux pas charger les données car le fichier comporte des erreurs.\n";
                                }
                                break;
                                case 'supprimerTous' :
                                        $this->supprimerTous();
                                        break;
                                case 'chargerOntologies' :
                                        $this->chargerOntologies();
                                        break;
                                case 'verifierDonnees' :
                                         $this->verifFichier();
                                         break;
                                case 'genererChamps' :
                                         $this->genererChpsBDNT();
                                         $this->genererChpsNumTaxon();
                                         $this->genererChpsNumNomen();
                                         break;
                                default :
                                        throw new Exception("Erreur : la commande '$cmd' n'existe pas!");
                        }
                } catch (Exception $e) {
                        $this->traiterErreur($e->getMessage());
                }
    }
    
   //-- génération de champs --// 
    private function genererChamps(){
        $this->genererChpsBDNT();
        $this->genererChpsNumTaxon();
        $this->genererChpsNumNomen();
    }
    
    private function genererChpsBDNT(){
        $this->initialiserGenerationChamps();
        $this->preparerTablePrChpsBDNT();
        $resultats = $this->recupererTuplesNumsOriginels();
        $bdnt = array();
        foreach ($resultats as $chps => $res) {
                $cle = $res['cle'];
                $nno = $res['num_nomen_originel'];
                $nto = $res['num_taxon_originel'];
                if ( (preg_match("/^([AB])[0-9]+$/", $nno, $retour)) || (preg_match("/^([AB])[0-9]+$/", $nto, $retour)) ){
                        if($retour[1]=='A'){
                                $bdnt[$cle]= "BDAFX";
                        }else{
                                $bdnt[$cle]= "BDBFX";
                        }
                        
                }elseif(($nno == 'nc') && ($nto == 'nc')){
                        $bdnt[$cle]= "nc";
                }else{
                        $bdnt[$cle]= "BDTFX";
                }
                
        }
        $this->remplirChamps($bdnt, "BDNT");
    }
    
    private function genererChpsNumTaxon(){
        $this->initialiserGenerationChamps();
        $this->preparerTablePrChpsNumTaxon();
        $resultats = $this->recupererTuplesNumsOriginels();
        $num_taxon= array();
        foreach ($resultats as $chps => $res) {
                $cle = $res['cle'];
                $nto = $res['num_taxon_originel'];
                if  (preg_match("/^[AB]([0-9]+)$/", $nto, $retour)){
                        $num_taxon[$cle]= intval($retour[1]);
                }elseif($nto == 'nc'){
                        $num_taxon[$cle]= 0;
                }else{
                        $num_taxon[$cle]= intval($nto);
                }
    
        }
        $this->remplirChamps($num_taxon, "num_taxon");
    }
    
    private function genererChpsNumNomen(){
        $this->initialiserGenerationChamps();
        $this->preparerTablePrChpsNumNomen();
        $resultats = $this->recupererTuplesNumsOriginels();
        $num_nomen= array();
        foreach ($resultats as $chps => $res) {
                $cle = $res['cle'];
                $nno = $res['num_nomen_originel'];
                if  (preg_match("/^[AB]([0-9]+)$/", $nno, $retour)){
                                $num_nomen[$cle]= intval($retour[1]);
                }elseif($nno == 'nc'){
                        $num_nomen[$cle]= 0;
                }else{
                        $num_nomen[$cle]=intval($nno);
                }
        }
        $this->remplirChamps($num_nomen, "num_nomen");
    }
    
     private function remplirChamps($champs, $nom_colonne) {
        foreach ($champs as $cle => $val) {
                $val = $this->getBdd()->proteger($val);
                $requete = "UPDATE {$this->table} SET $nom_colonne = $val WHERE cle = $cle ";
                $resultat = $this->getBdd()->requeter($requete);
                if ($resultat === false) {
                        throw new Exception("Erreur d'insertion pour le tuple clé = ".$cle);
                }
                $this->afficherAvancement("Insertion des valeurs pour ".$nom_colonne." dans la base en cours");
        }
        echo "\n";
    }
    
    

    
    private function initialiserGenerationChamps() {
        $this->table = Config::get('tables.donnees');
    }
    
    private function preparerTablePrChpsBDNT() {
        $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'BDNT' ";
        $resultat = $this->getBdd()->recuperer($requete);
        if ($resultat === false) {
                $requete =      "ALTER TABLE {$this->table} ".
                                'ADD BDNT VARCHAR( 6 ) '.
                                'CHARACTER SET utf8 COLLATE utf8_general_ci'.
                                ' NOT NULL AFTER catminat_code ';
                $this->getBdd()->requeter($requete);
        }
    }
    
    private function preparerTablePrChpsNumTaxon() {
        $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'num_taxon' ";
        $resultat = $this->getBdd()->recuperer($requete);
        if ($resultat === false) {
                $requete =      "ALTER TABLE {$this->table} ".
                                        'ADD num_taxon INT( 10 ) NOT NULL'.
                                        'BEFORE num_taxon_originel';
                $this->getBdd()->requeter($requete);
        }
    }
    
    private function preparerTablePrChpsNumNomen() {
        $requete = "SHOW COLUMNS FROM {$this->table} LIKE 'num_nomen' ";
        $resultat = $this->getBdd()->recuperer($requete);
        if ($resultat === false) {
                $requete =      "ALTER TABLE {$this->table} ".
                                        'ADD num_nomen INT( 10 ) NOT NULL'.
                                        'BEFORE num_taxon_originel';
                $this->getBdd()->requeter($requete);
        }
    }
    private function recupererTuplesNumsOriginels(){
        $requete = 'SELECT cle, num_taxon_originel, num_nomen_originel '.
                                        "FROM {$this->table} ";
        $resultat = $this->getBdd()->recupererTous($requete);
        return $resultat;
    }
    
    //-- chargements, suppression, exécution  --//
    
    private function chargerOntologies() {
        $chemin = Config::get('chemins.ontologies');
        $table = Config::get('tables.ontologies');
        $requete = "LOAD DATA INFILE '$chemin' ".
                        "REPLACE INTO TABLE $table ".
                        'CHARACTER SET utf8 '.
                        'FIELDS '.
                        "       TERMINATED BY '\t' ".
                        "       ENCLOSED BY '' ".
                        "       ESCAPED BY '\\\' "
                        ;
        $this->getBdd()->requeter($requete);
    }
    
    protected function chargerStructureSql() {
        $contenuSql = $this->recupererContenu(Config::get('chemins.structureSql'));
        $this->executerScripSql($contenuSql);
    }
    
    
    protected function executerScripSql($sql) {
        $requetes = Outils::extraireRequetes($sql);
        foreach ($requetes as $requete) {
                $this->getBdd()->requeter($requete);
        }
    }

    private function chargerDonnees() {
        $table = Config::get('tables.donnees');
        $requete = "LOAD DATA INFILE '".Config::get('chemins.donnees')."' ".
                                        "REPLACE INTO TABLE $table ".
                                        'CHARACTER SET utf8 '.
                                        'FIELDS '.
                                        "       TERMINATED BY '\t' ".
                                        "       ENCLOSED BY '' ".
                                        "       ESCAPED BY '\\\'";
        $this->getBdd()->requeter($requete);
    }
    
    private function supprimerTous() {
        $requete = "DROP TABLE IF EXISTS baseflor_meta, baseflor_v2012_03_19";
        $this->getBdd()->requeter($requete);
    }
        
    
    //-- vérifications de données  --//
    
    private function verifierSiVide(){
        $bool=false;
        if ( $this->colonne_valeur  == "" ){
                $bool=true;
        }
        return $bool;
    }
    
    
    private function verifierIntervallesTous($valeur){
        $intervalle=$this->verifierIntervalle[$this->colonne_num];
        $this->verifierIntervalleChiffres($intervalle[0], $intervalle[1],$valeur);
    }
    

    private function verifierIntervalleChiffres($mini, $max, $valeur){
         if (( ($valeur < $mini) || ($valeur > $max) ) && ($valeur != "")){
                $this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;
                        $this->nb_erreurs++;
        }
        
    }
 
  
    private function verifierValeursIndic(){
        if (( preg_match("/^([^0-9])*([0-9]+)([^0-9])*$/", $this->colonne_valeur,$retour) == 1  )){
                $this->verifierIntervallesTous($retour[2]);
                if((isset($retour[3]))&&(!(in_array($retour[3], $this->signes_nn_seuls))) ){
                        $this->erreurs_ligne[$this->colonne_num] =$this->colonne_valeur;
                        $this->nb_erreurs++;
                }       
                if(($retour[1] != "-")&&($retour[1]!="") ){
                        $this->erreurs_ligne[$this->colonne_num] =$this->colonne_valeur;
                        $this->nb_erreurs++;
                        if($this->ligne_num==3){
                        }
                }       
        }elseif(!(in_array( $this->colonne_valeur, $this->signes_seuls))&& !($this->verifierSiVide())) {
                        $this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;
                        $this->nb_erreurs++;
                }
    }

 private function verifierTypeBio(){
        if (( preg_match("/(.+)\((.+)\)$/", $this->colonne_valeur,$retour) == 1  )){
                $this->verifType_SsType($retour[1]);
                $this->verifType_SsType($retour[2]);
        }else{
                $this->verifType_SsType($this->colonne_valeur);
        }
 }

    private function verifType_SsType($chaine_a_verif){
        //verifie type
        if(( preg_match("/^([a-zA-Zé]+)\-(.+)$|^([a-zA-Zé]+[^\-])$/", $chaine_a_verif, $retour) == 1  )){
                $type=$retour[1];
                if(isset($retour[3])){
                        $type=$retour[3];
                }
                 if(!(in_array($type, $this->type_bio))) {
                        $this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;
                        $this->nb_erreurs++;
                }
                //verifie sous_type(s)
                if ($retour[2]!=""){
                        $ss_type[0]=$retour[2];
                        if (( preg_match("/\-/", $ss_type[0])) == 1 ){
                                $ss_type = preg_split("/\-/", $retour[2]);
                        }
                        foreach($ss_type as $sst){
                                if(!(in_array($sst, $this->ss_type_bio))) {
                                        $this->erreurs_ligne[$this->colonne_num] = $this->colonne_valeur;
                                        $this->nb_erreurs++;
                                }
                        }
                }
        }
    }

    private function verifierColonne($chaine){
        if ( ( preg_match($chaine, $this->colonne_valeur) == 0 ) && !($this->verifierSiVide()) ){
                $this->erreurs_ligne[$this->colonne_num]=$this->colonne_valeur;
                $this->nb_erreurs++;
        }
    }
    
    //vérifie par colonnes les erreurs d'une ligne 
   private function verifErreursLignes($colonne_array){
        $this->erreurs_ligne=array();
     for ($this->colonne_num = 0; $this->colonne_num < count($colonne_array); $this->colonne_num++){
         if ( isset($colonne_array) ){
                $this->colonne_valeur = $colonne_array[$this->colonne_num];
                        if ( (( ($this->colonne_num > 0) && ($this->colonne_num < 15) ) || ( $this->colonne_num == 16 )  
                                || (($this->colonne_num > 18 ) && ( $this->colonne_num < 23 ) ) 
                                ||  ( $this->colonne_num > 39 ))  ){
                                $motif = $this->verifierColonnesBasiques[$this->colonne_num];
                                $this->verifierColonne($motif);
                        }elseif ( $this->colonne_num == 15 ){
                                $this->verifierTypeBio();
                        } elseif (($this->colonne_num > 22 ) && ( $this->colonne_num < 33 )){
                                $this->verifierIntervallesTous($this->colonne_valeur);                                  
                        }elseif (( $this->colonne_num > 32) && ( $this->colonne_num < 41)){
                                  $this->verifierValeursIndic();
                        }
                } else {
                        echo "ligne ".$this->ligne_num.": pas de tabulation  \n ";
                }       
   }
   
}
    
    //verifie la cohérence des valeurs des colonnes 
    private function verifFichier(){
        $dernier_nb_erreur =0;
        unlink(Config::get('chemins.donnees_verif'));
        $fichier = fopen(Config::get('chemins.donnees'), "r");
        $fichier2 = fopen(Config::get('chemins.donnees_verif'), "w");
        $this->valeurs=array();
        $this->nb_erreurs=0;
        if ( ($fichier != false) && ($fichier2 != false) ){
                $this->ligne_num=1;
                while ( !feof($fichier) ) {
                        if ($this->ligne_num == 1){
                                fputs($fichier2," !!! REGARDEZ LES COLONNES DANS NUMERO_COLONNES_IMPORTANT.TXT.\n");
                        }
                        $this->ligne_num++;
                        $ligne = fgets($fichier);
                        $col = preg_split("/    /", $ligne);
                        $this->verifErreursLignes($col);
                        //rédaction du fichier de vérif
                        if(count($this->nb_erreurs)!= 0){
                                if($dernier_nb_erreur != $this->nb_erreurs){
                                        $dernier_nb_erreur = $this->nb_erreurs;
                                        fputs($fichier2," \n erreurs sur la ligne".$this->ligne_num."\n");
                                        foreach ($this->erreurs_ligne as $cle => $v){
                                                fputs($fichier2,"colonne ".$cle." : ".$v." - ");
                                        }
                                }
                        } 
                }  
        }
        if ($this->nb_erreurs == 0){
                fputs($fichier2,"/n il n'y a pas d'erreurs./n");
        }
        echo $this->nb_erreurs."erreurs";
        fclose($fichier);
        fclose($fichier2);
    }
 

    
}

?>