Subversion Repositories eFlore/Projets.eflore-projets

Rev

Rev 1175 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
11 jpm 1
<?php
121 jpm 2
//declare(encoding='UTF-8');
11 jpm 3
/**
4
 * Exemple de lancement du script : :
121 jpm 5
 * /opt/lampp/bin/php cli.php bdtfx -a chargerTous
6
 *
11 jpm 7
 * @category	php 5.2
121 jpm 8
 * @package		eFlore/Scripts
11 jpm 9
 * @author		Jennifer DHÉ <jennifer@tela-botanica.org>
10
 * @author		Jean-Pascal MILCENT <jpm@tela-botanica.org>
11
 * @copyright	Copyright (c) 2011, Tela Botanica (accueil@tela-botanica.org)
12
 * @license		http://www.cecill.info/licences/Licence_CeCILL_V2-fr.txt Licence CECILL
13
 * @license		http://www.gnu.org/licenses/gpl.html Licence GNU-GPL
14
 * @version		$Id$
15
 */
68 jpm 16
class Bdtfx extends EfloreScript {
46 jpm 17
 
68 jpm 18
	private $table = null;
19
	private $pasInsertion = 1000;
20
	private $departInsertion = 0;
11 jpm 21
 
22
	protected $parametres_autorises = array(
369 mathilde 23
		'-t' => array(false, false, 'Permet de tester le script sur un jeu réduit de données (indiquer le nombre de lignes).'));
11 jpm 24
 
25
	public function executer() {
46 jpm 26
		try {
68 jpm 27
			$this->initialiserProjet('bdtfx');
46 jpm 28
 
29
			// Lancement de l'action demandée
30
			$cmd = $this->getParametre('a');
31
			switch ($cmd) {
32
				case 'chargerTous' :
33
					$this->chargerStructureSql();
34
					$this->chargerBdtfx();
340 jpm 35
					$this->genererChpNomSciHtml();
36
					$this->genererChpFamille();
103 jpm 37
					$this->genererDonneesTestMultiVersion();
699 aurelien 38
					$this->genererChpHierarchie();
1259 delphine 39
					$this->genererBDTFXReduit();
46 jpm 40
					break;
41
				case 'chargerStructureSql' :
42
					$this->chargerStructureSql();
43
					break;
44
				case 'chargerBdtfx' :
45
					$this->chargerBdtfx();
46
					break;
47
				case 'genererNomSciHtml' :
340 jpm 48
					$this->genererChpNomSciHtml();
46 jpm 49
					break;
335 jpm 50
				case 'genererChpFamille' :
51
					$this->genererChpFamille();
52
					break;
699 aurelien 53
				case 'genererChpHierarchie' :
54
					$this->genererChpHierarchie();
55
					break;
103 jpm 56
				case 'genererDonneesTestMultiVersion' :
57
					$this->genererDonneesTestMultiVersion();
58
					break;
1259 delphine 59
				case 'genererBDTFXR' :
60
					$this->genererBDTFXReduit();
61
					break;
103 jpm 62
				case 'supprimerDonneesTestMultiVersion' :
63
					$this->supprimerDonneesTestMultiVersion();
64
					break;
130 jpm 65
				case 'supprimerTous' :
66
					$this->supprimerTous();
67
					break;
46 jpm 68
				default :
69
					throw new Exception("Erreur : la commande '$cmd' n'existe pas!");
70
			}
71
		} catch (Exception $e) {
72
			$this->traiterErreur($e->getMessage());
11 jpm 73
		}
46 jpm 74
	}
11 jpm 75
 
46 jpm 76
	private function chargerBdtfx() {
77
		$chemin = Config::get('chemins.bdtfx');
78
		$table = Config::get('tables.bdtfx');
79
		$requete = "LOAD DATA INFILE '$chemin' ".
80
				"REPLACE INTO TABLE $table ".
81
				'CHARACTER SET utf8 '.
82
				'FIELDS '.
83
				"	TERMINATED BY '\t' ".
84
				"	ENCLOSED BY '' ".
85
				"	ESCAPED BY '\\\' ".
86
				'IGNORE 1 LINES';
68 jpm 87
		$this->getBdd()->requeter($requete);
46 jpm 88
	}
89
 
340 jpm 90
	private function genererChpNomSciHtml() {
335 jpm 91
		$this->initialiserGenerationChamps();
92
		$this->preparerTablePrChpNomSciHtml();
46 jpm 93
		$generateur = new GenerateurNomSciHtml();
94
		$nbreTotal = $this->recupererNbTotalTuples();
661 jpm 95
		$erreurs = array();
335 jpm 96
		$this->departInsertion = 0;
46 jpm 97
		while ($this->departInsertion < $nbreTotal) {
340 jpm 98
			$resultat = $this->recupererTuplesPrChpNomSciHtml();
661 jpm 99
 
100
			try {
101
				$nomsSciEnHtml = $generateur->generer($resultat);
102
			} catch (Exception $e) {
103
				$erreurs[] = $e->getMessage();
104
			}
105
 
340 jpm 106
			$this->remplirChpNomSciHtm($nomsSciEnHtml);
46 jpm 107
			$this->departInsertion += $this->pasInsertion;
108
			$this->afficherAvancement("Insertion des noms scientifique au format HTML dans la base par paquet de {$this->pasInsertion} en cours");
68 jpm 109
			if ($this->stopperLaBoucle($this->getParametre('t'))) break;
11 jpm 110
		}
111
		echo "\n";
661 jpm 112
 
113
		$this->creerFichierLog('Erreurs lors de la génération HTML des noms scientifiques', $erreurs, 'erreurs_noms_sci_html');
11 jpm 114
	}
115
 
335 jpm 116
	private function initialiserGenerationChamps() {
46 jpm 117
		$this->table = Config::get('tables.bdtfx');
11 jpm 118
	}
119
 
335 jpm 120
	private function preparerTablePrChpNomSciHtml() {
11 jpm 121
		$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'nom_sci_html' ";
68 jpm 122
		$resultat = $this->getBdd()->recuperer($requete);
11 jpm 123
		if ($resultat === false) {
124
			$requete = 	"ALTER TABLE {$this->table} ".
335 jpm 125
				'ADD nom_sci_html VARCHAR( 500 ) '.
126
				'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL ';
68 jpm 127
			$this->getBdd()->requeter($requete);
11 jpm 128
		}
129
	}
130
 
131
	private function recupererNbTotalTuples(){
46 jpm 132
		$requete = "SELECT count(*) AS nb FROM {$this->table} ";
68 jpm 133
		$resultat = $this->getBdd()->recuperer($requete);
46 jpm 134
		return $resultat['nb'];
11 jpm 135
	}
136
 
340 jpm 137
	private function recupererTuplesPrChpNomSciHtml() {
869 delphine 138
		$requete = 'SELECT 	num_nom, rang, nom_sci, nom_supra_generique, genre, epithete_infra_generique, '.
335 jpm 139
			'	epithete_sp, type_epithete, epithete_infra_sp,cultivar_groupe, '.
140
			'	nom_commercial, cultivar '.
141
			"FROM {$this->table} ".
142
			"LIMIT {$this->departInsertion},{$this->pasInsertion} ";
68 jpm 143
		$resultat = $this->getBdd()->recupererTous($requete);
11 jpm 144
		return $resultat;
145
	}
146
 
340 jpm 147
	private function remplirChpNomSciHtm($nomsSciHtm) {
46 jpm 148
		foreach ($nomsSciHtm as $id => $html) {
68 jpm 149
			$html = $this->getBdd()->proteger($html);
340 jpm 150
			$requete = "UPDATE {$this->table} SET nom_sci_html = $html WHERE num_nom = $id ";
68 jpm 151
			$resultat = $this->getBdd()->requeter($requete);
46 jpm 152
			if ($resultat === false) {
153
				throw new Exception("Erreur d'insertion pour le tuple $id");
11 jpm 154
			}
155
		}
156
	}
339 jpm 157
 
757 raphael 158
	private function traiterResultatsFamille(&$resultats, &$noms, &$introuvables, &$introuvablesSyno) {
339 jpm 159
		foreach ($resultats as $id => $nom) {
336 jpm 160
			$nn = $nom['num_nom'];
339 jpm 161
			$nnr = $nom['num_nom_retenu'];
336 jpm 162
			$nts = $nom['num_tax_sup'];
163
			$rg = $nom['rang'];
339 jpm 164
			if ($nnr != '') {
165
				if ($rg == '180') {
340 jpm 166
					$noms[$nn] = $nom['nom_sci'];
336 jpm 167
				} else {
339 jpm 168
					if ($nn == $nnr) {// nom retenu
169
						if (isset($noms[$nts])) {
757 raphael 170
							// signifie que recupererTuplesPrChpFamille() devrait
171
							// récupérer ce record *avant*
339 jpm 172
							$noms[$nn] = $noms[$nts];
173
						} else {
174
							$introuvables[] = $nn;
175
						}
176
					} else {// nom synonyme
757 raphael 177
						if (isset($noms[$nnr])) {
178
							// signifie que recupererTuplesPrChpFamille() devrait
179
							// récupérer ce record *avant*
180
							$noms[$nn] = $noms[$nnr];
181
						} else {
182
							$introuvablesSyno[] = $nom;
339 jpm 183
						}
184
					}
336 jpm 185
				}
186
			}
339 jpm 187
			unset($resultats[$id]);
336 jpm 188
			$this->afficherAvancement("Attribution de leur famille aux noms en cours");
335 jpm 189
			if ($this->stopperLaBoucle($this->getParametre('t'))) break;
190
		}
757 raphael 191
	}
339 jpm 192
 
757 raphael 193
	private function genererChpFamille() {
194
		$this->initialiserGenerationChamps();
195
		$this->preparerTablePrChpFamille();
196
		$resultats = $this->recupererTuplesPrChpFamille();
197
		$noms = array();
198
		$introuvables = array();
199
		$introuvablesSyno = array();
200
		$i = 1;
201
 
202
		while(true) {
203
			printf("passe n°%d:\n", $i);
204
			$this->traiterResultatsFamille($resultats, $noms, $introuvables, $introuvablesSyno);
205
			echo "\n\n";
206
			// printf("noms: %d, introuvables: %d, introuvablesSyno: %d\n", count($noms), count($introuvables), count($introuvablesSyno));
207
			// XXX, au 22/07/2013, 3 passes sont suffisantes
208
			// TODO: MySQL procédure stockée !
209
			if($i++ == 3) break;
210
			$resultats = array_merge($resultats, $introuvables, $introuvablesSyno);
211
			$introuvables = $introuvablesSyno = array();
212
		}
213
 
339 jpm 214
		foreach ($introuvablesSyno as $id => $nom) {
1259 delphine 215
			$nn = $nom['num_nom'];
339 jpm 216
			$nnr = $nom['num_nom_retenu'];
1259 delphine 217
			if (isset($noms[$nnr])) {
218
				$noms[$nn] = $noms[$nnr];
219
			} else {
220
				$introuvables[] = $nn;
339 jpm 221
			}
222
			unset($introuvablesSyno[$id]);
223
			$this->afficherAvancement("Attribution de leur famille aux synonymes en cours");
336 jpm 224
		}
339 jpm 225
		echo "\n";
226
 
661 jpm 227
		$msg = 'Plusieurs familles sont introuvables';
228
		$this->creerFichierLog($msg, $introuvables, 'famille_introuvable');
339 jpm 229
 
340 jpm 230
		$this->remplirChpFamille($noms);
335 jpm 231
	}
699 aurelien 232
 
335 jpm 233
	private function preparerTablePrChpFamille() {
234
		$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'famille' ";
235
		$resultat = $this->getBdd()->recuperer($requete);
236
		if ($resultat === false) {
237
			$requete = 	"ALTER TABLE {$this->table} ".
238
				'ADD famille VARCHAR(255) '.
239
				'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL ';
240
			$this->getBdd()->requeter($requete);
241
		}
242
	}
339 jpm 243
 
335 jpm 244
	private function recupererTuplesPrChpFamille() {
339 jpm 245
		$requete = 'SELECT num_nom, num_nom_retenu, num_tax_sup, rang, nom_sci '.
335 jpm 246
				"FROM {$this->table} ".
247
				"WHERE rang >= 180 ".
340 jpm 248
				"ORDER BY rang ASC, num_tax_sup ASC, num_nom_retenu DESC ";
335 jpm 249
		$resultat = $this->getBdd()->recupererTous($requete);
250
		return $resultat;
251
	}
339 jpm 252
 
340 jpm 253
	private function remplirChpFamille($noms) {
336 jpm 254
		foreach ($noms as $id => $famille) {
255
			$famille = $this->getBdd()->proteger($famille);
340 jpm 256
			$requete = "UPDATE {$this->table} SET famille = $famille WHERE num_nom = $id ";
336 jpm 257
			$resultat = $this->getBdd()->requeter($requete);
258
			if ($resultat === false) {
259
				throw new Exception("Erreur d'insertion pour le tuple $id");
260
			}
261
			$this->afficherAvancement("Insertion des noms de famille dans la base en cours");
262
		}
263
		echo "\n";
264
	}
699 aurelien 265
 
266
	private function genererChpHierarchie() {
267
		$this->initialiserGenerationChamps();
268
		$this->preparerTablePrChpHierarchie();
269
		$table = Config::get('tables.bdtfx');
270
 
271
		$requete = "UPDATE $table SET hierarchie = NULL ";
272
		$mise_a_jour = $this->getBdd()->requeter($requete);
273
 
274
		$requete_hierarchie = "SELECT num_nom, num_nom_retenu, num_tax_sup FROM ".$table." ORDER BY rang DESC";
275
 
276
		$resultat = $this->getBdd()->recupererTous($requete_hierarchie);
277
		$num_nom_a_num_sup = array();
278
		foreach($resultat as &$taxon) {
279
			$num_nom_a_num_sup[$taxon['num_nom']] = $taxon['num_tax_sup'];
280
		}
281
		$chemin_taxo = "";
282
		foreach($resultat as &$taxon) {
283
			$chemin_taxo = $this->traiterHierarchieNumTaxSup($taxon['num_nom_retenu'], $num_nom_a_num_sup).'-';
284
			$requete = "UPDATE $table SET hierarchie = ".$this->getBdd()->proteger($chemin_taxo)." WHERE num_nom = ".$taxon['num_nom']." ";
285
			$mise_a_jour = $this->getBdd()->requeter($requete);
286
			$this->afficherAvancement("Insertion de la hierarchie taxonomique en cours");
287
		}
288
		echo "\n";
289
	}
290
 
291
	private function traiterHierarchieNumTaxSup($num_nom_retenu, &$num_nom_a_num_sup) {
292
		$chaine_hierarchie = "";
293
		if(isset($num_nom_a_num_sup[$num_nom_retenu])) {
294
			$num_tax_sup = $num_nom_a_num_sup[$num_nom_retenu];
295
			$chaine_hierarchie = '-'.$num_tax_sup;
296
			if($num_tax_sup != 0 && $num_tax_sup != '') {
297
				$chaine_hierarchie = $this->traiterHierarchieNumTaxSup($num_tax_sup, $num_nom_a_num_sup).$chaine_hierarchie;
298
			}
299
		}
300
		return $chaine_hierarchie;
301
	}
302
 
303
	private function preparerTablePrChpHierarchie() {
304
		$requete = "SHOW COLUMNS FROM {$this->table} LIKE 'hierarchie' ";
305
		$resultat = $this->getBdd()->recuperer($requete);
306
		if ($resultat === false) {
307
			$requete = 	"ALTER TABLE {$this->table} ".
308
					'ADD hierarchie VARCHAR(1000) '.
309
					'CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL ';
310
			$this->getBdd()->requeter($requete);
311
		}
312
	}
11 jpm 313
 
103 jpm 314
	private function genererDonneesTestMultiVersion() {
315
		$contenuSql = $this->recupererContenu(Config::get('chemins.structureSqlTest'));
316
		$this->executerScripSql($contenuSql);
68 jpm 317
 
103 jpm 318
		$table = Config::get('tables.bdtfx');
319
		$tableTest = Config::get('tables.bdtfxTest');
320
		$requete = "INSERT INTO $tableTest SELECT * FROM $table";
321
		$this->getBdd()->requeter($requete);
322
	}
323
 
324
	private function supprimerDonneesTestMultiVersion() {
325
		$tableMeta = Config::get('tables.bdtfxMeta');
326
		$requete = "DELETE FROM $tableMeta WHERE guid = 'urn:lsid:tela-botanica.org:bdtfx:1.02'";
327
		$this->getBdd()->requeter($requete);
328
 
329
		$tableTest = Config::get('tables.bdtfxTest');
342 jpm 330
		$requete = "DROP TABLE IF EXISTS $tableTest";
103 jpm 331
		$this->getBdd()->requeter($requete);
332
	}
1259 delphine 333
 
334
	private function genererBDTFXReduit() {
335
		$table = Config::get('tables.bdtfx');
336
		$version = Config::get('version');
337
 
338
		$requete = "create table bdtfxr_v".$version." as (SELECT *
339
						FROM ".$table."
340
						WHERE
341
							(`presence` = 'P' AND num_nom = num_nom_retenu)
342
						OR
343
							(`flore_bonnier_num` != '' OR `flore_cnrs_num` != '' OR `flore_coste_num` != ''
344
								OR `flore_fournier_num` != '' OR `flore_fg_num` != ''))";
345
		$table = $this->getBdd()->requeter($requete);
346
 
347
		$requete_meta = "INSERT INTO `bdtfxr_meta`(`guid`, `langue_meta`, `code`, `version`, `titre`, `description`, `mots_cles`, `citation`, `url_tech`, `url_projet`, `source`, `createurs`, `editeur`, `contributeurs`, `droits`, `url_droits`, `langue`, `date_creation`, `date_validite`, `couverture_spatiale`, `couverture_temporelle`, `web_services`) ".
348
				" SELECT * FROM `bdtfx_meta` WHERE `version` = '".str_replace('_', '.', $version)."'";
349
		$meta = $this->getBdd()->requeter($requete_meta);
350
	}
130 jpm 351
 
352
	private function supprimerTous() {
342 jpm 353
		$requete = "DROP TABLE IF EXISTS bdtfx_meta, bdtfx_v1_01, bdtfx_v1_02";
130 jpm 354
		$this->getBdd()->requeter($requete);
355
	}
661 jpm 356
 
357
	private function creerFichierLog($message, $lignes, $nomFichier) {
358
		$lignesNbre = count($lignes);
359
		if ($lignesNbre != 0) {
360
			echo "$message. Voir le log de $lignesNbre lignes :\n";
361
 
362
			$logContenu = implode(", \n", $lignes);
363
			$logFichier = realpath(dirname(__FILE__))."/log/$nomFichier.log";
364
			echo $logFichier."\n";
365
					file_put_contents($logFichier, $logContenu);
366
		}
367
	}
11 jpm 368
}
369
?>