/trunk/scripts/modules/migration_mots_cles/Migrationmotscles.php |
---|
File deleted |
\ No newline at end of file |
/trunk/scripts/modules/migration_mots_cles/MigrationMotsCles.php |
---|
New file |
0,0 → 1,174 |
<?php |
// declare(encoding='UTF-8'); |
/** |
* Traitement des mots clés cel pour les migrer vers des mots clés stockés avec path enumeration |
* |
* Utilisation : |
* - migrer les mots-clés obs : <code>/opt/lamp/bin/php cli.php migration_mots_cles -a obs</code> |
* - migrer les mots-clés images : <code>/opt/lamp/bin/php cli.php migration_mots_cles -a images</code> |
* |
* @category CEL |
* @package Scripts |
* @subpackage Migration : Mots-Clés |
* @author Mathias CHOUET <mathias@tela-botanica.org> |
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org> |
* @author Aurelien PERONNET <aurelien@tela-botanica.org> |
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt> |
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt> |
* @copyright 1999-2014 Tela Botanica <accueil@tela-botanica.org> |
*/ |
include_once dirname(__FILE__).'/bibliotheque/Dao.php'; |
class MigrationMotsCles extends Script { |
private $mode; |
public function executer() { |
$cmd = $this->getParametre('a'); |
$this->mode_verbeux = $this->getParametre('v'); |
switch($cmd) { |
case 'obs': |
$this->mode = $cmd; |
$this->migrerMotsClesObservations(); |
break; |
case 'images': |
$this->mode = $cmd; |
$this->migrerMotsClesImages(); |
break; |
default: |
echo 'Méthode inconnue, les méthodes possibles sont obs et images'."\n"; |
} |
} |
private function migrerMotsClesObservations() { |
$this->dao = new Dao('obs'); |
$this->convertirNestedSetVersPathEnum(); |
} |
private function migrerMotsClesImages() { |
$this->dao = new Dao('images'); |
$this->convertirNestedSetVersPathEnum(); |
} |
private function convertirNestedSetVersPathEnum() { |
$debut = microtime(); |
$this->dao->viderTables(); |
$mots_cles_hierarchiques = $this->dao->obtenirMotsClesOriginaux($this->mode); |
$mots_cles_hierarchiques = $this->formaterChemin($mots_cles_hierarchiques); |
$liaisons = $this->dao->obtenirLiaisonsMotsClesOriginaux($this->mode); |
if($this->mode_verbeux) { |
echo "-------------------------------------------------------------------\n"; |
echo " Début de la migration des mots clés ".$this->mode." \n"; |
echo " ".count($mots_cles_hierarchiques)." mots clés et ".count($liaisons)." liaisons à migrer \n"; |
echo "-------------------------------------------------------------------\n"; |
} |
$nb_mots_cles = 0; |
$nb_liaisons = 0; |
$mots_cles_slice = array_chunk($mots_cles_hierarchiques, 800, true); |
echo " Insertion des mots clés par paquet de 800 \n"; |
foreach($mots_cles_slice as &$mots_cles_tranche) { |
$this->dao->insererMotsCles($mots_cles_tranche); |
$nb_mots_cles += 800; |
if($this->mode_verbeux) { |
$this->afficherAvancement(' mots clés insérés ', $nb_mots_cles); |
} |
} |
echo "\n Insertion des mots clés ".$this->mode." terminée \n"; |
$infos_nouveaux_mots_cles = $this->dao->obtenirInfosNouveauxMotsCles(); |
$correspondances = $this->formaterCorrespondanceCheminId($infos_nouveaux_mots_cles); |
$liaisons_slice = array_chunk($liaisons, 800, true); |
echo " Insertion des mots liaisons par paquet de 800 \n"; |
foreach($liaisons_slice as &$liaison_tranche) { |
$this->dao->insererLiaisonsMotsCles($mots_cles_hierarchiques, $liaison_tranche, $correspondances); |
$nb_liaisons += 800; |
if($this->mode_verbeux) { |
$this->afficherAvancement(' liaisons insérées ', $nb_liaisons); |
} |
} |
echo "\n Insertion des liaisons ".$this->mode." terminée \n"; |
$fin = microtime(true); |
if($this->mode_verbeux) { |
echo "\n"; |
echo "-------------------------------------------------------------------\n"; |
echo " Fin de la migration des mots clés ".$this->mode.", \n"; |
echo " ".($fin - $debut)." secondes écoulées \n"; |
echo " ".count($mots_cles_hierarchiques)." mots clés insérés \n"; |
echo " ".count($liaisons)." liaisons insérées \n"; |
echo "-------------------------------------------------------------------\n"; |
echo "\n"; |
} |
} |
private function formaterCorrespondanceCheminId($infos_nouveaux_mots_cles) { |
$mots_cles_correspondances = array(); |
foreach($infos_nouveaux_mots_cles as &$mot) { |
$cle = $mot['chemin'].'-'.$mot['id_utilisateur']; |
$mots_cles_correspondances[$cle] = $mot; |
} |
return $mots_cles_correspondances; |
} |
private function formaterChemin($mots_cles) { |
$mots_cles_hierarchiques = array(); |
$chemin_traites = array(); |
$pbs = array('doublons' => array(), 'vides' => array()); |
foreach($mots_cles as &$mot) { |
if(trim($mot['mot_cle']) != "") { |
$chemin = '/'; |
$cle_parent = $mot['parent'].'-'.$mot['id_utilisateur']; |
if(isset($mots_cles_hierarchiques[$cle_parent])) { |
$chemin = $mots_cles_hierarchiques[$cle_parent]['chemin']; |
} |
$chemin .= self::simplifier($mot['mot_cle']).'/'; |
$chemin = str_replace("//", "/", $chemin); |
if(!isset($chemin_traites[self::harmoniserPourDoublon($chemin).'-'.$mot['id_utilisateur']])) { |
$cle = $mot['id_mot_cle'].'-'.$mot['id_utilisateur']; |
$mots_cles_hierarchiques[$cle] = array( |
'id_utilisateur' => $mot['id_utilisateur'], |
'chemin' => $chemin, |
'mot_cle' => $mot['mot_cle'] |
); |
$chemin_traites[self::harmoniserPourDoublon($chemin).'-'.$mot['id_utilisateur']] = 1; |
} else { |
$pbs['doublons'][self::harmoniserPourDoublon($chemin).'-'.$mot['id_utilisateur']] = $mot; |
} |
} else { |
$pbs['vides'][] = $mot; |
} |
} |
//echo '<pre>'.print_r($mots_cles_hierarchiques,true).'</pre>';exit; |
return $mots_cles_hierarchiques; |
} |
static public function harmoniserPourDoublon($texte) { |
$texte = htmlentities($texte, ENT_NOQUOTES, 'utf-8'); |
$texte = preg_replace('/&([A-za-z])(?:acute|cedil|circ|grave|orn|ring|slash|th|tilde|uml);/', '\1', $texte); |
$texte = preg_replace('/&([A-za-z]{2})(?:lig);/', '\1', $texte); // pour les ligatures e.g. 'œ' |
$texte = preg_replace('/&[^;]+;/', '', $texte); // supprime les autres caractères |
return strtolower($texte); |
} |
static public function simplifier($text) { |
// le slash est le seul caractère interdit dans les mots clés |
return trim(str_replace(array('\\','/'), '', $text)); |
} |
} |
/trunk/scripts/modules/migration_mots_cles/bibliotheque/Dao.php |
---|
1,17 → 1,29 |
<?php |
// declare(encoding='UTF-8'); |
/** |
* Contient les requêtes effecturant la migration des mots-clés. |
* |
* @category CEL |
* @package Scripts |
* @subpackage Migration : Sauvages |
* @author Mathias CHOUET <mathias@tela-botanica.org> |
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org> |
* @author Aurelien PERONNET <aurelien@tela-botanica.org> |
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt> |
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt> |
* @copyright 1999-2014 Tela Botanica <accueil@tela-botanica.org> |
*/ |
class Dao extends Bdd { |
private $mode; |
private $table_liaison; |
private $table_mots_cles; |
private $champ_id_element_lie; |
public function __construct($mode) { |
parent::__construct(); |
$this->mode = $mode; |
if($mode == 'obs') { |
if ($mode == 'obs') { |
$this->table_liaison = 'cel_mots_cles_obs_liaison'; |
$this->table_mots_cles = 'cel_arbre_mots_cles_obs'; |
} else { |
19,79 → 31,76 |
$this->table_mots_cles = 'cel_arbre_mots_cles_images'; |
} |
} |
public function viderTables() { |
$requete = "TRUNCATE ".$this->table_mots_cles." "; |
$vidage = $this->requeter($requete.' -- '.__FILE__.':'.__LINE__); |
$requete = "TRUNCATE ".$this->table_liaison." "; |
$vidage = $this->requeter($requete.' -- '.__FILE__.':'.__LINE__); |
return $vidage; |
} |
public function obtenirMotsClesOriginaux() { |
if($this->mode == 'obs') { |
$requete_arbre = "SELECT id_mot_cle_obs as id_mot_cle, ce_mot_cle_obs_parent as parent, id_utilisateur, mot_cle ". |
"FROM cel_mots_cles_obs WHERE niveau != 0 AND id_utilisateur != '' ORDER BY id_utilisateur, niveau"; |
$requete = "SELECT id_mot_cle_obs as id_mot_cle, ce_mot_cle_obs_parent as parent, id_utilisateur, mot_cle ". |
"FROM cel_mots_cles_obs WHERE niveau != 0 AND id_utilisateur != '' ORDER BY id_utilisateur, niveau"; |
} else { |
$requete_arbre = "SELECT id_mot_cle_image as id_mot_cle, ce_mot_cle_image_parent as parent, id_utilisateur, mot_cle ". |
"FROM cel_mots_cles_images WHERE niveau != 0 AND id_utilisateur != '' ORDER BY id_utilisateur, niveau"; |
$requete = "SELECT id_mot_cle_image as id_mot_cle, ce_mot_cle_image_parent as parent, id_utilisateur, mot_cle ". |
"FROM cel_mots_cles_images WHERE niveau != 0 AND id_utilisateur != '' ORDER BY id_utilisateur, niveau"; |
} |
$mots_cles = $this->recupererTous($requete_arbre); |
$mots_cles = $this->recupererTous($requete); |
return $mots_cles; |
} |
public function obtenirLiaisonsMotsClesOriginaux() { |
if($this->mode == 'obs') { |
$requete_liaisons = "SELECT comc.id_observation as id_element_lie, comc.id_mot_cle_obs as id_mot_cle, id_utilisateur as id_utilisateur ". |
"FROM cel_obs_mots_cles comc ". |
"INNER JOIN cel_obs o ON o.id_observation = comc.id_observation "; |
if ($this->mode == 'obs') { |
$requete = "SELECT comc.id_observation as id_element_lie, comc.id_mot_cle_obs as id_mot_cle, id_utilisateur as id_utilisateur ". |
"FROM cel_obs_mots_cles comc ". |
"INNER JOIN cel_obs o ON o.id_observation = comc.id_observation "; |
} else { |
$requete_liaisons = "SELECT comc.id_image as id_element_lie, comc.id_mot_cle_image as id_mot_cle, o.ce_utilisateur as id_utilisateur ". |
"FROM cel_images_mots_cles comc ". |
"INNER JOIN cel_images o ON o.id_image = comc.id_image "; |
$requete = "SELECT comc.id_image as id_element_lie, comc.id_mot_cle_image as id_mot_cle, o.ce_utilisateur as id_utilisateur ". |
"FROM cel_images_mots_cles comc ". |
"INNER JOIN cel_images o ON o.id_image = comc.id_image "; |
} |
$liaisons = $this->recupererTous($requete_liaisons); |
$liaisons = $this->recupererTous($requete); |
return $liaisons; |
} |
public function insererMotsCles($mots_cles) { |
$requete_insertion_mots_cles = "INSERT INTO ".$this->table_mots_cles." (chemin, id_utilisateur, mot_cle) ". |
" VALUES "; |
foreach($mots_cles as $cle => $mot_cle) { |
$requete_insertion_mots_cles .= "(". |
$this->proteger($mot_cle['chemin']).", ". |
$this->proteger($mot_cle['id_utilisateur']).", ". |
$this->proteger($mot_cle['mot_cle'])." ". |
"), "; |
$requete = "INSERT INTO ".$this->table_mots_cles." (chemin, id_utilisateur, mot_cle) VALUES "; |
foreach ($mots_cles as $mot_cle) { |
$requete .= '('. |
$this->proteger($mot_cle['chemin']).', '. |
$this->proteger($mot_cle['id_utilisateur']).', '. |
$this->proteger($mot_cle['mot_cle']).' '. |
'), '; |
} |
$requete_insertion_mots_cles = rtrim($requete_insertion_mots_cles, ", "); |
return $this->requeter($requete_insertion_mots_cles); |
$requete = rtrim($requete, ', '); |
return $this->requeter($requete); |
} |
public function obtenirInfosNouveauxMotsCles() { |
$requete = "SELECT id_mot_cle, id_utilisateur, chemin FROM ".$this->table_mots_cles; |
return $this->recupererTous($requete); |
} |
public function insererLiaisonsMotsCles($mots_cles, $liaisons, $correspondances) { |
$requete_insertion_liaisons = "INSERT INTO ".$this->table_liaison." (id_element_lie, id_mot_cle) ". |
" VALUES "; |
$requete = "INSERT INTO ".$this->table_liaison." (id_element_lie, id_mot_cle) VALUES "; |
$lier = false; |
$mot_cles_inexistants = 0; |
foreach($liaisons as &$liaison_mot_cle) { |
foreach ($liaisons as &$liaison_mot_cle) { |
$cle = $liaison_mot_cle['id_mot_cle'].'-'.$liaison_mot_cle['id_utilisateur']; |
if(isset($mots_cles[$cle])) { |
if (isset($mots_cles[$cle])) { |
$cle_id_mot_cle_migre = $mots_cles[$cle]['chemin'].'-'.$liaison_mot_cle['id_utilisateur']; |
if(isset($correspondances[$cle_id_mot_cle_migre])) { |
$id_mot_cle_migre = $correspondances[$cle_id_mot_cle_migre]['id_mot_cle']; |
$requete_insertion_liaisons .= "(". |
$requete .= "(". |
$this->proteger($liaison_mot_cle['id_element_lie']).", ". |
$this->proteger($id_mot_cle_migre)." ". |
"), "; |
102,11 → 111,11 |
} |
} |
if($lier) { |
$requete_insertion_liaisons = rtrim($requete_insertion_liaisons, ", "); |
$this->requeter($requete_insertion_liaisons); |
if ($lier) { |
$requete = rtrim($requete, ', '); |
$this->requeter($requete); |
} |
if($mot_cles_inexistants != 0) { |
if ($mot_cles_inexistants != 0) { |
echo $mot_cles_inexistants."\n"; |
} |
} |