Rev 609 | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php/***************************************************************************\* SPIP, Systeme de publication pour l'internet ** ** Copyright (c) 2001-2005 ** Arnaud Martin, Antoine Pitrou, Philippe Riviere, Emmanuel Saint-James ** ** Ce programme est un logiciel libre distribue sous licence GNU/GPL. ** Pour plus de details voir le fichier COPYING.txt ou l'aide en ligne. *\***************************************************************************/// Ce fichier ne sera execute qu'une foisif (defined("_ECRIRE_INC_DIFF")) return;define("_ECRIRE_INC_DIFF", "1");//// LCS (Longest Common Subsequence) en deux versions// (ref: http://www2.toki.or.id/book/AlgDesignManual/BOOK/BOOK5/NODE208.HTM)// Version ultra-simplifiee : chaque chaine est une permutation de l'autre// et on passe en parametre un des deux tableaux de correspondancesfunction lcs_opt($s) {$n = count($s);if (!$n) return array();$paths = array();$paths_ymin = array();$max_len = 0;// Insertion des pointsasort($s);foreach ($s as $y => $c) {for ($len = $max_len; $len > 0; $len--) {if ($paths_ymin[$len] < $y) {$paths_ymin[$len + 1] = $y;$paths[$len + 1] = $paths[$len];$paths[$len + 1][$y] = $c;break;}}if ($len == 0) {$paths_ymin[1] = $y;$paths[1] = array($y => $c);}if ($len + 1 > $max_len) $max_len = $len + 1;}return $paths[$max_len];}// Version normale : les deux chaines n'ont pas ete traitees au prealable// par la fonction d'appariementfunction lcs($s, $t) {$n = count($s);$p = count($t);if (!$n || !$p) return array(0 => array(), 1 => array());$paths = array();$paths_ymin = array();$max_len = 0;$s_pos = $t_pos = array();// Insertion des pointsforeach ($t as $y => $c) $t_pos[trim($c)][] = $y;foreach ($s as $x => $c) {$c = trim($c);if (!$t_pos[$c]) continue;krsort($t_pos[$c]);foreach ($t_pos[$c] as $y) {for ($len = $max_len; $len > 0; $len--) {if ($paths_ymin[$len] < $y) {$paths_ymin[$len + 1] = $y;// On construit le resultat sous forme de chaine d'abord,// car les tableaux de PHP sont dispendieux en taille memoire$paths[$len + 1] = $paths[$len]." $x,$y";break;}}if ($len + 1 > $max_len) $max_len = $len + 1;if ($len == 0) {$paths_ymin[1] = $y;$paths[1] = "$x,$y";}}}if ($paths[$max_len]) {$path = explode(" ", $paths[$max_len]);$u = $v = array();foreach ($path as $p) {list($x, $y) = explode(",", $p);$u[$x] = $y;$v[$y] = $x;}return array($u, $v);}return array(0 => array(), 1 => array());}function test_lcs($a, $b) {$s = explode(" ", $a);$t = explode(" ", $b);$t0 = explode(" ", microtime());list($r1, $r2) = lcs($s, $t);$t1 = explode(" ", microtime());$dt = $t1[0] + $t1[1] - $t0[0] - $t0[1];echo join(" ", $r1)."<br />";echo join(" ", $r2)."<p>";echo "<div style='font-weight: bold; color: red;'>$dt s.</div>";}function test_lcs_opt($s) {$s = preg_split(',\s+,', $s);$t0 = explode(" ", microtime());$t = lcs_opt($s);$t1 = explode(" ", microtime());$dt = $t1[0] + $t1[1] - $t0[0] - $t0[1];echo join(" ", $s)."<br />";echo join(" ", $t)."<p>";echo "<div style='font-weight: bold; color: red;'>$dt s.</div>";}//// Generation de diff a plusieurs etages//class Diff {var $diff;var $fuzzy;function Diff($diff) {$this->diff = $diff;$this->fuzzy = true;}function comparer($new, $old) {$paras = $this->diff->segmenter($new);$paras_old = $this->diff->segmenter($old);if ($this->diff->fuzzy()) {list($trans_rev, $trans) = apparier_paras($paras_old, $paras);$lcs = lcs_opt($trans);$lcs_rev = array_flip($lcs);}else {list($trans_rev, $trans) = lcs($paras_old, $paras);$lcs = $trans;$lcs_rev = $trans_rev;}reset($paras_old);reset($paras);reset($lcs);unset($i_old);$fin_old = false;foreach ($paras as $i => $p) {if (!isset($trans[$i])) {// Paragraphe ajoute$this->diff->ajouter($p);continue;}$j = $trans[$i];if (!isset($lcs[$i])) {// Paragraphe deplace$this->diff->deplacer($p, $paras_old[$j]);continue;}if (!$fin_old) {// Paragraphes supprimes jusqu'au paragraphe courantif (!isset($i_old)) {list($i_old, $p_old) = each($paras_old);if (!$p_old) $fin_old = true;}while (!$fin_old && $i_old < $j) {if (!isset($trans_rev[$i_old])) {$this->diff->supprimer($p_old);}unset($i_old);list($i_old, $p_old) = each($paras_old);if (!$p_old) $fin_old = true;}}// Paragraphe n'ayant pas change de place$this->diff->comparer($p, $paras_old[$j]);}// Paragraphes supprimes a la fin du texteif (!$fin_old) {if (!isset($i_old)) {list($i_old, $p_old) = each($paras_old);if (!strlen($p_old)) $fin_old = true;}while (!$fin_old) {if (!isset($trans_rev[$i_old])) {$this->diff->supprimer($p_old);}list($i_old, $p_old) = each($paras_old);if (!$p_old) $fin_old = true;}}if (isset($i_old)) {if (!isset($trans_rev[$i_old])) {$this->diff->supprimer($p_old);}}return $this->diff->resultat();}}class DiffTexte {var $r;function DiffTexte() {$this->r = "";}function _diff($p, $p_old) {$diff = new Diff(new DiffPara);return $diff->comparer($p, $p_old);}function fuzzy() {return true;}function segmenter($texte) {return separer_paras($texte);}// NB : rem=\"diff-\" est un signal pour la fonction "afficher_para_modifies"function ajouter($p) {$p = trim($p);$this->r .= "\n\n\n<div class=\"diff-para-ajoute\" title=\""._T('diff_para_ajoute')."\">".$p."</div rem=\"diff-\">";}function supprimer($p_old) {$p_old = trim($p_old);$this->r .= "\n\n\n<div class=\"diff-para-supprime\" title=\""._T('diff_para_supprime')."\">".$p_old."</div rem=\"diff-\">";}function deplacer($p, $p_old) {$this->r .= "\n\n\n<div class=\"diff-para-deplace\" title=\""._T('diff_para_deplace')."\">";$this->r .= trim($this->_diff($p, $p_old));$this->r .= "</div rem=\"diff-\">";}function comparer($p, $p_old) {$this->r .= "\n\n\n".$this->_diff($p, $p_old);}function resultat() {return $this->r;}}class DiffPara {var $r;function DiffPara() {$this->r = "";}function _diff($p, $p_old) {$diff = new Diff(new DiffPhrase);return $diff->comparer($p, $p_old);}function fuzzy() {return true;}function segmenter($texte) {$paras = array();$texte = trim($texte);while (preg_match('/[\.!\?]+\s*/u', $texte, $regs)) {$p = strpos($texte, $regs[0]) + strlen($regs[0]);$paras[] = substr($texte, 0, $p);$texte = substr($texte, $p);}if ($texte) $paras[] = $texte;return $paras;}function ajouter($p) {$this->r .= "<span class=\"diff-ajoute\" title=\""._T('diff_texte_ajoute')."\">".$p."</span rem=\"diff-\">";}function supprimer($p_old) {$this->r .= "<span class=\"diff-supprime\" title=\""._T('diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\">";}function deplacer($p, $p_old) {$this->r .= "<span class=\"diff-deplace\" title=\""._T('diff_texte_deplace')."\">".$this->_diff($p, $p_old)."</span rem=\"diff-\">";}function comparer($p, $p_old) {$this->r .= $this->_diff($p, $p_old);}function resultat() {return $this->r;}}class DiffPhrase {var $r;function DiffPhrase() {$this->r = "";}function fuzzy() {return false;}function segmenter($texte) {$paras = array();if (test_pcre_unicode()) {$punct = '([[:punct:]]|'.plage_punct_unicode().')';$mode = 'u';}else {// Plages de poncutation pour preg_match bugge (ha ha)$punct = '([^\w\s\x80-\xFF]|'.plage_punct_unicode().')';$mode = '';}$preg = '/('.$punct.'+)(\s+|$)|(\s+)('.$punct.'*)/'.$mode;while (preg_match($preg, $texte, $regs)) {$p = strpos($texte, $regs[0]);$l = strlen($regs[0]);$punct = $regs[1] ? $regs[1] : $regs[6];$milieu = "";if ($punct) {// Attacher les raccourcis fermants au mot precedentif (preg_match(',^[\]}]+$,', $punct)) {$avant = substr($texte, 0, $p) . $regs[5] . $punct;$texte = $regs[4] . substr($texte, $p + $l);}// Attacher les raccourcis ouvrants au mot suivantelse if ($regs[5] && preg_match(',^[\[{]+$,', $punct)) {$avant = substr($texte, 0, $p) . $regs[5];$texte = $punct . substr($texte, $p + $l);}// Les autres signes de ponctuation sont des mots a part entiereelse {$avant = substr($texte, 0, $p);$milieu = $regs[0];$texte = substr($texte, $p + $l);}}else {$avant = substr($texte, 0, $p + $l);$texte = substr($texte, $p + $l);}if ($avant) $paras[] = $avant;if ($milieu) $paras[] = $milieu;}if ($texte) $paras[] = $texte;return $paras;}function ajouter($p) {$this->r .= "<span class=\"diff-ajoute\" title=\""._T('diff_texte_ajoute')."\">".$p."</span rem=\"diff-\"> ";}function supprimer($p_old) {$this->r .= "<span class=\"diff-supprime\" title=\""._T('diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\"> ";}function comparer($p, $p_old) {$this->r .= $p;}function resultat() {return $this->r;}}function preparer_diff($texte) {include_spip("charsets.php");$charset = lire_meta('charset');if ($charset == 'utf-8')return unicode_to_utf_8(html2unicode($texte));return unicode_to_utf_8(html2unicode(charset2unicode($texte, $charset, true)));}function afficher_diff($texte) {$charset = lire_meta('charset');if ($charset == 'utf-8') return $texte;return charset2unicode($texte, 'utf-8');}?>