Subversion Repositories Sites.tela-botanica.org

Rev

Blame | Last modification | View Log | RSS feed

<?php

/***************************************************************************\
 *  SPIP, Systeme de publication pour l'internet                           *
 *                                                                         *
 *  Copyright (c) 2001-2005                                                *
 *  Arnaud Martin, Antoine Pitrou, Philippe Riviere, Emmanuel Saint-James  *
 *                                                                         *
 *  Ce programme est un logiciel libre distribue sous licence GNU/GPL.     *
 *  Pour plus de details voir le fichier COPYING.txt ou l'aide en ligne.   *
\***************************************************************************/


// Ce fichier ne sera execute qu'une fois
if (defined("_ECRIRE_INC_DIFF")) return;
define("_ECRIRE_INC_DIFF", "1");


//
// LCS (Longest Common Subsequence) en deux versions
// (ref: http://www2.toki.or.id/book/AlgDesignManual/BOOK/BOOK5/NODE208.HTM)

// Version ultra-simplifiee : chaque chaine est une permutation de l'autre 
// et on passe en parametre un des deux tableaux de correspondances
function lcs_opt($s) {
        $n = count($s);
        if (!$n) return array();
        $paths = array();
        $paths_ymin = array();
        $max_len = 0;

        // Insertion des points
        asort($s);
        foreach ($s as $y => $c) {
                for ($len = $max_len; $len > 0; $len--) {
                        if ($paths_ymin[$len] < $y) {
                                $paths_ymin[$len + 1] = $y;
                                $paths[$len + 1] = $paths[$len];
                                $paths[$len + 1][$y] = $c;
                                break;
                        }
                }
                if ($len == 0) {
                        $paths_ymin[1] = $y;
                        $paths[1] = array($y => $c);
                }
                if ($len + 1 > $max_len) $max_len = $len + 1;
        }
        return $paths[$max_len];
}

// Version normale : les deux chaines n'ont pas ete traitees au prealable
// par la fonction d'appariement
function lcs($s, $t) {
        $n = count($s);
        $p = count($t);
        if (!$n || !$p) return array(0 => array(), 1 => array());
        $paths = array();
        $paths_ymin = array();
        $max_len = 0;
        $s_pos = $t_pos = array();

        // Insertion des points
        foreach ($t as $y => $c) $t_pos[trim($c)][] = $y;

        foreach ($s as $x => $c) {
                $c = trim($c);
                if (!$t_pos[$c]) continue;
                krsort($t_pos[$c]);
                foreach ($t_pos[$c] as $y) {
                        for ($len = $max_len; $len > 0; $len--) {
                                if ($paths_ymin[$len] < $y) {
                                        $paths_ymin[$len + 1] = $y;
                                        // On construit le resultat sous forme de chaine d'abord,
                                        // car les tableaux de PHP sont dispendieux en taille memoire
                                        $paths[$len + 1] = $paths[$len]." $x,$y";
                                        break;
                                }
                        }
                        if ($len + 1 > $max_len) $max_len = $len + 1;
                        if ($len == 0) {
                                $paths_ymin[1] = $y;
                                $paths[1] = "$x,$y";
                        }
                }
        }
        if ($paths[$max_len]) {
                $path = explode(" ", $paths[$max_len]);
                $u = $v = array();
                foreach ($path as $p) {
                        list($x, $y) = explode(",", $p);
                        $u[$x] = $y;
                        $v[$y] = $x;
                }
                return array($u, $v);
        }
        return array(0 => array(), 1 => array());
}


function test_lcs($a, $b) {
        $s = explode(" ", $a);
        $t = explode(" ", $b);
        
        $t0 = explode(" ", microtime());
        list($r1, $r2) = lcs($s, $t);
        $t1 = explode(" ", microtime());
        $dt = $t1[0] + $t1[1] - $t0[0] - $t0[1];
        echo join(" ", $r1)."<br />";
        echo join(" ", $r2)."<p>";
        echo "<div style='font-weight: bold; color: red;'>$dt s.</div>";
}

function test_lcs_opt($s) {
        $s = preg_split(',\s+,', $s);

        $t0 = explode(" ", microtime());
        $t = lcs_opt($s);
        $t1 = explode(" ", microtime());
        $dt = $t1[0] + $t1[1] - $t0[0] - $t0[1];
        echo join(" ", $s)."<br />";
        echo join(" ", $t)."<p>";
        echo "<div style='font-weight: bold; color: red;'>$dt s.</div>";
}


//
// Generation de diff a plusieurs etages
//

class Diff {
        var $diff;
        var $fuzzy;

        function Diff($diff) {
                $this->diff = $diff;
                $this->fuzzy = true;
        }

        function comparer($new, $old) {
                $paras = $this->diff->segmenter($new);
                $paras_old = $this->diff->segmenter($old);
                if ($this->diff->fuzzy()) {
                        list($trans_rev, $trans) = apparier_paras($paras_old, $paras);
                        $lcs = lcs_opt($trans);
                        $lcs_rev = array_flip($lcs);
                }
                else {
                        list($trans_rev, $trans) = lcs($paras_old, $paras);
                        $lcs = $trans;
                        $lcs_rev = $trans_rev;
                }
        
                reset($paras_old);
                reset($paras);
                reset($lcs);
                unset($i_old);
                $fin_old = false;
                foreach ($paras as $i => $p) {
                        if (!isset($trans[$i])) {
                                // Paragraphe ajoute
                                $this->diff->ajouter($p);
                                continue;
                        }
                        $j = $trans[$i];
                        if (!isset($lcs[$i])) {
                                // Paragraphe deplace
                                $this->diff->deplacer($p, $paras_old[$j]);
                                continue;
                        }
                        if (!$fin_old) {
                                // Paragraphes supprimes jusqu'au paragraphe courant
                                if (!isset($i_old)) {
                                        list($i_old, $p_old) = each($paras_old);
                                        if (!$p_old) $fin_old = true;
                                }
                                while (!$fin_old && $i_old < $j) {
                                        if (!isset($trans_rev[$i_old])) {
                                                $this->diff->supprimer($p_old);
                                        }
                                        unset($i_old);
                                        list($i_old, $p_old) = each($paras_old);
                                        if (!$p_old) $fin_old = true;
                                }
                        }
                        // Paragraphe n'ayant pas change de place
                        $this->diff->comparer($p, $paras_old[$j]);
                }
                // Paragraphes supprimes a la fin du texte
                if (!$fin_old) {
                        if (!isset($i_old)) {
                                list($i_old, $p_old) = each($paras_old);
                                if (!strlen($p_old)) $fin_old = true;
                        }
                        while (!$fin_old) {
                                if (!isset($trans_rev[$i_old])) {
                                        $this->diff->supprimer($p_old);
                                }
                                list($i_old, $p_old) = each($paras_old);
                                if (!$p_old) $fin_old = true;
                        }
                }
                if (isset($i_old)) {
                        if (!isset($trans_rev[$i_old])) {
                                $this->diff->supprimer($p_old);
                        }
                }
                return $this->diff->resultat();
        }
}

class DiffTexte {
        var $r;

        function DiffTexte() {
                $this->r = "";
        }

        function _diff($p, $p_old) {
                $diff = new Diff(new DiffPara);
                return $diff->comparer($p, $p_old);
        }

        function fuzzy() {
                return true;
        }
        function segmenter($texte) {
                return separer_paras($texte);
        }

        // NB :  rem=\"diff-\" est un signal pour la fonction "afficher_para_modifies"
        function ajouter($p) {
                $p = trim($p);
                $this->r .= "\n\n\n<div class=\"diff-para-ajoute\" title=\""._T('diff_para_ajoute')."\">".$p."</div rem=\"diff-\">";
        }
        function supprimer($p_old) {
                $p_old = trim($p_old);
                $this->r .= "\n\n\n<div class=\"diff-para-supprime\" title=\""._T('diff_para_supprime')."\">".$p_old."</div rem=\"diff-\">";
        }
        function deplacer($p, $p_old) {
                $this->r .= "\n\n\n<div class=\"diff-para-deplace\" title=\""._T('diff_para_deplace')."\">";
                $this->r .= trim($this->_diff($p, $p_old));
                $this->r .= "</div rem=\"diff-\">";
        }
        function comparer($p, $p_old) {
                $this->r .= "\n\n\n".$this->_diff($p, $p_old);
        }
        
        function resultat() {
                return $this->r;
        }
}

class DiffPara {
        var $r;

        function DiffPara() {
                $this->r = "";
        }

        function _diff($p, $p_old) {
                $diff = new Diff(new DiffPhrase);
                return $diff->comparer($p, $p_old);
        }

        function fuzzy() {
                return true;
        }
        function segmenter($texte) {
                $paras = array();
                $texte = trim($texte);
                while (preg_match('/[\.!\?]+\s*/u', $texte, $regs)) {
                        $p = strpos($texte, $regs[0]) + strlen($regs[0]);
                        $paras[] = substr($texte, 0, $p);
                        $texte = substr($texte, $p);
                }
                if ($texte) $paras[] = $texte;
                return $paras;
        }

        function ajouter($p) {
                $this->r .= "<span class=\"diff-ajoute\" title=\""._T('diff_texte_ajoute')."\">".$p."</span rem=\"diff-\">";
        }
        function supprimer($p_old) {
                $this->r .= "<span class=\"diff-supprime\" title=\""._T('diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\">";
        }
        function deplacer($p, $p_old) {
                $this->r .= "<span class=\"diff-deplace\" title=\""._T('diff_texte_deplace')."\">".$this->_diff($p, $p_old)."</span rem=\"diff-\">";
        }
        function comparer($p, $p_old) {
                $this->r .= $this->_diff($p, $p_old);
        }
        
        function resultat() {
                return $this->r;
        }
}

class DiffPhrase {
        var $r;

        function DiffPhrase() {
                $this->r = "";
        }

        function fuzzy() {
                return false;
        }
        function segmenter($texte) {
                $paras = array();
                if (test_pcre_unicode()) {
                        $punct = '([[:punct:]]|'.plage_punct_unicode().')';
                        $mode = 'u';
                }
                else {
                        // Plages de poncutation pour preg_match bugge (ha ha)
                        $punct = '([^\w\s\x80-\xFF]|'.plage_punct_unicode().')';
                        $mode = '';
                }
                $preg = '/('.$punct.'+)(\s+|$)|(\s+)('.$punct.'*)/'.$mode;
                while (preg_match($preg, $texte, $regs)) {
                        $p = strpos($texte, $regs[0]);
                        $l = strlen($regs[0]);
                        $punct = $regs[1] ? $regs[1] : $regs[6];
                        $milieu = "";
                        if ($punct) {
                                // Attacher les raccourcis fermants au mot precedent
                                if (preg_match(',^[\]}]+$,', $punct)) {
                                        $avant = substr($texte, 0, $p) . $regs[5] . $punct;
                                        $texte = $regs[4] . substr($texte, $p + $l);
                                }
                                // Attacher les raccourcis ouvrants au mot suivant
                                else if ($regs[5] && preg_match(',^[\[{]+$,', $punct)) {
                                        $avant = substr($texte, 0, $p) . $regs[5];
                                        $texte = $punct . substr($texte, $p + $l);
                                }
                                // Les autres signes de ponctuation sont des mots a part entiere
                                else {
                                        $avant = substr($texte, 0, $p);
                                        $milieu = $regs[0];
                                        $texte = substr($texte, $p + $l);
                                }
                        }
                        else {
                                $avant = substr($texte, 0, $p + $l);
                                $texte = substr($texte, $p + $l);
                        }
                        if ($avant) $paras[] = $avant;
                        if ($milieu) $paras[] = $milieu;
                }
                if ($texte) $paras[] = $texte;
                return $paras;
        }

        function ajouter($p) {
                $this->r .= "<span class=\"diff-ajoute\" title=\""._T('diff_texte_ajoute')."\">".$p."</span rem=\"diff-\"> ";
        }
        function supprimer($p_old) {
                $this->r .= "<span class=\"diff-supprime\" title=\""._T('diff_texte_supprime')."\">".$p_old."</span rem=\"diff-\"> ";
        }
        function comparer($p, $p_old) {
                $this->r .= $p;
        }

        function resultat() {
                return $this->r;
        }
}


function preparer_diff($texte) {
        include_spip("charsets.php");

        $charset = lire_meta('charset');
        if ($charset == 'utf-8')
                return unicode_to_utf_8(html2unicode($texte));
        return unicode_to_utf_8(html2unicode(charset2unicode($texte, $charset, true)));
}

function afficher_diff($texte) {
        $charset = lire_meta('charset');
        if ($charset == 'utf-8') return $texte;
        return charset2unicode($texte, 'utf-8');
}


?>