texte)
//
function ajouter_fragments($id_article, $id_version, $fragments) {
global $flag_gz;
$replaces = array();
foreach ($fragments as $id_fragment => $texte) {
$nouveau = true;
// Recuperer la version la plus recente
$query = "SELECT compress, fragment, version_min, version_max FROM spip_versions_fragments ".
"WHERE id_article=$id_article AND id_fragment=$id_fragment AND version_min<=$id_version ".
"ORDER BY version_min DESC LIMIT 0,1";
$result = spip_query($query);
if ($row = spip_fetch_array($result)) {
$fragment = $row['fragment'];
$version_min = $row['version_min'];
if ($row['compress'] > 0) $fragment = gzuncompress($fragment);
$fragment = unserialize($fragment);
if (is_array($fragment)) {
unset($fragment[$id_version]);
// Si moins de cinq revisions distinctes dans le fragment, prolonger celui-ci
if (count($fragment) < 5) $nouveau = false;
}
}
if ($nouveau) {
$fragment = array($id_version => $texte);
$version_min = $id_version;
}
else {
// Ne pas dupliquer les fragments non modifies
$modif = true;
for ($i = $id_version - 1; $i >= $version_min; $i--) {
if (isset($fragment[$i])) {
$modif = ($fragment[$i] != $texte);
break;
}
}
if ($modif) $fragment[$id_version] = $texte;
}
// Preparer l'enregistrement du fragment
$fragment = serialize($fragment);
$compress = 0;
$version_max = $id_version;
if ($flag_gz) {
$s = gzcompress($fragment);
if (strlen($s) < strlen($fragment)) {
//echo "gain gz: ".(100 - 100 * strlen($s) / strlen($fragment))."%
";
$compress = 1;
$fragment = $s;
}
}
// (attention a bien echapper le $fragment qui est en binaire)
$replaces[] = "($id_article, $version_min, $version_max, $id_fragment, $compress, '"
.mysql_escape_string($fragment)."')";
}
if (count($replaces)) {
$query = "REPLACE spip_versions_fragments (id_article, version_min, version_max, id_fragment, compress, fragment) ".
"VALUES ".join(", ", $replaces);
spip_query($query);
}
}
//
// Recuperer les fragments d'une version donnee
// renvoie un tableau associatif (id_fragment => texte)
//
function recuperer_fragments($id_article, $id_version) {
$fragments = array();
$query = "SELECT id_fragment, version_min, compress, fragment FROM spip_versions_fragments ".
"WHERE id_article=$id_article AND version_min<=$id_version AND version_max>=$id_version";
$result = spip_query($query);
while ($row = spip_fetch_array($result)) {
$id_fragment = $row['id_fragment'];
$version_min = $row['version_min'];
$fragment = $row['fragment'];
if ($row['compress'] > 0) $fragment = gzuncompress($fragment);
$fragment = unserialize($fragment);
for ($i = $id_version; $i >= $version_min; $i--) {
if (isset($fragment[$i])) {
$fragments[$id_fragment] = $fragment[$i];
break;
}
}
}
return $fragments;
}
//
// Apparier des paragraphes deux a deux entre une version originale
// et une version modifiee
//
function apparier_paras($src, $dest) {
$src_dest = array();
$dest_src = array();
$t1 = $t2 = array();
$md1 = $md2 = array();
$gz_min1 = $gz_min2 = array();
$gz_trans1 = $gz_trans2 = array();
$l1 = $l2 = array();
// Nettoyage de la ponctuation pour faciliter l'appariement
foreach($src as $key => $val) {
$t1[$key] = preg_replace("/[[:punct:][:space:]]+/", " ", $val);
}
foreach($dest as $key => $val) {
$t2[$key] = preg_replace("/[[:punct:][:space:]]+/", " ", $val);
}
// Hash pour premiere passe
foreach($t1 as $key => $val) $md1[md5($val)] = $key;
foreach($t2 as $key => $val) $md2[md5($val)] = $key;
// Premiere passe : chercher les correspondance exactes
foreach($md1 as $h => $key1) {
if (isset($md2[$h])) {
$key2 = $md2[$h];
if ($t1[$key1] == $t2[$key2]) {
$src_dest[$key1] = $key2;
$dest_src[$key2] = $key1;
unset($t1[$key1]);
unset($t2[$key2]);
}
}
}
// Deuxieme passe : recherche de correlation par test de compressibilite
foreach($t1 as $key => $val) {
$l1[$key] = strlen(gzcompress($val));
}
foreach($t2 as $key => $val) {
$l2[$key] = strlen(gzcompress($val));
}
foreach($t1 as $key1 => $s1) {
//echo "
";
foreach($t2 as $key2 => $s2) {
$r = strlen(gzcompress($s1.$s2));
//$k += strlen($s1) + strlen($s2);
$taux = 1.0 * $r / ($l1[$key1] + $l2[$key2]);
//echo "
";
// Depouiller les resultats de la deuxieme passe :
// ne retenir que les correlations reciproques
foreach($gz_trans1 as $key1 => $key2) {
if ($gz_trans2[$key2] == $key1 && $gz_min1[$key1] < 0.9) {
$src_dest[$key1] = $key2;
$dest_src[$key2] = $key1;
}
}
/*echo "
";
foreach ($gz_trans1 as $a => $b) {
echo "$a => $b
";
echo "
"; } echo "".$t1[$a].""; echo "".$t2[$b]."