Subversion Repositories eFlore/Projets.eflore-projets

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
866 raphael 1
<?php
2
/*
3
 * @author		Raphaël Droz <raphael@tela-botanica.org>
4
 * @copyright	Copyright (c) 2011, 2013 Tela Botanica (accueil@tela-botanica.org)
5
 * @license		http://www.cecill.info/licences/Licence_CeCILL_V2-fr.txt Licence CECILL
6
 * @license		http://www.gnu.org/licenses/gpl.html Licence GNU-GPL
7
 *
8
 * Corrige les erreurs de saisie de nom à l'aide d'une recherche via un index sphinx
9
 * pour les observations ayant un nom saisi et dont l'un au moins de nom_ret[nn],
10
 * nt ou famille est NULL ou vide.
11
 *
12
 */
13
 
14
// time php -d memory_limit=1024M sphinx-maj-nom-ret.php 0 > sphinx-maj.log
15
// 23 secondes
16
 
17
error_reporting(E_ALL);
18
$db = mysql_connect('localhost', 'root', '');
19
mysql_select_db('tb_cel', $db);
20
mysql_query("SET NAMES utf8", $db);
21
$dbs = mysql_connect('127.0.0.1:9306', NULL, NULL, TRUE);
22
 
23
$req = <<<EOF
24
	SELECT id_observation, nom_sel, nom_sel_nn,nom_ret,nom_ret_nn,nt,famille
25
        FROM `cel_obs`
26
        WHERE nom_sel IS NOT NULL AND nom_sel != '' AND
27
		id_observation BETWEEN %d AND %d AND
28
		( nom_ret IS NULL or nom_ret = ''
29
		  OR nom_ret_nn IS NULL or nom_ret_nn = 0 or nom_ret_nn = ''
30
		  OR nt IS NULL or nt = 0 or nt = ''
31
		  OR famille IS NULL or famille = '' )
32
	LIMIT %d, %d
33
EOF;
34
 
35
array_shift($argv);
36
$start = array_shift($argv);
37
$max = array_shift($argv);
38
$chunk_size = array_shift($argv);
39
 
40
if(!$start) $start = 0;
41
// 1036314
42
if(!$max) $max = intval(mysql_fetch_assoc(mysql_query("SELECT MAX(id_observation) AS max FROM cel_obs", $db))['max']) + 1;
43
if(!$chunk_size) $chunk_size = 50000;
44
 
45
 
46
// escape sphinx
47
$from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=', "'", "\x00", "\n", "\r", "\x1a" );
48
$to   = array ( '\\\\', '\\\(','\\\)','\\\|','\\\-','\\\!','\\\@','\\\~','\\\"', '\\\&', '\\\/', '\\\^', '\\\$', '\\\=', "\\'", "\\x00", "\\n", "\\r", "\\x1a" );
49
 
50
 
51
$stats = ['no_nom_sel' => ['count' => 0, 'data' => [] ],
52
		  'not found' => ['count' => 0, 'data' => [] ],
53
		  'too many' => ['count' => 0, 'data' => [] ],
54
		  'fixable' => ['count' => 0, 'data' => [] ],
55
		  'sauvages' => ['count' => 0, 'data' => [] ],
56
		  'sphinx errors' => ['count' => 0, 'data' => [] ], ];
57
 
58
 
59
for($current = 0; $current < intval($max/$chunk_size) + 1; $current++) {
60
	// printf("current = %d, chunk_size = %d, max = %d (rmax = %d) [real limit: %d]\n", $current, $chunk_size, $max, intval($max/$chunk_size) + 1, $current*$chunk_size);
61
	// printf(strtr($req, "\n", " ") . "\n", $start, $max, $current*$chunk_size, $chunk_size);
62
	$data = mysql_query(sprintf($req, $start, $max, $current*$chunk_size, $chunk_size), $db);
63
	if(!$data) { var_dump(mysql_error()); die('end'); }
64
	while($d = mysql_fetch_assoc($data)) {
65
		$n  = $d['nom_sel'];
66
		if(!$n) {
67
			$stats['no_nom_sel']['count']++;
68
			// $stats['no_nom_sel']['data'][] = [$d['id_observation'], $n];*/
69
			continue;
70
		}
71
 
72
		if($n == 'Autre(s) espèce(s) (écrire le/les nom(s) dans les notes)' ||
73
		   $n == '-') {
74
			$stats['sauvages']['count']++;
75
			// $stats['sauvages']['data'][] = [$d['id_observation'], $n];
76
			continue;
77
		}
78
 
867 raphael 79
	recherche:
866 raphael 80
		//$s = mysql_query("SELECT * FROM i_bdtfx WHERE MATCH('" . str_replace($from,$to,$n) . "') LIMIT 5", $dbs);
867 raphael 81
		$s = mysql_query("SELECT * FROM i_bdtfx, i_nvjfl WHERE MATCH('" . $n . "') LIMIT 5", $dbs);
866 raphael 82
		if(!$s) {
83
			$stats['sphinx errors']['count']++;
84
			// $stats['sphinx errors']['data'][] = [$d['id_observation'], $n];
85
			continue;
86
		}
87
 
88
		$c = mysql_num_rows($s);
89
		if($c == 0) {
90
			$stats['not found']['count']++;
91
			// $stats['not found']['data'][] = [$d['id_observation'], $n];
92
			continue;
93
		}
94
 
95
		if($c > 1) {
96
			$stats['too many']['count']++;
97
			// $stats['too many']['data'][] = [$d['id_observation'], $n];
98
			continue;
99
		}
100
 
101
		$stats['fixable']['count']++;
102
		// $stats['fixable']['data'][] = [$d['id_observation'], $n];
103
 
104
	}
105
}
106
 
107
array_walk($stats, function(&$v) { unset($v['data']); });
108
print_r($stats);
109
printf("total traité: %d\n", array_sum(array_map(function($v) { return $v['count']; }, $stats)));