Subversion Repositories eFlore/Projets.eflore-projets

Rev

Rev 867 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 867 Rev 876
Line 12... Line 12...
12
 */
12
 */
Line 13... Line 13...
13
 
13
 
14
// time php -d memory_limit=1024M sphinx-maj-nom-ret.php 0 > sphinx-maj.log
14
// time php -d memory_limit=1024M sphinx-maj-nom-ret.php 0 > sphinx-maj.log
Line -... Line 15...
-
 
15
// 23 secondes
-
 
16
 
-
 
17
// settings
-
 
18
define('USE_NVJFL', FALSE);
-
 
19
define('ESCAPE_ON_SPHINX_SYNERROR', TRUE);
-
 
20
 
-
 
21
define('TRY_FORCE_START_LINE', TRUE);
-
 
22
define('TRY_SPLIT',			   TRUE);
-
 
23
define('TRY_EXACT',			   TRUE);
-
 
24
define('TRY_REF',			   TRUE);
-
 
25
define('TRY_SPLIT_AND_AUTEUR', FALSE);
-
 
26
define('TRY_REMOVE_L',		   TRUE);
-
 
27
 
-
 
28
define('M_TRY_SPLIT',	0x01);
-
 
29
define('M_TRY_EXACT',	0x02);
-
 
30
define('M_TRY_REF',		0x04);
15
// 23 secondes
31
define('M_TRY_SPLIT_AND_AUTEUR',	0x08);
16
 
32
 
17
error_reporting(E_ALL);
33
error_reporting(E_ALL);
18
$db = mysql_connect('localhost', 'root', '');
34
$db = mysql_connect('localhost', 'root', '') or die('no mysql');
19
mysql_select_db('tb_cel', $db);
35
mysql_select_db('tb_cel', $db);
Line 20... Line 36...
20
mysql_query("SET NAMES utf8", $db);
36
mysql_query("SET NAMES utf8", $db) or die('no sphinx');
21
$dbs = mysql_connect('127.0.0.1:9306', NULL, NULL, TRUE);
37
$dbs = mysql_connect('127.0.0.1:9306', NULL, NULL, TRUE);
22
 
38
 
23
$req = <<<EOF
39
$req = <<<EOF
24
	SELECT id_observation, nom_sel, nom_sel_nn,nom_ret,nom_ret_nn,nt,famille
40
	SELECT id_observation, nom_sel, nom_sel_nn, nom_ret, nom_ret_nn, nt, famille, nom_referentiel
25
        FROM `cel_obs`
41
        FROM `cel_obs`
26
        WHERE nom_sel IS NOT NULL AND nom_sel != '' AND
42
        WHERE nom_sel IS NOT NULL AND nom_sel != '' AND
Line 51... Line 67...
51
$stats = ['no_nom_sel' => ['count' => 0, 'data' => [] ],
67
$stats = ['no_nom_sel' => ['count' => 0, 'data' => [] ],
52
		  'not found' => ['count' => 0, 'data' => [] ],
68
		  'not found' => ['count' => 0, 'data' => [] ],
53
		  'too many' => ['count' => 0, 'data' => [] ],
69
		  'too many' => ['count' => 0, 'data' => [] ],
54
		  'fixable' => ['count' => 0, 'data' => [] ],
70
		  'fixable' => ['count' => 0, 'data' => [] ],
55
		  'sauvages' => ['count' => 0, 'data' => [] ],
71
		  'sauvages' => ['count' => 0, 'data' => [] ],
56
		  'sphinx errors' => ['count' => 0, 'data' => [] ], ];
72
		  'sphinx errors' => ['count' => 0, 'data' => [] ],
-
 
73
		  'ref pb' => ['count' => 0, 'data' => [] ], ];
Line -... Line 74...
-
 
74
 
Line 57... Line 75...
57
 
75
$sphinx_req = sprintf("SELECT * FROM i_bdtfx %s WHERE MATCH('%%s') LIMIT 5", USE_NVJFL ? ", i_nvjfl" : "");
58
 
76
 
59
for($current = 0; $current < intval($max/$chunk_size) + 1; $current++) {
77
for($current = 0; $current < intval($max/$chunk_size) + 1; $current++) {
60
	// printf("current = %d, chunk_size = %d, max = %d (rmax = %d) [real limit: %d]\n", $current, $chunk_size, $max, intval($max/$chunk_size) + 1, $current*$chunk_size);
78
	// printf("current = %d, chunk_size = %d, max = %d (rmax = %d) [real limit: %d]\n", $current, $chunk_size, $max, intval($max/$chunk_size) + 1, $current*$chunk_size);
61
	// printf(strtr($req, "\n", " ") . "\n", $start, $max, $current*$chunk_size, $chunk_size);
79
	// printf(strtr($req, "\n", " ") . "\n", $start, $max, $current*$chunk_size, $chunk_size);
62
	$data = mysql_query(sprintf($req, $start, $max, $current*$chunk_size, $chunk_size), $db);
80
	$data = mysql_query(sprintf($req, $start, $max, $current*$chunk_size, $chunk_size), $db);
63
	if(!$data) { var_dump(mysql_error()); die('end'); }
81
	if(!$data) { var_dump(mysql_error()); die('end'); }
-
 
82
	while($d = mysql_fetch_assoc($data)) {
-
 
83
		$n  = trim($d['nom_sel']);
64
	while($d = mysql_fetch_assoc($data)) {
84
		//d: fprintf(STDERR, "$n\n");
65
		$n  = $d['nom_sel'];
85
 
66
		if(!$n) {
86
		if(!$n) {
67
			$stats['no_nom_sel']['count']++;
87
			$stats['no_nom_sel']['count']++;
68
			// $stats['no_nom_sel']['data'][] = [$d['id_observation'], $n];*/
88
			// $stats['no_nom_sel']['data'][] = [$d['id_observation'], $n];*/
Line 74... Line 94...
74
			$stats['sauvages']['count']++;
94
			$stats['sauvages']['count']++;
75
			// $stats['sauvages']['data'][] = [$d['id_observation'], $n];
95
			// $stats['sauvages']['data'][] = [$d['id_observation'], $n];
76
			continue;
96
			continue;
77
		}
97
		}
Line -... Line 98...
-
 
98
 
-
 
99
		$MASQUE = 0;
-
 
100
 
-
 
101
		if(TRY_REMOVE_L) {
-
 
102
			$n = str_replace(' L.','', $n);
-
 
103
		}
-
 
104
 
-
 
105
		$orig_n = $n;
78
 
106
 
-
 
107
	recherche:
-
 
108
		if(TRY_FORCE_START_LINE && !_has($MASQUE, M_TRY_EXACT)) {
-
 
109
			$n = '^' . $n;
-
 
110
		}
79
	recherche:
111
 
-
 
112
		$s = mysql_query(sprintf($sphinx_req, $n), $dbs);
-
 
113
 
-
 
114
 
80
		//$s = mysql_query("SELECT * FROM i_bdtfx WHERE MATCH('" . str_replace($from,$to,$n) . "') LIMIT 5", $dbs);
115
		if(!$s && ESCAPE_ON_SPHINX_SYNERROR) {
-
 
116
			$s = mysql_query(sprintf($sphinx_req, str_replace($from,$to,$n)), $dbs);
81
		$s = mysql_query("SELECT * FROM i_bdtfx, i_nvjfl WHERE MATCH('" . $n . "') LIMIT 5", $dbs);
117
		}
82
		if(!$s) {
118
		if(!$s) {
83
			$stats['sphinx errors']['count']++;
119
			$stats['sphinx errors']['count']++;
84
			// $stats['sphinx errors']['data'][] = [$d['id_observation'], $n];
120
			// $stats['sphinx errors']['data'][] = [$d['id_observation'], $orig_n];
85
			continue;
121
			continue;
Line 86... Line 122...
86
		}
122
		}
-
 
123
 
-
 
124
		$c = mysql_num_rows($s);
87
 
125
		//d: fprintf(STDERR, "\t search [nb:%d] \"%s\" (msk:%d)\n", $c, $n, $MASQUE);
-
 
126
 
-
 
127
		if($c == 0) {
-
 
128
			if(TRY_SPLIT && !_has($MASQUE, M_TRY_SPLIT)) {
-
 
129
				require_once('lib-split-auteur.php');
-
 
130
				$MASQUE |= M_TRY_SPLIT;
-
 
131
				// $n = RechercheInfosTaxonBeta::supprimerAuteur($orig_n);
-
 
132
				// list($ret, $m) = RechercheInfosTaxonBeta::contientAuteur($orig_n);
-
 
133
				$ret = RechercheInfosTaxonBeta::supprimerAuteurBis($orig_n, $m);
-
 
134
				if($ret) {
-
 
135
					// printf("===================== SPLIT: contientAuteur \"%s\" [@%s @%s)\n", $orig_n, $ret, $m);
-
 
136
					$n = sprintf('%s @auteur %s', $ret, $m);
-
 
137
					goto recherche;
-
 
138
				}
-
 
139
			}
-
 
140
			if(TRY_SPLIT_AND_AUTEUR && !_has($MASQUE, M_TRY_SPLIT_AND_AUTEUR) && strpos($orig_n, ' ') !== FALSE) {
-
 
141
				require_once('lib-split-auteur.php');
-
 
142
				$MASQUE |= M_TRY_SPLIT_AND_AUTEUR;
-
 
143
				$ns = RechercheInfosTaxonBeta::supprimerAuteur($orig_n);
-
 
144
				if($ns) {
-
 
145
					$a = trim(substr($orig_n, strlen($n)));
-
 
146
					$n = sprintf("%s @auteur %s", $ns, $a);
-
 
147
					// echo "===================== SPLIT N/A: $n\n";
-
 
148
					goto recherche;
-
 
149
				}
88
		$c = mysql_num_rows($s);
150
			}
89
		if($c == 0) {
151
 
90
			$stats['not found']['count']++;
152
			$stats['not found']['count']++;
91
			// $stats['not found']['data'][] = [$d['id_observation'], $n];
153
			// $stats['not found']['data'][] = [$d['id_observation'], $orig_n];
Line 92... Line 154...
92
			continue;
154
			continue;
-
 
155
		}
-
 
156
 
-
 
157
		if($c > 1) {
-
 
158
 
-
 
159
			if($c == 2) {
-
 
160
				if(mysql_fetch_array($s)['group_id'] !=
-
 
161
				   mysql_fetch_array($s)['group_id']) {
-
 
162
					// recherche donne seulement 2 résultats dans 2 référentiels
-
 
163
					// potentiellement fixable si l'on peut se référer à $d['nom_referentiel']
-
 
164
					$stats['ref pb']['count']++;
-
 
165
					// $stats['ref pb']['data'][] = [$d['id_observation'], $orig_n];
-
 
166
					continue;
-
 
167
				}
-
 
168
			}
-
 
169
 
-
 
170
			if(TRY_EXACT && !_has($MASQUE, M_TRY_EXACT)) {
-
 
171
				$MASQUE |= M_TRY_EXACT;
-
 
172
				$n = '"^' . trim($orig_n) . '$"';
-
 
173
				goto recherche;
-
 
174
			}
-
 
175
			if(TRY_REF && isset($d['nom_referentiel']) && !_has($MASQUE, M_TRY_REF)) {
-
 
176
				$MASQUE |= M_TRY_REF;
-
 
177
				$n = $orig_n . ' @group_id ' . $d['nom_referentiel'];
93
		}
178
				goto recherche;
94
 
179
			}
95
		if($c > 1) {
180
 
96
			$stats['too many']['count']++;
181
			$stats['too many']['count']++;
Line -... Line 182...
-
 
182
			// $stats['too many']['data'][] = [$d['id_observation'], $orig_n];
-
 
183
			continue;
97
			// $stats['too many']['data'][] = [$d['id_observation'], $n];
184
		}
98
			continue;
185
 
Line 99... Line 186...
99
		}
186
 
100
 
187
	ok:
Line -... Line 188...
-
 
188
		$stats['fixable']['count']++;
-
 
189
		// $stats['fixable']['data'][] = [$d['id_observation'], $orig_n];
-
 
190
 
-
 
191
	}
-
 
192
}
101
		$stats['fixable']['count']++;
193
 
102
		// $stats['fixable']['data'][] = [$d['id_observation'], $n];
194
function _has($v, $r) {
103
 
195
	return ($v & $r) == $r;