Subversion Repositories Applications.papyrus

Rev

Rev 2126 | Rev 2131 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2103 drzraf 1
<?php
2
/*
3
 * Moteur de recherche SPHINX
4
 * @author        Raphaël Droz <raphael@tela-botanica.org
5
 * @copyright     Tela-Botanica 2013
6
 */
7
 
8
/*
9
  sudo urpmi lib64sphinxclient-devel
10
  sudo pecl install sphinx
11
 
12
  see also: http://www.ibm.com/developerworks/library/os-sphinx/
13
  see also: http://sphinxsearch.com/docs/manual-2.0.7.html#extended-syntax
14
  TODO: http://sphinxsearch.com/blog/2010/08/17/how-sphinx-relevance-ranking-works/
2113 drzraf 15
  TODO: attention, projet,bazaar,spip et papyrus sont latin9, ainsi en est-il de l'input attendu du <form>
16
  		mais pour coste... c'est full utf-8
2103 drzraf 17
*/
18
 
19
/* returned struct:
20
'poids' => 0,
21
'url' => '',
22
'titre' => '',
23
'hreflang' => '',
24
'accesskey' => '',
2105 drzraf 25
'title' => '', // balise 'title'
2103 drzraf 26
'date_creation' => '',
27
'description' => ''
28
*/
29
 
2113 drzraf 30
define('SPHINX_DSN', '127.0.0.1:9306');
2117 drzraf 31
define('_MRS_SPHINX_BASEHOST', $_SERVER['HTTP_HOST']);
2113 drzraf 32
 
2105 drzraf 33
// pour strftime()
34
date_default_timezone_set('Europe/Paris');
35
setlocale(LC_TIME, 'fr_FR');
36
 
2103 drzraf 37
class MoteurRecherche_SPIP {
2105 drzraf 38
	public function get($ids, $q = NULL) {
39
		if(!$ids) return array();
2103 drzraf 40
		$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['bdd_dsn']);
2105 drzraf 41
		$req = $db->query(sprintf(<<<EOF
42
SELECT id_article AS id, titre, texte, date AS date_creation, lang as hreflang
43
FROM spip_articles
44
WHERE statut = "%s"
45
AND id_article IN (%s)
46
EOF
47
								  ,
2103 drzraf 48
								  "publie",
49
								  implode(',', $ids)));
2105 drzraf 50
 
51
		(DB::isError($req)) ? die($req->getMessage()) : '';
2103 drzraf 52
		$content = array();
53
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 54
			$rec['url_simple'] = sprintf("%s/article%d.html",
2103 drzraf 55
										 trim($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['url'], '/'),
56
										 $rec['id']);
2105 drzraf 57
			$rec['url'] = sprintf("%s?var_recherche=%s",
58
								  $rec['url_simple'],
2106 drzraf 59
								  More_Recherche::traiterMotif($q, 'url'));
2105 drzraf 60
			$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
2103 drzraf 61
			unset($rec['texte']);
62
			$content[$rec['id']] = $rec;
63
 
64
		}
65
		return $content;
66
	}
67
}
68
 
69
class MoteurRecherche_BAZAR {
2105 drzraf 70
	public function get($ids, $q = NULL) {
71
		if(!$ids) return array();
2103 drzraf 72
		$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['bdd_dsn']);
2105 drzraf 73
		$req = $db->query(sprintf(<<<EOF
74
SELECT bf_id_fiche AS id,
75
	   bf_description AS texte,
76
	   bf_titre AS titre,
77
	   bf_date_debut_evenement AS date_creation
78
FROM bazar_fiche
79
WHERE bf_id_fiche IN (%s)
80
EOF
81
								  ,
82
								  implode(',', $ids)));
2103 drzraf 83
 
2105 drzraf 84
		(DB::isError($req)) ? die($req->getMessage()) : '';
2103 drzraf 85
		$content = array();
86
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 87
			$rec['url_simple'] = $rec['url'] = sprintf(trim($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['url'], '/'), $rec['id']);
88
			$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
2103 drzraf 89
			unset($rec['texte']);
90
			$content[$rec['id']] = $rec;
91
		}
92
		return $content;
93
	}
94
}
95
 
96
class MoteurRecherche_PROJET {
2105 drzraf 97
	public function get($ids, $q = NULL) {
98
		if(!$ids) return array();
2103 drzraf 99
		$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
2105 drzraf 100
		$req = $db->query(sprintf(<<<EOF
2117 drzraf 101
SELECT p_id AS id, p_titre AS titre, p_description, p_date_creation AS date_creation
2105 drzraf 102
FROM projet WHERE p_id IN (%s)
103
EOF
104
								  ,
2103 drzraf 105
								  implode(',', $ids)));
2105 drzraf 106
 
2103 drzraf 107
		(DB::isError($req)) ? die($req->getMessage()) : '';
108
		$content = array();
109
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 110
			$rec['url_simple'] = $rec['url'] = sprintf("%s?id_projet=%d",
111
													   trim($GLOBALS['_MOTEUR_RECHERCHE_']['projet'][0]['url'], '/'),
112
													   $rec['id']);
2103 drzraf 113
			$rec['description'] = substr(strip_tags($rec['p_description']), 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
114
			unset($rec['p_description']);
115
			$content[$rec['id']] = $rec;
116
		}
117
		return $content;
118
	}
119
}
120
 
121
class MoteurRecherche_PAPYRUS {
2105 drzraf 122
	public function get($ids, $q = NULL) {
123
		if(!$ids) return array();
124
        $db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
125
		$req = $db->query(sprintf(<<<EOF
126
SELECT mc.gmc_ce_menu AS id,
127
	   IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) AS titre,
128
	   gmc_contenu AS texte,
129
	   gm_description_libre, gm_description_resume,
130
	   gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, gm_date_creation AS date_creation
131
FROM gen_menu m
132
LEFT JOIN gen_menu_contenu mc ON mc.gmc_ce_menu = m.gm_id_menu AND mc.gmc_bool_dernier = 1
133
WHERE mc.gmc_ce_menu IN (%s)
134
EOF
135
								  ,
136
								  implode(',', $ids)));
137
 
138
		(DB::isError($req)) ? die($req->getMessage()) : '';
139
		$content = array();
140
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
141
			// Création de l'url
142
			// TODO : utiliser comme pour spip un fichier de config spécifique pour virer PAP_URL d'ici
143
			$une_url = new Pap_URL(PAP_URL);
144
			$une_url->setId($rec['id']);
145
			$rec['url_simple'] = $une_url->getURL();
2106 drzraf 146
			$une_url->addQueryString('var_recherche', More_Recherche::traiterMotif($q, 'url'), true);
2105 drzraf 147
			$rec['url'] = $une_url->getURL();
148
 
149
			$rec['description'] =  htmlentities($rec['gm_description_resume']);
150
			unset($rec['gm_description_resume']);
151
			$content[$rec['id']] = $rec;
152
		}
153
		return $content;
2103 drzraf 154
	}
155
}
156
 
2117 drzraf 157
class MoteurRecherche_COSTE {
158
	public function get($ids, $q = NULL) {
159
		if(!$ids) return array();
160
		// DB access is dumb, let's use this one and pray
161
		$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
162
		$req = $db->query(sprintf(<<<EOF
163
SELECT c.flore_bdtfx_nn AS id, c.nom_sci AS titre, dsc.body AS description
164
FROM tb_eflore.coste_v2_00 c
165
LEFT JOIN tela_prod_wikini.florecoste_pages dsc ON c.page_wiki_dsc = dsc.tag AND dsc.latest = 'Y'
166
WHERE c.flore_bdtfx_nn IN (%s)
167
EOF
168
								  ,
169
								  implode(',', $ids)));
2105 drzraf 170
 
2117 drzraf 171
		(DB::isError($req)) ? die($req->getMessage()) : '';
172
		$content = array();
173
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
174
			$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);
175
			// TODO: interpret wikini
176
			$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
177
			$content[$rec['id']] = $rec;
178
		}
179
		return $content;
180
	}
181
}
182
 
2123 drzraf 183
class MoteurRecherche_NVJFL {
184
	public function get($ids, $q = NULL) {
185
		if(!$ids) return array();
186
		// DB access is dumb, let's use this one and pray
187
		$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
188
		$req = $db->query(sprintf(<<<EOF
189
SELECT b.num_nom AS id, CONCAT(nom_sci, ' (nn: ', b.num_nom, ', nt: ', num_taxonomique, ')') AS titre, GROUP_CONCAT(n.nom_vernaculaire) AS description
2125 drzraf 190
FROM tb_eflore.bdtfx_v1_01 b
2123 drzraf 191
LEFT JOIN tb_eflore.nvjfl_v2007 n ON n.num_taxon = b.num_taxonomique
192
WHERE b.num_nom IN (%s)
193
GROUP BY n.num_taxon
194
EOF
195
								  ,
196
								  implode(',', $ids)));
2117 drzraf 197
 
2123 drzraf 198
		(DB::isError($req)) ? die($req->getMessage()) : '';
199
		$content = array();
200
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
201
			$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);
202
			$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
203
			$content[$rec['id']] = $rec;
204
		}
205
		return $content;
206
	}
207
}
208
 
209
 
2103 drzraf 210
// http://stackoverflow.com/questions/348410/sort-an-array-based-on-another-array
211
function _sortArrayByArray($array, $orderArray) {
212
	$ordered = array();
213
	foreach($orderArray as $key) {
214
		if(array_key_exists($key, $array)) {
215
			$ordered[$key] = $array[$key];
216
			unset($array[$key]);
217
		}
218
	}
219
	return $ordered + $array;
2105 drzraf 220
}
2103 drzraf 221
 
2105 drzraf 222
// adaption aux templates existants: [score] => [weight]
223
function _weight2score(&$item, $key, $max) {
224
	$item['score'] = intval($item['weight'] / $max * 100);
2117 drzraf 225
	$item['date_creation'] = isset($item['date_creation']) ? strftime("%d %B %Y", strtotime($item['date_creation'])) : '';
2105 drzraf 226
	unset($item['weight']);
2103 drzraf 227
}
228
 
2127 drzraf 229
function _actuNewerFirst($a,$b) {
230
	return isset($a['date_creation']) && isset($b['date_creation']) ? strcmp($b['date_creation'], $a['date_creation']) : 0;
231
}
232
 
2105 drzraf 233
function sphinx_search($q = NULL, $page = 1) {
234
	if(!$q) return array();
2103 drzraf 235
 
2113 drzraf 236
	$db = mysql_connect(SPHINX_DSN, NULL, NULL, TRUE);
2123 drzraf 237
	$req = mysql_query(sprintf("SELECT group_id, main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('%s') LIMIT 50", $q), $db);
2103 drzraf 238
 
2126 drzraf 239
	$res = array('spip' => array(), 'bazar' => array(), 'projet' => array(), 'papyrus' => array(), 'coste' => array(), 'nvjfl' => array());
2105 drzraf 240
	$ids_par_poids = array();
241
	while($rec = mysql_fetch_array($req, MYSQL_ASSOC)) {
242
		$res[$rec['group_id']][$rec['id']] = $rec;
243
		$ids_par_poids[] = $rec['main_id'];
244
	}
245
	$docs = array();
2103 drzraf 246
 
2105 drzraf 247
	// spip
248
	$spip = new MoteurRecherche_SPIP();
249
	foreach($spip->get(array_filter(array_keys($res['spip']), 'intval'), $q) as $v) {
250
		unset($res['spip'][$v['id']]['group_id'],
251
			  $res['spip'][$v['id']]['main_id'],
252
			  $res['spip'][$v['id']]['id']);
253
		// left: weight
254
		$docs['spip-' . $v['id']] = array_merge($v,	$res['spip'][$v['id']]);
255
	}
2103 drzraf 256
 
2105 drzraf 257
	// bazar
258
	$bazar = new MoteurRecherche_BAZAR();
259
	foreach($bazar->get(array_filter(array_keys($res['bazar']), 'intval'), $q) as $v) {
260
		unset($res['bazar'][$v['id']]['group_id'],
261
			  $res['bazar'][$v['id']]['main_id'],
262
			  $res['bazar'][$v['id']]['id']);
263
		// left: weight
264
		$docs['bazar-' . $v['id']] = array_merge($v, $res['bazar'][$v['id']]);
265
	}
2103 drzraf 266
 
2105 drzraf 267
	// projet
268
	$projet = new MoteurRecherche_PROJET();
269
	foreach($projet->get(array_filter(array_keys($res['projet']), 'intval'), $q) as $v) {
270
		unset($res['projet'][$v['id']]['group_id'],
271
			  $res['projet'][$v['id']]['main_id'],
272
			  $res['projet'][$v['id']]['id']);
273
		// left: weight
274
		$docs['projet-' . $v['id']] = array_merge($v, $res['projet'][$v['id']]);
275
	}
2103 drzraf 276
 
2105 drzraf 277
	// papyrus
278
	$papyrus = new MoteurRecherche_PAPYRUS();
279
	foreach($papyrus->get(array_filter(array_keys($res['papyrus']), 'intval'), $q) as $v) {
280
		unset($res['papyrus'][$v['id']]['group_id'],
281
			  $res['papyrus'][$v['id']]['main_id'],
282
			  $res['papyrus'][$v['id']]['id']);
283
		// left: weight
284
		$docs['papyrus-' . $v['id']] = array_merge($v, $res['papyrus'][$v['id']]);
285
	}
2103 drzraf 286
 
2117 drzraf 287
	// coste
288
	$coste = new MoteurRecherche_COSTE();
289
	foreach($coste->get(array_filter(array_keys($res['coste']), 'intval'), $q) as $v) {
290
		unset($res['coste'][$v['id']]['group_id'],
291
			  $res['coste'][$v['id']]['main_id'],
292
			  $res['coste'][$v['id']]['id']);
293
		// left: weight
294
		$docs['coste-' . $v['id']] = array_merge($v, $res['coste'][$v['id']]);
295
	}
296
 
2123 drzraf 297
	// nvjfl
298
	$nvjfl = new MoteurRecherche_NVJFL();
299
	foreach($nvjfl->get(array_filter(array_keys($res['nvjfl']), 'intval'), $q) as $v) {
300
		unset($res['nvjfl'][$v['id']]['group_id'],
301
			  $res['nvjfl'][$v['id']]['main_id'],
302
			  $res['nvjfl'][$v['id']]['id']);
303
		// left: weight
304
		$docs['nvjfl-' . $v['id']] = array_merge($v, $res['nvjfl'][$v['id']]);
305
	}
306
 
2127 drzraf 307
	// sort
2105 drzraf 308
	$sorted = _sortArrayByArray($docs, $ids_par_poids);
309
 
2127 drzraf 310
	$max = current($sorted);
311
	$max = $max['weight'];
2105 drzraf 312
 
2127 drzraf 313
	if(isset($_GET['tri']) && $_GET['tri'] == 'date')
314
		usort($sorted, '_actuNewerFirst');
2105 drzraf 315
 
2127 drzraf 316
	// transforme les clefs pour s'adapter aux templates existants
2105 drzraf 317
	array_walk($sorted, '_weight2score', $max);
318
 
2117 drzraf 319
	// var_dump($sorted);die;
2105 drzraf 320
	return $sorted;
321
}
322
 
323
 
2103 drzraf 324
/*
325
// http://www.php.net/manual/fr/sphinx.examples.php
326
$s = new SphinxClient;
327
$s->setServer("localhost", 9306);
328
$s->setMatchMode(SPH_MATCH_ANY);
329
$s->setMaxQueryTime(3);
330
var_dump($s->query("test"));
331
*/
2108 drzraf 332
 
333
 
334
/*
335
  Note: conversion côté client SQL:
336
  mysql -h0 -P 9306 < <(iconv -f utf8 -t latin1 <<<"SELECT * FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('journée');")
337
*/
2127 drzraf 338
 
339
/*
340
  // test sorting
341
  // uncomment this:
342
  print_r(array_keys($sorted); die;
343
 
344
  // then:
345
  ddiff
346
	<(mysql -h0 -P 9306 <<<"SELECT main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('test') LIMIT 50;"|awk '{print $3}'|sed 1d) \
347
	<(POST http://localhost/site:reseau<<<"more_motif=test&"|awk -F'=>' '{print $2}'|sed -e 's/ //g' -e '/^$/d')
348
 
349
  // both should be equal.
350
  // [ SELECT main_id, group_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('test') LIMIT 50; ]
351
*/