Subversion Repositories Applications.papyrus

Rev

Rev 2143 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2103 drzraf 1
<?php
2140 jp_milcent 2
/*vim: set expandtab tabstop=4 shiftwidth=4: */
3
// +------------------------------------------------------------------------------------------------------+
4
// | PHP version 4.1                                                                                      |
5
// +------------------------------------------------------------------------------------------------------+
6
// | Copyright (C) 2004 Tela Botanica (accueil@tela-botanica.org)                                         |
7
// +------------------------------------------------------------------------------------------------------+
8
// | This file is part of Papyrus.                                                                        |
9
// |                                                                                                      |
10
// | Foobar is free software; you can redistribute it and/or modify                                       |
11
// | it under the terms of the GNU General Public License as published by                                 |
12
// | the Free Software Foundation; either version 2 of the License, or                                    |
13
// | (at your option) any later version.                                                                  |
14
// |                                                                                                      |
15
// | Foobar is distributed in the hope that it will be useful,                                            |
16
// | but WITHOUT ANY WARRANTY; without even the implied warranty of                                       |
17
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                                        |
18
// | GNU General Public License for more details.                                                         |
19
// |                                                                                                      |
20
// | You should have received a copy of the GNU General Public License                                    |
21
// | along with Foobar; if not, write to the Free Software                                                |
22
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA                            |
23
// +------------------------------------------------------------------------------------------------------+
24
/**
2103 drzraf 25
 * Moteur de recherche SPHINX
2140 jp_milcent 26
 *
27
 * Installation de sphinx :
28
 * sudo urpmi lib64sphinxclient-devel
29
 * sudo pecl install sphinx
30
 *
31
 * See also:
32
 *  - http://www.ibm.com/developerworks/library/os-sphinx/
33
 *  - http://sphinxsearch.com/docs/manual-2.0.7.html#extended-syntax
34
 *
35
 * TODO: http://sphinxsearch.com/blog/2010/08/17/how-sphinx-relevance-ranking-works/
36
 * TODO: attention, projet,bazaar,spip et papyrus sont latin9, ainsi en est-il de l'input attendu du <form>
37
 * mais pour coste... c'est full utf-8
38
 * Structure retournée :
39
 * 	'poids' => 0,
40
 * 	'url' => '',
41
 * 	'titre' => '',
42
 * 	'hreflang' => '',
43
 * 	'accesskey' => '',
44
 * 	'title' => '', // balise 'title'
45
 * 	'date_creation' => '',
46
 * 	'description' => ''
47
 *
48
 *
49
 * // http://www.php.net/manual/fr/sphinx.examples.php
50
 * $s = new SphinxClient;
51
 * $s->setServer("localhost", 9306);
52
 * $s->setMatchMode(SPH_MATCH_ANY);
53
 * $s->setMaxQueryTime(3);
54
 * var_dump($s->query("test"));
55
 *
56
 * Note: conversion côté client SQL:
57
 * mysql -h0 -P 9306 < <(iconv -f utf8 -t latin1 <<<"SELECT * FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('journée');")
58
 *
59
 *
60
 * Test :
61
 * ddiff
62
 * <(mysql -h0 -P 9306 <<<"SELECT main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('test') LIMIT 50;"|awk '{print $3}'|sed 1d) \
63
 * <(POST http://localhost/site:reseau<<<"more_motif=test&"|awk -F'=>' '{print $2}'|sed -e 's/ //g' -e '/^$/d')
64
 *
65
 * both should be equal.
66
 * [ SELECT main_id, group_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('test') LIMIT 50; ]
67
 *
68
 *
69
 *@package Applette
70
 *@subpackage Moteur_recherche
71
 //Auteur original :
72
 * @author		Raphaël Droz <raphael@tela-botanica.org
73
 //Autres auteurs :
74
 *@author		Jean-Pascal MILCENT <jpm@tela-botanica.org>
75
 *@copyright	Tela-Botanica 2000-2013
76
 *@version		$Revision$
77
 // +------------------------------------------------------------------------------------------------------+
2103 drzraf 78
 */
79
 
2140 jp_milcent 80
define('SPHINX_DSN', '193.54.123.216:9306');
2117 drzraf 81
define('_MRS_SPHINX_BASEHOST', $_SERVER['HTTP_HOST']);
2105 drzraf 82
// pour strftime()
83
date_default_timezone_set('Europe/Paris');
84
setlocale(LC_TIME, 'fr_FR');
85
 
2140 jp_milcent 86
function sphinx_search($q = NULL, $page = 1) {
87
	if(!$q) return array();
88
 
89
	// quelques aliases pour faciliter l'usage sans passer par le full sphinxQL
90
	// $q = preg_replace('/\<actu\>(.*)/', '\1 @group_id i_spip', $q);
91
	// $q = preg_replace('/\<eflore\>(.*)/', '\1 @group_id i_bazar|i_coste', $q);
92
 
93
	$db = mysql_connect(SPHINX_DSN, NULL, NULL, TRUE);
94
	// AFAICT, pas de réel risque de SQL-injection du côté de sphinx (au pire, $req = FALSE)
95
	// et il serait dommage de devoir limiter la puissante syntaxe offerte à  l'utilisation
96
	//$requeteTpl = "SELECT group_id, main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('%s') LIMIT 50";
2143 jp_milcent 97
	$requeteTpl = 'SELECT group_id, main_id, id, WEIGHT() AS poids '.
2141 jp_milcent 98
		'FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl '.
2140 jp_milcent 99
		"WHERE MATCH('%s') ".
100
		'LIMIT 50 ';
101
	$requete = mysql_query(sprintf($requeteTpl, $q), $db);
102
 
103
	$retour = array();
104
	if ($requete) {
105
		$res = array('spip' => array(), 'bazar' => array(), 'projet' => array(), 'papyrus' => array(), 'coste' => array(), 'nvjfl' => array());
106
		$ids_par_poids = array();
2143 jp_milcent 107
		$poidsMax = 0;
108
		while ($rec = mysql_fetch_array($requete, MYSQL_ASSOC)) {
109
			$poidsMax = ($rec['poids'] > $poidsMax) ? $rec['poids'] : $poidsMax;
2140 jp_milcent 110
			$res[$rec['group_id']][$rec['id']] = $rec;
111
			$ids_par_poids[] = $rec['main_id'];
112
		}
113
 
114
		$docs = array();
115
		// spip
116
		$spip = new MoteurRecherche_SPIP();
2143 jp_milcent 117
		foreach ($spip->get(array_filter(array_keys($res['spip']), 'intval'), $q) as $v) {
2140 jp_milcent 118
			unset($res['spip'][$v['id']]['group_id'],
2143 jp_milcent 119
				$res['spip'][$v['id']]['main_id'],
120
				$res['spip'][$v['id']]['id']);
2140 jp_milcent 121
			$docs['spip-' . $v['id']] = array_merge($v,	$res['spip'][$v['id']]);
122
		}
123
 
124
		// bazar
125
		$bazar = new MoteurRecherche_BAZAR();
2143 jp_milcent 126
		foreach ($bazar->get(array_filter(array_keys($res['bazar']), 'intval'), $q) as $v) {
2140 jp_milcent 127
			unset($res['bazar'][$v['id']]['group_id'],
2143 jp_milcent 128
				$res['bazar'][$v['id']]['main_id'],
129
				$res['bazar'][$v['id']]['id']);
2140 jp_milcent 130
			$docs['bazar-' . $v['id']] = array_merge($v, $res['bazar'][$v['id']]);
131
		}
132
 
133
		// projet
134
		$projet = new MoteurRecherche_PROJET();
2143 jp_milcent 135
		foreach ($projet->get(array_filter(array_keys($res['projet']), 'intval'), $q) as $v) {
2140 jp_milcent 136
			unset($res['projet'][$v['id']]['group_id'],
2143 jp_milcent 137
				$res['projet'][$v['id']]['main_id'],
138
				$res['projet'][$v['id']]['id']);
2140 jp_milcent 139
			$docs['projet-' . $v['id']] = array_merge($v, $res['projet'][$v['id']]);
140
		}
141
 
142
		// papyrus
143
		$papyrus = new MoteurRecherche_PAPYRUS();
2143 jp_milcent 144
		foreach ($papyrus->get(array_filter(array_keys($res['papyrus']), 'intval'), $q) as $v) {
2140 jp_milcent 145
			unset($res['papyrus'][$v['id']]['group_id'],
2143 jp_milcent 146
				$res['papyrus'][$v['id']]['main_id'],
147
				$res['papyrus'][$v['id']]['id']);
2140 jp_milcent 148
			$docs['papyrus-' . $v['id']] = array_merge($v, $res['papyrus'][$v['id']]);
149
		}
150
 
151
		// coste
152
		$coste = new MoteurRecherche_COSTE();
2143 jp_milcent 153
		foreach ($coste->get(array_filter(array_keys($res['coste']), 'intval'), $q) as $v) {
2140 jp_milcent 154
			unset($res['coste'][$v['id']]['group_id'],
2143 jp_milcent 155
				$res['coste'][$v['id']]['main_id'],
156
				$res['coste'][$v['id']]['id']);
2140 jp_milcent 157
			$docs['coste-' . $v['id']] = array_merge($v, $res['coste'][$v['id']]);
158
		}
159
 
160
		// nvjfl
161
		$nvjfl = new MoteurRecherche_NVJFL();
2143 jp_milcent 162
		foreach ($nvjfl->get(array_filter(array_keys($res['nvjfl']), 'intval'), $q) as $v) {
2140 jp_milcent 163
			unset($res['nvjfl'][$v['id']]['group_id'],
2143 jp_milcent 164
				$res['nvjfl'][$v['id']]['main_id'],
165
				$res['nvjfl'][$v['id']]['id']);
2140 jp_milcent 166
			$docs['nvjfl-' . $v['id']] = array_merge($v, $res['nvjfl'][$v['id']]);
167
		}
2143 jp_milcent 168
		//die('<pre>'.print_r($docs, true).'</pre>');
2140 jp_milcent 169
 
170
		// sort
171
		$sorted = _sortArrayByArray($docs, $ids_par_poids);
172
 
173
		if (isset($_GET['tri']) && $_GET['tri'] == 'date') {
174
			usort($sorted, '_actuNewerFirst');
175
		}
2144 jp_milcent 176
		// Transforme le poids de chaque doc en pourcentage relatif aux résultats courant de la recherche
2143 jp_milcent 177
		array_walk($sorted, '_weight2score', $poidsMax);
2140 jp_milcent 178
 
179
		// var_dump($sorted);die;
180
		$retour = $sorted;
181
	}
182
	return $retour;
183
}
184
 
2103 drzraf 185
class MoteurRecherche_SPIP {
2105 drzraf 186
	public function get($ids, $q = NULL) {
2103 drzraf 187
		$content = array();
2140 jp_milcent 188
		if (count($ids) > 0) {
189
			$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['bdd_dsn']);
190
			$requeteTpl = 'SELECT id_article AS id, titre, texte, date AS date_creation, lang as hreflang '.
191
					'FROM spip_articles '.
192
					'WHERE statut = "%s" '.
193
					'AND id_article IN (%s) ';
194
			$requete = $db->query(sprintf($requeteTpl, 'publie',implode(',', $ids)));
195
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
2103 drzraf 196
 
2140 jp_milcent 197
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
198
				$rec['url_simple'] = sprintf("%s/article%d.html",
199
					 trim($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['url'], '/'),
200
					 $rec['id']);
201
				$rec['url'] = sprintf("%s?var_recherche=%s",
202
						$rec['url_simple'],
203
						More_Recherche::traiterMotif($q, 'url'));
204
				$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
205
				unset($rec['texte']);
206
				$content[$rec['id']] = $rec;
207
			}
2103 drzraf 208
		}
209
		return $content;
210
	}
211
}
212
 
213
class MoteurRecherche_BAZAR {
2105 drzraf 214
	public function get($ids, $q = NULL) {
2140 jp_milcent 215
		$content = array();
216
		if (count($ids) > 0) {
217
			$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['bdd_dsn']);
218
			$requeteTpl = 'SELECT bf_id_fiche AS id, '.
219
					'bf_description AS texte, '.
220
					'bf_titre AS titre, '.
221
					'bf_date_debut_evenement AS date_creation '.
222
					'FROM bazar_fiche '.
223
					'WHERE bf_id_fiche IN (%s) ';
224
			$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));
225
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
2103 drzraf 226
 
2140 jp_milcent 227
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
228
				$rec['url_simple'] = $rec['url'] = sprintf(trim($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['url'], '/'), $rec['id']);
229
				$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
230
				unset($rec['texte']);
231
				$content[$rec['id']] = $rec;
232
			}
2103 drzraf 233
		}
2140 jp_milcent 234
		return $content;
2103 drzraf 235
	}
236
}
237
 
238
class MoteurRecherche_PROJET {
2105 drzraf 239
	public function get($ids, $q = NULL) {
2140 jp_milcent 240
		$content = array();
241
		if (count($ids) > 0) {
242
			$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
243
			$requeteTpl = 'SELECT p_id AS id, p_titre AS titre, p_description, p_date_creation AS date_creation '.
244
					'FROM projet '.
245
					'WHERE p_id IN (%s)';
246
			$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));
247
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
2105 drzraf 248
 
2140 jp_milcent 249
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
250
				$rec['url_simple'] = $rec['url'] = sprintf("%s?id_projet=%d",
251
					trim($GLOBALS['_MOTEUR_RECHERCHE_']['projet']['url'], '/'),
252
					$rec['id']);
253
				$rec['description'] = substr(strip_tags($rec['p_description']), 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
254
				unset($rec['p_description']);
255
				$content[$rec['id']] = $rec;
256
			}
2103 drzraf 257
		}
2140 jp_milcent 258
		return $content;
2103 drzraf 259
	}
260
}
261
 
262
class MoteurRecherche_PAPYRUS {
2105 drzraf 263
	public function get($ids, $q = NULL) {
264
		$content = array();
2140 jp_milcent 265
		if (count($ids) > 0) {
266
			$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
267
			$requeteTpl = 'SELECT mc.gmc_ce_menu AS id, '.
268
				"	IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) AS titre, ".
269
				'	gmc_contenu AS texte, '.
270
				'	gm_description_libre, gm_description_resume, '.
271
				'	gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, '.
272
				'	gm_date_creation AS date_creation '.
273
				'FROM gen_menu AS m '.
274
				'	LEFT JOIN gen_menu_contenu AS mc ON mc.gmc_ce_menu = m.gm_id_menu AND mc.gmc_bool_dernier = 1 '.
275
				'WHERE mc.gmc_ce_menu IN (%s) ';
276
			$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));
277
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
2105 drzraf 278
 
2140 jp_milcent 279
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
280
				// Création de l'url
281
				// TODO : utiliser comme pour spip un fichier de config spécifique pour virer PAP_URL d'ici
282
				$une_url = new Pap_URL(PAP_URL);
283
				$une_url->setId($rec['id']);
284
				$rec['url_simple'] = $une_url->getURL();
285
				$une_url->addQueryString('var_recherche', More_Recherche::traiterMotif($q, 'url'), true);
286
				$rec['url'] = $une_url->getURL();
287
 
288
				$rec['description'] =  htmlentities($rec['gm_description_resume']);
289
				unset($rec['gm_description_resume']);
290
				$content[$rec['id']] = $rec;
291
			}
2105 drzraf 292
		}
2140 jp_milcent 293
		return $content;
2103 drzraf 294
	}
295
}
296
 
2117 drzraf 297
class MoteurRecherche_COSTE {
298
	public function get($ids, $q = NULL) {
2140 jp_milcent 299
		$content = array();
300
		if (count($ids) > 0) {
301
			// DB access is dumb, let's use this one and pray
302
			$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['bota'];
303
			$requeteTpl = 'SELECT c.flore_bdtfx_nn AS id, c.nom_sci AS titre, dsc.body AS description '.
304
				'FROM tb_eflore.coste_v2_00 AS c '.
305
				"	LEFT JOIN tela_prod_wikini.florecoste_pages dsc ON c.page_wiki_dsc = dsc.tag AND dsc.latest = 'Y' ".
306
				'WHERE c.flore_bdtfx_nn IN (%s) ';
307
			$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));
308
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
2105 drzraf 309
 
2140 jp_milcent 310
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
311
				$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);
312
				// TODO: interpret wikini
313
				$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
314
				$content[$rec['id']] = $rec;
315
			}
2117 drzraf 316
		}
2140 jp_milcent 317
		return $content;
2117 drzraf 318
	}
319
}
320
 
2123 drzraf 321
class MoteurRecherche_NVJFL {
322
	public function get($ids, $q = NULL) {
2140 jp_milcent 323
		$content = array();
324
		if (count($ids) > 0) {
325
			// DB access is dumb, let's use this one and pray
326
			$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['bota'];
327
			$requeteTpl = 'SELECT b.num_nom AS id, '.
328
			"	CONCAT(nom_sci, ' (nn: ', b.num_nom, ', nt: ', num_taxonomique, ')') AS titre, ".
329
			'	GROUP_CONCAT(n.nom_vernaculaire) AS description '.
330
			'FROM tb_eflore.bdtfx_v1_01 AS b '.
331
			'	LEFT JOIN tb_eflore.nvjfl_v2007 n ON n.num_taxon = b.num_taxonomique '.
332
			'WHERE b.num_nom IN (%s) '.
333
			'GROUP BY n.num_taxon ';
334
			$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));
2117 drzraf 335
 
2140 jp_milcent 336
			(DB::isError($requete)) ? die($requete->getMessage()) : '';
337
 
338
			while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {
339
				$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);
340
				$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
341
				$content[$rec['id']] = $rec;
342
			}
2123 drzraf 343
		}
2140 jp_milcent 344
		return $content;
2123 drzraf 345
	}
346
}
347
 
2103 drzraf 348
// http://stackoverflow.com/questions/348410/sort-an-array-based-on-another-array
349
function _sortArrayByArray($array, $orderArray) {
350
	$ordered = array();
2140 jp_milcent 351
	foreach ($orderArray as $key) {
352
		if (array_key_exists($key, $array)) {
2103 drzraf 353
			$ordered[$key] = $array[$key];
354
			unset($array[$key]);
355
		}
356
	}
357
	return $ordered + $array;
2105 drzraf 358
}
2103 drzraf 359
 
2140 jp_milcent 360
function _actuNewerFirst($a,$b) {
361
	return isset($a['date_creation']) && isset($b['date_creation']) ? strcmp($b['date_creation'], $a['date_creation']) : 0;
362
}
363
 
2143 jp_milcent 364
// Transforme un score en pourcentage
2105 drzraf 365
function _weight2score(&$item, $key, $max) {
2143 jp_milcent 366
	$item['score'] = intval($item['poids'] / $max * 100);
2117 drzraf 367
	$item['date_creation'] = isset($item['date_creation']) ? strftime("%d %B %Y", strtotime($item['date_creation'])) : '';
2143 jp_milcent 368
	unset($item['poids']);
2103 drzraf 369
}
370
 
2140 jp_milcent 371
?>