Subversion Repositories Applications.papyrus

Rev

Rev 2103 | Rev 2106 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2103 drzraf 1
<?php
2
/*
3
 * Moteur de recherche SPHINX
4
 * @author        Raphaël Droz <raphael@tela-botanica.org
5
 * @copyright     Tela-Botanica 2013
6
 */
7
 
8
/*
9
  sudo urpmi lib64sphinxclient-devel
10
  sudo pecl install sphinx
11
 
12
  see also: http://www.ibm.com/developerworks/library/os-sphinx/
13
  see also: http://sphinxsearch.com/docs/manual-2.0.7.html#extended-syntax
14
  TODO: http://sphinxsearch.com/blog/2010/08/17/how-sphinx-relevance-ranking-works/
15
*/
16
 
17
/* returned struct:
18
'poids' => 0,
19
'url' => '',
20
'titre' => '',
21
'hreflang' => '',
22
'accesskey' => '',
2105 drzraf 23
'title' => '', // balise 'title'
2103 drzraf 24
'date_creation' => '',
25
'description' => ''
26
*/
27
 
2105 drzraf 28
// pour strftime()
29
date_default_timezone_set('Europe/Paris');
30
setlocale(LC_TIME, 'fr_FR');
31
 
2103 drzraf 32
class MoteurRecherche_SPIP {
2105 drzraf 33
	public function get($ids, $q = NULL) {
34
		if(!$ids) return array();
2103 drzraf 35
		$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['bdd_dsn']);
2105 drzraf 36
		$req = $db->query(sprintf(<<<EOF
37
SELECT id_article AS id, titre, texte, date AS date_creation, lang as hreflang
38
FROM spip_articles
39
WHERE statut = "%s"
40
AND id_article IN (%s)
41
EOF
42
								  ,
2103 drzraf 43
								  "publie",
44
								  implode(',', $ids)));
2105 drzraf 45
 
46
		(DB::isError($req)) ? die($req->getMessage()) : '';
2103 drzraf 47
		$content = array();
48
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 49
			$rec['url_simple'] = sprintf("%s/article%d.html",
2103 drzraf 50
										 trim($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['url'], '/'),
51
										 $rec['id']);
2105 drzraf 52
			$rec['url'] = sprintf("%s?var_recherche=%s",
53
								  $rec['url_simple'],
54
								  More_Recherche::s_traiterMotif($q, 'url'));
55
			$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
2103 drzraf 56
			unset($rec['texte']);
57
			$content[$rec['id']] = $rec;
58
 
59
		}
60
		return $content;
61
	}
62
}
63
 
64
class MoteurRecherche_BAZAR {
2105 drzraf 65
	public function get($ids, $q = NULL) {
66
		if(!$ids) return array();
2103 drzraf 67
		$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['bdd_dsn']);
2105 drzraf 68
		$req = $db->query(sprintf(<<<EOF
69
SELECT bf_id_fiche AS id,
70
	   bf_description AS texte,
71
	   bf_titre AS titre,
72
	   bf_date_debut_evenement AS date_creation
73
FROM bazar_fiche
74
WHERE bf_id_fiche IN (%s)
75
EOF
76
								  ,
77
								  implode(',', $ids)));
2103 drzraf 78
 
2105 drzraf 79
		(DB::isError($req)) ? die($req->getMessage()) : '';
2103 drzraf 80
		$content = array();
81
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 82
			$rec['url_simple'] = $rec['url'] = sprintf(trim($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['url'], '/'), $rec['id']);
83
			$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);
2103 drzraf 84
			unset($rec['texte']);
85
			$content[$rec['id']] = $rec;
86
		}
87
		return $content;
88
	}
89
}
90
 
91
class MoteurRecherche_PROJET {
2105 drzraf 92
	public function get($ids, $q = NULL) {
93
		if(!$ids) return array();
2103 drzraf 94
		$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
2105 drzraf 95
		$req = $db->query(sprintf(<<<EOF
96
SELECT p_id AS id, p_titre, p_description, p_date_creation AS date_creation
97
FROM projet WHERE p_id IN (%s)
98
EOF
99
								  ,
2103 drzraf 100
								  implode(',', $ids)));
2105 drzraf 101
 
2103 drzraf 102
		(DB::isError($req)) ? die($req->getMessage()) : '';
103
		$content = array();
104
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
2105 drzraf 105
			$rec['url_simple'] = $rec['url'] = sprintf("%s?id_projet=%d",
106
													   trim($GLOBALS['_MOTEUR_RECHERCHE_']['projet'][0]['url'], '/'),
107
													   $rec['id']);
2103 drzraf 108
			$rec['description'] = substr(strip_tags($rec['p_description']), 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);
109
			unset($rec['p_description']);
110
			$content[$rec['id']] = $rec;
111
		}
112
		return $content;
113
	}
114
}
115
 
116
class MoteurRecherche_PAPYRUS {
2105 drzraf 117
	public function get($ids, $q = NULL) {
118
		if(!$ids) return array();
119
        $db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];
120
		$req = $db->query(sprintf(<<<EOF
121
SELECT mc.gmc_ce_menu AS id,
122
	   IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) AS titre,
123
	   gmc_contenu AS texte,
124
	   gm_description_libre, gm_description_resume,
125
	   gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, gm_date_creation AS date_creation
126
FROM gen_menu m
127
LEFT JOIN gen_menu_contenu mc ON mc.gmc_ce_menu = m.gm_id_menu AND mc.gmc_bool_dernier = 1
128
WHERE mc.gmc_ce_menu IN (%s)
129
EOF
130
								  ,
131
								  implode(',', $ids)));
132
 
133
		(DB::isError($req)) ? die($req->getMessage()) : '';
134
		$content = array();
135
		while($rec = $req->fetchRow(DB_FETCHMODE_ASSOC)) {
136
			// Création de l'url
137
			// TODO : utiliser comme pour spip un fichier de config spécifique pour virer PAP_URL d'ici
138
			$une_url = new Pap_URL(PAP_URL);
139
			$une_url->setId($rec['id']);
140
			$rec['url_simple'] = $une_url->getURL();
141
			$une_url->addQueryString('var_recherche', More_Recherche::s_traiterMotif($q, 'url'), true);
142
			$rec['url'] = $une_url->getURL();
143
 
144
			$rec['description'] =  htmlentities($rec['gm_description_resume']);
145
			unset($rec['gm_description_resume']);
146
			$content[$rec['id']] = $rec;
147
		}
148
		return $content;
2103 drzraf 149
	}
150
}
151
 
2105 drzraf 152
 
2103 drzraf 153
// http://stackoverflow.com/questions/348410/sort-an-array-based-on-another-array
154
function _sortArrayByArray($array, $orderArray) {
155
	$ordered = array();
156
	foreach($orderArray as $key) {
157
		if(array_key_exists($key, $array)) {
158
			$ordered[$key] = $array[$key];
159
			unset($array[$key]);
160
		}
161
	}
162
	return $ordered + $array;
2105 drzraf 163
}
2103 drzraf 164
 
2105 drzraf 165
// adaption aux templates existants: [score] => [weight]
166
function _weight2score(&$item, $key, $max) {
167
	$item['score'] = intval($item['weight'] / $max * 100);
168
	$item['date_creation'] = strftime("%d %B %Y", strtotime($item['date_creation']));
169
	unset($item['weight']);
2103 drzraf 170
}
171
 
172
 
2105 drzraf 173
// ce fichier/cette fonction peut-être réclamé plusieurs fois
174
// car le motif du template '{{MoteurRecherche}}' est inclu récursivement,
175
// (la première substitution fait réapparaître '{{MoteurRecherche}}')
176
function sphinx_search($q = NULL, $page = 1) {
177
	if(!$q) return array();
2103 drzraf 178
 
2105 drzraf 179
	$db = mysql_connect('127.0.0.1:9306', NULL, NULL, TRUE);
180
	$req = mysql_query(sprintf("SELECT group_id, main_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('%s') LIMIT 50", $q), $db);
2103 drzraf 181
 
2105 drzraf 182
	$res = array('spip' => array(), 'bazar' => array(), 'projet' => array(), 'papyrus' => array());
183
	$ids_par_poids = array();
184
	while($rec = mysql_fetch_array($req, MYSQL_ASSOC)) {
185
		$res[$rec['group_id']][$rec['id']] = $rec;
186
		$ids_par_poids[] = $rec['main_id'];
187
	}
2103 drzraf 188
 
2105 drzraf 189
	$docs = array();
2103 drzraf 190
 
2105 drzraf 191
	// spip
192
	$spip = new MoteurRecherche_SPIP();
193
	foreach($spip->get(array_filter(array_keys($res['spip']), 'intval'), $q) as $v) {
194
		unset($res['spip'][$v['id']]['group_id'],
195
			  $res['spip'][$v['id']]['main_id'],
196
			  $res['spip'][$v['id']]['id']);
197
		// left: weight
198
		$docs['spip-' . $v['id']] = array_merge($v,	$res['spip'][$v['id']]);
199
	}
2103 drzraf 200
 
2105 drzraf 201
	// bazar
202
	$bazar = new MoteurRecherche_BAZAR();
203
	foreach($bazar->get(array_filter(array_keys($res['bazar']), 'intval'), $q) as $v) {
204
		unset($res['bazar'][$v['id']]['group_id'],
205
			  $res['bazar'][$v['id']]['main_id'],
206
			  $res['bazar'][$v['id']]['id']);
207
		// left: weight
208
		$docs['bazar-' . $v['id']] = array_merge($v, $res['bazar'][$v['id']]);
209
	}
2103 drzraf 210
 
2105 drzraf 211
	// projet
212
	$projet = new MoteurRecherche_PROJET();
213
	foreach($projet->get(array_filter(array_keys($res['projet']), 'intval'), $q) as $v) {
214
		unset($res['projet'][$v['id']]['group_id'],
215
			  $res['projet'][$v['id']]['main_id'],
216
			  $res['projet'][$v['id']]['id']);
217
		// left: weight
218
		$docs['projet-' . $v['id']] = array_merge($v, $res['projet'][$v['id']]);
219
	}
2103 drzraf 220
 
2105 drzraf 221
	// papyrus
222
	$papyrus = new MoteurRecherche_PAPYRUS();
223
	foreach($papyrus->get(array_filter(array_keys($res['papyrus']), 'intval'), $q) as $v) {
224
		unset($res['papyrus'][$v['id']]['group_id'],
225
			  $res['papyrus'][$v['id']]['main_id'],
226
			  $res['papyrus'][$v['id']]['id']);
227
		// left: weight
228
		$docs['papyrus-' . $v['id']] = array_merge($v, $res['papyrus'][$v['id']]);
229
	}
2103 drzraf 230
 
2105 drzraf 231
	$sorted = _sortArrayByArray($docs, $ids_par_poids);
232
 
233
	// sort
234
	/*
235
	// uncomment this:
236
	print_r(array_keys($sorted); die;
237
 
238
	// then:
239
	ddiff
240
	<(mysql -h0 -P 9306 <<<"SELECT main_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('test') LIMIT 50;"|awk '{print $3}'|sed 1d) \
241
	<(POST http://localhost/site:reseau<<<"more_motif=test&"|awk -F'=>' '{print $2}'|sed -e 's/ //g' -e '/^$/d')
242
 
243
	// both should be equal.
244
	// [ SELECT main_id, group_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('test') LIMIT 50; ]
245
	*/
246
 
247
	$max = current($sorted);
248
	$max = $max['weight'];
249
	array_walk($sorted, '_weight2score', $max);
250
 
251
	//var_dump($sorted);die;
252
	return $sorted;
253
}
254
 
255
 
2103 drzraf 256
/*
257
// http://www.php.net/manual/fr/sphinx.examples.php
258
$s = new SphinxClient;
259
$s->setServer("localhost", 9306);
260
$s->setMatchMode(SPH_MATCH_ANY);
261
$s->setMaxQueryTime(3);
262
var_dump($s->query("test"));
263
*/