Rev 2141 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php/*vim: set expandtab tabstop=4 shiftwidth=4: */// +------------------------------------------------------------------------------------------------------+// | PHP version 4.1 |// +------------------------------------------------------------------------------------------------------+// | Copyright (C) 2004 Tela Botanica (accueil@tela-botanica.org) |// +------------------------------------------------------------------------------------------------------+// | This file is part of Papyrus. |// | |// | Foobar is free software; you can redistribute it and/or modify |// | it under the terms of the GNU General Public License as published by |// | the Free Software Foundation; either version 2 of the License, or |// | (at your option) any later version. |// | |// | Foobar is distributed in the hope that it will be useful, |// | but WITHOUT ANY WARRANTY; without even the implied warranty of |// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |// | GNU General Public License for more details. |// | |// | You should have received a copy of the GNU General Public License |// | along with Foobar; if not, write to the Free Software |// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |// +------------------------------------------------------------------------------------------------------+/*** Moteur de recherche SPHINX** Installation de sphinx :* sudo urpmi lib64sphinxclient-devel* sudo pecl install sphinx** See also:* - http://www.ibm.com/developerworks/library/os-sphinx/* - http://sphinxsearch.com/docs/manual-2.0.7.html#extended-syntax** TODO: http://sphinxsearch.com/blog/2010/08/17/how-sphinx-relevance-ranking-works/* TODO: attention, projet,bazaar,spip et papyrus sont latin9, ainsi en est-il de l'input attendu du <form>* mais pour coste... c'est full utf-8* Structure retournée :* 'poids' => 0,* 'url' => '',* 'titre' => '',* 'hreflang' => '',* 'accesskey' => '',* 'title' => '', // balise 'title'* 'date_creation' => '',* 'description' => ''*** // http://www.php.net/manual/fr/sphinx.examples.php* $s = new SphinxClient;* $s->setServer("localhost", 9306);* $s->setMatchMode(SPH_MATCH_ANY);* $s->setMaxQueryTime(3);* var_dump($s->query("test"));** Note: conversion côté client SQL:* mysql -h0 -P 9306 < <(iconv -f utf8 -t latin1 <<<"SELECT * FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('journée');")*** Test :* ddiff* <(mysql -h0 -P 9306 <<<"SELECT main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('test') LIMIT 50;"|awk '{print $3}'|sed 1d) \* <(POST http://localhost/site:reseau<<<"more_motif=test&"|awk -F'=>' '{print $2}'|sed -e 's/ //g' -e '/^$/d')** both should be equal.* [ SELECT main_id, group_id FROM i_projet, i_spip, i_papyrus, i_bazar WHERE MATCH('test') LIMIT 50; ]***@package Applette*@subpackage Moteur_recherche//Auteur original :* @author Raphaël Droz <raphael@tela-botanica.org//Autres auteurs :*@author Jean-Pascal MILCENT <jpm@tela-botanica.org>*@copyright Tela-Botanica 2000-2013*@version $Revision$// +------------------------------------------------------------------------------------------------------+*/define('SPHINX_DSN', '193.54.123.216:9306');define('_MRS_SPHINX_BASEHOST', $_SERVER['HTTP_HOST']);// pour strftime()date_default_timezone_set('Europe/Paris');setlocale(LC_TIME, 'fr_FR');function sphinx_search($q = NULL, $page = 1) {if(!$q) return array();// quelques aliases pour faciliter l'usage sans passer par le full sphinxQL// $q = preg_replace('/\<actu\>(.*)/', '\1 @group_id i_spip', $q);// $q = preg_replace('/\<eflore\>(.*)/', '\1 @group_id i_bazar|i_coste', $q);$db = mysql_connect(SPHINX_DSN, NULL, NULL, TRUE);// AFAICT, pas de réel risque de SQL-injection du côté de sphinx (au pire, $req = FALSE)// et il serait dommage de devoir limiter la puissante syntaxe offerte à l'utilisation//$requeteTpl = "SELECT group_id, main_id FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl WHERE MATCH('%s') LIMIT 50";$requeteTpl = 'SELECT group_id, main_id, id, WEIGHT() AS poids '.'FROM i_projet, i_spip, i_papyrus, i_bazar, i_coste, i_nvjfl '."WHERE MATCH('%s') ".'LIMIT 50 ';$requete = mysql_query(sprintf($requeteTpl, $q), $db);$retour = array();if ($requete) {$res = array('spip' => array(), 'bazar' => array(), 'projet' => array(), 'papyrus' => array(), 'coste' => array(), 'nvjfl' => array());$ids_par_poids = array();$poidsMax = 0;while ($rec = mysql_fetch_array($requete, MYSQL_ASSOC)) {$poidsMax = ($rec['poids'] > $poidsMax) ? $rec['poids'] : $poidsMax;$res[$rec['group_id']][$rec['id']] = $rec;$ids_par_poids[] = $rec['main_id'];}$docs = array();// spip$spip = new MoteurRecherche_SPIP();foreach ($spip->get(array_filter(array_keys($res['spip']), 'intval'), $q) as $v) {unset($res['spip'][$v['id']]['group_id'],$res['spip'][$v['id']]['main_id'],$res['spip'][$v['id']]['id']);$docs['spip-' . $v['id']] = array_merge($v, $res['spip'][$v['id']]);}// bazar$bazar = new MoteurRecherche_BAZAR();foreach ($bazar->get(array_filter(array_keys($res['bazar']), 'intval'), $q) as $v) {unset($res['bazar'][$v['id']]['group_id'],$res['bazar'][$v['id']]['main_id'],$res['bazar'][$v['id']]['id']);$docs['bazar-' . $v['id']] = array_merge($v, $res['bazar'][$v['id']]);}// projet$projet = new MoteurRecherche_PROJET();foreach ($projet->get(array_filter(array_keys($res['projet']), 'intval'), $q) as $v) {unset($res['projet'][$v['id']]['group_id'],$res['projet'][$v['id']]['main_id'],$res['projet'][$v['id']]['id']);$docs['projet-' . $v['id']] = array_merge($v, $res['projet'][$v['id']]);}// papyrus$papyrus = new MoteurRecherche_PAPYRUS();foreach ($papyrus->get(array_filter(array_keys($res['papyrus']), 'intval'), $q) as $v) {unset($res['papyrus'][$v['id']]['group_id'],$res['papyrus'][$v['id']]['main_id'],$res['papyrus'][$v['id']]['id']);$docs['papyrus-' . $v['id']] = array_merge($v, $res['papyrus'][$v['id']]);}// coste$coste = new MoteurRecherche_COSTE();foreach ($coste->get(array_filter(array_keys($res['coste']), 'intval'), $q) as $v) {unset($res['coste'][$v['id']]['group_id'],$res['coste'][$v['id']]['main_id'],$res['coste'][$v['id']]['id']);$docs['coste-' . $v['id']] = array_merge($v, $res['coste'][$v['id']]);}// nvjfl$nvjfl = new MoteurRecherche_NVJFL();foreach ($nvjfl->get(array_filter(array_keys($res['nvjfl']), 'intval'), $q) as $v) {unset($res['nvjfl'][$v['id']]['group_id'],$res['nvjfl'][$v['id']]['main_id'],$res['nvjfl'][$v['id']]['id']);$docs['nvjfl-' . $v['id']] = array_merge($v, $res['nvjfl'][$v['id']]);}//die('<pre>'.print_r($docs, true).'</pre>');// sort$sorted = _sortArrayByArray($docs, $ids_par_poids);if (isset($_GET['tri']) && $_GET['tri'] == 'date') {usort($sorted, '_actuNewerFirst');}// transforme les clefs pour s'adapter aux templates existantsarray_walk($sorted, '_weight2score', $poidsMax);// var_dump($sorted);die;$retour = $sorted;}return $retour;}class MoteurRecherche_SPIP {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['bdd_dsn']);$requeteTpl = 'SELECT id_article AS id, titre, texte, date AS date_creation, lang as hreflang '.'FROM spip_articles '.'WHERE statut = "%s" '.'AND id_article IN (%s) ';$requete = $db->query(sprintf($requeteTpl, 'publie',implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {$rec['url_simple'] = sprintf("%s/article%d.html",trim($GLOBALS['_MOTEUR_RECHERCHE_']['spip'][0]['url'], '/'),$rec['id']);$rec['url'] = sprintf("%s?var_recherche=%s",$rec['url_simple'],More_Recherche::traiterMotif($q, 'url'));$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);unset($rec['texte']);$content[$rec['id']] = $rec;}}return $content;}}class MoteurRecherche_BAZAR {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {$db = DB::connect($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['bdd_dsn']);$requeteTpl = 'SELECT bf_id_fiche AS id, '.'bf_description AS texte, '.'bf_titre AS titre, '.'bf_date_debut_evenement AS date_creation '.'FROM bazar_fiche '.'WHERE bf_id_fiche IN (%s) ';$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {$rec['url_simple'] = $rec['url'] = sprintf(trim($GLOBALS['_MOTEUR_RECHERCHE_']['bazar'][0]['url'], '/'), $rec['id']);$rec['description'] = More_Recherche::couperTexte($rec['texte'], MORE_RESULTAT_TAILLE_DESCRIPTION);unset($rec['texte']);$content[$rec['id']] = $rec;}}return $content;}}class MoteurRecherche_PROJET {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];$requeteTpl = 'SELECT p_id AS id, p_titre AS titre, p_description, p_date_creation AS date_creation '.'FROM projet '.'WHERE p_id IN (%s)';$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {$rec['url_simple'] = $rec['url'] = sprintf("%s?id_projet=%d",trim($GLOBALS['_MOTEUR_RECHERCHE_']['projet']['url'], '/'),$rec['id']);$rec['description'] = substr(strip_tags($rec['p_description']), 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);unset($rec['p_description']);$content[$rec['id']] = $rec;}}return $content;}}class MoteurRecherche_PAPYRUS {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['papyrus'];$requeteTpl = 'SELECT mc.gmc_ce_menu AS id, '." IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) AS titre, ".' gmc_contenu AS texte, '.' gm_description_libre, gm_description_resume, '.' gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, '.' gm_date_creation AS date_creation '.'FROM gen_menu AS m '.' LEFT JOIN gen_menu_contenu AS mc ON mc.gmc_ce_menu = m.gm_id_menu AND mc.gmc_bool_dernier = 1 '.'WHERE mc.gmc_ce_menu IN (%s) ';$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {// Création de l'url// TODO : utiliser comme pour spip un fichier de config spécifique pour virer PAP_URL d'ici$une_url = new Pap_URL(PAP_URL);$une_url->setId($rec['id']);$rec['url_simple'] = $une_url->getURL();$une_url->addQueryString('var_recherche', More_Recherche::traiterMotif($q, 'url'), true);$rec['url'] = $une_url->getURL();$rec['description'] = htmlentities($rec['gm_description_resume']);unset($rec['gm_description_resume']);$content[$rec['id']] = $rec;}}return $content;}}class MoteurRecherche_COSTE {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {// DB access is dumb, let's use this one and pray$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['bota'];$requeteTpl = 'SELECT c.flore_bdtfx_nn AS id, c.nom_sci AS titre, dsc.body AS description '.'FROM tb_eflore.coste_v2_00 AS c '." LEFT JOIN tela_prod_wikini.florecoste_pages dsc ON c.page_wiki_dsc = dsc.tag AND dsc.latest = 'Y' ".'WHERE c.flore_bdtfx_nn IN (%s) ';$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);// TODO: interpret wikini$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);$content[$rec['id']] = $rec;}}return $content;}}class MoteurRecherche_NVJFL {public function get($ids, $q = NULL) {$content = array();if (count($ids) > 0) {// DB access is dumb, let's use this one and pray$db = $GLOBALS['_MOTEUR_RECHERCHE_']['bd']['bota'];$requeteTpl = 'SELECT b.num_nom AS id, '." CONCAT(nom_sci, ' (nn: ', b.num_nom, ', nt: ', num_taxonomique, ')') AS titre, ".' GROUP_CONCAT(n.nom_vernaculaire) AS description '.'FROM tb_eflore.bdtfx_v1_01 AS b '.' LEFT JOIN tb_eflore.nvjfl_v2007 n ON n.num_taxon = b.num_taxonomique '.'WHERE b.num_nom IN (%s) '.'GROUP BY n.num_taxon ';$requete = $db->query(sprintf($requeteTpl, implode(',', $ids)));(DB::isError($requete)) ? die($requete->getMessage()) : '';while ($rec = $requete->fetchRow(DB_FETCHMODE_ASSOC)) {$rec['url_simple'] = $rec['url'] = sprintf("http://%s/bdtfx-nn-%d", _MRS_SPHINX_BASEHOST, $rec['id']);$rec['description'] = substr($rec['description'], 0, 400 + 2 * MORE_RESULTAT_TAILLE_DESCRIPTION);$content[$rec['id']] = $rec;}}return $content;}}// http://stackoverflow.com/questions/348410/sort-an-array-based-on-another-arrayfunction _sortArrayByArray($array, $orderArray) {$ordered = array();foreach ($orderArray as $key) {if (array_key_exists($key, $array)) {$ordered[$key] = $array[$key];unset($array[$key]);}}return $ordered + $array;}function _actuNewerFirst($a,$b) {return isset($a['date_creation']) && isset($b['date_creation']) ? strcmp($b['date_creation'], $a['date_creation']) : 0;}// Transforme un score en pourcentagefunction _weight2score(&$item, $key, $max) {$item['score'] = intval($item['poids'] / $max * 100);$item['date_creation'] = isset($item['date_creation']) ? strftime("%d %B %Y", strtotime($item['date_creation'])) : '';unset($item['poids']);}?>