2103 |
drzraf |
1 |
#
|
|
|
2 |
# Minimal Sphinx configuration sample (clean, simple, functional)
|
|
|
3 |
#
|
|
|
4 |
# http://www.tela-botanica.org/page:recherche
|
|
|
5 |
# mysql -h0 -P 9306
|
|
|
6 |
# SELECT * FROM i_spip WHERE MATCH('@texte mousse');
|
|
|
7 |
|
|
|
8 |
# sudo sphinx-indexer i_spip -c /etc/sphinx/sphinx-min.conf
|
|
|
9 |
# sudo sphinx-searchd -c /etc/sphinx/sphinx-min.conf
|
|
|
10 |
source common
|
|
|
11 |
{
|
|
|
12 |
type = mysql
|
|
|
13 |
|
|
|
14 |
sql_host = localhost
|
|
|
15 |
sql_user = root
|
|
|
16 |
sql_pass =
|
|
|
17 |
sql_db = raphdb
|
|
|
18 |
sql_sock = /home/raphael/web/mysql/mysql.sock
|
2108 |
drzraf |
19 |
#sql_query_pre = SET NAMES utf8
|
|
|
20 |
sql_query_pre = SET NAMES latin1
|
2103 |
drzraf |
21 |
}
|
|
|
22 |
|
|
|
23 |
|
|
|
24 |
index i_common
|
|
|
25 |
{
|
|
|
26 |
# source =
|
|
|
27 |
# path = /var/lib/sphinx/XXX
|
|
|
28 |
docinfo = extern
|
|
|
29 |
charset_type = sbcs
|
|
|
30 |
stopwords = /etc/sphinx/stopwords.txt
|
|
|
31 |
|
|
|
32 |
# http://sphinxsearch.com/docs/2.1.1/conf-morphology.html
|
|
|
33 |
morphology = libstemmer_french
|
|
|
34 |
enable_star = 1
|
|
|
35 |
min_word_len = 3
|
|
|
36 |
# min_prefix_len = 3
|
|
|
37 |
min_infix_len = 3
|
|
|
38 |
|
|
|
39 |
# prefix_fields =
|
|
|
40 |
# infix_fields =
|
|
|
41 |
|
|
|
42 |
phrase_boundary = ., ?, !, U+2026
|
|
|
43 |
|
|
|
44 |
# http://sphinxsearch.com/wiki/doku.php?id=charset_tables
|
|
|
45 |
# mysql raphdb<<<"SELECT p_description FROM projet WHERE p_id = 121"|xxd
|
|
|
46 |
# sphinx-search -q -c sphinx-min.conf -i i_projet "homoge*|homogé"|grep -w hits
|
|
|
47 |
# SELECT * FROM i_projet WHERE MATCH('homoge*|homogé'); SHOW META;
|
|
|
48 |
charset_table = U+00C8->e, U+00C9->e, U+00CA->e, U+00CB->e, U+00E8->e, U+00E9->e, U+00EA->e, U+00EB->e, U+0112->e, U+0113->e, U+0114->e, U+0115->e, U+0116->e, U+0117->e, U+0118->e, U+0119->e, U+011A->e, U+011B->e, U+018E->e, U+0190->e, U+01DD->e, U+0204->e, U+0205->e, U+0206->e, U+0207->e, U+0228->e, U+0229->e, U+0246->e, U+0247->e, U+0258->e, U+025B->e, U+025C->e, U+025D->e, U+025E->e, U+029A->e, U+1D07->e, U+1D08->e, U+1D31->e, U+1D32->e, U+1D49->e, U+1D4B->e, U+1D4C->e, U+1D92->e, U+1D93->e, U+1D94->e, U+1D9F->e, U+1E14->e, U+1E15->e, U+1E16->e, U+1E17->e, U+1E18->e, U+1E19->e, U+1E1A->e, U+1E1B->e, U+1E1C->e, U+1E1D->e, U+1EB8->e, U+1EB9->e, U+1EBA->e, U+1EBB->e, U+1EBC->e, U+1EBD->e, U+1EBE->e, U+1EBF->e, U+1EC0->e, U+1EC1->e, U+1EC2->e, U+1EC3->e, U+1EC4->e, U+1EC5->e, U+1EC6->e, U+1EC7->e, U+2091->e, \
|
|
|
49 |
U+00CC->i, U+00CD->i, U+00CE->i, U+00CF->i, U+00EC->i, U+00ED->i, U+00EE->i, U+00EF->i, U+010309->i, U+0128->i, U+0129->i, U+012A->i, U+012B->i, U+012C->i, U+012D->i, U+012E->i, U+012F->i, U+0130->i, U+0131->i, U+0197->i, U+01CF->i, U+01D0->i, U+0208->i, U+0209->i, U+020A->i, U+020B->i, U+0268->i, U+026A->i, U+040D->i, U+0418->i, U+0419->i, U+0438->i, U+0439->i, U+0456->i, U+1D09->i, U+1D35->i, U+1D4E->i, U+1D62->i, U+1D7B->i, U+1D96->i, U+1DA4->i, U+1DA6->i, U+1DA7->i, U+1E2C->i, U+1E2D->i, U+1E2E->i, U+1E2F->i, U+1EC8->i, U+1EC9->i, U+1ECA->i, U+1ECB->i, U+2071->i, U+2111->i, \
|
|
|
50 |
0..9, A..Z->a..z, _, a..z, \
|
|
|
51 |
U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
|
|
|
52 |
|
|
|
53 |
# later (2.1.1)
|
|
|
54 |
# lemmatizer_base = /usr/share/sphinx/dicts/
|
|
|
55 |
}
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
# SPIP
|
|
|
59 |
source spip : common
|
|
|
60 |
{
|
|
|
61 |
# bibliotheque/more_recherche_spip_article.class.php
|
|
|
62 |
# premier champ = index (obligatoire)
|
|
|
63 |
|
|
|
64 |
# surtitre = '' partout
|
|
|
65 |
# descriptif = '' partout
|
|
|
66 |
# chapo = '' partout
|
|
|
67 |
# soustitre = '' partout sauf id = 19
|
|
|
68 |
# ps = '' partout sauf id = 19
|
|
|
69 |
sql_query = \
|
|
|
70 |
SELECT id_article, titre, texte, date, lang, "spip" AS group_id, CONCAT("spip-", id_article) as main_id \
|
|
|
71 |
FROM spip_articles WHERE statut = "publie"
|
|
|
72 |
|
|
|
73 |
# sql_attr_uint = id_article
|
|
|
74 |
sql_attr_timestamp = date
|
|
|
75 |
sql_attr_string = lang
|
2105 |
drzraf |
76 |
sql_field_string = group_id # sql field pour permettre du filtrage @group_id != spip par exemple
|
2103 |
drzraf |
77 |
sql_attr_string = main_id
|
|
|
78 |
|
|
|
79 |
sql_field_string = titre
|
|
|
80 |
|
|
|
81 |
sql_query_info = SELECT * FROM sphinx_doc WHERE id=$id
|
|
|
82 |
}
|
|
|
83 |
|
|
|
84 |
index i_spip : i_common {
|
|
|
85 |
source = spip
|
|
|
86 |
path = /var/lib/sphinx/spip
|
|
|
87 |
|
|
|
88 |
# prefix_fields = titre, texte
|
|
|
89 |
infix_fields = titre, texte
|
|
|
90 |
}
|
|
|
91 |
|
|
|
92 |
|
|
|
93 |
|
|
|
94 |
# PAPYRUS
|
|
|
95 |
source papyrus : common
|
|
|
96 |
{
|
|
|
97 |
#gm_nom
|
|
|
98 |
#gm_titre
|
|
|
99 |
#gm_titre_alternatif
|
|
|
100 |
#gm_mots_cles # TODO
|
|
|
101 |
#gm_description_libre # TODO
|
|
|
102 |
#gm_description_resume # TODO
|
|
|
103 |
#gm_description_table_matieres partout '' sauf gm_id_menu = 1
|
|
|
104 |
#gm_source partout '' sauf gm_id_menu = 587
|
|
|
105 |
#gm_auteur # TODO
|
|
|
106 |
#gm_contributeur # TODO
|
|
|
107 |
#gm_editeur # TODO
|
|
|
108 |
#gm_categorie # TODO
|
|
|
109 |
#gm_public partout ''
|
|
|
110 |
#gmc_contenu
|
|
|
111 |
sql_query = \
|
|
|
112 |
SELECT m.gm_id_menu, \
|
|
|
113 |
IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) as titre, \
|
|
|
114 |
gmc_contenu AS texte, \
|
|
|
115 |
gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, \
|
|
|
116 |
\
|
|
|
117 |
gm_date_creation, \
|
|
|
118 |
"papyrus" AS group_id, \
|
|
|
119 |
CONCAT("papyrus-", m.gm_id_menu) as main_id \
|
|
|
120 |
\
|
|
|
121 |
FROM gen_menu m \
|
|
|
122 |
LEFT JOIN gen_menu_contenu mc ON mc.gmc_ce_menu = m.gm_id_menu \
|
|
|
123 |
AND mc.gmc_bool_dernier = 1 \
|
|
|
124 |
WHERE m.gm_date_fin_validite = '0000-00-00 00:00:00' OR LOWER(m.gm_date_fin_validite) < now()
|
|
|
125 |
|
|
|
126 |
sql_attr_timestamp = gm_date_creation
|
2105 |
drzraf |
127 |
sql_field_string = group_id
|
2103 |
drzraf |
128 |
sql_attr_string = main_id
|
|
|
129 |
}
|
|
|
130 |
|
|
|
131 |
index i_papyrus : i_common {
|
|
|
132 |
source = papyrus
|
|
|
133 |
path = /var/lib/sphinx/papyrus
|
|
|
134 |
|
|
|
135 |
# prefix_fields = titre, texte
|
|
|
136 |
infix_fields = titre, texte
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
# PROJET
|
|
|
142 |
source projet : common {
|
|
|
143 |
# p_id
|
|
|
144 |
# p_titre
|
|
|
145 |
# p_resume
|
|
|
146 |
# p_description
|
|
|
147 |
# pd_nom # erreur dans more_recherche_projet.class.php
|
|
|
148 |
# pd_description # erreur dans more_recherche_projet.class.php
|
|
|
149 |
sql_query = \
|
|
|
150 |
SELECT p_id, p_titre AS titre, p_description AS texte, \
|
|
|
151 |
p_resume, \
|
|
|
152 |
p_date_creation, \
|
|
|
153 |
"projet" AS group_id, \
|
|
|
154 |
CONCAT("projet-", p_id) as main_id \
|
|
|
155 |
FROM projet
|
|
|
156 |
|
|
|
157 |
sql_attr_timestamp = p_date_creation
|
2105 |
drzraf |
158 |
sql_field_string = group_id
|
2103 |
drzraf |
159 |
sql_attr_string = main_id
|
|
|
160 |
}
|
|
|
161 |
|
|
|
162 |
index i_projet : i_common {
|
|
|
163 |
source = projet
|
|
|
164 |
path = /var/lib/sphinx/projet
|
|
|
165 |
|
|
|
166 |
# prefix_fields = titre, texte
|
|
|
167 |
infix_fields = titre, texte
|
|
|
168 |
}
|
|
|
169 |
|
|
|
170 |
|
|
|
171 |
|
|
|
172 |
# BAZAR
|
|
|
173 |
source bazar : common {
|
|
|
174 |
sql_query = \
|
|
|
175 |
SELECT bf_id_fiche, bf_description AS texte, bf_titre AS titre, bf_date_debut_evenement, "bazar" AS group_id, \
|
|
|
176 |
CONCAT("bazar-", bf_id_fiche) as main_id \
|
|
|
177 |
FROM bazar_fiche
|
|
|
178 |
|
|
|
179 |
sql_attr_timestamp = bf_date_debut_evenement
|
2105 |
drzraf |
180 |
sql_field_string = group_id
|
2103 |
drzraf |
181 |
sql_attr_string = main_id
|
|
|
182 |
}
|
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
|
187 |
# misc...
|
|
|
188 |
index i_bazar : i_common {
|
|
|
189 |
source = bazar
|
|
|
190 |
path = /var/lib/sphinx/bazar
|
|
|
191 |
|
|
|
192 |
# prefix_fields = titre, texte
|
|
|
193 |
infix_fields = titre, texte
|
|
|
194 |
}
|
|
|
195 |
|
|
|
196 |
index testrt
|
|
|
197 |
{
|
|
|
198 |
type = rt
|
|
|
199 |
rt_mem_limit = 32M
|
|
|
200 |
|
|
|
201 |
path = /var/lib/sphinx/testrt
|
|
|
202 |
charset_type = utf-8
|
|
|
203 |
|
|
|
204 |
rt_field = title
|
|
|
205 |
rt_field = content
|
|
|
206 |
rt_attr_uint = gid
|
|
|
207 |
}
|
|
|
208 |
|
|
|
209 |
|
|
|
210 |
indexer
|
|
|
211 |
{
|
|
|
212 |
mem_limit = 32M
|
|
|
213 |
}
|
|
|
214 |
|
|
|
215 |
|
|
|
216 |
searchd
|
|
|
217 |
{
|
|
|
218 |
listen = 127.0.0.1:9312
|
|
|
219 |
listen = 9306:mysql41
|
2108 |
drzraf |
220 |
log = /var/log/sphinx/sphinx-searchd.log
|
2103 |
drzraf |
221 |
query_log = /var/log/sphinx/sphinx-query.log
|
2108 |
drzraf |
222 |
read_timeout = 5
|
|
|
223 |
max_children = 30
|
2103 |
drzraf |
224 |
pid_file = /run/sphinx/sphinx-searchd.pid
|
|
|
225 |
max_matches = 1000
|
2108 |
drzraf |
226 |
seamless_rotate = 1
|
|
|
227 |
preopen_indexes = 1
|
2103 |
drzraf |
228 |
unlink_old = 1
|
|
|
229 |
workers = threads # for RT to work
|
|
|
230 |
binlog_path = /var/lib/sphinx
|
|
|
231 |
|
2108 |
drzraf |
232 |
#collation_server = utf8_general_ci # unsure
|
|
|
233 |
#collation_libc_locale = fr_FR.UTF-8
|
|
|
234 |
|
2103 |
drzraf |
235 |
# collation_libc_locale = fr_FR@euro
|
|
|
236 |
# collation_libc_locale = fr_FR@euro
|
|
|
237 |
}
|
|
|
238 |
|
|
|
239 |
|
|
|
240 |
|
|
|
241 |
|
|
|
242 |
# sudo sphinx-searchd --stop -c sphinx-min.conf; sleep 2; sudo sphinx-indexer -c sphinx-min.conf i_projet; sphinx-search -q -c sphinx-min.conf -i i_projet "@texte homogé*"
|
|
|
243 |
# sudo sphinx-searchd -c sphinx-min.conf
|