Subversion Repositories Applications.papyrus

Rev

Rev 2108 | Rev 2113 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2103 drzraf 1
#
2
# Minimal Sphinx configuration sample (clean, simple, functional)
3
#
4
# http://www.tela-botanica.org/page:recherche
5
# mysql -h0 -P 9306
6
# SELECT * FROM i_spip WHERE MATCH('@texte mousse');
7
 
8
# sudo sphinx-indexer i_spip -c /etc/sphinx/sphinx-min.conf
9
# sudo sphinx-searchd -c /etc/sphinx/sphinx-min.conf
10
source common
11
{
12
	type			= mysql
13
 
14
	sql_host		= localhost
15
	sql_user		= root
16
	sql_pass		=
17
	sql_db			= raphdb
2112 drzraf 18
	sql_sock		= /var/lib/mysql/mysql.sock
2108 drzraf 19
    #sql_query_pre   = SET NAMES utf8
20
    sql_query_pre   = SET NAMES latin1
2103 drzraf 21
}
22
 
23
index i_common
24
{
25
	# source			=
26
	# path			= /var/lib/sphinx/XXX
27
	docinfo			= extern
28
	charset_type	= sbcs
29
	stopwords		= /etc/sphinx/stopwords.txt
30
 
31
    # http://sphinxsearch.com/docs/2.1.1/conf-morphology.html
32
	morphology		= libstemmer_french
33
	enable_star		= 1
34
    min_word_len    = 3
35
    # min_prefix_len  = 3
36
    min_infix_len   = 3
37
 
38
    # prefix_fields   =
39
    # infix_fields    =
40
 
41
    phrase_boundary = ., ?, !, U+2026
42
 
43
    # http://sphinxsearch.com/wiki/doku.php?id=charset_tables
44
    # mysql raphdb<<<"SELECT p_description FROM projet WHERE p_id = 121"|xxd
45
    # sphinx-search -q -c sphinx-min.conf -i i_projet "homoge*|homogé"|grep -w hits
46
    # SELECT * FROM i_projet WHERE MATCH('homoge*|homogé'); SHOW META;
47
    charset_table		= U+00C8->e, U+00C9->e, U+00CA->e, U+00CB->e, U+00E8->e, U+00E9->e, U+00EA->e, U+00EB->e, U+0112->e, U+0113->e, U+0114->e, U+0115->e, U+0116->e, U+0117->e, U+0118->e, U+0119->e, U+011A->e, U+011B->e, U+018E->e, U+0190->e, U+01DD->e, U+0204->e, U+0205->e, U+0206->e, U+0207->e, U+0228->e, U+0229->e, U+0246->e, U+0247->e, U+0258->e, U+025B->e, U+025C->e, U+025D->e, U+025E->e, U+029A->e, U+1D07->e, U+1D08->e, U+1D31->e, U+1D32->e, U+1D49->e, U+1D4B->e, U+1D4C->e, U+1D92->e, U+1D93->e, U+1D94->e, U+1D9F->e, U+1E14->e, U+1E15->e, U+1E16->e, U+1E17->e, U+1E18->e, U+1E19->e, U+1E1A->e, U+1E1B->e, U+1E1C->e, U+1E1D->e, U+1EB8->e, U+1EB9->e, U+1EBA->e, U+1EBB->e, U+1EBC->e, U+1EBD->e, U+1EBE->e, U+1EBF->e, U+1EC0->e, U+1EC1->e, U+1EC2->e, U+1EC3->e, U+1EC4->e, U+1EC5->e, U+1EC6->e, U+1EC7->e, U+2091->e, \
48
    U+00CC->i, U+00CD->i, U+00CE->i, U+00CF->i, U+00EC->i, U+00ED->i, U+00EE->i, U+00EF->i, U+010309->i, U+0128->i, U+0129->i, U+012A->i, U+012B->i, U+012C->i, U+012D->i, U+012E->i, U+012F->i, U+0130->i, U+0131->i, U+0197->i, U+01CF->i, U+01D0->i, U+0208->i, U+0209->i, U+020A->i, U+020B->i, U+0268->i, U+026A->i, U+040D->i, U+0418->i, U+0419->i, U+0438->i, U+0439->i, U+0456->i, U+1D09->i, U+1D35->i, U+1D4E->i, U+1D62->i, U+1D7B->i, U+1D96->i, U+1DA4->i, U+1DA6->i, U+1DA7->i, U+1E2C->i, U+1E2D->i, U+1E2E->i, U+1E2F->i, U+1EC8->i, U+1EC9->i, U+1ECA->i, U+1ECB->i, U+2071->i, U+2111->i, \
49
    0..9, A..Z->a..z, _, a..z, \
50
    U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
51
 
52
    # later (2.1.1)
53
    # lemmatizer_base = /usr/share/sphinx/dicts/
54
}
55
 
56
 
57
# SPIP
58
source spip : common
59
{
60
	# bibliotheque/more_recherche_spip_article.class.php
61
	# premier champ = index (obligatoire)
62
 
63
	# surtitre = '' partout
64
	# descriptif = '' partout
65
	# chapo = '' partout
66
	# soustitre = '' partout sauf id = 19
67
	# ps = '' partout sauf id = 19
68
	sql_query		= \
69
		SELECT id_article, titre, texte, date, lang, "spip" AS group_id, CONCAT("spip-", id_article) as main_id \
70
              FROM spip_articles WHERE statut = "publie"
71
 
72
#	sql_attr_uint		= id_article
73
	sql_attr_timestamp	= date
74
	sql_attr_string		= lang
2105 drzraf 75
	sql_field_string	= group_id      # sql field pour permettre du filtrage @group_id != spip par exemple
2103 drzraf 76
	sql_attr_string		= main_id
77
 
78
	sql_field_string	= titre
79
 
80
	sql_query_info		= SELECT * FROM sphinx_doc WHERE id=$id
81
}
82
 
83
index i_spip : i_common {
84
	source			= spip
85
	path			= /var/lib/sphinx/spip
86
 
87
    # prefix_fields   = titre, texte
88
    infix_fields    = titre, texte
89
}
90
 
91
 
92
# PAPYRUS
93
source papyrus : common
94
{
95
    #gm_nom
96
	#gm_titre
97
	#gm_titre_alternatif
98
    #gm_mots_cles                       # TODO
99
    #gm_description_libre               # TODO
100
	#gm_description_resume              # TODO
101
	#gm_description_table_matieres      partout '' sauf gm_id_menu = 1
102
    #gm_source                          partout '' sauf gm_id_menu = 587
103
	#gm_auteur                          # TODO
104
	#gm_contributeur                    # TODO
105
	#gm_editeur                         # TODO
106
	#gm_categorie                       # TODO
107
    #gm_public                          partout ''
108
    #gmc_contenu
109
    sql_query = \
110
        SELECT m.gm_id_menu, \
111
               IF(gm_nom != '', gm_nom, IF(gm_titre != '', gm_titre, gm_titre_alternatif)) as titre, \
112
               gmc_contenu AS texte, \
113
               gm_mots_cles,gm_source, gm_auteur, gm_contributeur, gm_editeur, gm_categorie, \
114
               \
115
               gm_date_creation,        \
116
               "papyrus" AS group_id,    \
117
               CONCAT("papyrus-", m.gm_id_menu) as main_id \
118
               \
119
               FROM gen_menu m \
120
               LEFT JOIN gen_menu_contenu mc  ON mc.gmc_ce_menu      = m.gm_id_menu \
121
                                              AND mc.gmc_bool_dernier = 1 \
122
               WHERE m.gm_date_fin_validite = '0000-00-00 00:00:00' OR LOWER(m.gm_date_fin_validite) < now()
123
 
124
	sql_attr_timestamp	= gm_date_creation
2105 drzraf 125
	sql_field_string	= group_id
2103 drzraf 126
	sql_attr_string		= main_id
127
}
128
 
129
index i_papyrus : i_common {
130
	source			= papyrus
131
	path			= /var/lib/sphinx/papyrus
132
 
133
    # prefix_fields   = titre, texte
134
    infix_fields    = titre, texte
135
}
136
 
137
 
138
 
139
# PROJET
140
source projet : common {
141
    # p_id
142
    # p_titre
143
    # p_resume
144
    # p_description
145
    # pd_nom            # erreur dans more_recherche_projet.class.php
146
    # pd_description    # erreur dans more_recherche_projet.class.php
147
	sql_query		= \
148
        SELECT p_id, p_titre AS titre, p_description AS texte, \
149
               p_resume, \
150
               p_date_creation, \
151
               "projet" AS group_id,    \
152
               CONCAT("projet-", p_id) as main_id \
153
        FROM projet
154
 
155
	sql_attr_timestamp	= p_date_creation
2105 drzraf 156
	sql_field_string	= group_id
2103 drzraf 157
	sql_attr_string		= main_id
158
}
159
 
160
index i_projet : i_common {
161
	source			= projet
162
	path			= /var/lib/sphinx/projet
163
 
164
    # prefix_fields   = titre, texte
165
    infix_fields    = titre, texte
166
}
167
 
168
 
169
 
170
# BAZAR
171
source bazar : common {
172
	sql_query		= \
173
        SELECT bf_id_fiche, bf_description AS texte, bf_titre AS titre, bf_date_debut_evenement, "bazar" AS group_id, \
174
        CONCAT("bazar-", bf_id_fiche) as main_id \
175
        FROM bazar_fiche
176
 
177
	sql_attr_timestamp	= bf_date_debut_evenement
2105 drzraf 178
	sql_field_string	= group_id
2103 drzraf 179
	sql_attr_string		= main_id
180
}
181
 
182
index i_bazar : i_common {
183
	source			= bazar
184
	path			= /var/lib/sphinx/bazar
185
 
186
    # prefix_fields   = titre, texte
187
    infix_fields    = titre, texte
188
}
189
 
2112 drzraf 190
 
191
 
192
# COSTE
193
source coste : common {
194
	sql_query		= \
195
        SELECT c.num_nom AS id, CONCAT("bdtfx", ":", c.num_nom) AS group_id, CONCAT(c.nom_sci, dsc.body) AS texte, CONCAT(c.nom_sci, cle.body) AS determination, c.nom_sci AS nom_sci \
196
        FROM tb_eflore.coste_v2_00 c \
197
        LEFT JOIN tela_prod_wikini.florecoste_pages dsc ON c.page_wiki_dsc = dsc.tag AND dsc.latest = 'Y' \
198
        LEFT JOIN tela_prod_wikini.florecoste_pages cle ON c.page_wiki_cle = dsc.tag AND dsc.latest = 'Y';
199
 
200
	sql_field_string	= group_id
201
}
202
 
203
index i_coste : i_common {
204
	source			= coste
205
	path			= /var/lib/sphinx/coste
206
 
207
    infix_fields    = texte, nom_sci
208
 
209
    # override latin1, nécessaire pour les 4 indexes ci-dessus:
210
	charset_type	= utf-8
211
    sql_query_pre   = SET NAMES utf8
212
    sql_query_pre   = SET CHARACTER_SET_RESULTS=utf8
213
    # valeur par défaut, cf http://sphinxsearch.com/docs/archives/1.10/conf-charset-table.html
214
    # pour écraser i_common
215
    charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
216
}
217
 
218
 
219
# misc...
2103 drzraf 220
index testrt
221
{
222
	type			= rt
223
	rt_mem_limit		= 32M
224
 
225
	path			= /var/lib/sphinx/testrt
226
	charset_type		= utf-8
227
 
228
	rt_field		= title
229
	rt_field		= content
230
	rt_attr_uint		= gid
231
}
232
 
233
 
234
indexer
235
{
236
	mem_limit		= 32M
237
}
238
 
239
 
240
searchd
241
{
242
	listen			= 127.0.0.1:9312
243
	listen			= 9306:mysql41
2108 drzraf 244
	log             = /var/log/sphinx/sphinx-searchd.log
2103 drzraf 245
	query_log		= /var/log/sphinx/sphinx-query.log
2108 drzraf 246
	read_timeout	= 5
247
	max_children	= 30
2103 drzraf 248
	pid_file		= /run/sphinx/sphinx-searchd.pid
249
	max_matches		= 1000
2108 drzraf 250
	seamless_rotate	= 1
251
	preopen_indexes	= 1
2103 drzraf 252
	unlink_old		= 1
253
	workers			= threads # for RT to work
254
	binlog_path		= /var/lib/sphinx
255
 
2112 drzraf 256
    # collation_server = utf8_general_ci  # unsure
257
    # collation_libc_locale = fr_FR.UTF-8
2108 drzraf 258
 
2103 drzraf 259
    # collation_libc_locale = fr_FR@euro
260
    # collation_libc_locale = fr_FR@euro
261
}
262
 
263
 
264
 
265
 
2112 drzraf 266
# sudo sphinx-searchd --stopwait -c sphinx-min.conf; sudo sphinx-indexer -c sphinx-min.conf i_projet; sphinx-search -q -c sphinx-min.conf -i i_projet "@texte homogé*"
2103 drzraf 267
# sudo sphinx-searchd -c sphinx-min.conf
2112 drzraf 268
 
269
 
270
# problème de charsets:
271
# + https://raw.github.com/melo/scripts/master/bin/x-sphinx-charset-generator
272
# $ wget https://raw.github.com/tom--/Collation-to-Charset-Table/master/collation_2_charset_table-{1,2}.php
273
# $ php collation_2_charset_table-1.php|sed -n -e '/^[A-Z]/p'|php collation_2_charset_table-2.php