WebSVN – eFlore/Projets.eflore-projets – Blame – /trunk/scripts/modules/cel/redempteur.sql

Rev	Author	Line No.	Line
752	raphael	1	`/*`
		2	`À l'origine les observations nécessiteuses sont celles n'ayant pas de données génénées,`
		3	`Soit: nom_ret, nom_ret_nn, nt ou famille à NULL\|0\|''`
		4
		5	`Eg:`
		6	`SELECT id_observation, nom_sel`
818	raphael	7	FROM `tb_cel_test`.`cel_obs`
752	raphael	8	`WHERE (`
		9	`nom_ret IS NULL or nom_ret = ''`
		10	`OR nom_ret_nn IS NULL or nom_ret_nn = 0`
		11	`OR nt IS NULL or nt = 0`
		12	`OR famille IS NULL or famille = ''`
		13	`)`
		14
		15	`Sauf que:`
		16	`1) on exclue celles sans nom_sel (elles sont sans espoir):`
		17	`nom_sel IS NOT NULL AND nom_sel != ''`
		18	`2) on exclue celles qui on un nom_ret_nn à 0, car cela peut-être légal, cf maj-201307.sql à ce propos`
		19	# donc pas de `nom_ret_nn = 0` dans la requête
		20	`3) on exclue, dans un premier temps, celles dont le référentiel n'est pas défini`
780	raphael	21	`AND (nom_referentiel IS NULL)`
752	raphael	22
		23	`D'où, les 3621 observations suivantes (2206 nom_sel distincts)`
		24	`SELECT id_observation, nom_sel`
818	raphael	25	FROM `tb_cel_test`.`cel_obs`
752	raphael	26	`WHERE (`
		27	`nom_sel IS NOT NULL AND nom_sel != ''`
		28	`AND (`
		29	`nom_ret IS NULL OR nom_ret = ''`
		30	`OR nom_ret_nn IS NULL`
		31	`OR nt IS NULL or nt = 0`
		32	`OR famille IS NULL or famille = ''`
		33	`)`
780	raphael	34	`AND (nom_referentiel IS NOT NULL)`
752	raphael	35	`)`
		36
		37	`Dans un premier temps nous travaillons avec le bdtfx, c'est à dire que`
780	raphael	38	`AND (nom_referentiel IS NOT NULL)`
752	raphael	39	`devient`
		40	`AND (nom_referentiel like 'bdtfx%')`
		41	`soit 3597/3621 observations:`
		42
		43	`Et effectuons une jointure sur bdtfx:`
		44	`SELECT id_observation, nom_sel, b.num_nom, b.famille`
818	raphael	45	FROM `tb_cel_test`.`cel_obs` c INNER JOIN `tb_eflore`.`bdtfx_v1_01` b ON (b.nom_sci = c.nom_sel)
752	raphael	46	`WHERE (`
		47	`nom_sel IS NOT NULL AND nom_sel != ''`
		48	`AND (`
		49	`nom_ret IS NULL OR nom_ret = ''`
		50	`OR nom_ret_nn IS NULL`
		51	`OR nt IS NULL OR nt = 0`
		52	`OR c.famille IS NULL OR c.famille = ''`
		53	`)`
		54	`AND (nom_referentiel like 'bdtfx%')`
		55	`)`
		56
		57	`* Or nous observons que la famille est parfois légitimement NULL ! Ce n'est pas pertinent de l'utiliser`
		58	`comme critère de caractérisation d'une observation buggée, contentons-nous donc de empty ('')`
		59
		60	`* Or nous observons que le numéro taxonomique est parfois légitimement 0 ! Ce n'est pas pertinent de l'utiliser`
		61	`comme critère de caractérisation d'une observation buggée, contentons-nous donc de NULL`
		62
		63
		64
		65	`Soit 84 lignes, cependant, un nom_sel peut correspondre à plusieurs num_nom_retenu dans bdtfx ! (et oui, les suffixes latins et d'auteur).`
		66	`Il s'agit donc de ne pas traiter ceux qui risquerait d'être mal-corrigé (sans les 100% de certitude).`
		67	Ainsi un ` GROUP BY id_observation HAVING count(id_observation) = 1 ` sera du meilleur effet.
		68
		69	`Nous obtenons donc ainsi les 69 observations à mettre à jour:`
		70	`SELECT id_observation, nom_sel, nom_ret, nom_ret_nn, nt, c.famille, b.num_nom, b.nom_sci, b.num_taxonomique, b.famille`
818	raphael	71	FROM `tb_cel_test`.`cel_obs` c INNER JOIN `tb_eflore`.`bdtfx_v1_01` b ON (b.nom_sci = c.nom_sel)
752	raphael	72	`WHERE (`
		73	`nom_sel IS NOT NULL AND nom_sel != ''`
		74	`AND (`
		75	`nom_ret IS NULL OR nom_ret = ''`
		76	`OR nom_ret_nn IS NULL`
		77	`OR nt IS NULL`
		78	`OR c.famille = ''`
		79	`)`
		80	`AND (nom_referentiel like 'bdtfx%')`
		81	`)`
		82	`GROUP BY id_observation HAVING count(id_observation) = 1`
		83
		84
		85	`=== la mise à jour ===`
		86	`Comme nous voulons utiliser UPDATE, nous devons remplacer le JOIN par des conditions du WHERE, mais le GROUP BY bloque de`
		87	`toute manière, un SUB-SELECT (table temporaire) est donc nécessaire:`
		88
		89	`=== finale ===`
		90	`*/`
		91
		92	CREATE TEMPORARY TABLE T_bis ( INDEX(`id_observation`)) AS
		93	`SELECT id_observation, b.num_nom, CONCAT(b.nom_sci, ' ', b.auteur), b.num_taxonomique, b.famille`
818	raphael	94	FROM `tb_cel_test`.`cel_obs` c, `tb_eflore`.`bdtfx_v1_01` b
752	raphael	95	`WHERE (`
		96	`b.nom_sci = c.nom_sel`
		97	`AND nom_sel IS NOT NULL AND nom_sel != ''`
		98	`AND (`
		99	`nom_ret IS NULL OR nom_ret = ''`
		100	`OR nom_ret_nn IS NULL`
		101	`OR nt IS NULL OR nt = 0`
		102	`OR c.famille = ''`
		103	`)`
		104	`AND (nom_referentiel like 'bdtfx%')`
		105	`)`
		106	`GROUP BY id_observation HAVING count(id_observation) = 1`
		107
818	raphael	108	UPDATE `tb_cel_test`.`cel_obs` c, T_bis t SET
752	raphael	109	`c.nom_ret = t.nom_sci,`
		110	`c.nom_ret_nn = t.num_nom,`
		111	`c.nt = t.num_taxonomique,`
		112	`c.famille = t.famille`
		113	`WHERE (c.id_observation = t.id_observation);`
		114	`DROP TEMPORARY TABLE T_bis;`

Subversion Repositories eFlore/Projets.eflore-projets

(root)/trunk/scripts/modules/cel/redempteur.sql – Rev 818