1 |
<?php
|
1 |
<?php
|
2 |
|
- |
|
- |
|
2 |
// declare(encoding='UTF-8');
|
3 |
/**
|
3 |
/**
|
- |
|
4 |
* Classe permettant de convertir une chaine d'un nom scientifique en un format standard.
|
- |
|
5 |
*
|
- |
|
6 |
* Source orignale :
|
4 |
* Taxamatch-Webservice PHP v1.0.0
|
7 |
* Taxamatch-Webservice PHP v1.0.0
|
5 |
* @author Michael Giddens
|
8 |
* @author Michael Giddens
|
6 |
* @link http://www.silverbiology.com
|
9 |
* @link http://www.silverbiology.com
|
7 |
*/
|
10 |
*
|
8 |
|
11 |
*
|
9 |
/* Adapation par David Delon Decembre 2010 : gestion sous espece
|
12 |
* @internal Mininum PHP version : 5.2
|
10 |
*/
|
13 |
* @category CEL
|
11 |
|
- |
|
12 |
|
- |
|
13 |
/**
|
14 |
* @package Services
|
- |
|
15 |
* @subpackage Bibliothèques
|
14 |
* Class NameParser
|
16 |
* @version 0.1
|
15 |
* Used to convert a string to a standarized format.
|
17 |
* @author Mathias CHOUET <mathias@tela-botanica.org>
|
- |
|
18 |
* @author David DELON <david@clapas.net>
|
- |
|
19 |
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org>
|
- |
|
20 |
* @author Aurelien PERONNET <aurelien@tela-botanica.org>
|
- |
|
21 |
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt>
|
- |
|
22 |
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt>
|
- |
|
23 |
* @copyright 1999-2014 Tela Botanica <accueil@tela-botanica.org>
|
16 |
*/
|
24 |
*/
|
17 |
class NameParser {
|
25 |
class NameParser {
|
18 |
|
26 |
|
19 |
/**
|
27 |
/**
|
20 |
* Whether to debug or nor
|
28 |
* Whether to debug or nor
|
21 |
* @var bool|integer
|
29 |
* @var bool|integer
|
22 |
*/
|
30 |
*/
|
23 |
public $debug_flag;
|
31 |
public $debug_flag;
|
24 |
|
- |
|
25 |
|
- |
|
26 |
|
- |
|
27 |
/**
|
- |
|
28 |
* Constructor
|
- |
|
29 |
*/
|
- |
|
30 |
public function __construct( ) {
|
- |
|
31 |
}
|
- |
|
32 |
|
32 |
|
33 |
/**
|
33 |
/**
|
34 |
* Sets value to the method property
|
34 |
* Sets value to the method property
|
35 |
* @param mixed $name class property name
|
35 |
* @param mixed $name class property name
|
36 |
* @param mixed $value class property value
|
36 |
* @param mixed $value class property value
|
37 |
*/
|
37 |
*/
|
38 |
public function set($name,$value) {
|
38 |
public function set($name, $value) {
|
39 |
$this->$name = $value;
|
39 |
$this->$name = $value;
|
40 |
}
|
40 |
}
|
41 |
|
41 |
|
42 |
/**
|
42 |
/**
|
43 |
* Reduce Spaces
|
43 |
* Reduce Spaces
|
44 |
* This will reduce the string to only allow once space between characters
|
44 |
* This will reduce the string to only allow once space between characters
|
45 |
* @param string $str : string to reduce space
|
45 |
* @param string $str : string to reduce space
|
46 |
* @return string : string with only once space between characters
|
46 |
* @return string : string with only once space between characters
|
47 |
*/
|
47 |
*/
|
48 |
private function reduce_spaces( $str ) {
|
48 |
private function reduce_spaces($str) {
|
49 |
|
- |
|
50 |
$str = preg_replace("/ {2,}/", ' ', $str );
|
49 |
$str = preg_replace('/ {2,}/', ' ', $str );
|
51 |
$str = trim( $str );
|
50 |
$str = trim( $str );
|
52 |
|
- |
|
53 |
return( $str );
|
51 |
return( $str );
|
54 |
}
|
52 |
}
|
55 |
|
53 |
|
56 |
/**
|
54 |
/**
|
57 |
* Function: parse_auth
|
55 |
* Function: parse_auth
|
58 |
* Purpose: Produce a parsed version of authority of a taxon name
|
56 |
* Purpose: Produce a parsed version of authority of a taxon name
|
59 |
* @author Tony Rees (Tony.Rees@csiro.au)
|
57 |
* @author Tony Rees (Tony.Rees@csiro.au)
|
60 |
* Date created: March 2008
|
58 |
* Date created: March 2008
|
61 |
* Inputs: authority string as str
|
59 |
* Inputs: authority string as str
|
62 |
* Remarks:
|
60 |
* Remarks:
|
63 |
* (1) Performs authority expension of known abbreviated authornames using
|
61 |
* (1) Performs authority expension of known abbreviated authornames using
|
64 |
* table "auth_abbrev_test1" (must be available and populated with relevant content)
|
62 |
* table "auth_abbrev_test1" (must be available and populated with relevant content)
|
65 |
* (2) Recognises "and", "et", "&" as equivalents (special case for "et al.") - all parsed to ampersand
|
63 |
* (2) Recognises "and", "et", "&" as equivalents (special case for "et al.") - all parsed to ampersand
|
66 |
* (3) Recognises (e.g.) "Smith 1980" and "Smith, 1980" as equivalents - comma is removed in these cases
|
64 |
* (3) Recognises (e.g.) "Smith 1980" and "Smith, 1980" as equivalents - comma is removed in these cases
|
67 |
* (4) Recognises (e.g.) "F. J. R. Taylor, 1980" and "F.J.R. Taylor, 1980" as equivalents -
|
65 |
* (4) Recognises (e.g.) "F. J. R. Taylor, 1980" and "F.J.R. Taylor, 1980" as equivalents -
|
68 |
* extra space after full stops is ignored in these cases
|
66 |
* extra space after full stops is ignored in these cases
|
69 |
* (5) Returns uppercase string, diacritical marks intact
|
67 |
* (5) Returns uppercase string, diacritical marks intact
|
70 |
*
|
68 |
*
|
71 |
* @param string $str : authority string
|
69 |
* @param string $str : authority string
|
72 |
* @param integer $upcase : convert to uppercase if $upcase = 1
|
70 |
* @param integer $upcase : convert to uppercase if $upcase = 1
|
73 |
* @return string : parsed author string
|
71 |
* @return string : parsed author string
|
74 |
*/
|
72 |
*/
|
75 |
public function parse_auth( $str, $upcase=1 ) {
|
73 |
public function parse_auth($str, $upcase = 1) {
|
76 |
|
- |
|
77 |
$this->debug['parse_auth'][] = "1";
|
74 |
$this->debug['parse_auth'][] = "1";
|
78 |
$temp = $str = trim($str);
|
75 |
$temp = $str = trim($str);
|
79 |
|
76 |
|
80 |
if ( ($str == NULL) || ($str == '') ) {
|
77 |
if ( ($str == NULL) || ($str == '') ) {
|
81 |
$this->debug['parse_auth'][] = "1a";
|
78 |
$this->debug['parse_auth'][] = "1a";
|
82 |
return '';
|
79 |
return '';
|
83 |
}
|
80 |
}
|
84 |
|
81 |
|
85 |
if ( ( $temp == null ) || ( $temp == '') ) {
|
82 |
if ( ( $temp == null ) || ( $temp == '') ) {
|
86 |
$this->debug['parse_auth'][] = "2a";
|
83 |
$this->debug['parse_auth'][] = "2a";
|
87 |
return('');
|
84 |
return('');
|
88 |
} else {
|
85 |
} else {
|
89 |
|
86 |
|
90 |
$this->debug['parse_auth'][] = "2b";
|
87 |
$this->debug['parse_auth'][] = "2b";
|
91 |
|
88 |
|
92 |
// add space after full stops, except at end (NB, will also add spece before some close brackets)
|
89 |
// add space after full stops, except at end (NB, will also add spece before some close brackets)
|
93 |
$temp = rtrim( str_replace('.', '. ', $temp) );
|
90 |
$temp = rtrim( str_replace('.', '. ', $temp) );
|
94 |
$this->debug['parse_auth'][] = "4 (temp:$temp)";
|
91 |
$this->debug['parse_auth'][] = "4 (temp:$temp)";
|
95 |
|
92 |
|
96 |
//normalise "et", "and" to ampersand (et al. is a special case)
|
93 |
//normalise "et", "and" to ampersand (et al. is a special case)
|
97 |
// if ( $temp like '% et al%' ) {
|
94 |
// if ( $temp like '% et al%' ) {
|
98 |
if ( preg_match('/ et al/', $temp) ) {
|
95 |
if ( preg_match('/ et al/', $temp) ) {
|
99 |
$temp = str_replace(' et al','zzzzz', $temp);
|
96 |
$temp = str_replace(' et al','zzzzz', $temp);
|
100 |
$this->debug['parse_auth'][] = "4a (temp:$temp)";
|
97 |
$this->debug['parse_auth'][] = "4a (temp:$temp)";
|
101 |
}
|
98 |
}
|
102 |
|
99 |
|
103 |
$temp = str_replace(' et ',' & ', $temp );
|
100 |
$temp = str_replace(' et ',' & ', $temp );
|
104 |
$temp = str_replace(' and ',' & ', $temp );
|
101 |
$temp = str_replace(' and ',' & ', $temp );
|
105 |
|
102 |
|
106 |
$temp = str_replace('zzzzz',' et al', $temp);
|
103 |
$temp = str_replace('zzzzz',' et al', $temp);
|
107 |
|
104 |
|
108 |
$this->debug['parse_auth'][] = "5 (temp:$temp)";
|
105 |
$this->debug['parse_auth'][] = "5 (temp:$temp)";
|
109 |
|
106 |
|
110 |
//remove commas before dates (only)
|
107 |
//remove commas before dates (only)
|
111 |
// like '%, 17%'
|
108 |
// like '%, 17%'
|
112 |
if ( preg_match('/, 17/', $temp) ) {
|
109 |
if ( preg_match('/, 17/', $temp) ) {
|
113 |
$temp = str_replace(', 17',' 17', $temp);
|
110 |
$temp = str_replace(', 17',' 17', $temp);
|
114 |
$this->debug['parse_auth'][] = "5a (temp:$temp)";
|
111 |
$this->debug['parse_auth'][] = "5a (temp:$temp)";
|
115 |
}
|
112 |
}
|
116 |
|
113 |
|
117 |
// like '%, 18%'
|
114 |
// like '%, 18%'
|
118 |
if ( preg_match('/, 18/', $temp) ) {
|
115 |
if ( preg_match('/, 18/', $temp) ) {
|
119 |
$temp = str_replace(', 18',' 18', $temp);
|
116 |
$temp = str_replace(', 18',' 18', $temp);
|
120 |
$this->debug['parse_auth'][] = "5b (temp:$temp)";
|
117 |
$this->debug['parse_auth'][] = "5b (temp:$temp)";
|
121 |
}
|
118 |
}
|
122 |
|
119 |
|
123 |
// like '%, 19%'
|
120 |
// like '%, 19%'
|
124 |
if ( preg_match('/, 19/', $temp) ) {
|
121 |
if ( preg_match('/, 19/', $temp) ) {
|
125 |
$temp = str_replace(', 19',' 19', $temp);
|
122 |
$temp = str_replace(', 19',' 19', $temp);
|
126 |
$this->debug['parse_auth'][] = "5c (temp:$temp)";
|
123 |
$this->debug['parse_auth'][] = "5c (temp:$temp)";
|
127 |
}
|
124 |
}
|
128 |
|
125 |
|
129 |
// like '%, 20%'
|
126 |
// like '%, 20%'
|
130 |
if ( preg_match('/, 20/', $temp) ) {
|
127 |
if ( preg_match('/, 20/', $temp) ) {
|
131 |
$temp = str_replace(', 20',' 20', $temp);
|
128 |
$temp = str_replace(', 20',' 20', $temp);
|
132 |
$this->debug['parse_auth'][] = "5d (temp:$temp)";
|
129 |
$this->debug['parse_auth'][] = "5d (temp:$temp)";
|
133 |
}
|
130 |
}
|
134 |
|
131 |
|
135 |
// reduce multiple internal spaces to single space
|
132 |
// reduce multiple internal spaces to single space
|
136 |
$temp = $this->reduce_spaces( $temp );
|
133 |
$temp = $this->reduce_spaces( $temp );
|
137 |
|
134 |
|
138 |
// like '% -%'
|
135 |
// like '% -%'
|
139 |
$temp = str_replace(' -', '-', $temp);
|
136 |
$temp = str_replace(' -', '-', $temp);
|
140 |
|
137 |
|
141 |
$this->debug['parse_auth'][] = "6 (temp:$temp)";
|
138 |
$this->debug['parse_auth'][] = "6 (temp:$temp)";
|
142 |
|
139 |
|
143 |
foreach( explode(' ', $temp) as $this_word ) {
|
140 |
foreach (explode(' ', $temp) as $this_word) {
|
144 |
|
- |
|
145 |
//$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
|
141 |
//$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
|
146 |
$elapsed_chars = '';
|
142 |
$elapsed_chars = '';
|
147 |
// like '(%'
|
143 |
// like '(%'
|
148 |
if ( preg_match('/^\(/', $this_word) ) {
|
144 |
if ( preg_match('/^\(/', $this_word) ) {
|
149 |
$elapsed_chars .= '(';
|
145 |
$elapsed_chars .= '(';
|
150 |
$this_word = substr( $this_word, 1 );
|
146 |
$this_word = substr( $this_word, 1 );
|
151 |
//$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
|
147 |
//$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
|
152 |
}
|
148 |
}
|
153 |
|
149 |
|
154 |
// Add back the word to the final translation
|
150 |
// Add back the word to the final translation
|
155 |
$elapsed_chars .= $this_word . ' ';
|
151 |
$elapsed_chars .= $this_word . ' ';
|
156 |
//$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
|
152 |
//$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
|
157 |
}
|
153 |
}
|
158 |
|
- |
|
159 |
$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) );
|
154 |
$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) );
|
160 |
|
- |
|
161 |
return trim( $elapsed_chars ) ;
|
155 |
return trim( $elapsed_chars ) ;
|
162 |
}
|
156 |
}
|
163 |
|
- |
|
164 |
}
|
157 |
}
|
165 |
|
158 |
|
166 |
/**
|
159 |
/**
|
167 |
* Function: parse
|
160 |
* Function: parse
|
168 |
* Purpose: Produces parsed version of an input string (scientific name components)
|
161 |
* Purpose: Produces parsed version of an input string (scientific name components)
|
169 |
* @author Tony Rees (Tony.Rees@csiro.au)
|
162 |
* @author Tony Rees (Tony.Rees@csiro.au)
|
170 |
* Date created: June 2007-November 2008
|
163 |
* Date created: June 2007-November 2008
|
171 |
* Inputs: input string as str (this version presumes genus, genus+species, or
|
164 |
* Inputs: input string as str (this version presumes genus, genus+species, or
|
172 |
* genus+species+authority)
|
165 |
* genus+species+authority)
|
173 |
* Outputs: parsed version of input string, for match purposes
|
166 |
* Outputs: parsed version of input string, for match purposes
|
174 |
* Remarks:
|
167 |
* Remarks:
|
175 |
* (1) Removes known text elements e.g.
|
168 |
* (1) Removes known text elements e.g.
|
176 |
* 'aff.', 'cf.', 'subsp.', subgenera if enclosed in brackets, etc. as desired
|
169 |
* 'aff.', 'cf.', 'subsp.', subgenera if enclosed in brackets, etc. as desired
|
177 |
* (2) Removes accented and non A-Z characters other than full stops
|
170 |
* (2) Removes accented and non A-Z characters other than full stops
|
178 |
* (in scientific name portions)
|
171 |
* (in scientific name portions)
|
179 |
* (3) Returns uppercase scientific name (genus + species only)
|
172 |
* (3) Returns uppercase scientific name (genus + species only)
|
180 |
* plus unaltered (presumed) authority
|
173 |
* plus unaltered (presumed) authority
|
181 |
* examples;
|
174 |
* examples;
|
182 |
* Anabaena cf. flos-aquae Ralfs ex Born. et Flah. => ANABAENA FLOSAQUAE Ralfs
|
175 |
* Anabaena cf. flos-aquae Ralfs ex Born. et Flah. => ANABAENA FLOSAQUAE Ralfs
|
183 |
* ex Born. et Flah.
|
176 |
* ex Born. et Flah.
|
184 |
* Abisara lemÈe-pauli => ABISARA LEMEEPAULI
|
177 |
* Abisara lemÈe-pauli => ABISARA LEMEEPAULI
|
185 |
* Fuc/us Vesiculos2us => FUCUS VESICULOSUS
|
178 |
* Fuc/us Vesiculos2us => FUCUS VESICULOSUS
|
186 |
* Buffo ignicolor LacÈpËde, 1788 => BUFFO IGNICOLOR LacÈpËde, 1788
|
179 |
* Buffo ignicolor LacÈpËde, 1788 => BUFFO IGNICOLOR LacÈpËde, 1788
|
187 |
* Barbatia (Mesocibota) bistrigata (Dunker, 1866) => BARBATIA BISTRIGATA (Dunker, 1866)
|
180 |
* Barbatia (Mesocibota) bistrigata (Dunker, 1866) => BARBATIA BISTRIGATA (Dunker, 1866)
|
188 |
* (4) Thus version does not handle genus+author, or genus+species+infraspecies
|
181 |
* (4) Thus version does not handle genus+author, or genus+species+infraspecies
|
189 |
* (second" good" term is presumed to be species epithet, anything after is
|
182 |
* (second" good" term is presumed to be species epithet, anything after is
|
190 |
* considered to be start of the authority), however could be adapted further as required
|
183 |
* considered to be start of the authority), however could be adapted further as required
|
191 |
* and actually it was done in this version for Tela Botanica
|
184 |
* and actually it was done in this version for Tela Botanica
|
192 |
* (5) There is a separate function "parse_auth" for normalizing authorities when required
|
185 |
* (5) There is a separate function "parse_auth" for normalizing authorities when required
|
193 |
* (e.g. for authority comparisons)
|
186 |
* (e.g. for authority comparisons)
|
194 |
*
|
187 |
*
|
195 |
* @param string $str : input string ( genus, genus+species, or genus+species+authority )
|
188 |
* @param string $str : input string ( genus, genus+species, or genus+species+authority )
|
196 |
* @return string : parsed string
|
189 |
* @return string : parsed string
|
197 |
*/
|
190 |
*/
|
198 |
public function parse( $str = NULL ) {
|
191 |
public function parse( $str = NULL ) {
|
199 |
|
- |
|
200 |
unset($this->debug['parse']);
|
192 |
unset($this->debug['parse']);
|
201 |
|
- |
|
202 |
$temp = '';
|
193 |
$temp = '';
|
203 |
$first_str_part = NULL;
|
194 |
$first_str_part = NULL;
|
204 |
$second_str_part = NULL;
|
195 |
$second_str_part = NULL;
|
205 |
$temp_genus = '';
|
196 |
$temp_genus = '';
|
206 |
$temp_species = '';
|
197 |
$temp_species = '';
|
207 |
$temp_genus_species = '';
|
198 |
$temp_genus_species = '';
|
208 |
$temp_authority = '';
|
199 |
$temp_authority = '';
|
209 |
$temp_infra = '';
|
200 |
$temp_infra = '';
|
210 |
|
201 |
|
211 |
//$this->debug['parse'][] = "1";
|
202 |
//$this->debug['parse'][] = "1";
|
212 |
|
203 |
|
213 |
if ( ($str == NULL) || ( trim($str) == '') ) {
|
204 |
if ( ($str == NULL) || ( trim($str) == '') ) {
|
214 |
//$this->debug[] = "N1a<br>";
|
205 |
//$this->debug[] = "N1a<br>";
|
215 |
return '';
|
206 |
return '';
|
216 |
} else {
|
207 |
} else {
|
217 |
// trim any leading, trailing spaces or line feeds
|
208 |
// trim any leading, trailing spaces or line feeds
|
218 |
$temp = trim( $str );
|
209 |
$temp = trim( $str );
|
219 |
//$this->debug['parse'][] = "1b";
|
210 |
//$this->debug['parse'][] = "1b";
|
220 |
}
|
211 |
}
|
221 |
|
212 |
|
222 |
if ( $temp == NULL || $temp == '') {
|
213 |
if ( $temp == NULL || $temp == '') {
|
223 |
//$this->debug['parse'][] = "2a";
|
214 |
//$this->debug['parse'][] = "2a";
|
224 |
return '';
|
215 |
return '';
|
225 |
} else {
|
216 |
} else {
|
226 |
//$this->debug['parse'][] = "2b";
|
217 |
//$this->debug['parse'][] = "2b";
|
227 |
|
218 |
|
228 |
// replace any HTML ampersands
|
219 |
// replace any HTML ampersands
|
229 |
$set = array('%', '&', 'amp;%', 'AMP;%');
|
220 |
$set = array('%', '&', 'amp;%', 'AMP;%');
|
230 |
$temp = str_replace( $set, '&', $temp );
|
221 |
$temp = str_replace( $set, '&', $temp );
|
231 |
|
222 |
|
232 |
//$this->debug['parse'][] = "2b1 (temp:$temp)";
|
223 |
//$this->debug['parse'][] = "2b1 (temp:$temp)";
|
233 |
|
224 |
|
234 |
// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
|
225 |
// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
|
235 |
$html_pattern = "(\<(/?[^\>]+)\>)";
|
226 |
$html_pattern = '(\<(/?[^\>]+)\>)';
|
236 |
//? This should not just handle html tags but all <*>
|
227 |
//? This should not just handle html tags but all <*>
|
237 |
$temp = preg_replace( $html_pattern, '', $temp);
|
228 |
$temp = preg_replace( $html_pattern, '', $temp);
|
238 |
//$this->debug['parse'][] = "2b2 (temp:$temp)";
|
229 |
//$this->debug['parse'][] = "2b2 (temp:$temp)";
|
239 |
|
230 |
|
240 |
// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
|
231 |
// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
|
241 |
// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
|
232 |
// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
|
242 |
// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
|
233 |
// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
|
243 |
// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
|
234 |
// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
|
244 |
// however this is very rare?? and in any case we are not supporting genus+authority in this version)
|
235 |
// however this is very rare?? and in any case we are not supporting genus+authority in this version)
|
245 |
//if ( $temp like '% (%)%'
|
236 |
//if ( $temp like '% (%)%'
|
246 |
$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
|
237 |
$temp = preg_replace( '/ \(\w*\W*\)/', '', $temp, 1 );
|
247 |
//? Not sure if this will catch if
|
238 |
//? Not sure if this will catch if
|
248 |
//$this->debug['parse'][] = "2b3 (temp:$temp)";
|
239 |
//$this->debug['parse'][] = "2b3 (temp:$temp)";
|
249 |
|
240 |
|
250 |
// if second term (only) is in square brackets, presume it is a comment and remove it
|
241 |
// if second term (only) is in square brackets, presume it is a comment and remove it
|
251 |
// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]
|
242 |
// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]
|
252 |
//if ( $temp like '% [%]%'
|
243 |
//if ( $temp like '% [%]%'
|
253 |
$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
|
244 |
$temp = preg_replace( '/ \[\w*\W*\]/', '', $temp, 1 );
|
254 |
//? Not sure if this will catch if
|
245 |
//? Not sure if this will catch if
|
255 |
//$this->debug['parse'][] = "2b4 (temp:$temp)";
|
246 |
//$this->debug['parse'][] = "2b4 (temp:$temp)";
|
256 |
|
247 |
|
257 |
// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
|
248 |
// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
|
258 |
$temp = preg_replace( "/ cf /", " ", $temp );
|
249 |
$temp = preg_replace('/ cf /', ' ', $temp );
|
259 |
$temp = preg_replace( "/ cf\. /", " ", $temp );
|
250 |
$temp = preg_replace('/ cf\. /', ' ', $temp );
|
260 |
$temp = preg_replace( "/ near /", " ", $temp );
|
251 |
$temp = preg_replace('/ near /', ' ', $temp );
|
261 |
$temp = preg_replace( "/ aff\. /", " ", $temp );
|
252 |
$temp = preg_replace('/ aff\. /', ' ', $temp );
|
262 |
$temp = preg_replace( "/ sp\. /", " ", $temp );
|
253 |
$temp = preg_replace('/ sp\. /', ' ', $temp );
|
263 |
$temp = preg_replace( "/ spp\. /", " ", $temp );
|
254 |
$temp = preg_replace('/ spp\. /', ' ', $temp );
|
264 |
$temp = preg_replace( "/ spp /", " ", $temp );
|
255 |
$temp = preg_replace('/ spp /', ' ', $temp );
|
265 |
|
256 |
|
266 |
//$this->debug['parse'][] = "2b5 (temp:$temp)";
|
257 |
//$this->debug['parse'][] = "2b5 (temp:$temp)";
|
267 |
|
258 |
|
268 |
// eliminate or close up any stray spaces introduced by the above
|
259 |
// eliminate or close up any stray spaces introduced by the above
|
269 |
$temp = $this->reduce_spaces( $temp );
|
260 |
$temp = $this->reduce_spaces( $temp );
|
270 |
|
261 |
|
271 |
//$this->debug['parse'][] = "2b6 (temp:$temp)";
|
262 |
//$this->debug['parse'][] = "2b6 (temp:$temp)";
|
272 |
|
263 |
|
273 |
// now presume first element is genus, second (if present) is species, remainder
|
264 |
// now presume first element is genus, second (if present) is species, remainder
|
274 |
// (if present) is authority
|
265 |
// (if present) is authority
|
275 |
// look for genus name
|
266 |
// look for genus name
|
276 |
$ar = explode( " ", $temp, 2);
|
267 |
$ar = explode(' ', $temp, 2);
|
277 |
if ( count( $ar ) ) {
|
268 |
if ( count( $ar ) ) {
|
278 |
$temp_genus = $ar[0];
|
269 |
$temp_genus = $ar[0];
|
279 |
$temp = @$ar[1];
|
270 |
$temp = @$ar[1];
|
280 |
} else {
|
271 |
} else {
|
281 |
$temp_genus = $temp;
|
272 |
$temp_genus = $temp;
|
282 |
$temp = '';
|
273 |
$temp = '';
|
283 |
}
|
274 |
}
|
284 |
|
275 |
|
285 |
//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
|
276 |
//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
|
286 |
|
277 |
|
287 |
// look for species epithet and authority
|
278 |
// look for species epithet and authority
|
288 |
$ar = explode( " ", $temp, 2);
|
279 |
$ar = explode(' ', $temp, 2);
|
289 |
if ( count( $ar ) ) {
|
280 |
if ( count( $ar ) ) {
|
290 |
$temp_species = $ar[0];
|
281 |
$temp_species = $ar[0];
|
291 |
$temp_authority = @$ar[1];
|
282 |
$temp_authority = @$ar[1];
|
292 |
} else {
|
283 |
} else {
|
293 |
$temp_species = $temp;
|
284 |
$temp_species = $temp;
|
294 |
$temp_authority = '';
|
285 |
$temp_authority = '';
|
295 |
}
|
286 |
}
|
296 |
// look for subspecies
|
287 |
// look for subspecies
|
297 |
|
288 |
|
298 |
$infras =array('subsp.','var.');
|
289 |
$infras =array('subsp.','var.');
|
299 |
|
290 |
|
300 |
$temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
|
291 |
$temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
|
301 |
$temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
|
292 |
$temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
|
302 |
$temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
|
293 |
$temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
|
303 |
$temp_authority = preg_replace( "/var /", "var.", $temp_authority);
|
294 |
$temp_authority = preg_replace( "/var /", "var.", $temp_authority);
|
304 |
|
295 |
|
305 |
$temp_infra_authority = '';
|
296 |
$temp_infra_authority = '';
|
306 |
$temp_infra_type = '';
|
297 |
$temp_infra_type = '';
|
307 |
foreach ($infras as $infra) {
|
298 |
foreach ($infras as $infra) {
|
308 |
$pos = strpos($temp_authority, $infra);
|
299 |
$pos = strpos($temp_authority, $infra);
|
309 |
if ($pos === false) {
|
300 |
if ($pos === false) {
|
310 |
continue;
|
301 |
continue;
|
311 |
} else {
|
302 |
} else {
|
312 |
$temp_infra=substr($temp_authority,$pos+strlen($infra));
|
303 |
$temp_infra=substr($temp_authority,$pos+strlen($infra));
|
313 |
$temp_authority=substr($temp_authority,0,$pos);
|
304 |
$temp_authority=substr($temp_authority,0,$pos);
|
314 |
$temp_infra=trim($temp_infra);
|
305 |
$temp_infra=trim($temp_infra);
|
315 |
$temp_infra_type=$infra;
|
306 |
$temp_infra_type=$infra;
|
316 |
// look for infra epithet and authority
|
307 |
// look for infra epithet and authority
|
317 |
$ar = explode(" ", $temp_infra, 2);
|
308 |
$ar = explode(' ', $temp_infra, 2);
|
318 |
if ( count( $ar ) ) {
|
309 |
if ( count( $ar ) ) {
|
319 |
$temp_infra = $ar[0];
|
310 |
$temp_infra = $ar[0];
|
320 |
$temp_infra_authority = @$ar[1];
|
311 |
$temp_infra_authority = @$ar[1];
|
321 |
}
|
312 |
}
|
322 |
break; // on s'arrete au premier trouve
|
313 |
break; // on s'arrete au premier trouve
|
323 |
}
|
314 |
}
|
324 |
}
|
315 |
}
|
325 |
|
316 |
|
326 |
//$this->debug['parse'][] = "2b8 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
317 |
//$this->debug['parse'][] = "2b8 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
327 |
|
318 |
|
328 |
// replace selected ligatures here (Genus names can contain Æ, OE ligature)
|
319 |
// replace selected ligatures here (Genus names can contain Æ, OE ligature)
|
329 |
$temp_genus = str_replace( 'Æ', 'AE', $temp_genus);
|
320 |
$temp_genus = str_replace( 'Æ', 'AE', $temp_genus);
|
330 |
$temp_species = str_replace( 'Æ', 'AE', $temp_species);
|
321 |
$temp_species = str_replace( 'Æ', 'AE', $temp_species);
|
331 |
$temp_infra = str_replace( 'Æ', 'AE', $temp_infra );
|
322 |
$temp_infra = str_replace( 'Æ', 'AE', $temp_infra );
|
332 |
|
323 |
|
333 |
//$this->debug['parse'][] = "2b9 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
324 |
//$this->debug['parse'][] = "2b9 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
334 |
|
325 |
|
335 |
$temp_genus= trim($temp_genus);
|
326 |
$temp_genus= trim($temp_genus);
|
336 |
$temp_species= trim($temp_species);
|
327 |
$temp_species= trim($temp_species);
|
337 |
$temp_infra= trim($temp_infra );
|
328 |
$temp_infra= trim($temp_infra );
|
338 |
|
329 |
|
339 |
// reduce any new multiple internal spaces to single space, if present
|
330 |
// reduce any new multiple internal spaces to single space, if present
|
340 |
$temp_genus= $this->reduce_spaces( $temp_genus );
|
331 |
$temp_genus= $this->reduce_spaces( $temp_genus );
|
341 |
$temp_species= $this->reduce_spaces( $temp_species );
|
332 |
$temp_species= $this->reduce_spaces( $temp_species );
|
342 |
$temp_infra= $this->reduce_spaces( $temp_infra );
|
333 |
$temp_infra= $this->reduce_spaces( $temp_infra );
|
343 |
|
334 |
|
344 |
//$this->debug['parse'][] = "2b10 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
335 |
//$this->debug['parse'][] = "2b10 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
345 |
|
336 |
|
346 |
if (isset($temp_authority) && ($temp_authority!='') ) {
|
337 |
if (isset($temp_authority) && ($temp_authority!='') ) {
|
347 |
$temp_authority=$this->parse_auth($temp_authority);
|
338 |
$temp_authority=$this->parse_auth($temp_authority);
|
348 |
}
|
339 |
}
|
349 |
|
340 |
|
350 |
if (isset($temp_infra_authority) && ($temp_infra_authority!='') ) {
|
341 |
if (isset($temp_infra_authority) && ($temp_infra_authority!='') ) {
|
351 |
$temp_infra_authority=$this->parse_auth($temp_infra_authority);
|
342 |
$temp_infra_authority=$this->parse_auth($temp_infra_authority);
|
352 |
}
|
343 |
}
|
353 |
//$this->debug['parse'][] = "2b11 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
- |
|
354 |
return array("genus"=>$temp_genus, "species"=>$temp_species, "authority"=>$temp_authority, "infra"=>$temp_infra, "infra_authority"=>$temp_infra_authority, "infra_type"=>$temp_infra_type);
|
- |
|
355 |
}
|
- |
|
356 |
} // End NameParser
|
- |
|
357 |
} // End Class
|
- |
|
358 |
?>
|
344 |
//$this->debug['parse'][] = "2b11 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
|
- |
|
345 |
return array("genus"=>$temp_genus, "species"=>$temp_species, "authority"=>$temp_authority, "infra"=>$temp_infra, "infra_authority"=>$temp_infra_authority, "infra_type"=>$temp_infra_type);
|
- |
|
346 |
}
|
- |
|
347 |
}
|
- |
|
348 |
}
|
359 |
|
349 |
|