Subversion Repositories eFlore/Applications.cel

Rev

Rev 995 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 995 Rev 1318
Line 8... Line 8...
8
 
8
 
9
 /* Adapation par David Delon Decembre 2010 : gestion sous espece
9
 /* Adapation par David Delon Decembre 2010 : gestion sous espece
Line -... Line 10...
-
 
10
 */
-
 
11
 
-
 
12
 
-
 
13
/**
-
 
14
 * Class NameParser
-
 
15
 * Used to convert a string to a standarized format.
10
 */
16
 */
11
 
17
class NameParser {
12
 
18
 
13
	/**
19
	/**
14
	 * Class NameParser
-
 
15
	 * Used to convert a string to a standarized format.
-
 
16
	 */
-
 
17
	class NameParser {
-
 
18
 
-
 
19
		/**
-
 
20
		 * Whether to debug or nor
20
	 * Whether to debug or nor
Line 21... Line 21...
21
		 * @var bool|integer
21
	 * @var bool|integer
22
		 */
22
	 */
23
		public $debug_flag;
23
	public $debug_flag;
24
 
24
 
25
 
-
 
26
 
-
 
27
		/**
-
 
28
		 * Constructor 
-
 
29
		 */
-
 
30
		public function __construct( ) {
-
 
31
		}
-
 
32
 
-
 
33
		/**
-
 
34
		 * Sets value to the method property
25
 
Line -... Line 26...
-
 
26
 
-
 
27
	/**
-
 
28
	 * Constructor 
-
 
29
	 */
-
 
30
	public function __construct( ) {
-
 
31
	}
-
 
32
 
-
 
33
	/**
Line 35... Line 34...
35
		 * @param mixed $name class property name
34
	 * Sets value to the method property
36
		 * @param mixed $value class property value
35
	 * @param mixed $name class property name
37
		 */
36
	 * @param mixed $value class property value
38
		public function set($name,$value) {
37
	 */
39
			$this->$name = $value;
38
	public function set($name,$value) {
40
		}
39
		$this->$name = $value;
41
 
40
	}
-
 
41
 
-
 
42
	/**
-
 
43
	 * Reduce Spaces
Line 42... Line -...
42
 
-
 
43
		/**
-
 
44
		 * Reduce Spaces
-
 
45
		 * This will reduce the string to only allow once space between characters
44
	 * This will reduce the string to only allow once space between characters
46
		 * @param string $str : string to reduce space
45
	 * @param string $str : string to reduce space
Line -... Line 46...
-
 
46
	 * @return string : string with only once space between characters
-
 
47
	 */
-
 
48
	private function reduce_spaces( $str ) {
-
 
49
	
-
 
50
		$str = preg_replace("/ {2,}/", ' ', $str );
-
 
51
		$str = trim( $str );
-
 
52
		
-
 
53
		return( $str );
-
 
54
	}
-
 
55
 
-
 
56
	/**
-
 
57
	 * Function: parse_auth
-
 
58
	 * Purpose: Produce a parsed version of authority of a taxon name
-
 
59
	 * @author Tony Rees (Tony.Rees@csiro.au)
-
 
60
	 * Date created: March 2008
-
 
61
	 * Inputs: authority string as str
-
 
62
	 * Remarks:
-
 
63
	 *  (1) Performs authority expension of known abbreviated authornames using
-
 
64
	 *   table "auth_abbrev_test1" (must be available and populated with relevant content)
-
 
65
	 *  (2) Recognises "and", "et", "&" as equivalents (special case for "et al.") - all parsed to ampersand
Line 47... Line -...
47
		 * @return string : string with only once space between characters
-
 
48
		 */
-
 
49
		private function reduce_spaces( $str ) {
-
 
50
		
-
 
51
			$str = preg_replace("/ {2,}/", ' ', $str );
-
 
52
			$str = trim( $str );
-
 
53
			
-
 
54
			return( $str );
-
 
55
		}
-
 
56
 
-
 
57
 
-
 
58
		/**
-
 
59
		 * Function: parse_auth
-
 
60
		 * Purpose: Produce a parsed version of authority of a taxon name
-
 
61
		 * @author Tony Rees (Tony.Rees@csiro.au)
-
 
62
		 * Date created: March 2008
-
 
63
		 * Inputs: authority string as str
-
 
64
		 * Remarks:
-
 
65
		 *  (1) Performs authority expension of known abbreviated authornames using
-
 
66
		 *   table "auth_abbrev_test1" (must be available and populated with relevant content)
-
 
67
		 *  (2) Recognises "and", "et", "&" as equivalents (special case for "et al.") - all parsed to ampersand
-
 
68
		 *  (3) Recognises (e.g.) "Smith 1980" and "Smith, 1980" as equivalents - comma is removed in these cases
66
	 *  (3) Recognises (e.g.) "Smith 1980" and "Smith, 1980" as equivalents - comma is removed in these cases
69
		 *  (4) Recognises (e.g.) "F. J. R. Taylor, 1980" and "F.J.R. Taylor, 1980" as equivalents -
67
	 *  (4) Recognises (e.g.) "F. J. R. Taylor, 1980" and "F.J.R. Taylor, 1980" as equivalents -
Line 70... Line 68...
70
		 *      extra space after full stops is ignored in these cases
68
	 *      extra space after full stops is ignored in these cases
71
		 *  (5) Returns uppercase string, diacritical marks intact
69
	 *  (5) Returns uppercase string, diacritical marks intact
72
		 *
70
	 *
73
		 * @param string $str : authority string
71
	 * @param string $str : authority string
Line 74... Line 72...
74
		 * @param integer $upcase : convert to uppercase if $upcase = 1
72
	 * @param integer $upcase : convert to uppercase if $upcase = 1
75
		 * @return string : parsed author string
73
	 * @return string : parsed author string
76
		 */
74
	 */
77
		public function parse_auth( $str, $upcase=1 ) {
75
	public function parse_auth( $str, $upcase=1 ) {
Line 78... Line 76...
78
 
76
 
79
			$this->debug['parse_auth'][] = "1";
77
		$this->debug['parse_auth'][] = "1";
80
			$temp = $str = trim($str);
78
		$temp = $str = trim($str);
81
			
79
			
82
  		if ( ($str == NULL) || ($str == '') ) {
80
  		if ( ($str == NULL) || ($str == '') ) {
83
				$this->debug['parse_auth'][] = "1a";
81
				$this->debug['parse_auth'][] = "1a";
84
		    return '';
82
		    return '';
85
			}
83
		}
86
 
84
 
87
			if ( ( $temp == null ) || ( $temp == '') ) {
85
		if ( ( $temp == null ) || ( $temp == '') ) {
88
				$this->debug['parse_auth'][] = "2a";
86
			$this->debug['parse_auth'][] = "2a";
89
				return('');
87
			return('');
Line 90... Line 88...
90
			} else {
88
		} else {
91
 
89
 
Line 92... Line 90...
92
				$this->debug['parse_auth'][] = "2b";
90
			$this->debug['parse_auth'][] = "2b";
Line 93... Line 91...
93
			
91
		
94
				// add space after full stops, except at end (NB, will also add spece before some close brackets)
-
 
95
				$temp = rtrim( str_replace('.', '. ', $temp) );
-
 
96
				$this->debug['parse_auth'][] = "4 (temp:$temp)";
-
 
97
				
-
 
98
				//normalise "et", "and" to ampersand (et al. is a special case)
-
 
99
//				if ( $temp like '% et al%' ) {
-
 
100
				if ( preg_match('/ et al/', $temp) ) {
-
 
Line 101... Line -...
101
					$temp = str_replace(' et al','zzzzz', $temp);
-
 
102
					$this->debug['parse_auth'][] = "4a (temp:$temp)";
92
			// add space after full stops, except at end (NB, will also add spece before some close brackets)
103
				}
-
 
104
				
-
 
105
				$temp = str_replace(' et ',' & ', $temp );
-
 
106
				$temp = str_replace(' and ',' & ', $temp );
-
 
107
 
93
			$temp = rtrim( str_replace('.', '. ', $temp) );
108
				$temp = str_replace('zzzzz',' et al', $temp);
94
			$this->debug['parse_auth'][] = "4 (temp:$temp)";
109
 
95
				
110
				$this->debug['parse_auth'][] = "5 (temp:$temp)";
96
			//normalise "et", "and" to ampersand (et al. is a special case)
111
				
97
			// if ( $temp like '% et al%' ) {
112
				//remove commas before dates (only)
98
			if ( preg_match('/ et al/', $temp) ) {
113
				//	like '%, 17%'
99
				$temp = str_replace(' et al','zzzzz', $temp);
114
				if ( preg_match('/, 17/', $temp) ) {
100
				$this->debug['parse_auth'][] = "4a (temp:$temp)";
115
					$temp = str_replace(', 17',' 17', $temp);
101
			}
116
					$this->debug['parse_auth'][] = "5a (temp:$temp)";
102
				
117
				}
103
			$temp = str_replace(' et ',' & ', $temp );
Line -... Line 104...
-
 
104
			$temp = str_replace(' and ',' & ', $temp );
-
 
105
 
-
 
106
			$temp = str_replace('zzzzz',' et al', $temp);
118
				
107
 
-
 
108
			$this->debug['parse_auth'][] = "5 (temp:$temp)";
-
 
109
				
-
 
110
			//remove commas before dates (only)
-
 
111
			//	like '%, 17%'
119
				//	like '%, 18%'
112
			if ( preg_match('/, 17/', $temp) ) {
-
 
113
				$temp = str_replace(', 17',' 17', $temp);
-
 
114
				$this->debug['parse_auth'][] = "5a (temp:$temp)";
Line -... Line 115...
-
 
115
			}
-
 
116
			
-
 
117
			//	like '%, 18%'
120
				if ( preg_match('/, 18/', $temp) ) {
118
			if ( preg_match('/, 18/', $temp) ) {
121
					$temp = str_replace(', 18',' 18', $temp);
119
				$temp = str_replace(', 18',' 18', $temp);
Line 122... Line 120...
122
					$this->debug['parse_auth'][] = "5b (temp:$temp)";
120
				$this->debug['parse_auth'][] = "5b (temp:$temp)";
123
				}
121
			}
124
				
122
				
125
				//	like '%, 19%'
123
			//	like '%, 19%'
126
				if ( preg_match('/, 19/', $temp) ) {
124
			if ( preg_match('/, 19/', $temp) ) {
127
					$temp = str_replace(', 19',' 19', $temp);
-
 
-
 
125
				$temp = str_replace(', 19',' 19', $temp);
128
					$this->debug['parse_auth'][] = "5c (temp:$temp)";
126
				$this->debug['parse_auth'][] = "5c (temp:$temp)";
129
				}
127
			}
130
				
128
			
131
				//	like '%, 20%'
129
			//	like '%, 20%'
132
				if ( preg_match('/, 20/', $temp) ) {
130
			if ( preg_match('/, 20/', $temp) ) {
133
					$temp = str_replace(', 20',' 20', $temp);
-
 
134
					$this->debug['parse_auth'][] = "5d (temp:$temp)";
-
 
135
				}
-
 
136
				
-
 
137
				// reduce multiple internal spaces to single space
-
 
138
				$temp = $this->reduce_spaces( $temp );
131
				$temp = str_replace(', 20',' 20', $temp);
139
				
-
 
140
				//	like '% -%'
-
 
141
				$temp = str_replace(' -', '-', $temp);
-
 
142
 
-
 
143
				$this->debug['parse_auth'][] = "6 (temp:$temp)";
-
 
Line 144... Line -...
144
				
-
 
145
				foreach( explode(' ', $temp) as $this_word ) {
-
 
146
				
-
 
147
					$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
-
 
148
					
-
 
149
					//	like '(%'
132
				$this->debug['parse_auth'][] = "5d (temp:$temp)";
150
					if ( preg_match('/^\(/', $this_word) ) {
133
			}
151
						$elapsed_chars .= '(';
-
 
152
						$this_word = substr( $this_word, 1 );
-
 
153
						$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
-
 
154
					}
-
 
155
 
-
 
156
					// Add back the word to the final translation
-
 
157
					$elapsed_chars .= $this_word . ' ';
-
 
158
					$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
-
 
159
				}
-
 
160
				
-
 
161
				$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) );
-
 
162
				
-
 
163
				return trim( $elapsed_chars ) ;
-
 
164
			}
-
 
165
 
-
 
166
		}
-
 
167
		
-
 
168
		/**
-
 
169
		 * Function: parse
-
 
170
		 * Purpose: Produces parsed version of an input string (scientific name components)
-
 
171
		 * @author Tony Rees (Tony.Rees@csiro.au)
-
 
172
		 * Date created: June 2007-November 2008
134
				
173
		 * Inputs: input string as str (this version presumes genus, genus+species, or
-
 
174
		 * genus+species+authority)
135
			// reduce multiple internal spaces to single space
175
		 * Outputs: parsed version of input string, for match purposes
-
 
176
		 * Remarks:
-
 
177
		 *    (1) Removes known text elements e.g.
136
			$temp = $this->reduce_spaces( $temp );
178
		 *      'aff.', 'cf.', 'subsp.', subgenera if enclosed in brackets, etc. as desired
137
			
Line 179... Line 138...
179
		 *    (2) Removes accented and non A-Z characters other than full stops 
138
			//	like '% -%'
-
 
139
			$temp = str_replace(' -', '-', $temp);
Line -... Line 140...
-
 
140
 
-
 
141
			$this->debug['parse_auth'][] = "6 (temp:$temp)";
-
 
142
			
-
 
143
			foreach( explode(' ', $temp) as $this_word ) {
-
 
144
			
-
 
145
				//$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
-
 
146
				$elapsed_chars = '';
-
 
147
				//	like '(%'
-
 
148
				if ( preg_match('/^\(/', $this_word) ) {
-
 
149
					$elapsed_chars .= '(';
-
 
150
					$this_word = substr( $this_word, 1 );
-
 
151
					//$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
-
 
152
				}
-
 
153
 
-
 
154
				// Add back the word to the final translation
-
 
155
				$elapsed_chars .= $this_word . ' ';
-
 
156
				//$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
-
 
157
			}
-
 
158
			
-
 
159
			$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) );
-
 
160
			
-
 
161
			return trim( $elapsed_chars ) ;
-
 
162
		}
-
 
163
 
-
 
164
	}
-
 
165
		
-
 
166
	/**
-
 
167
	 * Function: parse
-
 
168
	 * Purpose: Produces parsed version of an input string (scientific name components)
-
 
169
	 * @author Tony Rees (Tony.Rees@csiro.au)
-
 
170
	 * Date created: June 2007-November 2008
-
 
171
	 * Inputs: input string as str (this version presumes genus, genus+species, or
-
 
172
	 * genus+species+authority)
-
 
173
	 * Outputs: parsed version of input string, for match purposes
-
 
174
	 * Remarks:
-
 
175
	 *    (1) Removes known text elements e.g.
-
 
176
	 *      'aff.', 'cf.', 'subsp.', subgenera if enclosed in brackets, etc. as desired
Line 180... Line 177...
180
		 *       (in scientific name portions)
177
	 *    (2) Removes accented and non A-Z characters other than full stops 
181
		 *    (3) Returns uppercase scientific name (genus + species only) 
178
	 *       (in scientific name portions)
182
		 *       plus unaltered (presumed) authority
179
	 *    (3) Returns uppercase scientific name (genus + species only) 
183
		 *     examples;
180
	 *       plus unaltered (presumed) authority
184
		 *       Anabaena cf. flos-aquae Ralfs ex Born. et Flah. => ANABAENA FLOSAQUAE Ralfs 
181
	 *     examples;
185
		 *       ex Born. et Flah.
182
	 *       Anabaena cf. flos-aquae Ralfs ex Born. et Flah. => ANABAENA FLOSAQUAE Ralfs 
186
		 *       Abisara lemÈe-pauli => ABISARA LEMEEPAULI
183
	 *       ex Born. et Flah.
187
		 *       Fuc/us Vesiculos2us => FUCUS VESICULOSUS
184
	 *       Abisara lemÈe-pauli => ABISARA LEMEEPAULI
Line 188... Line 185...
188
		 *       Buffo ignicolor LacÈpËde, 1788 => BUFFO IGNICOLOR LacÈpËde, 1788
185
	 *       Fuc/us Vesiculos2us => FUCUS VESICULOSUS
Line 189... Line 186...
189
		 *       Barbatia (Mesocibota) bistrigata (Dunker, 1866) => BARBATIA BISTRIGATA (Dunker, 1866)
186
	 *       Buffo ignicolor LacÈpËde, 1788 => BUFFO IGNICOLOR LacÈpËde, 1788
190
		 *    (4) Thus version does not handle genus+author, or genus+species+infraspecies
187
	 *       Barbatia (Mesocibota) bistrigata (Dunker, 1866) => BARBATIA BISTRIGATA (Dunker, 1866)
191
		 *       (second" good" term is presumed to be species epithet, anything after is 
188
	 *    (4) Thus version does not handle genus+author, or genus+species+infraspecies
192
		 *       considered to be start of the authority), however could be adapted further as required
189
	 *       (second" good" term is presumed to be species epithet, anything after is 
193
         *         and actually it was done in this version for Tela Botanica
190
	 *       considered to be start of the authority), however could be adapted further as required
194
		 *    (5) There is a separate function "parse_auth" for normalizing authorities when required
191
     *         and actually it was done in this version for Tela Botanica
195
		 *      (e.g. for authority comparisons)
192
	 *    (5) There is a separate function "parse_auth" for normalizing authorities when required
196
		 *
193
	 *      (e.g. for authority comparisons)
Line 197... Line 194...
197
		 * @param string $str : input string ( genus, genus+species, or genus+species+authority )
194
	 *
198
		 * @return string : parsed string
195
	 * @param string $str : input string ( genus, genus+species, or genus+species+authority )
199
		 */
196
	 * @return string : parsed string
-
 
197
	 */
-
 
198
	public function parse( $str = NULL ) {
-
 
199
		
-
 
200
		unset($this->debug['parse']);
-
 
201
 
-
 
202
		$temp = '';
-
 
203
		$first_str_part = NULL;
-
 
204
		$second_str_part = NULL;
-
 
205
		$temp_genus = '';
-
 
206
		$temp_species = '';
-
 
207
		$temp_genus_species = '';
-
 
208
		$temp_authority = '';
-
 
209
		$temp_infra = '';
-
 
210
			
-
 
211
		//$this->debug['parse'][] = "1";
-
 
212
 
-
 
213
		if ( ($str == NULL) || ( trim($str) == '') ) {
-
 
214
			//$this->debug[] = "N1a<br>";
-
 
215
			return '';
-
 
216
		} else {
-
 
217
			//	trim any leading, trailing spaces or line feeds
-
 
218
			$temp = trim( $str );
-
 
219
			//$this->debug['parse'][] = "1b";
-
 
220
		}
-
 
221
 
-
 
222
		if ( $temp == NULL || $temp == '') {
-
 
223
			//$this->debug['parse'][] = "2a";
-
 
224
			return '';
-
 
225
		} else {
-
 
226
			//$this->debug['parse'][] = "2b";
-
 
227
 
-
 
228
			// replace any HTML ampersands
-
 
229
			$set = array('%', '&', 'amp;%', 'AMP;%');
-
 
230
			$temp = str_replace( $set, '&', $temp );
-
 
231
 
-
 
232
			//$this->debug['parse'][] = "2b1 (temp:$temp)";
-
 
233
 
-
 
234
			// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
-
 
235
			$html_pattern = "(\<(/?[^\>]+)\>)";
-
 
236
			//? This should not just handle html tags but all <*>				
-
 
237
			$temp = preg_replace( $html_pattern, '', $temp);
-
 
238
			//$this->debug['parse'][] = "2b2 (temp:$temp)";
-
 
239
 
-
 
240
			// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
-
 
241
			// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
-
 
242
			// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
-
 
243
			// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
-
 
244
			// however this is very rare?? and in any case we are not supporting genus+authority in this version)
-
 
245
			//if ( $temp like '% (%)%'
-
 
246
			$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
-
 
247
			//? Not sure if this will catch if				
-
 
248
			//$this->debug['parse'][] = "2b3 (temp:$temp)";
-
 
249
 
-
 
250
			// if second term (only) is in square brackets, presume it is a comment and remove it
-
 
251
			// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]		
200
		public function parse( $str = NULL ) {
252
			//if ( $temp like '% [%]%'
201
			
-
 
202
			unset($this->debug['parse']);
-
 
203
 
-
 
204
 
-
 
205
			$temp = '';
-
 
206
			$first_str_part = NULL;
-
 
207
			$second_str_part = NULL;
-
 
208
			$temp_genus = '';
-
 
209
			$temp_species = '';
-
 
210
			$temp_genus_species = '';
-
 
211
			$temp_authority = '';
-
 
212
			$temp_infra = '';
-
 
213
			
-
 
214
			$this->debug['parse'][] = "1";
-
 
215
 
-
 
216
			if ( ($str == NULL) || ( trim($str) == '') ) {
-
 
217
				$this->debug[] = "N1a<br>";
-
 
218
				return '';
-
 
219
			} else {
-
 
220
				//	trim any leading, trailing spaces or line feeds
-
 
221
				$temp = trim( $str );
-
 
222
				$this->debug['parse'][] = "1b";
-
 
223
			}
-
 
224
 
-
 
225
			if ( $temp == NULL || $temp == '') {
-
 
226
				$this->debug['parse'][] = "2a";
-
 
227
				return '';
-
 
228
			} else {
-
 
229
				$this->debug['parse'][] = "2b";
-
 
230
 
-
 
231
				// replace any HTML ampersands
-
 
232
				$set = array('%', '&', 'amp;%', 'AMP;%');
-
 
233
				$temp = str_replace( $set, '&', $temp );
-
 
234
 
-
 
235
				$this->debug['parse'][] = "2b1 (temp:$temp)";
-
 
236
 
-
 
237
				// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
-
 
238
				$html_pattern = "(\<(/?[^\>]+)\>)";
-
 
239
//? This should not just handle html tags but all <*>				
-
 
240
				$temp = preg_replace( $html_pattern, '', $temp);
-
 
241
				$this->debug['parse'][] = "2b2 (temp:$temp)";
-
 
242
 
-
 
243
				// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
-
 
244
				// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
-
 
245
				// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
-
 
246
				// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
-
 
247
				// however this is very rare?? and in any case we are not supporting genus+authority in this version)
-
 
248
//if ( $temp like '% (%)%'
-
 
249
				$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
-
 
250
//? Not sure if this will catch if				
-
 
251
				$this->debug['parse'][] = "2b3 (temp:$temp)";
-
 
252
 
-
 
253
				// if second term (only) is in square brackets, presume it is a comment and remove it
-
 
254
				// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]		
-
 
255
//if ( $temp like '% [%]%'
-
 
256
				$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
253
			$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
257
//? Not sure if this will catch if				
254
			//? Not sure if this will catch if				
258
				$this->debug['parse'][] = "2b4 (temp:$temp)";
255
			//$this->debug['parse'][] = "2b4 (temp:$temp)";
Line 259... Line 256...
259
 
256
 
Line 260... Line 257...
260
				// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
257
			// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
261
				$temp = preg_replace( "/ cf /", " ", $temp );
258
			$temp = preg_replace( "/ cf /", " ", $temp );
262
				$temp = preg_replace( "/ cf\. /", " ", $temp );
259
			$temp = preg_replace( "/ cf\. /", " ", $temp );
263
				$temp = preg_replace( "/ near /", " ", $temp );
260
			$temp = preg_replace( "/ near /", " ", $temp );
264
				$temp = preg_replace( "/ aff\. /", " ", $temp );
261
			$temp = preg_replace( "/ aff\. /", " ", $temp );
265
				$temp = preg_replace( "/ sp\. /", " ", $temp );
262
			$temp = preg_replace( "/ sp\. /", " ", $temp );
266
				$temp = preg_replace( "/ spp\. /", " ", $temp );
263
			$temp = preg_replace( "/ spp\. /", " ", $temp );
267
				$temp = preg_replace( "/ spp /", " ", $temp );
264
			$temp = preg_replace( "/ spp /", " ", $temp );
268
 
265
 
269
				$this->debug['parse'][] = "2b5 (temp:$temp)";
266
			//$this->debug['parse'][] = "2b5 (temp:$temp)";
Line 270... Line 267...
270
 
267
 
Line 271... Line 268...
271
				// eliminate or close up any stray spaces introduced by the above
268
			// eliminate or close up any stray spaces introduced by the above
272
				$temp = $this->reduce_spaces( $temp );
269
			$temp = $this->reduce_spaces( $temp );
273
 
270
 
274
				$this->debug['parse'][] = "2b6 (temp:$temp)";
271
			//$this->debug['parse'][] = "2b6 (temp:$temp)";
275
 
272
 
-
 
273
			// now presume first element is genus, second (if present) is species, remainder
-
 
274
			//   (if present) is authority
276
				// now presume first element is genus, second (if present) is species, remainder
275
			// look for genus name
277
				//   (if present) is authority
276
			$ar = explode( " ", $temp, 2);
278
				// look for genus name
277
			if ( count( $ar ) ) {
279
				$ar = explode( " ", $temp, 2);
278
				$temp_genus = $ar[0];
280
				if ( count( $ar ) ) {
-
 
281
					$temp_genus = $ar[0];
279
				$temp = @$ar[1];
282
					$temp = @$ar[1];
280
			} else {
283
				} else {
281
				$temp_genus = $temp;
284
					$temp_genus = $temp;
282
				$temp = '';
285
					$temp = '';
283
			}
286
				}
284
				
287
				
285
			//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
288
				$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
286
 
289
 
287
			// look for species epithet and authority
290
				// look for species epithet and authority
288
			$ar = explode( " ", $temp, 2);
291
				$ar = explode( " ", $temp, 2);
-
 
292
				if ( count( $ar ) ) {
-
 
293
					$temp_species = $ar[0];
289
			if ( count( $ar ) ) {
-
 
290
				$temp_species = $ar[0];
294
					$temp_authority = @$ar[1];
291
				$temp_authority = @$ar[1];
295
				} else {
-
 
296
					$temp_species = $temp;
-
 
297
					$temp_authority = '';
-
 
298
				}
-
 
299
               	// look for subspecies
-
 
300
 
-
 
301
                $infras =array('subsp.','var.');
-
 
302
 
-
 
303
                $temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
-
 
304
                $temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
-
 
305
                $temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
-
 
306
                $temp_authority = preg_replace( "/var /", "var.", $temp_authority);
-
 
307
 
-
 
308
                foreach ($infras as $infra) {
-
 
309
                    $pos = strpos($temp_authority, $infra);
-
 
310
                    if ($pos === false) {
-
 
311
                        continue;
-
 
312
                    }
-
 
313
                    else {
-
 
314
                        $temp_infra=substr($temp_authority,$pos+strlen($infra));
-
 
315
                        $temp_authority=substr($temp_authority,0,$pos);
-
 
316
                        $temp_infra=trim($temp_infra);
-
 
317
                        $temp_infra_type=$infra;
-
 
318
                        // look for infra epithet and authority
-
 
319
                        $ar = explode(" ", $temp_infra, 2);
-
 
320
                        if ( count( $ar ) ) {
-
 
321
                            $temp_infra = $ar[0];
-
 
322
                            $temp_infra_authority = @$ar[1];
-
 
323
                        }
-
 
324
                        break; // on s'arrete au premier trouve
-
 
325
                    }
-
 
326
                }
-
 
327
 
-
 
328
				$this->debug['parse'][] = "2b8 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
-
 
329
 
-
 
330
 
-
 
331
				// replace selected ligatures here (Genus names can contain Æ, OE ligature)
292
			} else {
332
				$temp_genus = str_replace( 'Æ', 'AE', $temp_genus);
-
 
333
				$temp_species = str_replace( 'Æ', 'AE', $temp_species);
-
 
Line 334... Line -...
334
				$temp_infra = str_replace( 'Æ', 'AE', $temp_infra );
-
 
335
				
-
 
336
 
-
 
-
 
293
				$temp_species = $temp;
Line -... Line 294...
-
 
294
				$temp_authority = '';
-
 
295
			}
-
 
296
            // look for subspecies
-
 
297
 
-
 
298
            $infras =array('subsp.','var.');
-
 
299
 
-
 
300
            $temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
-
 
301
            $temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
-
 
302
            $temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
-
 
303
            $temp_authority = preg_replace( "/var /", "var.", $temp_authority);
-
 
304
 
-
 
305
            $temp_infra_authority = '';
-
 
306
            $temp_infra_type = '';
-
 
307
            foreach ($infras as $infra) {
-
 
308
            	$pos = strpos($temp_authority, $infra);
-
 
309
                if ($pos === false) {
-
 
310
                	continue;
-
 
311
                } else {
-
 
312
                	$temp_infra=substr($temp_authority,$pos+strlen($infra));
-
 
313
                    $temp_authority=substr($temp_authority,0,$pos);
-
 
314
                    $temp_infra=trim($temp_infra);
-
 
315
                    $temp_infra_type=$infra;
-
 
316
                    // look for infra epithet and authority
-
 
317
                    $ar = explode(" ", $temp_infra, 2);
-
 
318
                    if ( count( $ar ) ) {
-
 
319
                    	$temp_infra = $ar[0];
-
 
320
                        $temp_infra_authority = @$ar[1];
-
 
321
                    }
-
 
322
                    break; // on s'arrete au premier trouve
-
 
323
                }
337
				$this->debug['parse'][] = "2b9 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
324
			}
338
 
325