Subversion Repositories eFlore/Applications.cel

Rev

Rev 995 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 995 Rev 1318
Line 37... Line 37...
37
		 */
37
	 */
38
		public function set($name,$value) {
38
	public function set($name,$value) {
39
			$this->$name = $value;
39
		$this->$name = $value;
40
		}
40
	}
Line 41... Line -...
41
 
-
 
42
 
41
 
43
		/**
42
	/**
44
		 * Reduce Spaces
43
	 * Reduce Spaces
45
		 * This will reduce the string to only allow once space between characters
44
	 * This will reduce the string to only allow once space between characters
46
		 * @param string $str : string to reduce space
45
	 * @param string $str : string to reduce space
Line 52... Line 51...
52
			$str = trim( $str );
51
		$str = trim( $str );
Line 53... Line 52...
53
			
52
		
54
			return( $str );
53
		return( $str );
Line 55... Line -...
55
		}
-
 
56
 
54
	}
57
 
55
 
58
		/**
56
	/**
59
		 * Function: parse_auth
57
	 * Function: parse_auth
60
		 * Purpose: Produce a parsed version of authority of a taxon name
58
	 * Purpose: Produce a parsed version of authority of a taxon name
Line 142... Line 140...
142
 
140
 
Line 143... Line 141...
143
				$this->debug['parse_auth'][] = "6 (temp:$temp)";
141
			$this->debug['parse_auth'][] = "6 (temp:$temp)";
Line 144... Line 142...
144
				
142
			
145
				foreach( explode(' ', $temp) as $this_word ) {
-
 
-
 
143
			foreach( explode(' ', $temp) as $this_word ) {
146
				
144
			
147
					$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
145
				//$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
148
					
146
				$elapsed_chars = '';
149
					//	like '(%'
147
				//	like '(%'
150
					if ( preg_match('/^\(/', $this_word) ) {
148
				if ( preg_match('/^\(/', $this_word) ) {
151
						$elapsed_chars .= '(';
149
					$elapsed_chars .= '(';
Line 152... Line 150...
152
						$this_word = substr( $this_word, 1 );
150
					$this_word = substr( $this_word, 1 );
153
						$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
151
					//$this->debug['parse_auth'][] = "7a (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
154
					}
152
				}
155
 
153
 
Line 156... Line 154...
156
					// Add back the word to the final translation
154
				// Add back the word to the final translation
Line 157... Line 155...
157
					$elapsed_chars .= $this_word . ' ';
155
				$elapsed_chars .= $this_word . ' ';
Line 199... Line 197...
199
		 */
197
	 */
200
		public function parse( $str = NULL ) {
198
	public function parse( $str = NULL ) {
Line 201... Line 199...
201
			
199
		
Line 202... Line -...
202
			unset($this->debug['parse']);
-
 
203
 
200
		unset($this->debug['parse']);
204
 
201
 
205
			$temp = '';
202
		$temp = '';
206
			$first_str_part = NULL;
203
		$first_str_part = NULL;
207
			$second_str_part = NULL;
204
		$second_str_part = NULL;
208
			$temp_genus = '';
205
		$temp_genus = '';
209
			$temp_species = '';
206
		$temp_species = '';
210
			$temp_genus_species = '';
207
		$temp_genus_species = '';
Line 211... Line 208...
211
			$temp_authority = '';
208
		$temp_authority = '';
Line 212... Line 209...
212
			$temp_infra = '';
209
		$temp_infra = '';
213
			
210
			
214
			$this->debug['parse'][] = "1";
211
		//$this->debug['parse'][] = "1";
215
 
212
 
216
			if ( ($str == NULL) || ( trim($str) == '') ) {
213
		if ( ($str == NULL) || ( trim($str) == '') ) {
217
				$this->debug[] = "N1a<br>";
214
			//$this->debug[] = "N1a<br>";
218
				return '';
215
			return '';
219
			} else {
216
		} else {
Line 220... Line 217...
220
				//	trim any leading, trailing spaces or line feeds
217
			//	trim any leading, trailing spaces or line feeds
221
				$temp = trim( $str );
218
			$temp = trim( $str );
222
				$this->debug['parse'][] = "1b";
219
			//$this->debug['parse'][] = "1b";
223
			}
220
		}
224
 
221
 
Line 225... Line 222...
225
			if ( $temp == NULL || $temp == '') {
222
		if ( $temp == NULL || $temp == '') {
226
				$this->debug['parse'][] = "2a";
223
			//$this->debug['parse'][] = "2a";
227
				return '';
224
			return '';
Line 228... Line 225...
228
			} else {
225
		} else {
Line 229... Line 226...
229
				$this->debug['parse'][] = "2b";
226
			//$this->debug['parse'][] = "2b";
230
 
227
 
231
				// replace any HTML ampersands
228
			// replace any HTML ampersands
232
				$set = array('%', '&', 'amp;%', 'AMP;%');
229
			$set = array('%', '&', 'amp;%', 'AMP;%');
233
				$temp = str_replace( $set, '&', $temp );
230
			$temp = str_replace( $set, '&', $temp );
Line 234... Line 231...
234
 
231
 
235
				$this->debug['parse'][] = "2b1 (temp:$temp)";
232
			//$this->debug['parse'][] = "2b1 (temp:$temp)";
236
 
233
 
237
				// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
234
			// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
238
				$html_pattern = "(\<(/?[^\>]+)\>)";
235
			$html_pattern = "(\<(/?[^\>]+)\>)";
239
//? This should not just handle html tags but all <*>				
236
			//? This should not just handle html tags but all <*>				
240
				$temp = preg_replace( $html_pattern, '', $temp);
237
			$temp = preg_replace( $html_pattern, '', $temp);
241
				$this->debug['parse'][] = "2b2 (temp:$temp)";
238
			//$this->debug['parse'][] = "2b2 (temp:$temp)";
242
 
239
 
Line 243... Line 240...
243
				// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
240
			// if second term (only) is in round brackets, presume it is a subgenus or a comment and remove it
244
				// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
241
			// examples: Barbatia (Mesocibota) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
245
				// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
242
			// Barbatia (?) bistrigata (Dunker, 1866) => Barbatia bistrigata (Dunker, 1866)
246
				// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
243
			// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
247
				// however this is very rare?? and in any case we are not supporting genus+authority in this version)
244
			// however this is very rare?? and in any case we are not supporting genus+authority in this version)
248
//if ( $temp like '% (%)%'
245
			//if ( $temp like '% (%)%'
Line 249... Line 246...
249
				$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
246
			$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
250
//? Not sure if this will catch if				
247
			//? Not sure if this will catch if				
251
				$this->debug['parse'][] = "2b3 (temp:$temp)";
248
			//$this->debug['parse'][] = "2b3 (temp:$temp)";
252
 
249
 
253
				// if second term (only) is in square brackets, presume it is a comment and remove it
250
			// if second term (only) is in square brackets, presume it is a comment and remove it
254
				// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]		
251
			// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]		
255
//if ( $temp like '% [%]%'
252
			//if ( $temp like '% [%]%'
256
				$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
253
			$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
Line 257... Line 254...
257
//? Not sure if this will catch if				
254
			//? Not sure if this will catch if				
Line 258... Line 255...
258
				$this->debug['parse'][] = "2b4 (temp:$temp)";
255
			//$this->debug['parse'][] = "2b4 (temp:$temp)";
259
 
256
 
Line 260... Line 257...
260
				// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
257
			// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
Line 261... Line 258...
261
				$temp = preg_replace( "/ cf /", " ", $temp );
258
			$temp = preg_replace( "/ cf /", " ", $temp );
262
				$temp = preg_replace( "/ cf\. /", " ", $temp );
259
			$temp = preg_replace( "/ cf\. /", " ", $temp );
263
				$temp = preg_replace( "/ near /", " ", $temp );
260
			$temp = preg_replace( "/ near /", " ", $temp );
264
				$temp = preg_replace( "/ aff\. /", " ", $temp );
261
			$temp = preg_replace( "/ aff\. /", " ", $temp );
Line 283... Line 280...
283
				} else {
280
			} else {
284
					$temp_genus = $temp;
281
				$temp_genus = $temp;
285
					$temp = '';
282
				$temp = '';
286
				}
283
			}
Line 287... Line 284...
287
				
284
				
Line 288... Line 285...
288
				$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
285
			//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
289
 
286
 
290
				// look for species epithet and authority
287
			// look for species epithet and authority
291
				$ar = explode( " ", $temp, 2);
288
			$ar = explode( " ", $temp, 2);
Line 303... Line 300...
303
                $temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
300
            $temp_authority = preg_replace( "/ssp./", "subsp.", $temp_authority);
304
                $temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
301
            $temp_authority = preg_replace( "/ssp /", "subsp.", $temp_authority);
305
                $temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
302
            $temp_authority = preg_replace( "/subsp /", "subsp.", $temp_authority);
306
                $temp_authority = preg_replace( "/var /", "var.", $temp_authority);
303
            $temp_authority = preg_replace( "/var /", "var.", $temp_authority);
Line -... Line 304...
-
 
304
 
-
 
305
            $temp_infra_authority = '';
307
 
306
            $temp_infra_type = '';
308
                foreach ($infras as $infra) {
307
            foreach ($infras as $infra) {
309
                    $pos = strpos($temp_authority, $infra);
308
            	$pos = strpos($temp_authority, $infra);
310
                    if ($pos === false) {
309
                if ($pos === false) {
311
                        continue;
-
 
312
                    }
310
                	continue;
313
                    else {
311
                } else {
314
                        $temp_infra=substr($temp_authority,$pos+strlen($infra));
312
                	$temp_infra=substr($temp_authority,$pos+strlen($infra));
315
                        $temp_authority=substr($temp_authority,0,$pos);
313
                    $temp_authority=substr($temp_authority,0,$pos);
316
                        $temp_infra=trim($temp_infra);
314
                    $temp_infra=trim($temp_infra);
317
                        $temp_infra_type=$infra;
315
                    $temp_infra_type=$infra;
Line 323... Line 321...
323
                        }
321
                    }
324
                        break; // on s'arrete au premier trouve
322
                    break; // on s'arrete au premier trouve
325
                    }
323
                }
326
                }
324
			}
Line 327... Line 325...
327
 
325
 
328
				$this->debug['parse'][] = "2b8 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
-
 
Line 329... Line 326...
329
 
326
			//$this->debug['parse'][] = "2b8 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
330
 
327
 
331
				// replace selected ligatures here (Genus names can contain Æ, OE ligature)
328
			// replace selected ligatures here (Genus names can contain Æ, OE ligature)
332
				$temp_genus = str_replace( 'Æ', 'AE', $temp_genus);
329
			$temp_genus = str_replace( 'Æ', 'AE', $temp_genus);
Line 333... Line -...
333
				$temp_species = str_replace( 'Æ', 'AE', $temp_species);
-
 
334
				$temp_infra = str_replace( 'Æ', 'AE', $temp_infra );
330
			$temp_species = str_replace( 'Æ', 'AE', $temp_species);
Line 335... Line 331...
335
				
331
			$temp_infra = str_replace( 'Æ', 'AE', $temp_infra );
336
 
332
 
337
				$this->debug['parse'][] = "2b9 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
333
			//$this->debug['parse'][] = "2b9 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
Line 338... Line 334...
338
 
334
 
339
                $temp_genus= trim($temp_genus);
335
            $temp_genus= trim($temp_genus);
340
				$temp_species= trim($temp_species);
336
			$temp_species= trim($temp_species);
341
				$temp_infra= trim($temp_infra );
337
			$temp_infra= trim($temp_infra );
Line 342... Line 338...
342
 
338
 
Line 343... Line 339...
343
				// reduce any new multiple internal spaces to single space, if present
339
			// reduce any new multiple internal spaces to single space, if present
344
                $temp_genus= $this->reduce_spaces( $temp_genus );
340
            $temp_genus= $this->reduce_spaces( $temp_genus );
345
				$temp_species= $this->reduce_spaces( $temp_species );
341
			$temp_species= $this->reduce_spaces( $temp_species );
Line 346... Line 342...
346
				$temp_infra= $this->reduce_spaces( $temp_infra );
342
			$temp_infra= $this->reduce_spaces( $temp_infra );
347
 
343
 
348
				$this->debug['parse'][] = "2b10 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
344
			//$this->debug['parse'][] = "2b10 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
349
 
-
 
350
                if (isset($temp_authority) && ($temp_authority!='') ) {
-
 
351
                    $temp_authority=$this->parse_auth($temp_authority);
345
 
352
                }
-
 
353
 
346
            if (isset($temp_authority) && ($temp_authority!='') ) {
354
                if (isset($temp_infra_authority) && ($temp_infra_authority!='') ) {
-
 
355
                    $temp_infra_authority=$this->parse_auth($temp_infra_authority);
347
            	$temp_authority=$this->parse_auth($temp_authority);
356
                }
-
 
357
 
348
            }
358
 
-
 
359
				$this->debug['parse'][] = "2b11 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
-
 
360
 
-
 
361
				return array("genus"=>$temp_genus, "species"=>$temp_species, "authority"=>$temp_authority, "infra"=>$temp_infra, "infra_authority"=>$temp_infra_authority, "infra_type"=>$temp_infra_type);
349
 
362
				
-
 
363
			}
350
            if (isset($temp_infra_authority) && ($temp_infra_authority!='') ) {