1,19 → 1,27 |
<?php |
|
// declare(encoding='UTF-8'); |
/** |
* Classe permettant de convertir une chaine d'un nom scientifique en un format standard. |
* |
* Source orignale : |
* Taxamatch-Webservice PHP v1.0.0 |
* @author Michael Giddens |
* @link http://www.silverbiology.com |
* |
* |
* @internal Mininum PHP version : 5.2 |
* @category CEL |
* @package Services |
* @subpackage Bibliothèques |
* @version 0.1 |
* @author Mathias CHOUET <mathias@tela-botanica.org> |
* @author David DELON <david@clapas.net> |
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org> |
* @author Aurelien PERONNET <aurelien@tela-botanica.org> |
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt> |
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt> |
* @copyright 1999-2014 Tela Botanica <accueil@tela-botanica.org> |
*/ |
|
/* Adapation par David Delon Decembre 2010 : gestion sous espece |
*/ |
|
|
/** |
* Class NameParser |
* Used to convert a string to a standarized format. |
*/ |
class NameParser { |
|
/** |
22,15 → 30,7 |
*/ |
public $debug_flag; |
|
|
|
/** |
* Constructor |
*/ |
public function __construct( ) { |
} |
|
/** |
* Sets value to the method property |
* @param mixed $name class property name |
* @param mixed $value class property value |
46,10 → 46,8 |
* @return string : string with only once space between characters |
*/ |
private function reduce_spaces( $str ) { |
|
$str = preg_replace("/ {2,}/", ' ', $str ); |
$str = preg_replace('/ {2,}/', ' ', $str ); |
$str = trim( $str ); |
|
return( $str ); |
} |
|
73,7 → 71,6 |
* @return string : parsed author string |
*/ |
public function parse_auth( $str, $upcase=1 ) { |
|
$this->debug['parse_auth'][] = "1"; |
$temp = $str = trim($str); |
|
141,7 → 138,6 |
$this->debug['parse_auth'][] = "6 (temp:$temp)"; |
|
foreach( explode(' ', $temp) as $this_word ) { |
|
//$this->debug['parse_auth'][] = "7 (this_word:$this_word)"; |
$elapsed_chars = ''; |
// like '(%' |
155,12 → 151,9 |
$elapsed_chars .= $this_word . ' '; |
//$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)"; |
} |
|
$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) ); |
|
return trim( $elapsed_chars ) ; |
} |
|
} |
|
/** |
196,9 → 189,7 |
* @return string : parsed string |
*/ |
public function parse( $str = NULL ) { |
|
unset($this->debug['parse']); |
|
$temp = ''; |
$first_str_part = NULL; |
$second_str_part = NULL; |
232,7 → 223,7 |
//$this->debug['parse'][] = "2b1 (temp:$temp)"; |
|
// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.) |
$html_pattern = "(\<(/?[^\>]+)\>)"; |
$html_pattern = '(\<(/?[^\>]+)\>)'; |
//? This should not just handle html tags but all <*> |
$temp = preg_replace( $html_pattern, '', $temp); |
//$this->debug['parse'][] = "2b2 (temp:$temp)"; |
243,7 → 234,7 |
// (obviously this will not suit genus + author alone, where first part of authorname is in brackets, |
// however this is very rare?? and in any case we are not supporting genus+authority in this version) |
//if ( $temp like '% (%)%' |
$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 ); |
$temp = preg_replace( '/ \(\w*\W*\)/', '', $temp, 1 ); |
//? Not sure if this will catch if |
//$this->debug['parse'][] = "2b3 (temp:$temp)"; |
|
250,18 → 241,18 |
// if second term (only) is in square brackets, presume it is a comment and remove it |
// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918] |
//if ( $temp like '% [%]%' |
$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 ); |
$temp = preg_replace( '/ \[\w*\W*\]/', '', $temp, 1 ); |
//? Not sure if this will catch if |
//$this->debug['parse'][] = "2b4 (temp:$temp)"; |
|
// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed) |
$temp = preg_replace( "/ cf /", " ", $temp ); |
$temp = preg_replace( "/ cf\. /", " ", $temp ); |
$temp = preg_replace( "/ near /", " ", $temp ); |
$temp = preg_replace( "/ aff\. /", " ", $temp ); |
$temp = preg_replace( "/ sp\. /", " ", $temp ); |
$temp = preg_replace( "/ spp\. /", " ", $temp ); |
$temp = preg_replace( "/ spp /", " ", $temp ); |
$temp = preg_replace('/ cf /', ' ', $temp ); |
$temp = preg_replace('/ cf\. /', ' ', $temp ); |
$temp = preg_replace('/ near /', ' ', $temp ); |
$temp = preg_replace('/ aff\. /', ' ', $temp ); |
$temp = preg_replace('/ sp\. /', ' ', $temp ); |
$temp = preg_replace('/ spp\. /', ' ', $temp ); |
$temp = preg_replace('/ spp /', ' ', $temp ); |
|
//$this->debug['parse'][] = "2b5 (temp:$temp)"; |
|
273,7 → 264,7 |
// now presume first element is genus, second (if present) is species, remainder |
// (if present) is authority |
// look for genus name |
$ar = explode( " ", $temp, 2); |
$ar = explode(' ', $temp, 2); |
if ( count( $ar ) ) { |
$temp_genus = $ar[0]; |
$temp = @$ar[1]; |
285,7 → 276,7 |
//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)"; |
|
// look for species epithet and authority |
$ar = explode( " ", $temp, 2); |
$ar = explode(' ', $temp, 2); |
if ( count( $ar ) ) { |
$temp_species = $ar[0]; |
$temp_authority = @$ar[1]; |
314,7 → 305,7 |
$temp_infra=trim($temp_infra); |
$temp_infra_type=$infra; |
// look for infra epithet and authority |
$ar = explode(" ", $temp_infra, 2); |
$ar = explode(' ', $temp_infra, 2); |
if ( count( $ar ) ) { |
$temp_infra = $ar[0]; |
$temp_infra_authority = @$ar[1]; |
353,6 → 344,5 |
//$this->debug['parse'][] = "2b11 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)"; |
return array("genus"=>$temp_genus, "species"=>$temp_species, "authority"=>$temp_authority, "infra"=>$temp_infra, "infra_authority"=>$temp_infra_authority, "infra_type"=>$temp_infra_type); |
} |
} // End NameParser |
} // End Class |
?> |
} |
} |