Subversion Repositories eFlore/Applications.cel

Compare Revisions

Regard whitespace Rev 2459 → Rev 2462

/trunk/jrest/bibliotheque/NameParser.php
1,19 → 1,27
<?php
 
// declare(encoding='UTF-8');
/**
* Classe permettant de convertir une chaine d'un nom scientifique en un format standard.
*
* Source orignale :
* Taxamatch-Webservice PHP v1.0.0
* @author Michael Giddens
* @link http://www.silverbiology.com
*
*
* @internal Mininum PHP version : 5.2
* @category CEL
* @package Services
* @subpackage Bibliothèques
* @version 0.1
* @author Mathias CHOUET <mathias@tela-botanica.org>
* @author David DELON <david@clapas.net>
* @author Jean-Pascal MILCENT <jpm@tela-botanica.org>
* @author Aurelien PERONNET <aurelien@tela-botanica.org>
* @license GPL v3 <http://www.gnu.org/licenses/gpl.txt>
* @license CECILL v2 <http://www.cecill.info/licences/Licence_CeCILL_V2-en.txt>
* @copyright 1999-2014 Tela Botanica <accueil@tela-botanica.org>
*/
 
/* Adapation par David Delon Decembre 2010 : gestion sous espece
*/
 
 
/**
* Class NameParser
* Used to convert a string to a standarized format.
*/
class NameParser {
 
/**
22,15 → 30,7
*/
public $debug_flag;
 
 
 
/**
* Constructor
*/
public function __construct( ) {
}
 
/**
* Sets value to the method property
* @param mixed $name class property name
* @param mixed $value class property value
46,10 → 46,8
* @return string : string with only once space between characters
*/
private function reduce_spaces( $str ) {
$str = preg_replace("/ {2,}/", ' ', $str );
$str = preg_replace('/ {2,}/', ' ', $str );
$str = trim( $str );
return( $str );
}
 
73,7 → 71,6
* @return string : parsed author string
*/
public function parse_auth( $str, $upcase=1 ) {
 
$this->debug['parse_auth'][] = "1";
$temp = $str = trim($str);
141,7 → 138,6
$this->debug['parse_auth'][] = "6 (temp:$temp)";
foreach( explode(' ', $temp) as $this_word ) {
//$this->debug['parse_auth'][] = "7 (this_word:$this_word)";
$elapsed_chars = '';
// like '(%'
155,12 → 151,9
$elapsed_chars .= $this_word . ' ';
//$this->debug['parse_auth'][] = "7c (this_word:$this_word) (elapsed_chars:$elapsed_chars)";
}
$elapsed_chars = $this->reduce_spaces( str_replace(' )', ')', $elapsed_chars) );
return trim( $elapsed_chars ) ;
}
 
}
/**
196,9 → 189,7
* @return string : parsed string
*/
public function parse( $str = NULL ) {
unset($this->debug['parse']);
 
$temp = '';
$first_str_part = NULL;
$second_str_part = NULL;
232,7 → 223,7
//$this->debug['parse'][] = "2b1 (temp:$temp)";
 
// remove any content in angle brackets (e.g. html tags - <i>, </i>, etc.)
$html_pattern = "(\<(/?[^\>]+)\>)";
$html_pattern = '(\<(/?[^\>]+)\>)';
//? This should not just handle html tags but all <*>
$temp = preg_replace( $html_pattern, '', $temp);
//$this->debug['parse'][] = "2b2 (temp:$temp)";
243,7 → 234,7
// (obviously this will not suit genus + author alone, where first part of authorname is in brackets,
// however this is very rare?? and in any case we are not supporting genus+authority in this version)
//if ( $temp like '% (%)%'
$temp = preg_replace( "/ \(\w*\W*\)/", '', $temp, 1 );
$temp = preg_replace( '/ \(\w*\W*\)/', '', $temp, 1 );
//? Not sure if this will catch if
//$this->debug['parse'][] = "2b3 (temp:$temp)";
 
250,18 → 241,18
// if second term (only) is in square brackets, presume it is a comment and remove it
// example: Aphis [?] ficus Theobald, [1918] => Aphis ficus Theobald, [1918]
//if ( $temp like '% [%]%'
$temp = preg_replace( "/ \[\w*\W*\]/", '', $temp, 1 );
$temp = preg_replace( '/ \[\w*\W*\]/', '', $temp, 1 );
//? Not sure if this will catch if
//$this->debug['parse'][] = "2b4 (temp:$temp)";
 
// drop indicators of questionable id's - presume all are lowercase for now (could extend as needed)
$temp = preg_replace( "/ cf /", " ", $temp );
$temp = preg_replace( "/ cf\. /", " ", $temp );
$temp = preg_replace( "/ near /", " ", $temp );
$temp = preg_replace( "/ aff\. /", " ", $temp );
$temp = preg_replace( "/ sp\. /", " ", $temp );
$temp = preg_replace( "/ spp\. /", " ", $temp );
$temp = preg_replace( "/ spp /", " ", $temp );
$temp = preg_replace('/ cf /', ' ', $temp );
$temp = preg_replace('/ cf\. /', ' ', $temp );
$temp = preg_replace('/ near /', ' ', $temp );
$temp = preg_replace('/ aff\. /', ' ', $temp );
$temp = preg_replace('/ sp\. /', ' ', $temp );
$temp = preg_replace('/ spp\. /', ' ', $temp );
$temp = preg_replace('/ spp /', ' ', $temp );
 
//$this->debug['parse'][] = "2b5 (temp:$temp)";
 
273,7 → 264,7
// now presume first element is genus, second (if present) is species, remainder
// (if present) is authority
// look for genus name
$ar = explode( " ", $temp, 2);
$ar = explode(' ', $temp, 2);
if ( count( $ar ) ) {
$temp_genus = $ar[0];
$temp = @$ar[1];
285,7 → 276,7
//$this->debug['parse'][] = "2b7 (temp_genus:$temp_genus) (temp:$temp)";
 
// look for species epithet and authority
$ar = explode( " ", $temp, 2);
$ar = explode(' ', $temp, 2);
if ( count( $ar ) ) {
$temp_species = $ar[0];
$temp_authority = @$ar[1];
314,7 → 305,7
$temp_infra=trim($temp_infra);
$temp_infra_type=$infra;
// look for infra epithet and authority
$ar = explode(" ", $temp_infra, 2);
$ar = explode(' ', $temp_infra, 2);
if ( count( $ar ) ) {
$temp_infra = $ar[0];
$temp_infra_authority = @$ar[1];
353,6 → 344,5
//$this->debug['parse'][] = "2b11 (temp_genus:$temp_genus) (temp_species:$temp_species) (temp_authority:$temp_authority) (temp_infra:$temp_infra) (temp_infra_authority:$temp_infra_authority) (temp:$temp)";
return array("genus"=>$temp_genus, "species"=>$temp_species, "authority"=>$temp_authority, "infra"=>$temp_infra, "infra_authority"=>$temp_infra_authority, "infra_type"=>$temp_infra_type);
}
} // End NameParser
} // End Class
?>
}
}