882,28 → 882,28 |
} |
} |
|
$byte_map=array(); |
init_byte_map(); |
$ascii_char='[\x00-\x7F]'; |
$cont_byte='[\x80-\xBF]'; |
$utf8_2='[\xC0-\xDF]'.$cont_byte; |
$utf8_3='[\xE0-\xEF]'.$cont_byte.'{2}'; |
$utf8_4='[\xF0-\xF7]'.$cont_byte.'{3}'; |
$utf8_5='[\xF8-\xFB]'.$cont_byte.'{4}'; |
$nibble_good_chars = "@^($ascii_char+|$utf8_2|$utf8_3|$utf8_4|$utf8_5)(.*)$@s"; |
function fix_latin($instr){ |
|
$byte_map=array(); |
init_byte_map(); |
$ascii_char='[\x00-\x7F]'; |
$cont_byte='[\x80-\xBF]'; |
$utf8_2='[\xC0-\xDF]'.$cont_byte; |
$utf8_3='[\xE0-\xEF]'.$cont_byte.'{2}'; |
$utf8_4='[\xF0-\xF7]'.$cont_byte.'{3}'; |
$utf8_5='[\xF8-\xFB]'.$cont_byte.'{4}'; |
$nibble_good_chars = "@^($ascii_char+|$utf8_2|$utf8_3|$utf8_4|$utf8_5)(.*)$@s"; |
|
function fix_latin($instr){ |
if(mb_check_encoding($instr,'UTF-8'))return $instr; // no need for the rest if it's all valid UTF-8 already |
global $nibble_good_chars,$byte_map; |
$outstr=''; |
$char=''; |
$rest=''; |
while((strlen($instr))>0){ |
if(1==@preg_match($nibble_good_chars,$input,$match)){ |
if(1==@preg_match($nibble_good_chars,$instr,$match)){ |
$char=$match[1]; |
$rest=$match[2]; |
$outstr.=$char; |
}elseif(1==@preg_match('@^(.)(.*)$@s',$input,$match)){ |
}elseif(1==@preg_match('@^(.)(.*)$@s',$instr,$match)){ |
$char=$match[1]; |
$rest=$match[2]; |
$outstr.=$byte_map[$char]; |