Rev 609 | Blame | Compare with Previous | Last modification | View Log | RSS feed
<?php
/***************************************************************************\
* SPIP, Systeme de publication pour l'internet *
* *
* Copyright (c) 2001-2005 *
* Arnaud Martin, Antoine Pitrou, Philippe Riviere, Emmanuel Saint-James *
* *
* Ce programme est un logiciel libre distribue sous licence GNU/GPL. *
* Pour plus de details voir le fichier COPYING.txt ou l'aide en ligne. *
\***************************************************************************/
//
// Ce fichier ne sera execute qu'une fois
if (defined("_ECRIRE_INC_CHARSETS")) return;
define("_ECRIRE_INC_CHARSETS", "1");
/* charsets supportes en natif (les autres via mb_string) :
utf-8 ;
iso-8859-1 ; iso-8859-9 ; iso-8859-15 ;
windows-1251 = CP1251 ;
*/
function load_charset ($charset = 'AUTO', $langue_site = 'AUTO') {
if ($charset == 'AUTO')
$charset = lire_meta('charset');
if (is_array($GLOBALS['CHARSET'][$charset]))
return $charset;
if ($langue_site == 'AUTO')
$langue_site = lire_meta('langue_site');
switch (strtolower($charset)) {
case 'utf-8':
$GLOBALS['CHARSET'][$charset] = array();
return $charset;
// iso latin 1
case 'iso-8859-1':
case '':
$GLOBALS['CHARSET'][$charset] = array (
128=>128, 129=>129, 130=>130, 131=>131, 132=>132, 133=>133, 134=>134, 135=>135,
136=>136, 137=>137, 138=>138, 139=>139, 140=>140, 141=>141, 142=>142, 143=>143,
144=>144, 145=>145, 146=>146, 147=>147, 148=>148, 149=>149, 150=>150, 151=>151,
152=>152, 153=>153, 154=>154, 155=>155, 156=>156, 157=>157, 158=>158, 159=>159,
160=>160, 161=>161, 162=>162, 163=>163, 164=>164, 165=>165, 166=>166, 167=>167,
168=>168, 169=>169, 170=>170, 171=>171, 172=>172, 173=>173, 174=>174, 175=>175,
176=>176, 177=>177, 178=>178, 179=>179, 180=>180, 181=>181, 182=>182, 183=>183,
184=>184, 185=>185, 186=>186, 187=>187, 188=>188, 189=>189, 190=>190, 191=>191,
192=>192, 193=>193, 194=>194, 195=>195, 196=>196, 197=>197, 198=>198, 199=>199,
200=>200, 201=>201, 202=>202, 203=>203, 204=>204, 205=>205, 206=>206, 207=>207,
208=>208, 209=>209, 210=>210, 211=>211, 212=>212, 213=>213, 214=>214, 215=>215,
216=>216, 217=>217, 218=>218, 219=>219, 220=>220, 221=>221, 222=>222, 223=>223,
224=>224, 225=>225, 226=>226, 227=>227, 228=>228, 229=>229, 230=>230, 231=>231,
232=>232, 233=>233, 234=>234, 235=>235, 236=>236, 237=>237, 238=>238, 239=>239,
240=>240, 241=>241, 242=>242, 243=>243, 244=>244, 245=>245, 246=>246, 247=>247,
248=>248, 249=>249, 250=>250, 251=>251, 252=>252, 253=>253, 254=>254, 255=>255
);
return $charset;
// iso latin 9 - Turc <alexis@nds.k12.tr>
case 'iso-8859-9':
load_charset('iso-8859-1');
$trans = $GLOBALS['CHARSET']['iso-8859-1'];
$trans[240]=287; //gbreve
$trans[208]=286; //Gbreve
$trans[221]=304; //Idot
$trans[253]=305; //inodot
$trans[254]=351; //scedil
$trans[222]=350; //Scedil
$GLOBALS['CHARSET'][$charset] = $trans;
return $charset;
// iso latin 15 - Gaetan Ryckeboer <gryckeboer@virtual-net.fr>
case 'iso-8859-15':
load_charset('iso-8859-1');
$trans = $GLOBALS['CHARSET']['iso-8859-1'];
$trans[164]=8364;
$trans[166]=352;
$trans[168]=353;
$trans[180]=381;
$trans[184]=382;
$trans[188]=338;
$trans[189]=339;
$trans[190]=376;
$GLOBALS['CHARSET'][$charset] = $trans;
return $charset;
// cyrillic - ref. http://czyborra.com/charsets/cyrillic.html
case 'windows-1251':
case 'cp1251':
$GLOBALS['CHARSET'][$charset] = array (
0x80=>0x0402, 0x81=>0x0403, 0x82=>0x201A, 0x83=>0x0453, 0x84=>0x201E,
0x85=>0x2026, 0x86=>0x2020, 0x87=>0x2021, 0x88=>0x20AC, 0x89=>0x2030,
0x8A=>0x0409, 0x8B=>0x2039, 0x8C=>0x040A, 0x8D=>0x040C, 0x8E=>0x040B,
0x8F=>0x040F, 0x90=>0x0452, 0x91=>0x2018, 0x92=>0x2019, 0x93=>0x201C,
0x94=>0x201D, 0x95=>0x2022, 0x96=>0x2013, 0x97=>0x2014, 0x99=>0x2122,
0x9A=>0x0459, 0x9B=>0x203A, 0x9C=>0x045A, 0x9D=>0x045C, 0x9E=>0x045B,
0x9F=>0x045F, 0xA0=>0x00A0, 0xA1=>0x040E, 0xA2=>0x045E, 0xA3=>0x0408,
0xA4=>0x00A4, 0xA5=>0x0490, 0xA6=>0x00A6, 0xA7=>0x00A7, 0xA8=>0x0401,
0xA9=>0x00A9, 0xAA=>0x0404, 0xAB=>0x00AB, 0xAC=>0x00AC, 0xAD=>0x00AD,
0xAE=>0x00AE, 0xAF=>0x0407, 0xB0=>0x00B0, 0xB1=>0x00B1, 0xB2=>0x0406,
0xB3=>0x0456, 0xB4=>0x0491, 0xB5=>0x00B5, 0xB6=>0x00B6, 0xB7=>0x00B7,
0xB8=>0x0451, 0xB9=>0x2116, 0xBA=>0x0454, 0xBB=>0x00BB, 0xBC=>0x0458,
0xBD=>0x0405, 0xBE=>0x0455, 0xBF=>0x0457, 0xC0=>0x0410, 0xC1=>0x0411,
0xC2=>0x0412, 0xC3=>0x0413, 0xC4=>0x0414, 0xC5=>0x0415, 0xC6=>0x0416,
0xC7=>0x0417, 0xC8=>0x0418, 0xC9=>0x0419, 0xCA=>0x041A, 0xCB=>0x041B,
0xCC=>0x041C, 0xCD=>0x041D, 0xCE=>0x041E, 0xCF=>0x041F, 0xD0=>0x0420,
0xD1=>0x0421, 0xD2=>0x0422, 0xD3=>0x0423, 0xD4=>0x0424, 0xD5=>0x0425,
0xD6=>0x0426, 0xD7=>0x0427, 0xD8=>0x0428, 0xD9=>0x0429, 0xDA=>0x042A,
0xDB=>0x042B, 0xDC=>0x042C, 0xDD=>0x042D, 0xDE=>0x042E, 0xDF=>0x042F,
0xE0=>0x0430, 0xE1=>0x0431, 0xE2=>0x0432, 0xE3=>0x0433, 0xE4=>0x0434,
0xE5=>0x0435, 0xE6=>0x0436, 0xE7=>0x0437, 0xE8=>0x0438, 0xE9=>0x0439,
0xEA=>0x043A, 0xEB=>0x043B, 0xEC=>0x043C, 0xED=>0x043D, 0xEE=>0x043E,
0xEF=>0x043F, 0xF0=>0x0440, 0xF1=>0x0441, 0xF2=>0x0442, 0xF3=>0x0443,
0xF4=>0x0444, 0xF5=>0x0445, 0xF6=>0x0446, 0xF7=>0x0447, 0xF8=>0x0448,
0xF9=>0x0449, 0xFA=>0x044A, 0xFB=>0x044B, 0xFC=>0x044C, 0xFD=>0x044D,
0xFE=>0x044E, 0xFF=>0x044F); // fin windows-1251
return $charset;
// arabic - george kandalaft - http://www.microsoft.com/typography/unicode/1256.htm
case 'windows-1256':
case 'cp1256':
$GLOBALS['CHARSET'][$charset] = array (
0x80=>0x20AC, 0x81=>0x067E, 0x82=>0x201A, 0x83=>0x0192, 0x84=>0x201E,
0x85=>0x2026, 0x86=>0x2020, 0x87=>0x2021, 0x88=>0x02C6, 0x89=>0x2030,
0x8A=>0x0679, 0x8B=>0x2039, 0x8C=>0x0152, 0x8D=>0x0686, 0x8E=>0x0698,
0x8F=>0x0688, 0x90=>0x06AF, 0x91=>0x2018, 0x92=>0x2019, 0x93=>0x201C,
0x94=>0x201D, 0x95=>0x2022, 0x96=>0x2013, 0x97=>0x2014, 0x98=>0x06A9,
0x99=>0x2122, 0x9A=>0x0691, 0x9B=>0x203A, 0x9C=>0x0153, 0x9D=>0x200C,
0x9E=>0x200D, 0x9F=>0x06BA, 0xA0=>0x00A0, 0xA1=>0x060C, 0xA2=>0x00A2,
0xA3=>0x00A3, 0xA4=>0x00A4, 0xA5=>0x00A5, 0xA6=>0x00A6, 0xA7=>0x00A7,
0xA8=>0x00A8, 0xA9=>0x00A9, 0xAA=>0x06BE, 0xAB=>0x00AB, 0xAC=>0x00AC,
0xAD=>0x00AD, 0xAE=>0x00AE, 0xAF=>0x00AF, 0xB0=>0x00B0, 0xB1=>0x00B1,
0xB2=>0x00B2, 0xB3=>0x00B3, 0xB4=>0x00B4, 0xB5=>0x00B5, 0xB6=>0x00B6,
0xB7=>0x00B7, 0xB8=>0x00B8, 0xB9=>0x00B9, 0xBA=>0x061B, 0xBB=>0x00BB,
0xBC=>0x00BC, 0xBD=>0x00BD, 0xBE=>0x00BE, 0xBF=>0x061F, 0xC0=>0x06C1,
0xC1=>0x0621, 0xC2=>0x0622, 0xC3=>0x0623, 0xC4=>0x0624, 0xC5=>0x0625,
0xC6=>0x0626, 0xC7=>0x0627, 0xC8=>0x0628, 0xC9=>0x0629, 0xCA=>0x062A,
0xCB=>0x062B, 0xCC=>0x062C, 0xCD=>0x062D, 0xCE=>0x062E, 0xCF=>0x062F,
0xD0=>0x0630, 0xD1=>0x0631, 0xD2=>0x0632, 0xD3=>0x0633, 0xD4=>0x0634,
0xD5=>0x0635, 0xD6=>0x0636, 0xD7=>0x00D7, 0xD8=>0x0637, 0xD9=>0x0638,
0xDA=>0x0639, 0xDB=>0x063A, 0xDC=>0x0640, 0xDD=>0x0641, 0xDE=>0x0642,
0xDF=>0x0643, 0xE0=>0x00E0, 0xE1=>0x0644, 0xE2=>0x00E2, 0xE3=>0x0645,
0xE4=>0x0646, 0xE5=>0x0647, 0xE6=>0x0648, 0xE7=>0x00E7, 0xE8=>0x00E8,
0xE9=>0x00E9, 0xEA=>0x00EA, 0xEB=>0x00EB, 0xEC=>0x0649, 0xED=>0x064A,
0xEE=>0x00EE, 0xEF=>0x00EF, 0xF0=>0x064B, 0xF1=>0x064C, 0xF2=>0x064D,
0xF3=>0x064E, 0xF4=>0x00F4, 0xF5=>0x064F, 0xF6=>0x0650, 0xF7=>0x00F7,
0xF8=>0x0651, 0xF9=>0x00F9, 0xFA=>0x0652, 0xFB=>0x00FB, 0xFC=>0x00FC,
0xFD=>0x200E, 0xFE=>0x200F, 0xFF=>0x06D2); // fin windows-1256
return $charset;
// arabic iso-8859-6 - http://czyborra.com/charsets/iso8859.html#ISO-8859-6
case 'iso-8859-6':
load_charset('iso-8859-1');
$trans = $GLOBALS['CHARSET']['iso-8859-1'];
$mod = Array(
0xA0=>0x00A0, 0xA4=>0x00A4, 0xAC=>0x060C, 0xAD=>0x00AD, 0xBB=>0x061B,
0xBF=>0x061F, 0xC1=>0x0621, 0xC2=>0x0622, 0xC3=>0x0623, 0xC4=>0x0624,
0xC5=>0x0625, 0xC6=>0x0626, 0xC7=>0x0627, 0xC8=>0x0628, 0xC9=>0x0629,
0xCA=>0x062A, 0xCB=>0x062B, 0xCC=>0x062C, 0xCD=>0x062D, 0xCE=>0x062E,
0xCF=>0x062F, 0xD0=>0x0630, 0xD1=>0x0631, 0xD2=>0x0632, 0xD3=>0x0633,
0xD4=>0x0634, 0xD5=>0x0635, 0xD6=>0x0636, 0xD7=>0x0637, 0xD8=>0x0638,
0xD9=>0x0639, 0xDA=>0x063A, 0xE0=>0x0640, 0xE1=>0x0641, 0xE2=>0x0642,
0xE3=>0x0643, 0xE4=>0x0644, 0xE5=>0x0645, 0xE6=>0x0646, 0xE7=>0x0647,
0xE8=>0x0648, 0xE9=>0x0649, 0xEA=>0x064A, 0xEB=>0x064B, 0xEC=>0x064C,
0xED=>0x064D, 0xEE=>0x064E, 0xEF=>0x064F, 0xF0=>0x0650, 0xF1=>0x0651,
0xF2=>0x0652
);
foreach ($mod as $num=>$val)
$trans[$num]=$val;
$GLOBALS['CHARSET'][$charset] = $trans;
return $charset;
// ------------------------------------------------------------------
// cas particulier pour les entites html (a completer eventuellement)
case 'html':
$GLOBALS['CHARSET'][$charset] = array (
'ldquo'=>'“', 'rdquo'=>'”',
'cent'=>'¢', 'pound'=>'£', 'curren'=>'¤', 'yen'=>'¥', 'brvbar'=>'¦',
'sect'=>'§', 'uml'=>'¨', 'ordf'=>'ª', 'laquo'=>'«', 'not'=>'¬',
'shy'=>'­', 'macr'=>'¯', 'deg'=>'°', 'plusmn'=>'±', 'sup2'=>'²',
'sup3'=>'³', 'acute'=>'´', 'micro'=>'µ', 'para'=>'¶', 'middot'=>'·',
'cedil'=>'¸', 'sup1'=>'¹', 'ordm'=>'º', 'raquo'=>'»', 'iquest'=>'¿',
'Agrave'=>'À', 'Aacute'=>'Á', 'Acirc'=>'Â', 'Atilde'=>'Ã', 'Auml'=>'Ä',
'Aring'=>'Å', 'AElig'=>'Æ', 'Ccedil'=>'Ç', 'Egrave'=>'È', 'Eacute'=>'É',
'Ecirc'=>'Ê', 'Euml'=>'Ë', 'Igrave'=>'Ì', 'Iacute'=>'Í', 'Icirc'=>'Î',
'Iuml'=>'Ï', 'ETH'=>'Ð', 'Ntilde'=>'Ñ', 'Ograve'=>'Ò', 'Oacute'=>'Ó',
'Ocirc'=>'Ô', 'Otilde'=>'Õ', 'Ouml'=>'Ö', 'times'=>'×', 'Oslash'=>'Ø',
'Ugrave'=>'Ù', 'Uacute'=>'Ú', 'Ucirc'=>'Û', 'Uuml'=>'Ü', 'Yacute'=>'Ý',
'THORN'=>'Þ', 'szlig'=>'ß', 'agrave'=>'à', 'aacute'=>'á', 'acirc'=>'â',
'atilde'=>'ã', 'auml'=>'ä', 'aring'=>'å', 'aelig'=>'æ', 'ccedil'=>'ç',
'egrave'=>'è', 'eacute'=>'é', 'ecirc'=>'ê', 'euml'=>'ë', 'igrave'=>'ì',
'iacute'=>'í', 'icirc'=>'î', 'iuml'=>'ï', 'eth'=>'ð', 'ntilde'=>'ñ',
'ograve'=>'ò', 'oacute'=>'ó', 'ocirc'=>'ô', 'otilde'=>'õ', 'ouml'=>'ö',
'divide'=>'÷', 'oslash'=>'ø', 'ugrave'=>'ù', 'uacute'=>'ú',
'ucirc'=>'û', 'uuml'=>'ü', 'yacute'=>'ý', 'thorn'=>'þ',
'nbsp' => " ", 'copy' => "(c)", 'reg' => "(r)", 'frac14' => "1/4",
'frac12' => "1/2", 'frac34' => "3/4", 'amp' => '&', 'quot' => '"',
'apos' => "'", 'lt' => '<', 'gt' => '>',
'mdash' => '—', 'ndash' => '–'
);
return $charset;
case 'mathml':
$GLOBALS['CHARSET'][$charset] = array (
'ac' => '',
'acd' => '',
'acE' => '&E#290;',
'acute' => '́',
'Afr' => '',
'afr' => '',
'aleph' => 'ℵ',
'alpha' => 'α',
'amalg' => '',
'amp' => '&',
'And' => '∧',
'and' => '∧',
'andand' => '',
'andd' => '',
'andslope' => '',
'andv' => '',
'ang' => '∠',
'ange' => '',
'angle' => '∠',
'angmsd' => '∡',
'angmsdaa' => '',
'angmsdab' => '',
'angmsdac' => '',
'angmsdad' => '',
'angmsdae' => '',
'angmsdaf' => '',
'angmsdag' => '',
'angmsdah' => '',
'angrt' => '∟',
'angrtvb' => '',
'angrtvbd' => '',
'angsph' => '∢',
'angst' => 'Å',
'angzarr' => '',
'Aopf' => '',
'ap' => '≈',
'apacir' => '',
'apE' => '',
'ape' => '≊',
'apid' => '≋',
'apos' => ''',
'approx' => '≈',
'approxeq' => '≊',
'Ascr' => '',
'ascr' => '',
'ast' => '∗',
'asymp' => '≍',
'awconint' => '∳',
'awint' => '',
'backcong' => '≌',
'backepsilon' => '',
'backprime' => '‵',
'backsim' => '∽',
'backsimeq' => '⋍',
'Backslash' => '∖',
'Barv' => '',
'barvee' => '⊽',
'Barwed' => '⌆',
'barwed' => '⊼',
'barwedge' => '⊼',
'bbrk' => '',
'bbrktbrk' => '',
'bcong' => '≌',
'becaus' => '∵',
'Because' => '∵',
'because' => '∵',
'bemptyv' => '',
'benzen' => '',
'benzena' => '',
'benzenb' => '',
'benzenc' => '',
'benzend' => '',
'benzene' => '',
'benzenf' => '',
'benzeng' => '',
'benzenh' => '',
'benzeni' => '',
'benzenj' => '',
'benzenk' => '',
'benzenl' => '',
'benzenm' => '',
'benzenn' => '',
'benzeno' => '',
'benzenp' => '',
'benzenq' => '',
'benzenr' => '',
'bepsi' => '',
'bernou' => 'ℬ',
'beta' => 'β',
'beth' => 'ℶ',
'between' => '≬',
'Bfr' => '',
'bfr' => '',
'bigcap' => '⋂',
'bigcirc' => '○',
'bigcup' => '⋃',
'bigodot' => '⊙',
'bigoplus' => '⊕',
'bigotimes' => '⊗',
'bigsqcup' => '⊔',
'bigstar' => '★',
'bigtriangledown' => '▽',
'bigtriangleup' => '△',
'biguplus' => '⊎',
'bigvee' => '⋁',
'bigwedge' => '⋀',
'bkarow' => '',
'blacklozenge' => '',
'blacksquare' => '■',
'blacktriangle' => '▴',
'blacktriangledown' => '▾',
'blacktriangleleft' => '◂',
'blacktriangleright' => '▸',
'blank' => '',
'blk12' => '▒',
'blk14' => '░',
'blk34' => '▓',
'block' => '█',
'bne' => '',
'bnequiv' => '',
'bNot' => '',
'bnot' => '⌐',
'Bopf' => '',
'bot' => '⊥',
'bottom' => '⊥',
'bowtie' => '⋈',
'boxbox' => '',
'boxminus' => '⊟',
'boxplus' => '⊞',
'boxtimes' => '⊠',
'bprime' => '‵',
'Breve' => '̆',
'breve' => '̆',
'brvbar' => '¦',
'Bscr' => '',
'bscr' => '',
'bsemi' => '',
'bsim' => '∽',
'bsime' => '⋍',
'bsol' => '\',
'bsolb' => '',
'bsolhsub' => '',
'bull' => '•',
'bullet' => '•',
'bump' => '≎',
'bumpe' => '≏',
'Bumpeq' => '≎',
'bumpeq' => '≏',
'Cap' => '⋒',
'cap' => '∩',
'capand' => '',
'capbrcup' => '',
'capcap' => '',
'capcup' => '',
'capdot' => '',
'caps' => '',
'caret' => '‸',
'caron' => '̌',
'ccaps' => '',
'Cconint' => '∰',
'ccups' => '',
'ccupssm' => '',
'cdot' => '⋅',
'cedil' => '̧',
'Cedilla' => '̧',
'cemptyv' => '',
'cent' => '¢',
'CenterDot' => '·',
'centerdot' => '·',
'Cfr' => '',
'cfr' => '',
'check' => '✓',
'checkmark' => '✓',
'chi' => 'χ',
'cir' => '∘',
'circ' => '∘',
'circeq' => '≗',
'circle' => '',
'circlearrowleft' => '↺',
'circlearrowright' => '↻',
'circledast' => '⊛',
'circledcirc' => '⊚',
'circleddash' => '⊝',
'CircleDot' => '⊙',
'circledR' => '¯',
'circledS' => '',
'circlef' => '●',
'circlefb' => '◒',
'circlefl' => '◐',
'circlefr' => '◑',
'circleft' => '◓',
'CircleMinus' => '⊖',
'CirclePlus' => '⊕',
'CircleTimes' => '⊗',
'cirE' => '',
'cire' => '≗',
'cirfnint' => '',
'cirmid' => '',
'cirscir' => '',
'ClockwiseContourIntegral' => '∲',
'CloseCurlyDoubleQuote' => '”',
'CloseCurlyQuote' => '’',
'clubs' => '♣',
'clubsuit' => '♣',
'Colon' => '∷',
'colon' => ':',
'Colone' => '',
'colone' => '≔',
'coloneq' => '≔',
'comma' => ',',
'commat' => '@',
'comp' => '∁',
'compfn' => '∘',
'complement' => '∁',
'cong' => '≅',
'congdot' => '',
'Congruent' => '≡',
'Conint' => '∯',
'conint' => '∮',
'ContourIntegral' => '∮',
'Copf' => 'ℂ',
'coprod' => '∐',
'Coproduct' => '∐',
'copy' => '©',
'copysr' => '℗',
'CounterClockwiseContourIntegral' => '∳',
'cross' => '☒',
'Cscr' => '',
'cscr' => '',
'csub' => '',
'csube' => '',
'csup' => '',
'csupe' => '',
'ctdot' => '⋯',
'cudarrl' => '',
'cudarrr' => '',
'cuepr' => '⋞',
'cuesc' => '⋟',
'cularr' => '↶',
'cularrp' => '',
'Cup' => '⌣',
'cup' => '∪',
'cupbrcap' => '',
'CupCap' => '≍',
'cupcap' => '',
'cupcup' => '',
'cupdot' => '⊍',
'cupor' => '',
'cups' => '',
'curarr' => '↷',
'curarrm' => '',
'curlyeqprec' => '⋞',
'curlyeqsucc' => '⋟',
'curlyvee' => '⋎',
'curlywedge' => '⋏',
'curren' => '¤',
'curvearrowleft' => '↶',
'curvearrowright' => '↷',
'cuvee' => '⋎',
'cuwed' => '⋏',
'cwconint' => '∲',
'cwint' => '∱',
'cylcty' => '⌭',
'Dagger' => '‡',
'dagger' => '†',
'daleth' => 'ℸ',
'Darr' => '↡',
'dArr' => '⇓',
'darr' => '↓',
'dash' => '‐',
'Dashv' => '',
'dashv' => '⊣',
'dbkarow' => '',
'dblac' => '̋',
'ddagger' => '‡',
'ddarr' => '⇊',
'DDotrahd' => '',
'ddotseq' => '',
'deg' => '°',
'Del' => '∇',
'Delta' => 'Δ',
'delta' => 'δ',
'demptyv' => '',
'dfisht' => '',
'Dfr' => '',
'dfr' => '',
'dHar' => '',
'dharl' => '⇃',
'dharr' => '⇂',
'DiacriticalAcute' => '́',
'DiacriticalDot' => '̇',
'DiacriticalDoubleAcute' => '̋',
'DiacriticalGrave' => '̀',
'DiacriticalLeftArrow' => '⃖',
'DiacriticalLeftRightArrow' => '⃡',
'DiacriticalLeftRightVector' => '',
'DiacriticalLeftVector' => '⃐',
'DiacriticalRightArrow' => '⃗',
'DiacriticalRightVector' => '⃑',
'DiacriticalTilde' => '̃',
'diam' => '⋄',
'diamond' => '⋄',
'diamondf' => '',
'diamondsuit' => '♢',
'diamonfb' => '',
'diamonfl' => '',
'diamonfr' => '',
'diamonft' => '',
'diams' => '♢',
'die' => '̈',
'digamma' => 'Ϝ',
'disin' => '',
'div' => '÷',
'divide' => '÷',
'divideontimes' => '⋇',
'divonx' => '⋇',
'dlcorn' => '⌞',
'dlcrop' => '⌍',
'dollar' => '$',
'Dopf' => '',
'Dot' => '̈',
'dot' => '̇',
'DotDot' => '⃜',
'doteq' => '≐',
'doteqdot' => '≑',
'DotEqual' => '≐',
'dotminus' => '∸',
'dotplus' => '∔',
'dotsquare' => '⊡',
'doublebarwedge' => '⌆',
'DoubleContourIntegral' => '∯',
'DoubleDot' => '̈',
'DoubleDownArrow' => '⇓',
'DoubleLeftArrow' => '⇐',
'DoubleLeftRightArrow' => '⇔',
'DoubleLongLeftArrow' => '',
'DoubleLongLeftRightArrow' => '',
'DoubleLongRightArrow' => '',
'DoubleRightArrow' => '⇒',
'DoubleRightTee' => '⊨',
'DoubleUpArrow' => '⇑',
'DoubleUpDownArrow' => '⇕',
'DoubleVerticalBar' => '∥',
'DownArrow' => '↓',
'Downarrow' => '⇓',
'downarrow' => '↓',
'DownArrowUpArrow' => '',
'downdownarrows' => '⇊',
'downharpoonleft' => '⇃',
'downharpoonright' => '⇂',
'DownLeftVector' => '↽',
'DownRightVector' => '⇁',
'DownTee' => '⊤',
'drbkarow' => '',
'drcorn' => '⌟',
'drcrop' => '⌌',
'Dscr' => '',
'dscr' => '',
'dsol' => '',
'dtdot' => '⋱',
'dtri' => '▿',
'dtrif' => '▾',
'duarr' => '',
'duhar' => '',
'dwangle' => '',
'dzigrarr' => '⇝',
'easter' => '≛',
'ecir' => '≖',
'ecolon' => '≕',
'eDDot' => '',
'eDot' => '≑',
'efDot' => '≒',
'Efr' => '',
'efr' => '',
'eg' => '',
'egs' => '⋝',
'egsdot' => '',
'el' => '',
'Element' => '∈',
'elinters' => '',
'ell' => 'ℓ',
'els' => '⋜',
'elsdot' => '',
'empty' => '',
'emptyset' => '',
'emptyv' => '∅',
'emsp' => ' ',
'emsp13' => ' ',
'emsp14' => ' ',
'ensp' => ' ',
'Eopf' => '',
'epar' => '⋕',
'eparsl' => '',
'eplus' => '',
'epsi' => '∊',
'epsiv' => 'ε',
'eqcirc' => '≖',
'eqcolon' => '≕',
'eqsim' => '≂',
'eqslantgtr' => '⋝',
'eqslantless' => '⋜',
'equals' => '=',
'EqualTilde' => '≂',
'equest' => '≟',
'Equilibrium' => '⇌',
'equiv' => '≡',
'equivDD' => '',
'eqvparsl' => '',
'erarr' => '',
'erDot' => '≓',
'Escr' => '',
'escr' => '',
'esdot' => '≐',
'Esim' => '',
'esim' => '≂',
'eta' => 'η',
'excl' => '!',
'exist' => '∃',
'Exists' => '∃',
'fallingdotseq' => '≒',
'female' => '♀',
'ffilig' => 'ffi',
'fflig' => 'ff',
'ffllig' => 'ffl',
'Ffr' => '',
'ffr' => '',
'filig' => 'fi',
'fjlig' => '',
'flat' => '♭',
'fllig' => 'fl',
'fltns' => '',
'Fopf' => '',
'ForAll' => '∀',
'forall' => '∀',
'fork' => '⋔',
'forkv' => '',
'fpartint' => '',
'frac12' => '½',
'frac13' => '⅓',
'frac14' => '¼',
'frac15' => '⅕',
'frac16' => '⅙',
'frac18' => '⅛',
'frac23' => '≔',
'frac25' => '⅖',
'frac34' => '¾',
'frac35' => '⅗',
'frac38' => '⅜',
'frac45' => '⅘',
'frac56' => '⅚',
'frac58' => '⅝',
'frac78' => '⅞',
'frown' => '⌢',
'Fscr' => '',
'fscr' => '',
'Gamma' => 'Γ',
'gamma' => 'γ',
'Gammad' => 'Ϝ',
'gammad' => 'Ϝ',
'gap' => '≳',
'gE' => '≧',
'ge' => '≥',
'gEl' => '⋛',
'gel' => '⋛',
'geq' => '≥',
'geqq' => '≧',
'geqslant' => '',
'ges' => '',
'gescc' => '',
'gesdot' => '',
'gesdoto' => '',
'gesdotol' => '',
'gesl' => '',
'gesles' => '',
'Gfr' => '',
'gfr' => '',
'Gg' => '⋙',
'gg' => '≫',
'ggg' => '⋙',
'gimel' => 'ℷ',
'gl' => '≷',
'gla' => '',
'glE' => '',
'glj' => '',
'gnap' => '',
'gnapprox' => '',
'gnE' => '≩',
'gne' => '≩',
'gneq' => '≩',
'gneqq' => '≩',
'gnsim' => '⋧',
'Gopf' => '',
'grave' => '̀',
'GreaterEqual' => '≥',
'GreaterEqualLess' => '⋛',
'GreaterFullEqual' => '≧',
'GreaterLess' => '≷',
'GreaterSlantEqual' => '',
'GreaterTilde' => '≳',
'Gscr' => '',
'gscr' => '',
'gsim' => '≳',
'gsime' => '',
'gsiml' => '',
'Gt' => '≫',
'gt' => '>',
'gtcc' => '',
'gtcir' => '',
'gtdot' => '⋗',
'gtlPar' => '',
'gtquest' => '',
'gtrapprox' => '≳',
'gtrarr' => '',
'gtrdot' => '⋗',
'gtreqless' => '⋛',
'gtreqqless' => '⋛',
'gtrless' => '≷',
'gtrsim' => '≳',
'gvertneqq' => '',
'gvnE' => '',
'Hacek' => '̌',
'hairsp' => ' ',
'half' => '½',
'hamilt' => 'ℋ',
'hArr' => '⇔',
'harr' => '↔',
'harrcir' => '',
'harrw' => '↭',
'Hat' => '̂',
'hbar' => '',
'hbenzen' => '',
'hbenzena' => '',
'hbenzenb' => '',
'hbenzenc' => '',
'hbenzend' => '',
'hbenzene' => '',
'hbenzenf' => '',
'hbenzeng' => '',
'hbenzenh' => '',
'hbenzeni' => '',
'hbenzenj' => '',
'hbenzenk' => '',
'hbenzenl' => '',
'hbenzenm' => '',
'hbenzenn' => '',
'hbenzeno' => '',
'hbenzenp' => '',
'hbenzenq' => '',
'hbenzenr' => '',
'hearts' => '♡',
'heartsuit' => '♡',
'hellip' => '…',
'hercon' => '⊹',
'Hfr' => '',
'hfr' => '',
'hksearow' => '',
'hkswarow' => '',
'hoarr' => '',
'homtht' => '∻',
'hookleftarrow' => '↩',
'hookrightarrow' => '↪',
'Hopf' => '',
'horbar' => '―',
'Hscr' => '',
'hscr' => '',
'hslash' => 'ℏ',
'HumpDownHump' => '≎',
'HumpEqual' => '≏',
'hybull' => '⁃',
'hyphen' => '',
'iexcl' => '¡',
'iff' => '',
'Ifr' => '',
'ifr' => '',
'iiiint' => '',
'iiint' => '∭',
'iinfin' => '',
'iiota' => '℩',
'Im' => 'ℑ',
'image' => 'ℑ',
'imath' => 'ı',
'imof' => '⊷',
'imped' => '',
'Implies' => '⇒',
'in' => '∊',
'incare' => '℅',
'infin' => '∞',
'infintie' => '',
'Int' => '∬',
'int' => '∫',
'intcal' => '⊺',
'Integral' => '∫',
'intercal' => '⊺',
'Intersection' => '⋂',
'intlarhk' => '',
'intprod' => '',
'Iopf' => '',
'iota' => 'ι',
'iprod' => '',
'iquest' => '¿',
'Iscr' => '',
'iscr' => '',
'isin' => '∊',
'isindot' => '',
'isinE' => '',
'isins' => '',
'isinsv' => '',
'isinv' => '∈',
'Jfr' => '',
'jfr' => '',
'jmath' => '',
'Jopf' => '',
'Jscr' => '',
'jscr' => '',
'kappa' => 'κ',
'kappav' => 'ϰ',
'Kfr' => '',
'kfr' => '',
'Kopf' => '',
'Kscr' => '',
'kscr' => '',
'lAarr' => '⇚',
'laemptyv' => '',
'lagran' => 'ℒ',
'Lambda' => 'Λ',
'lambda' => 'λ',
'Lang' => '《',
'lang' => '〈',
'langd' => '',
'langle' => '〈',
'lap' => '≲',
'laquo' => '«',
'Larr' => '↞',
'lArr' => '⇐',
'larr' => '←',
'larrbfs' => '',
'larrfs' => '',
'larrhk' => '↩',
'larrlp' => '↫',
'larrpl' => '',
'larrsim' => '',
'larrtl' => '↢',
'lat' => '',
'lAtail' => '',
'latail' => '',
'late' => '',
'lates' => '',
'lBarr' => '',
'lbarr' => '',
'lbbrk' => '〔',
'lbrace' => '{',
'lbrack' => '[',
'lbrke' => '',
'lbrksld' => '',
'lbrkslu' => '',
'lceil' => '⌈',
'lcub' => '{',
'ldca' => '',
'ldquo' => '“',
'ldquor' => '„',
'ldrdhar' => '',
'ldrushar' => '',
'ldsh' => '↲',
'lE' => '≦',
'le' => '≤',
'LeftAngleBracket' => '〈',
'LeftArrow' => '←',
'Leftarrow' => '⇐',
'leftarrow' => '←',
'LeftArrowRightArrow' => '⇆',
'leftarrowtail' => '↢',
'LeftCeiling' => '⌈',
'LeftDownVector' => '⇃',
'LeftFloor' => '⌊',
'leftharpoondown' => '↽',
'leftharpoonup' => '↼',
'leftleftarrows' => '⇇',
'LeftRightArrow' => '↔',
'Leftrightarrow' => '⇔',
'leftrightarrow' => '↔',
'leftrightarrows' => '⇆',
'leftrightharpoons' => '⇋',
'leftrightsquigarrow' => '↭',
'LeftTee' => '⊣',
'leftthreetimes' => '⋋',
'LeftTriangle' => '⊲',
'LeftTriangleEqual' => '⊴',
'LeftUpVector' => '↿',
'LeftVector' => '↼',
'lEg' => '⋚',
'leg' => '⋚',
'leq' => '≤',
'leqq' => '≦',
'leqslant' => '',
'les' => '',
'lescc' => '',
'lesdot' => '',
'lesdoto' => '',
'lesdotor' => '',
'lesg' => '',
'lesges' => '',
'lessapprox' => '≲',
'lessdot' => '⋖',
'lesseqgtr' => '⋚',
'lesseqqgtr' => '⋚',
'LessEqualGreater' => '⋚',
'LessFullEqual' => '≦',
'LessGreater' => '≶',
'lessgtr' => '≶',
'lesssim' => '≲',
'LessSlantEqual' => '',
'LessTilde' => '≲',
'lfisht' => '',
'lfloor' => '⌊',
'Lfr' => '',
'lfr' => '',
'lg' => '≶',
'lgE' => '',
'lHar' => '',
'lhard' => '↽',
'lharu' => '↼',
'lharul' => '',
'lhblk' => '▄',
'Ll' => '⋘',
'll' => '≪',
'llarr' => '⇇',
'llcorner' => '⌞',
'Lleftarrow' => '⇚',
'llhard' => '',
'lltri' => '',
'lmoust' => '',
'lmoustache' => '',
'lnap' => '',
'lnapprox' => '',
'lnE' => '≨',
'lne' => '≨',
'lneq' => '≨',
'lneqq' => '≨',
'lnsim' => '⋦',
'loang' => '〘',
'loarr' => '',
'lobrk' => '〚',
'LongLeftArrow' => '',
'Longleftarrow' => '',
'longleftarrow' => '',
'LongLeftRightArrow' => '',
'Longleftrightarrow' => '',
'longleftrightarrow' => '',
'longmapsto' => '',
'LongRightArrow' => '',
'Longrightarrow' => '',
'longrightarrow' => '',
'looparrowleft' => '↫',
'looparrowright' => '↬',
'lopar' => '',
'Lopf' => '',
'loplus' => '',
'lotimes' => '',
'lowast' => '∗',
'lowbar' => '_',
'LowerLeftArrow' => '↙',
'LowerRightArrow' => '↘',
'loz' => '◊',
'lozenge' => '◊',
'lozf' => '',
'lpar' => '(',
'lparlt' => '',
'lrarr' => '⇆',
'lrcorner' => '⌟',
'lrhar' => '⇋',
'lrhard' => '',
'lrtri' => '',
'Lscr' => '',
'lscr' => '',
'Lsh' => '↰',
'lsh' => '↰',
'lsim' => '≲',
'lsime' => '',
'lsimg' => '',
'lsqb' => '[',
'lsquo' => '‘',
'lsquor' => '‚',
'Lt' => '≪',
'lt' => '<',
'ltcc' => '',
'ltcir' => '',
'ltdot' => '⋖',
'lthree' => '⋋',
'ltimes' => '⋉',
'ltlarr' => '',
'ltquest' => '',
'ltri' => '◃',
'ltrie' => '⊴',
'ltrif' => '◂',
'ltrPar' => '',
'lurdshar' => '',
'luruhar' => '',
'lvertneqq' => '',
'lvnE' => '',
'macr' => '̄',
'male' => '♂',
'malt' => '✠',
'maltese' => '✠',
'Map' => '',
'map' => '↦',
'mapsto' => '↦',
'marker' => '',
'mcomma' => '',
'mdash' => '—',
'mDDot' => '∺',
'measuredangle' => '∡',
'Mfr' => '',
'mfr' => '',
'mho' => '℧',
'micro' => 'µ',
'mid' => '∣',
'midast' => '∗',
'midcir' => '',
'middot' => '·',
'minus' => '−',
'minusb' => '⊟',
'minusd' => '∸',
'minusdu' => '',
'MinusPlus' => '∓',
'mlcp' => '',
'mldr' => '',
'mnplus' => '∓',
'models' => '⊧',
'Mopf' => '',
'mp' => '∓',
'Mscr' => '',
'mscr' => '',
'mstpos' => '∾',
'mu' => 'μ',
'multimap' => '⊸',
'mumap' => '⊸',
'nabla' => '∇',
'nang' => '',
'nap' => '≉',
'napE' => '',
'napid' => '',
'napprox' => '≉',
'natur' => '♮',
'natural' => '♮',
'nbsp' => ' ',
'ncap' => '',
'ncong' => '≇',
'ncongdot' => '',
'ncup' => '',
'ndash' => '–',
'ne' => '≠',
'nearhk' => '',
'neArr' => '⇗',
'nearr' => '↗',
'nearrow' => '↗',
'nedot' => '',
'nequiv' => '≢',
'nesear' => '',
'NestedGreaterGreater' => '≫',
'NestedLessLess' => '≪',
'nexist' => '∄',
'nexists' => '∄',
'Nfr' => '',
'nfr' => '',
'ngE' => '≱',
'nge' => '',
'ngeq' => '',
'ngeqq' => '≱',
'ngeqslant' => '≱',
'nges' => '≱',
'nGg' => '',
'ngsim' => '≵',
'nGt' => '',
'ngt' => '≯',
'ngtr' => '≯',
'nGtv' => '',
'nhArr' => '⇎',
'nharr' => '↮',
'nhpar' => '',
'ni' => '∍',
'nis' => '',
'nisd' => '',
'niv' => '∋',
'nlArr' => '⇍',
'nlarr' => '↚',
'nldr' => '‥',
'nlE' => '≰',
'nle' => '',
'nLeftarrow' => '⇍',
'nleftarrow' => '↚',
'nLeftrightarrow' => '⇎',
'nleftrightarrow' => '↮',
'nleq' => '',
'nleqq' => '≰',
'nleqslant' => '≰',
'nles' => '≰',
'nless' => '≮',
'nLl' => '',
'nlsim' => '≴',
'nLt' => '',
'nlt' => '≮',
'nltri' => '⋪',
'nltrie' => '⋬',
'nLtv' => '',
'nmid' => '∤',
'Nopf' => 'ℕ',
'Not' => '',
'not' => '¬',
'NotCongruent' => '≢',
'NotDoubleVerticalBar' => '∦',
'NotElement' => '∉',
'NotEqual' => '≠',
'NotExists' => '∄',
'NotGreater' => '≯',
'NotGreaterEqual' => '',
'NotGreaterFullEqual' => '≰',
'NotGreaterGreater' => '',
'NotGreaterLess' => '≹',
'NotGreaterSlantEqual' => '≱',
'NotGreaterTilde' => '≵',
'notin' => '∉',
'notindot' => '',
'notinE' => '',
'notinva' => '',
'notinvb' => '',
'notinvc' => '',
'NotLeftTriangle' => '⋪',
'NotLeftTriangleEqual' => '⋬',
'NotLess' => '≮',
'NotLessEqual' => '',
'NotLessFullEqual' => '≰',
'NotLessGreater' => '≸',
'NotLessLess' => '',
'NotLessSlantEqual' => '≰',
'NotLessTilde' => '≴',
'notni' => '∌',
'notniva' => '∌',
'notnivb' => '',
'notnivc' => '',
'NotPrecedes' => '⊀',
'NotPrecedesEqual' => '',
'NotPrecedesSlantEqual' => '⋠',
'NotReverseElement' => '∌',
'NotRightTriangle' => '⋫',
'NotRightTriangleEqual' => '⋭',
'NotSquareSubsetEqual' => '⋢',
'NotSquareSupersetEqual' => '⋣',
'NotSubset' => '⊄',
'NotSucceeds' => '⊁',
'NotSucceedsEqual' => '',
'NotSucceedsSlantEqual' => '⋡',
'NotSuperset' => '⊅',
'NotTilde' => '≁',
'NotTildeEqual' => '≄',
'NotTildeFullEqual' => '≇',
'NotTildeTilde' => '≉',
'NotVerticalBar' => '∤',
'npar' => '∦',
'nparallel' => '∦',
'nparsl' => '',
'npart' => '',
'npolint' => '',
'npr' => '⊀',
'nprcue' => '⋠',
'npre' => '',
'nprec' => '⊀',
'npreceq' => '',
'nrArr' => '⇏',
'nrarr' => '↛',
'nrarrc' => '',
'nrarrw' => '',
'nRightarrow' => '⇏',
'nrightarrow' => '↛',
'nrtri' => '⋫',
'nrtrie' => '⋭',
'nsc' => '⊁',
'nsccue' => '⋡',
'nsce' => '',
'Nscr' => '',
'nscr' => '',
'nshortmid' => '',
'nshortparallel' => '',
'nsim' => '≁',
'nsime' => '≄',
'nsimeq' => '≄',
'nsmid' => '',
'nspar' => '',
'nsqsube' => '⋢',
'nsqsupe' => '⋣',
'nsub' => '⊄',
'nsubE' => '⊈',
'nsube' => '⊈',
'nsubset' => '⊄',
'nsubseteq' => '⊈',
'nsubseteqq' => '⊈',
'nsucc' => '⊁',
'nsucceq' => '',
'nsup' => '⊅',
'nsupE' => '⊉',
'nsupe' => '⊉',
'nsupset' => '⊅',
'nsupseteq' => '⊉',
'nsupseteqq' => '⊉',
'ntgl' => '≹',
'ntlg' => '≸',
'ntriangleleft' => '⋪',
'ntrianglelefteq' => '⋬',
'ntriangleright' => '⋫',
'ntrianglerighteq' => '⋭',
'ntvgl' => '≹',
'ntvlg' => '≸',
'nu' => 'ν',
'num' => '#',
'numsp' => ' ',
'nvap' => '',
'nVDash' => '⊯',
'nVdash' => '⊮',
'nvDash' => '⊭',
'nvdash' => '⊬',
'nvge' => '≱',
'nvgt' => '≯',
'nvhArr*' => '⇎',
'nvinfin' => '',
'nvlArr' => '⇍',
'nvle' => '≰',
'nvlt' => '≮',
'nvltrie' => '',
'nvrArr' => '⇏',
'nvrtrie' => '',
'nvsim' => '',
'nwarhk' => '',
'nwArr' => '⇖',
'nwarr' => '↖',
'nwarrow' => '↖',
'nwnear' => '',
'oast' => '⊛',
'ocir' => '⊚',
'odash' => '⊝',
'odiv' => '',
'odot' => '⊙',
'odsold' => '',
'ofcir' => '',
'Ofr' => '',
'ofr' => '',
'ogon' => '̨',
'ogt' => '',
'ohbar' => '',
'ohm' => 'Ω',
'oint' => '∮',
'olarr' => '↺',
'olcir' => '',
'olcross' => '',
'olt' => '',
'Omega' => 'Ω',
'omega' => 'ω',
'omicron' => 'ξ',
'omid' => '',
'ominus' => '⊖',
'Oopf' => '',
'opar' => '',
'OpenCurlyDoubleQuote' => '“',
'OpenCurlyQuote' => '‘',
'operp' => '',
'oplus' => '⊕',
'Or' => '',
'or' => '∨',
'orarr' => '↻',
'ord' => '',
'order' => 'ℴ',
'ordf' => 'ª',
'ordm' => 'º',
'origof' => '⊶',
'oror' => '',
'orslope' => '',
'orv' => '',
'oS' => '',
'Oscr' => '',
'oscr' => '',
'oslash' => '⊘',
'osol' => '⊘',
'Otimes' => '',
'otimes' => '⊗',
'otimesas' => '',
'ovbar' => '',
'OverLine' => '̅',
'par' => '∥',
'para' => '¶',
'parallel' => '∥',
'parsim' => '',
'parsl' => '',
'part' => '∂',
'PartialD' => '∂',
'percnt' => '%',
'period' => '.',
'permil' => '‰',
'perp' => '⊥',
'pertenk' => '‱',
'Pfr' => '',
'pfr' => '',
'Phi' => 'Φ',
'phi' => 'φ',
'phiv' => 'ϕ',
'phmmat' => 'ℳ',
'phone' => '☎',
'Pi' => 'Π',
'pi' => 'π',
'pitchfork' => '⋔',
'piv' => 'ϖ',
'plank' => '',
'plankv' => 'ℏ',
'plus' => '+',
'plusacir' => '',
'plusb' => '⊞',
'pluscir' => '',
'plusdo' => '∔',
'plusdu' => '',
'pluse' => '',
'PlusMinus' => '±',
'plusmn' => '±',
'plussim' => '',
'plustwo' => '',
'pm' => '±',
'pointint' => '',
'Popf' => 'ℙ',
'pound' => '£',
'Pr' => '',
'pr' => '≺',
'prap' => '≾',
'prcue' => '≼',
'prE' => '≼',
'pre' => '≼',
'prec' => '≺',
'precapprox' => '≾',
'preccurlyeq' => '≼',
'Precedes' => '≺',
'PrecedesEqual' => '≼',
'PrecedesSlantEqual' => '≼',
'PrecedesTilde' => '≾',
'preceq' => '≼',
'precnapprox' => '⋨',
'precneqq' => '',
'precnsim' => '⋨',
'precsim' => '≾',
'Prime' => '″',
'prime' => '′',
'prnap' => '⋨',
'prnE' => '',
'prnsim' => '⋨',
'profalar' => '⌮',
'profline' => '⌒',
'profsurf' => '⌓',
'prop' => '∝',
'Proportion' => '∷',
'Proportional' => '∝',
'propto' => '∝',
'prsim' => '≾',
'prurel' => '⊰',
'Pscr' => '',
'pscr' => '',
'Psi' => 'Ψ',
'psi' => 'ψ',
'puncsp' => ' ',
'Qfr' => '',
'qfr' => '',
'qint' => '',
'Qopf' => 'ℚ',
'qprime' => '',
'Qscr' => '',
'qscr' => '',
'quatint' => '',
'quest' => '?',
'questeq' => '≟',
'quot' => '"',
'rAarr' => '⇛',
'race' => '',
'radic' => '√',
'raemptyv' => '',
'Rang' => '》',
'rang' => '〉',
'rangd' => '',
'range' => '',
'rangle' => '〉',
'raquo' => '»',
'Rarr' => '↠',
'rArr' => '⇒',
'rarr' => '→',
'rarrap' => '',
'rarrbfs' => '',
'rarrc' => '',
'rarrfs' => '',
'rarrhk' => '↪',
'rarrlp' => '↬',
'rarrpl' => '',
'rarrsim' => '',
'Rarrtl' => '',
'rarrtl' => '↣',
'rarrw' => '↝',
'rAtail' => '',
'ratail' => '↣',
'ratio' => '∶',
'RBarr' => '',
'rBarr' => '',
'rbarr' => '',
'rbbrk' => '〕',
'rbrace' => '}',
'rbrack' => ']',
'rbrke' => '',
'rbrksld' => '',
'rbrkslu' => '',
'rceil' => '⌉',
'rcub' => '}',
'rdca' => '',
'rdldhar' => '',
'rdquo' => '”',
'rdquor' => '‛',
'rdsh' => '↳',
'Re' => 'ℜ',
'real' => 'ℜ',
'rect' => '',
'reg' => '¯',
'ReverseElement' => '∋',
'ReverseEquilibrium' => '⇋',
'ReverseUpEquilibrium' => '',
'rfisht' => '',
'rfloor' => '⌋',
'Rfr' => '',
'rfr' => '',
'rHar' => '',
'rhard' => '⇁',
'rharu' => '⇀',
'rharul' => '',
'rho' => 'ρ',
'rhov' => 'ϱ',
'RightAngleBracket' => '〉',
'RightArrow' => '→',
'Rightarrow' => '⇒',
'rightarrow' => '→',
'RightArrowLeftArrow' => '⇄',
'rightarrowtail' => '↣',
'RightCeiling' => '⌉',
'RightDownVector' => '⇂',
'RightFloor' => '⌋',
'rightharpoondown' => '⇁',
'rightharpoonup' => '⇀',
'rightleftarrows' => '⇄',
'rightleftharpoons' => '⇌',
'rightrightarrows' => '⇉',
'rightsquigarrow' => '↝',
'RightTee' => '⊢',
'RightTeeArrow' => '↦',
'rightthreetimes' => '⋌',
'RightTriangle' => '⊳',
'RightTriangleEqual' => '⊵',
'RightUpVector' => '↾',
'RightVector' => '⇀',
'ring' => '̊',
'risingdotseq' => '≓',
'rlarr' => '⇄',
'rlhar' => '⇌',
'rmoust' => '',
'rmoustache' => '',
'rnmid' => '',
'roang' => '〙',
'roarr' => '',
'robrk' => '〛',
'ropar' => '',
'Ropf' => 'ℝ',
'roplus' => '',
'rotimes' => '',
'rpar' => ')',
'rpargt' => '',
'rppolint' => '',
'rrarr' => '⇉',
'Rrightarrow' => '⇛',
'Rscr' => '',
'rscr' => 'ℛ',
'Rsh' => '↱',
'rsh' => '↱',
'rsqb' => ']',
'rsquo' => '’',
'rsquor' => '‟',
'rthree' => '⋌',
'rtimes' => '⋊',
'rtri' => '▹',
'rtrie' => '⊵',
'rtrif' => '▸',
'rtriltri' => '',
'ruluhar' => '',
'rx' => '℞',
'Sc' => '',
'sc' => '≻',
'scap' => '≿',
'sccue' => '≽',
'scE' => '≾',
'sce' => '≽',
'scnap' => '⋩',
'scnE' => '',
'scnsim' => '⋩',
'scpolint' => '',
'scsim' => '≿',
'sdot' => '⋅',
'sdotb' => '⊡',
'sdote' => '',
'searhk' => '',
'seArr' => '⇘',
'searr' => '↘',
'searrow' => '↘',
'sect' => '§',
'semi' => ';',
'seswar' => '',
'setminus' => '∖',
'setmn' => '∖',
'sext' => '',
'Sfr' => '',
'sfr' => '',
'sfrown' => '',
'sharp' => '♯',
'ShortLeftArrow' => '',
'shortmid' => '',
'shortparallel' => '',
'ShortRightArrow' => '',
'shy' => '­',
'Sigma' => 'Σ',
'sigma' => 'σ',
'sigmav' => 'ς',
'sim' => '∼',
'simdot' => '',
'sime' => '≃',
'simeq' => '≃',
'simg' => '',
'simgE' => '',
'siml' => '',
'simlE' => '',
'simne' => '≆',
'simplus' => '',
'simrarr' => '',
'slarr' => '',
'SmallCircle' => '∘',
'smallfrown' => '',
'smallsetminus' => '',
'smallsmile' => '',
'smashp' => '',
'smeparsl' => '',
'smid' => '',
'smile' => '⌣',
'smt' => '',
'smte' => '',
'smtes' => '',
'sol' => '/',
'solb' => '',
'solbar' => '',
'Sopf' => '',
'spades' => '♠',
'spadesuit' => '♠',
'spar' => '',
'sqcap' => '⊓',
'sqcaps' => '',
'sqcup' => '⊔',
'sqcups' => '',
'Sqrt' => '√',
'sqsub' => '⊏',
'sqsube' => '⊑',
'sqsubset' => '⊏',
'sqsubseteq' => '⊑',
'sqsup' => '⊐',
'sqsupe' => '⊒',
'sqsupset' => '⊐',
'sqsupseteq' => '⊒',
'squ' => '□',
'square' => '□',
'SquareIntersection' => '⊓',
'SquareSubset' => '⊏',
'SquareSubsetEqual' => '⊑',
'SquareSuperset' => '⊐',
'SquareSupersetEqual' => '⊒',
'SquareUnion' => '⊔',
'squarf' => '■',
'squarfb' => '',
'squarfbl' => '',
'squarfbr' => '◪',
'squarfl' => '◧',
'squarfr' => '◨',
'squarft' => '',
'squarftl' => '◩',
'squarftr' => '',
'squf' => '▪',
'srarr' => '',
'Sscr' => '',
'sscr' => '',
'ssetmn' => '',
'ssmile' => '',
'sstarf' => '⋆',
'Star' => '⋆',
'star' => '⋆',
'starf' => '★',
'straightepsilon' => '∊',
'straightphi' => 'φ',
'strns' => '',
'Sub' => '⋐',
'sub' => '⊂',
'subdot' => '',
'subE' => '⊆',
'sube' => '⊆',
'subedot' => '',
'submult' => '',
'subnE' => '⊊',
'subne' => '⊊',
'subplus' => '',
'subrarr' => '',
'Subset' => '⋐',
'subset' => '⊂',
'subseteq' => '⊆',
'subseteqq' => '⊆',
'SubsetEqual' => '⊆',
'subsetneq' => '⊊',
'subsetneqq' => '⊊',
'subsim' => '',
'subsub' => '',
'subsup' => '',
'succ' => '≻',
'succapprox' => '≿',
'succcurlyeq' => '≽',
'Succeeds' => '≻',
'SucceedsEqual' => '≽',
'SucceedsSlantEqual' => '≽',
'SucceedsTilde' => '≿',
'succeq' => '≽',
'succnapprox' => '⋩',
'succneqq' => '',
'succnsim' => '⋩',
'succsim' => '≿',
'SuchThat' => '∍',
'Sum' => '∑',
'sum' => '∑',
'sung' => '♩',
'Sup' => '⋑',
'sup' => '⊃',
'sup1' => '¹',
'sup2' => '²',
'sup3' => '³',
'supdot' => '',
'supdsub' => '',
'supE' => '⊇',
'supe' => '⊇',
'supedot' => '',
'Superset' => '⊃',
'SupersetEqual' => '⊇',
'suphsol' => '',
'suphsub' => '',
'suplarr' => '',
'supmult' => '',
'supnE' => '⊋',
'supne' => '⊋',
'supplus' => '',
'Supset' => '⋑',
'supset' => '⊃',
'supseteq' => '⊇',
'supseteqq' => '⊇',
'supsetneq' => '⊋',
'supsetneqq' => '⊋',
'supsim' => '',
'supsub' => '',
'supsup' => '',
'swarhk' => '',
'swArr' => '⇙',
'swarr' => '↙',
'swarrow' => '↙',
'swnwar' => '',
'target' => '⌖',
'tau' => 'τ',
'tbrk' => '',
'tdot' => '⃛',
'telrec' => '⌕',
'Tfr' => '',
'tfr' => '',
'there4' => '∴',
'Therefore' => '∴',
'therefore' => '∴',
'Theta' => 'Θ',
'theta' => 'θ',
'thetav' => 'ϑ',
'thickapprox' => '',
'thicksim' => '',
'thinsp' => ' ',
'thkap' => '',
'thksim' => '',
'Tilde' => '∼',
'tilde' => '̃',
'TildeEqual' => '≃',
'TildeFullEqual' => '≅',
'TildeTilde' => '≈',
'times' => '×',
'timesb' => '⊠',
'timesbar' => '',
'timesd' => '',
'tint' => '∭',
'toea' => '',
'top' => '⊤',
'topbot' => '⌶',
'topcir' => '',
'Topf' => '',
'topfork' => '',
'tosa' => '',
'tprime' => '‴',
'trade' => '™',
'triangle' => '▵',
'triangledown' => '▿',
'triangleleft' => '◃',
'trianglelefteq' => '⊴',
'triangleq' => '≜',
'triangleright' => '▹',
'trianglerighteq' => '⊵',
'tridot' => '◬',
'trie' => '≜',
'triminus' => '',
'TripleDot' => '⃛',
'triplus' => '',
'trisb' => '',
'tritime' => '',
'trpezium' => '',
'Tscr' => '',
'tscr' => '',
'twixt' => '≬',
'twoheadleftarrow' => '↞',
'twoheadrightarrow' => '↠',
'Uarr' => '↟',
'uArr' => '⇑',
'uarr' => '↑',
'Uarrocir' => '',
'udarr' => '⇅',
'udhar' => '',
'ufisht' => '',
'Ufr' => '',
'ufr' => '',
'uHar' => '',
'uharl' => '↿',
'uharr' => '↾',
'uhblk' => '▀',
'ulcorn' => '⌜',
'ulcorner' => '⌜',
'ulcrop' => '⌏',
'ultri' => '',
'uml' => '̈',
'UnderLine' => '̲',
'Union' => '⋃',
'UnionPlus' => '⊎',
'Uopf' => '',
'UpArrow' => '↑',
'Uparrow' => '⇑',
'uparrow' => '↑',
'UpArrowDownArrow' => '⇅',
'UpDownArrow' => '↕',
'Updownarrow' => '⇕',
'updownarrow' => '↕',
'UpEquilibrium' => '',
'upharpoonleft' => '↿',
'upharpoonright' => '↾',
'uplus' => '⊎',
'UpperLeftArrow' => '↖',
'UpperRightArrow' => '↗',
'Upsi' => 'ϒ',
'upsi' => 'υ',
'Upsilon' => 'ϒ',
'upsilon' => 'υ',
'UpTee' => '⊥',
'upuparrows' => '⇈',
'urcorn' => '⌝',
'urcorner' => '⌝',
'urcrop' => '⌎',
'urtri' => '',
'Uscr' => '',
'uscr' => '',
'utdot' => '⋰',
'utri' => '▵',
'utrif' => '▴',
'uuarr' => '⇈',
'uwangle' => '',
'vangrt' => '⊾',
'varepsilon' => 'ε',
'varkappa' => 'ϰ',
'varnothing' => '∅',
'varphi' => 'ϕ',
'varpi' => 'ϖ',
'varpropto' => '∝',
'vArr' => '⇕',
'varr' => '↕',
'varrho' => 'ϱ',
'varsigma' => 'ς',
'varsubsetneq' => '',
'varsubsetneqq' => '',
'varsupsetneq' => '',
'varsupsetneqq' => '',
'vartheta' => 'ϑ',
'vartriangleleft' => '⊲',
'vartriangleright' => '⊳',
'Vbar' => '',
'vBar' => '',
'vBarv' => '',
'VDash' => '⊫',
'Vdash' => '⊩',
'vDash' => '⊨',
'vdash' => '⊢',
'Vdashl' => '',
'Vee' => '⋁',
'vee' => '∨',
'veebar' => '⊻',
'veeeq' => '≚',
'vellip' => '⋮',
'Verbar' => '‖',
'verbar' => '|',
'Vert' => '‖',
'vert' => '|',
'VerticalBar' => '∣',
'VerticalTilde' => '≀',
'Vfr' => '',
'vfr' => '',
'vltri' => '⊲',
'vnsub' => '⊄',
'vnsup' => '⊅',
'Vopf' => '',
'vprop' => '∝',
'vrtri' => '⊳',
'Vscr' => '',
'vscr' => '',
'vsubnE' => '',
'vsubne' => '',
'vsupnE' => '',
'vsupne' => '',
'Vvdash' => '⊪',
'vzigzag' => '',
'wedbar' => '',
'Wedge' => '⋀',
'wedge' => '∧',
'wedgeq' => '≙',
'weierp' => '℘',
'Wfr' => '',
'wfr' => '',
'Wopf' => '',
'wp' => '℘',
'wr' => '≀',
'wreath' => '≀',
'Wscr' => '',
'wscr' => '',
'xcap' => '⋂',
'xcirc' => '○',
'xcup' => '⋃',
'xdtri' => '▽',
'Xfr' => '',
'xfr' => '',
'xhArr' => '',
'xharr' => '',
'Xi' => 'Ξ',
'xi' => 'ξ',
'xlArr' => '',
'xlarr' => '',
'xmap' => '',
'xnis' => '',
'xodot' => '⊙',
'Xopf' => '',
'xoplus' => '⊕',
'xotime' => '⊗',
'xrArr' => '',
'xrarr' => '',
'Xscr' => '',
'xscr' => '',
'xsqcup' => '⊔',
'xuplus' => '⊎',
'xutri' => '△',
'xvee' => '⋁',
'xwedge' => '⋀',
'yen' => '¥',
'Yfr' => '',
'yfr' => '',
'Yopf' => '',
'Yscr' => '',
'yscr' => '',
'zeta' => 'ζ',
'Zfr' => 'ℤ',
'zfr' => '',
'zigrarr' => '',
'Zopf' => '',
'Zscr' => '',
'zscr' => ''
);
return $charset;
// cas particuliers pour la translitteration
case 'translit':
$GLOBALS['CHARSET'][$charset] = array (
// latin
128=>'euro', 131=>'f', 140=>'OE', 147=>'\'\'', 148=>'\'\'', 153=>'TM', 156=>'oe', 159=>'Y', 160=>' ',
161=>'!', 162=>'c', 163=>'L', 164=>'O', 165=>'yen',166=>'|',
167=>'p',169=>'(c)', 171=>'<<',172=>'-',173=>'-',174=>'(R)',
176=>'o',177=>'+-',181=>'mu',182=>'p',183=>'.',187=>'>>', 192=>'A',
193=>'A', 194=>'A', 195=>'A', 196=>'A', 197=>'A', 198=>'AE', 199=>'C',
200=>'E', 201=>'E', 202=>'E', 203=>'E', 204=>'I', 205=>'I', 206=>'I',
207=>'I', 209=>'N', 210=>'O', 211=>'O', 212=>'O', 213=>'O', 214=>'O',
216=>'O', 217=>'U', 218=>'U', 219=>'U', 220=>'U', 223=>'ss', 224=>'a',
225=>'a', 226=>'a', 227=>'a', 228=>'a', 229=>'a', 230=>'ae', 231=>'c',
232=>'e', 233=>'e', 234=>'e', 235=>'e', 236=>'i', 237=>'i', 238=>'i',
239=>'i', 241=>'n', 242=>'o', 243=>'o', 244=>'o', 245=>'o', 246=>'o',
248=>'o', 249=>'u', 250=>'u', 251=>'u', 252=>'u', 255=>'y',
// turc
286=>'G', 287=>'g', 304=>'I', 305=>'i', 350=>'S', 351=>'s',
// esperanto
264 => 'Cx',265 => 'cx',
284 => 'Gx',285 => 'gx',
292 => 'Hx',293 => 'hx',
308 => 'Jx',309 => 'jx',
348 => 'Sx',349 => 'sx',
364 => 'Ux',365 => 'ux',
// cyrillique
1026=>'D%', 1027=>'G%', 8218=>'\'', 1107=>'g%', 8222=>'"', 8230=>'...',
8224=>'/-', 8225=>'/=', 8364=>'EUR', 8240=>'0/00', 1033=>'LJ',
8249=>'<', 1034=>'NJ', 1036=>'KJ', 1035=>'Ts', 1039=>'DZ', 1106=>'d%',
8216=>'`', 8217=>'\'', 8220=>'"', 8221=>'"', 8226=>' o ', 8211=>'-',
8212=>'--', 8212=>'~', 8482=>'(TM)', 1113=>'lj', 8250=>'>', 1114=>'nj',
1116=>'kj', 1115=>'ts', 1119=>'dz', 1038=>'V%', 1118=>'v%', 1032=>'J%',
1168=>'G3', 1025=>'IO', 1028=>'IE', 1031=>'YI', 1030=>'II',
1110=>'ii', 1169=>'g3', 1105=>'io', 8470=>'No.', 1108=>'ie',
1112=>'j%', 1029=>'DS', 1109=>'ds', 1111=>'yi', 1040=>'A', 1041=>'B',
1042=>'V', 1043=>'G', 1044=>'D', 1045=>'E', 1046=>'ZH', 1047=>'Z',
1048=>'I', 1049=>'J', 1050=>'K', 1051=>'L', 1052=>'M', 1053=>'N',
1054=>'O', 1055=>'P', 1056=>'R', 1057=>'S', 1058=>'T', 1059=>'U',
1060=>'F', 1061=>'H', 1062=>'C', 1063=>'CH', 1064=>'SH', 1065=>'SCH',
1066=>'"', 1067=>'Y', 1068=>'\'', 1069=>'`E', 1070=>'YU', 1071=>'YA',
1072=>'a', 1073=>'b', 1074=>'v', 1075=>'g', 1076=>'d', 1077=>'e',
1078=>'zh', 1079=>'z', 1080=>'i', 1081=>'j', 1082=>'k', 1083=>'l',
1084=>'m', 1085=>'n', 1086=>'o', 1087=>'p', 1088=>'r', 1089=>'s',
1090=>'t', 1091=>'u', 1092=>'f', 1093=>'h', 1094=>'c', 1095=>'ch',
1096=>'sh', 1097=>'sch', 1098=>'"', 1099=>'y', 1100=>'\'', 1101=>'`e',
1102=>'yu', 1103=>'ya',
// vietnamien en translitteration de base
7843=>"a",7841=>"a",7845=>"a",7847=>"a",7849=>"a",7851=>"a",7853=>"a",
7855=>"a",7857=>"a",7859=>"a",7861=>"a",7863=>"a",
7842=>"A",7840=>"A",7844=>"A",7846=>"A",7848=>"A",
7850=>"A",7852=>"A",7854=>"A",7856=>"A",7858=>"A",7860=>"A",
7862=>"A",7867=>"e",7869=>"e",7865=>"e",
7871=>"e",7873=>"e",7875=>"e",7877=>"e",7879=>"e",
7866=>"E",7868=>"E",7864=>"E",7870=>"E",7872=>"E",7874=>"E",
7876=>"E",7878=>"E",7881=>"i",7883=>"i",
7880=>"I",7882=>"I",
7887=>"o",7885=>"o",7889=>"o",7891=>"o",7893=>"o",
7895=>"o",7897=>"o",417=>"o",7899=>"o",7901=>"o",7903=>"o",7905=>"o",
7907=>"o",7886=>"O",7884=>"O",
7888=>"O",7890=>"O",7892=>"O",7894=>"O",7896=>"O",416=>"O",7898=>"O",
7900=>"O",7902=>"O",7904=>"O",7906=>"O",7911=>"u",
361=>"u",7909=>"u",432=>"u",7913=>"u",7915=>"u",7917=>"u",7919=>"u",
7921=>"u",7910=>"U",360=>"U",7908=>"U",431=>"U",
7912=>"U",7914=>"U",7916=>"U",7918=>"U",7920=>"U",253=>"y",7923=>"y",
7927=>"y",7929=>"y",7925=>"y",221=>"Y",7922=>"Y",7926=>"Y",7928=>"Y",
7924=>"Y",273=>"d"
);
return $charset;
// translitteration complexe
case 'translitcomplexe':
load_charset('translit');
$trans = $GLOBALS['CHARSET']['translit'];
$translit_c = array (
// vietnamien
225=>"a'", 224=>"a`",7843=>"a?",227=>"a~",7841=>"a.",
226=>"a^",7845=>"a^'",7847=>"a^`",7849=>"a^?",7851=>"a^~",7853=>"a^.",259=>"a(",
7855=>"a('",7857=>"a(`",7859=>"a(?",7861=>"a(~",7863=>"a(.",193=>"A'",192=>"A`",
7842=>"A?",195=>"A~",7840=>"A.",194=>"A^",7844=>"A^'",7846=>"A^`",7848=>"A^?",
7850=>"A^~",7852=>"A^.",258=>"A(",7854=>"A('",7856=>"A(`",7858=>"A(?",7860=>"A(~",
7862=>"A(.",233=>"e'",232=>"e`",7867=>"e?",7869=>"e~",7865=>"e.",234=>"e^",
7871=>"e^'",7873=>"e^`",7875=>"e^?",7877=>"e^~",7879=>"e^.",201=>"E'",200=>"E`",
7866=>"E?",7868=>"E~",7864=>"E.",202=>"E^",7870=>"E^'",7872=>"E^`",7874=>"E^?",
7876=>"E^~",7878=>"E^.",237=>"i'",236=>"i`",7881=>"i?",297=>"i~",7883=>"i.",
205=>"I'",204=>"I`",7880=>"I?",296=>"I~",7882=>"I.",243=>"o'",242=>"o`",
7887=>"o?",245=>"o~",7885=>"o.",244=>"o^",7889=>"o^'",7891=>"o^`",7893=>"o^?",
7895=>"o^~",7897=>"o^.",417=>"o+",7899=>"o+'",7901=>"o+`",7903=>"o+?",7905=>"o+~",
7907=>"o+.",211=>"O'",210=>"O`",7886=>"O?",213=>"O~",7884=>"O.",212=>"O^",
7888=>"O^'",7890=>"O^`",7892=>"O^?",7894=>"O^~",7896=>"O^.",416=>"O+",7898=>"O+'",
7900=>"O+`",7902=>"O+?",7904=>"O+~",7906=>"O+.",250=>"u'",249=>"u`",7911=>"u?",
361=>"u~",7909=>"u.",432=>"u+",7913=>"u+'",7915=>"u+`",7917=>"u+?",7919=>"u+~",
7921=>"u+.",218=>"U'",217=>"U`",7910=>"U?",360=>"U~",7908=>"U.",431=>"U+",
7912=>"U+'",7914=>"U+`",7916=>"U+?",7918=>"U+~",7920=>"U+.",253=>"y'",7923=>"y`",
7927=>"y?",7929=>"y~",7925=>"y.",221=>"Y'",7922=>"Y`",7926=>"Y?",7928=>"Y~",
7924=>"Y.",273=>"d-",208=>"D-",
// allemand
228=>'ae',246=>'oe',252=>'ue',196=>'Ae',214=>'Oe',220=>'Ue'
);
foreach($translit_c as $u=>$t)
$trans[$u] = $t;
$GLOBALS['CHARSET'][$charset] = $trans;
return $charset;
default:
$GLOBALS['CHARSET'][$charset] = array();
return $charset;
}
}
//
// Verifier qu'on peut utiliser mb_string
//
function init_mb_string() {
static $mb;
// verifier que tout est present (fonctions mb_string pour php >= 4.0.6)
// et que le charset interne est connu de mb_string
if (!$mb) {
if (function_exists('mb_internal_encoding')
AND function_exists('mb_detect_order')
AND function_exists('mb_substr')
AND function_exists('mb_strlen')
AND function_exists('mb_encode_mimeheader')
AND function_exists('mb_encode_numericentity')
AND function_exists('mb_decode_numericentity')
AND mb_detect_order(lire_meta('charset'))
) {
mb_internal_encoding('utf-8');
$mb = 1;
} else
$mb = -1;
}
return ($mb == 1);
}
// Detecter les versions buggees d'iconv
function test_iconv() {
static $iconv_ok;
if (!$iconv_ok) {
if (!function_exists('iconv'))
$iconv_ok = -1;
else {
if (utf_32_to_unicode(@iconv('utf-8', 'utf-32', 'chaine de test')) == 'chaine de test')
$iconv_ok = 1;
else
$iconv_ok = -1;
}
}
return ($iconv_ok == 1);
}
// Test de fonctionnement du support UTF-8 dans PCRE
// (contournement bug Debian Woody)
function test_pcre_unicode() {
static $pcre_ok = 0;
if (!$pcre_ok) {
$s = " ".chr(195).chr(169)."t".chr(195).chr(169)." ";
if (preg_match(',\W\w\w\w\W,u', $s)) $pcre_ok = 1;
else $pcre_ok = -1;
}
return $pcre_ok == 1;
}
// Plages alphanumeriques (incomplet...)
function pcre_lettres_unicode() {
static $plage_unicode;
if (!$plage_unicode) {
if (test_pcre_unicode()) {
// cf. http://www.unicode.org/charts/
$plage_unicode = '\w' // iso-latin
. '\x{100}-\x{24f}' // europeen etendu
. '\x{300}-\x{1cff}' // des tas de trucs
;
}
else {
// fallback a trois sous
$plage_unicode = '\w';
}
}
return $plage_unicode;
}
// Plage ponctuation de 0x2000 a 0x206F
// (i.e. de 226-128-128 a 226-129-176)
function plage_punct_unicode() {
return '\xE2(\x80[\x80-\xBF]|\x81[\x80-\xAF])';
}
//
// Transformer les é en {
//
function html2unicode($texte) {
static $trans;
if (!$trans) {
global $CHARSET;
load_charset('html');
foreach ($CHARSET['html'] as $key => $val) {
$trans["&$key;"] = $val;
}
}
return strtr($texte, $trans);
}
//
// Transformer les é en {
//
function mathml2unicode($texte) {
static $trans;
if (!$trans) {
global $CHARSET;
load_charset('mathml');
foreach ($CHARSET['mathml'] as $key => $val)
$trans["&$key;"] = $val;
}
return strtr($texte, $trans);
}
//
// Transforme une chaine en entites unicode 
//
function charset2unicode($texte, $charset='AUTO', $forcer = false) {
static $trans;
if ($charset == 'AUTO')
$charset = lire_meta('charset');
switch ($charset) {
case 'utf-8':
return utf_8_to_unicode($texte);
case 'iso-8859-1':
// corriger caracteres non-conformes issus de Windows (CP-1252)
$faux_latin = array(
chr(138) => "Š", // Scaron
chr(140) => "Œ", // OElig
chr(142) => "Ž", // Zcaron
chr(154) => "š", // scaron
chr(156) => "œ", // oelig
chr(158) => "ž" // zcaron
);
$texte = strtr($texte, $faux_latin);
// On commente cet appel tant qu'il reste des spip v<1.5 dans la nature
// pour que le filtre |entites_unicode donne des backends lisibles sur ces spips.
if (!$forcer) return $texte;
default:
// mbstring presente ?
if (init_mb_string()) {
if ($order = mb_detect_order() # mb_string connait-il $charset?
AND mb_detect_order($charset)) {
$s = mb_convert_encoding($texte, 'utf-8', $charset);
if ($s && $s != $texte) return utf_8_to_unicode($s);
}
mb_detect_order($order); # remettre comme precedemment
}
// Sinon, peut-etre connaissons-nous ce charset ?
if (!isset($trans[$charset])) {
global $CHARSET;
load_charset($charset);
foreach ($CHARSET[$charset] as $key => $val) {
$trans[$charset][chr($key)] = '&#'.$val.';';
}
}
if (count($trans[$charset]))
return strtr($texte, $trans[$charset]);
// Sinon demander a iconv (malgre le fait qu'il coupe quand un
// caractere n'appartient pas au charset, mais c'est un probleme
// surtout en utf-8, gere ci-dessus)
if (test_iconv()) {
$s = iconv($charset, 'utf-32le', $texte);
if ($s) return utf_32_to_unicode($s);
}
// Au pire ne rien faire
spip_log("erreur charset $charset non supporte");
return $texte;
}
}
//
// Transforme les entites unicode  dans le charset specifie
//
function unicode2charset($texte, $charset='AUTO') {
static $CHARSET_REVERSE;
if ($charset == 'AUTO')
$charset = lire_meta('charset');
switch($charset) {
case 'utf-8':
return unicode_to_utf_8($texte);
break;
default:
$charset = load_charset($charset);
if (!is_array($CHARSET_REVERSE[$charset])) {
$CHARSET_REVERSE[$charset] = array_flip($GLOBALS['CHARSET'][$charset]);
}
$trans = array();
// Construire la table de remplacements
// 1. Entites decimales (type "{")
if (preg_match_all(',&#([0-9]+);,', $texte, $regs, PREG_PATTERN_ORDER)) {
$entites = array_flip($regs[1]);
foreach ($entites as $e => $v) {
if ($s = ($e < 128) ? $e : $CHARSET_REVERSE[$charset][intval($e)])
$trans['&#'.$e.';'] = chr($s);
}
}
// 2. Entites hexadecimales (type "
")
if (preg_match_all(',&#x([0-9a-zA-Z]+);,', $texte, $regs, PREG_PATTERN_ORDER)) {
$entites = array_flip($regs[1]);
foreach ($entites as $e => $v) {
$h = hexdec($e);
if ($s = ($h < 128) ? $h : $CHARSET_REVERSE[$charset][$h])
$trans['&#x'.$e.';'] = chr($s);
}
}
$texte = strtr($texte, $trans);
return $texte;
}
}
// Importer un texte depuis un charset externe vers le charset du site
// (les caracteres non resolus sont transformes en {)
function importer_charset($texte, $charset = 'AUTO') {
return unicode2charset(charset2unicode($texte, $charset, true));
}
// UTF-8
function utf_8_to_unicode($source) {
// mb_string : methode rapide
if (init_mb_string()) {
$convmap = array(0x7F, 0xFFFFFF, 0x0, 0xFFFFFF);
return mb_encode_numericentity($source, $convmap, 'UTF-8');
}
// Sinon methode pas a pas
static $decrement;
static $shift;
// Cf. php.net, par Ronen. Adapte pour compatibilite php3
if (!is_array($decrement)) {
// array used to figure what number to decrement from character order value
// according to number of characters used to map unicode to ascii by utf-8
$decrement[4] = 240;
$decrement[3] = 224;
$decrement[2] = 192;
$decrement[1] = 0;
// the number of bits to shift each charNum by
$shift[1][0] = 0;
$shift[2][0] = 6;
$shift[2][1] = 0;
$shift[3][0] = 12;
$shift[3][1] = 6;
$shift[3][2] = 0;
$shift[4][0] = 18;
$shift[4][1] = 12;
$shift[4][2] = 6;
$shift[4][3] = 0;
}
$pos = 0;
$len = strlen ($source);
$encodedString = '';
while ($pos < $len) {
$char = '';
$ischar = false;
$asciiPos = ord (substr ($source, $pos, 1));
if (($asciiPos >= 240) && ($asciiPos <= 255)) {
// 4 chars representing one unicode character
$thisLetter = substr ($source, $pos, 4);
$pos += 4;
}
else if (($asciiPos >= 224) && ($asciiPos <= 239)) {
// 3 chars representing one unicode character
$thisLetter = substr ($source, $pos, 3);
$pos += 3;
}
else if (($asciiPos >= 192) && ($asciiPos <= 223)) {
// 2 chars representing one unicode character
$thisLetter = substr ($source, $pos, 2);
$pos += 2;
}
else {
// 1 char (lower ascii)
$thisLetter = substr ($source, $pos, 1);
$pos += 1;
$char = $thisLetter;
$ischar = true;
}
if ($ischar)
$encodedString .= $char;
else { // process the string representing the letter to a unicode entity
$thisLen = strlen ($thisLetter);
$thisPos = 0;
$decimalCode = 0;
while ($thisPos < $thisLen) {
$thisCharOrd = ord (substr ($thisLetter, $thisPos, 1));
if ($thisPos == 0) {
$charNum = intval ($thisCharOrd - $decrement[$thisLen]);
$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
} else {
$charNum = intval ($thisCharOrd - 128);
$decimalCode += ($charNum << $shift[$thisLen][$thisPos]);
}
$thisPos++;
}
$encodedLetter = "&#". ereg_replace('^0+', '', $decimalCode) . ';';
$encodedString .= $encodedLetter;
}
}
return $encodedString;
}
// UTF-32 ne sert plus que si on passe par iconv, c'est-a-dire quand
// mb_string est absente ou ne connait pas notre charset
// mais on l'optimise quand meme par mb_string
// => tout ca sera osolete quand on sera surs d'avoir mb_string
function utf_32_to_unicode($source) {
// mb_string : methode rapide
if (init_mb_string()) {
$convmap = array(0x7F, 0xFFFFFF, 0x0, 0xFFFFFF);
$source = mb_encode_numericentity($source, $convmap, 'UTF-32LE');
return str_replace(chr(0), '', $source);
}
// Sinon methode lente
$texte = '';
while ($source) {
$words = unpack("V*", substr($source, 0, 1024));
$source = substr($source, 1024);
foreach ($words as $word) {
if ($word < 128)
$texte .= chr($word);
// ignorer le BOM - http://www.unicode.org/faq/utf_bom.html
else if ($word != 65279)
$texte .= '&#'.$word.';';
}
}
return $texte;
}
// Ce bloc provient de php.net, auteur Ronen
function caractere_utf_8($num) {
if($num<128)
return chr($num);
if($num<2048)
return chr(($num>>6)+192).chr(($num&63)+128);
if($num<65536)
return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<1114112)
return chr($num>>18+240).chr((($num>>12)&63)+128).chr(($num>>6)&63+128). chr($num&63+128);
return '';
}
function unicode_to_utf_8($texte) {
while (preg_match(',�*([0-9]+);,', $texte, $regs)) {
$s = caractere_utf_8($regs[1]);
$texte = str_replace($regs[0], $s, $texte);
}
return $texte;
}
// convertit les Ĉ en \u0108
function unicode_to_javascript($texte) {
while (preg_match(',�*([0-9]+);,', $texte, $regs) AND !$vu[$regs[1]]) {
$num = $regs[1];
$vu[$num] = true;
$s = '\u'.sprintf("%04x", $num);
$texte = str_replace($regs[0], $s, $texte);
}
return $texte;
}
// convertit les %uxxxx (envoyes par javascript)
function javascript_to_unicode ($texte) {
while (ereg("%u([0-9A-F][0-9A-F][0-9A-F][0-9A-F])", $texte, $regs))
$texte = str_replace($regs[0],"&#".hexdec($regs[1]).";", $texte);
return $texte;
}
// convertit les %E9 (envoyes par le browser) en chaine du charset du site (binaire)
function javascript_to_binary ($texte) {
while (ereg("%([0-9A-F][0-9A-F])", $texte, $regs))
$texte = str_replace($regs[0],chr(hexdec($regs[1])), $texte);
return $texte;
}
//
// Translitteration charset => ascii (pour l'indexation)
// Attention les caracteres non reconnus sont renvoyes en utf-8
//
function translitteration($texte, $charset='AUTO', $complexe='') {
static $trans;
if ($charset == 'AUTO')
$charset = lire_meta('charset');
$table_translit ='translit'.$complexe;
// 1. Passer le charset et les é en utf-8
$texte = unicode_to_utf_8(html2unicode(charset2unicode($texte, $charset, true)));
// 2. Translitterer grace a la table predefinie
if (!$trans[$complexe]) {
global $CHARSET;
load_charset($table_translit);
foreach ($CHARSET[$table_translit] as $key => $val)
$trans[$complexe][caractere_utf_8($key)] = $val;
}
return strtr($texte, $trans[$complexe]);
}
function translitteration_complexe($texte) {
return translitteration($texte,'AUTO','complexe');
}
// Reconnaitre le BOM utf-8 (0xEFBBBF)
function bom_utf8($texte) {
return (substr($texte, 0,3) == chr(0xEF).chr(0xBB).chr(0xBF));
}
// Transcode une page (probablement attrapee sur le web) en essayant
// par tous les moyens de deviner son charset (y compris headers HTTP)
function transcoder_page($texte, $headers='') {
// Reconnaitre le BOM utf-8 (0xEFBBBF)
if (bom_utf8($texte))
$charset = 'utf-8';
// charset precise par le contenu (xml)
else if (preg_match(',<[?]xml[^>]*encoding[^>]*=[^>]*([-_a-z0-9]+?),Uims', $texte, $regs))
$charset = trim(strtolower($regs[1]));
// charset precise par le contenu (html)
else if (preg_match(',<(meta|html|body)[^>]*charset[^>]*=[^>]*([-_a-z0-9]+?),Uims', $texte, $regs))
$charset = trim(strtolower($regs[2]));
// charset de la reponse http
else if (preg_match(',charset=([-_a-z0-9]+),i', $headers, $regs))
$charset = trim(strtolower($regs[1]));
// normaliser les noms du shif-jis japonais
if (preg_match(',^(x|shift)[_-]s?jis$,i', $charset))
$charset = 'shift-jis';
if ($charset) {
spip_log("charset source detecte : $charset");
} else {
// valeur par defaut
$charset = 'iso-8859-1';
spip_log("pas de charset detecte, on suppose : $charset");
}
return importer_charset($texte, $charset);
}
// Initialisation
$GLOBALS['CHARSET'] = Array();
?>