*@copyright Tela-Botanica 2000-2004 *@version $Revision: 1.5 $ // +------------------------------------------------------------------------------------------------------+ */ // +------------------------------------------------------------------------------------------------------+ // | ENTETE du PROGRAMME | // +------------------------------------------------------------------------------------------------------+ // $Id: ezmlm-parser.php,v 1.5 2008-11-19 09:28:46 aperonnet Exp $ // require_once("ezmlm.php"); require_once("Mail/mimeDecode.php") ; // CLASS: ezmlm-parser class ezmlm_parser extends ezmlm_php { var $headers; // the full untouched headers of the message var $body; // the full untouched (but decoded) body (this is not $this->parts[0]->body) var $parts; // all the parts, if it is a multipart message. each part is an ezmlm_parser object... // Here's the most accessed headers, everything else can be // accessed from the $this->headers array. var $to; // To: var $from; // From: var $date; // Date: var $subject; // Subject: var $replyto; // Reply-To: var $contenttype; // Content-Type: var $multipart; // TRUE if the message is a multipart message var $msgfile; // if parsed from a file, this is the filename... // functions /** * recent_msgs renvoie les derniers messages de la liste de discussion * ezmlm * * ( * [0] => Array * ( * [1] => sujet * [2] => date en anglais * [3] => le hash de l auteur * [4] => l auteur * ) * [1] => ... * ) * @param int le nombre de message a renvoye * @return array un tableau contenant les messages * @access public */ function recent_msgs($show = 20, $month = "") { $repertoire_archive = opendir($this->listdir . "/archive/"); $repertoire_message = array() ; $numArchive = $this->getNumArchive(); $dernier_repertoire = floor($numArchive / 100); $tableau_message = array() ; $compteur_message = 0 ; $fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ; while (!feof($fichier_index)) { // Recuperation du numero de message, du hash du sujet et du sujet $temp = fgets($fichier_index, 4096); preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ; // dans la seconde on recupere la date, hash auteur et auteur $temp = fgets($fichier_index, 4096); preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);([^ ]*) (.*)/', $temp, $match_deuxieme_ligne) ; if ($match[1] != '') { $tableau_message[$match[1]] = array ($match[2], $match[3], $match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3], $match_deuxieme_ligne[5], $match_deuxieme_ligne[6]); } } fclose ($fichier_index); // on renverse le tableau pour afficher les derniers messages en premier $tableau_message = array_reverse($tableau_message, true); // On compte le nombre de message, s il est inferieur $show et que l on est // pas dans le premier index, on ouvre le fichier precedent et recupere // le n dernier message if (count ($tableau_message) < $show && $dernier_repertoire != '0') { $avant_dernier_repertoire = $dernier_repertoire - 1 ; // On utilise file_get_contents pour renverser le fichier $fichier_index = array_reverse( explode ("\n", preg_replace ('/\n$/', '', file_get_contents ($this->listdir.'/archive/'.$avant_dernier_repertoire.'/index')) ), true) ; reset ($fichier_index); //var_dump ($fichier_index); for ($i = count ($tableau_message); $i <= $show; $i++) { // Recuperation du numero de message, du hash du sujet et du sujet // dans la seconde on recupere la date, hash auteur et auteur preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);([^ ]*) (.*)/', current ($fichier_index), $match_deuxieme_ligne) ; preg_match('/([0-9]+): ([a-z]+) (.*)/', next($fichier_index), $match) ; next ($fichier_index); if ($match[1] != '') { $tableau_message[$match[1]] = array ($match[2], $match[3], $match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3], $match_deuxieme_ligne[5], $match_deuxieme_ligne[6]); } } } else { // Si le nombre de message est > $show on limite le tableau de retour $tableau_message = array_slice($tableau_message, 0, $show, true); } return $tableau_message ; } // parse_file - opens a file and feeds the data to parse, file can be relative to the listdir function parse_file($file,$simple = FALSE) { if (!is_file($file)) { if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; } else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; } else { return FALSE; } } $this->msgfile = $file; $data = '' ; $fd = fopen($file, "r"); while (!feof($fd)) { $data .= fgets($fd,4096); } fclose($fd); return $this->parse($data,$simple); } // parse_file_headers - ouvre un fichier et analyse les entête function parse_file_headers($file,$simple = FALSE) { if (!is_file($file)) { if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; } else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; } else { return FALSE; } } $this->msgfile = $file; $data = file_get_contents ($file) ; $message = file_get_contents($file) ; $mimeDecode = new Mail_mimeDecode($message) ; $mailDecode = $mimeDecode->decode() ; return $mailDecode ; } // this does all of the work (well it calls two functions that do all the work :) // all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html) function parse($data,$simple = FALSE) { if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; } return FALSE; } // all of these are internal functions, you shouldn't call them directly... // _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's function _ct_parse() { $instr = $this->headers['content-type']; preg_replace('/\(.*\)/','',$instr); // strip rfc822 comments if (preg_match('/: /', $instr)) { $ct = preg_split('/:/',trim($instr),2); $ct = preg_split('/;/',trim($ct[1])); } else { $ct = preg_split('/;/',trim($instr)); } if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]); $i = 2; $ct[1] = $ct[0]; $ct[0] = $this->headers['content-type']; if (isset($attrs) && is_array($attrs)) { while (list($key, $val) = each($attrs)) { if ($val == '') continue; $ap = preg_split('/=/',$val,2); if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); } $ct[$i] = $ap; $i++; } } // are we a multipart message? if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; } return $ct; } // _get_headers: pulls the headers out of the data and builds the $this->headers array function _get_headers($data,$simple = FALSE) { $lines = preg_split('/\n/', $data); while (list($key, $val) = each($lines)) { $val = trim($val); if ($val == "") break; if (preg_match('/^From[^:].*$/', $val)) continue; /* strips out any From lines added by the MTA */ $hdr = preg_split('/: /', $val, 2); if (count($hdr) == 1) { // this is a continuation of the last header (like a recieved from line) $this->headers[$last] .= $val; } else { $this->headers[strtolower($hdr[0])] = $hdr[1]; //echo htmlspecialchars($this->headers['from'])."
" ; $last = strtolower($hdr[0]); } } // ajout alex // pour supprimer le problème des ISO... // a déplacer ailleur, et appelé avant affichage if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) { $reg_exp = $nombre[0] ; } else { $reg_exp = 'ISO-8859-15?' ; } if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ; preg_match_all ("/=\?$reg_exp\?(Q|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER) ; for ($i = 0; $i < count ($match[0]); $i++ ) { if ($match[1][$i] == 'Q') { $decode = quoted_printable_decode ($match[2][$i]) ; } elseif ($match[1][$i] == 'B') { $decode = base64_decode ($match[2][$i]) ; } $decode = preg_replace ("/_/", " ", $decode) ; if ($reg_exp == 'UTF-8') { $decode = utf8_decode ($decode) ; } $this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ; } // sanity anyone? if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; } if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); } return TRUE; } // _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary. function _get_body($data,$simple = FALSE) { $lines = preg_split('/\n/', $data); $doneheaders = FALSE; $data = ""; while (list($key,$val) = each($lines)) { //echo htmlspecialchars($val)."
"; if (($val == '') and (!$doneheaders)) { $doneheaders = TRUE; continue; } else if ($doneheaders) { $data .= $val . "\n"; } } // now here comes the fun part... decoding. switch($this->headers['content-transfer-encoding']) { case 'binary': $this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple); break; case 'base64': $this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple); break; case 'quoted-printable': $this->body = $this->_cte_8bit($this->_cte_qp($data),$simple); break; case '8bit': $this->body = $this->_cte_8bit($data,$simple); break; case '7bit': // 7bit doesn't need to be decoded default: // And the fall through as well... $this->body = $data; break; } //echo $this->headers['content-type'][2][1]; if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') { //$this->body = utf8_decode ($this->body) ; //echo quoted_printable_decode(utf8_decode ($this->body)) ; } if ($simple) { return TRUE; } // if we are a multipart message then break up the parts and decode, set the appropriate variables. // here comes the best part about making ezmlm-php OOP. since each part is just really a little message // in itself each part becomes a new parser object and all the wheels turn again... :) if ($this->multipart) { $boundary = ''; for ($i = 2; $i <= count($this->headers['content-type']); $i++) { if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) { $boundary = $this->headers['content-type'][$i][1]; } } if ($boundary != '') { $this->_get_parts($this->body,$boundary); } else { // whoopps... something's not right here. we were told that the message is supposed // to be a multipart message, yet the boundary wasn't set in the content type. // mark the message as non multipart and add a message to the top of the body. $this->multipart = FALSE; $this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body; } } return TRUE; } // _get_parts: breaks up $data into parts based on $boundary following the rfc specs // detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html) // After the parts are broken up they are then turned into parser objects and the // resulting array of parts is set to $this->parts; function _get_parts($data,$boundary) { $inpart = -1; $lines = preg_split('/\n/', $data); // La première partie contient l'avertissement pour les client mail ne supportant pas // multipart, elle est stocké dans parts[-1] while(list($key,$val) = each($lines)) { if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part else if ($val == "--" . $boundary . "--") { break; } // the end of the last part else { $parts[$inpart] .= $val . "\n"; } } for ($i = 0; $i < count($parts) - 1; $i++) { // On saute la première partie $part[$i] = new ezmlm_parser(); $part[$i]->parse($parts[$i]); $this->parts[$i] = $part[$i]; //echo $this->parts[$i]."
" ; } } // _cte_8bit: decode a content transfer encoding of 8bit // NOTE: this function is a little bit special. Since the end result will be displayed in // a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the // html ordinal equivilant, it also ensures that the messages content-type is changed // to include text/html if it changes anything... function _cte_8bit($data,$simple = FALSE) { if ($simple) { return $data; } $changed = FALSE; $chars = preg_split('//',$data); while (list($key,$val) = each($chars)) { if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; } else { $out .= $val; } } if ($changed) { $this->headers['content-type'][1] = 'text/html'; } return $out; } // _cte_binary: decode a content transfer encoding of binary function _cte_binary($data) { return $data; } // _cte_base64: decode a content transfer encoding of base64 function _cte_base64($data) { return base64_decode($data); } // _cte_qp: decode a content transfer encoding of quoted_printable function _cte_qp($data) { // For the time being we'll use PHP's function, it seems to work well enough. return quoted_printable_decode($data); } }