WebSVN – Applications.papyrus – Path Comparison – / – /trunk/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php Rev 447 and /trunk/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php Rev 448

Ignore whitespace Rev 447 → Rev 448

 /trunk/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php
 New file
 ,0 → 1,389
+<?php
+// $Id: ezmlm-parser.php,v 1.1 2005-09-22 14:02:46 ddelon Exp $
+//
+// ezmlm-parser.php - ezmlm-php v2.0
+// --------------------------------------------------------------
+// Contains all the code for parsing messages.
+// It handles all the nessesary decoding, attachments, etc...
+// Note this does all the parsing itself now removing the dependancy
+// on the mailparse library (as it looks like it will never make
+// it into the official inclusion with PHP)...
+// --------------------------------------------------------------
+require_once("ezmlm.php");
+require_once("Mail/mimeDecode.php") ;
+// CLASS: ezmlm-parser
+class ezmlm_parser extends ezmlm_php {
+        var $headers;           // the full untouched headers of the message
+        var $body;              // the full untouched (but decoded) body (this is not $this->parts[0]->body)
+        var $parts;             // all the parts, if it is a multipart message. each part is an ezmlm_parser object...
+        // Here's the most accessed headers, everything else can be
+        // accessed from the $this->headers array.
+        var $to;                // To:
+        var $from;              // From:
+        var $date;              // Date:
+        var $subject;           // Subject:
+        var $replyto;           // Reply-To:
+        var $contenttype;       // Content-Type:
+        var $multipart;         // TRUE if the message is a multipart message
+        var $msgfile;           // if parsed from a file, this is the filename...
+        // functions
+        // recent_msgs - parses and returns an arbitrary number of the most recent messages
+        function recent_msgs($show = 20, $month = "") {
+                if ($month == "") { $month = date("Ym"); }
+                $threadyear = substr($month,0,4);
+                $threadmonth = substr($month,4,2);
+                if (!is_file($this->listdir . "/archive/threads/" . $month)) {
+                        if ($threadmonth == '01') { $prevthread = ($threadyear - 1) . "12"; }
+                        else if ($threadmonth >= 11) { $prevthread = $threadyear . ($threadmonth - 1); }
+                        else { $prevthread = $threadyear . "0" . ($threadmonth - 1); }
+                        return $this->recent_msgs($show,$prevthread);
+                }
+        // on ouvre les fichiers de threads du dernier mois
+                $fd = fopen($this->listdir . "/archive/threads/" . $month, "r");
+                fseek($fd,-256,SEEK_END);
+        // on récupère la dernière ligne
+                while (!feof($fd)) {
+                        $temp = fgets($fd,4096);
+                        if ($temp != "") { $curthread = $temp; }
+                }
+        $nombre_message = 0 ;
+        fseek ($fd, 0) ;
+        while (!feof($fd)) {
+            $nombre_message++;
+                        fgets($fd);
+                }
+                fclose($fd);
+        ///echo "<br />".$curthread."<br />" ;
+                $subjectfile = preg_replace("/^[0-9]*\:([a-z]*) \[.*/", "\\1", $curthread);
+                $subjectfile = substr($subjectfile,0,2) . "/" . substr($subjectfile,2,18); // on ne garde que les 2 1ère lettre du hash, slash et le reste du hash
+        // on ouvre le fichier des sujets
+        // présenté comme suit :
+        // hash sujet originel   (sur la première ligne)
+        // num_message:annéemois:hash_auteur Nom Auteur
+                $fd = fopen($this->listdir . "/archive/subjects/" . $subjectfile, "r");
+                fseek($fd,-512,SEEK_END);
+        // on prend la dernière ligne
+                while (!feof($fd)) {
+                        $temp = fgets($fd,4096);
+                        if ($temp != "") { $cursubject = $temp; }
+                }
+                fclose($fd);
+                list($msgnum,$fromthread,$authorid) = split(":",$cursubject);
+                $msgdir = (int)($msgnum / 100); // on reconstruit le répertoire du message en divisant son numéro par 100
+                $numshown = 0;
+                $msgfiles = array();
+        // on boucle 100 fois
+                for ($i = 0; $i <= 99; $i++) {
+                        if (($msgdir == 0) and ($i == 0)) { $i++; };
+                        if ($i < 10) { $msgfile = "0" . $i; }
+                        else { $msgfile = $i; }
+                        if (!is_file($this->listdir . "/archive/" . $msgdir . "/" . $msgfile)) { break; }
+                }
+        if ($show == '') $show = $nombre_message ;  // Si aucun paramètre n'est passé on renvoie tous les fichiers du mois
+                while ($numshown < $show) {
+                        $i--;
+                        if ($i < 0) {
+                                $i = 99;
+                                $msgdir--;
+                                if ($msgdir < 0) { break; }
+                        }
+                        if ($i < 10) {
+                                $msgfile = $this->listdir . "/archive/" . $msgdir . "/0" . $i;
+                        } else {
+                                $msgfile = $this->listdir . "/archive/" . $msgdir . "/" . $i;
+                        }
+            /*
+                        $msg = new ezmlm_parser();
+                        $msg->parse_file($msgfile);
+            */
+            if (!is_file($msgfile)) {
+                if (is_file($this->listdir . "/" . $msgfile)) {
+                    $msgfile = $this->listdir . "/" . $msgfile;
+                } else if (is_file($this->listdir . "/archive/" . $msgfile)) {
+                    $msgfile = $this->listdir . "/archive/" . $msgfile;
+                } else {
+                    return $msgfiles;
+                }
+            }
+            $message = file_get_contents($msgfile) ;
+            $mimeDecode = new Mail_mimeDecode($message) ;
+            $mailDecode = $mimeDecode->decode() ;
+            $mailDecode->msgfile = $msgfile ;
+            $mailDecode->nummessage = $msgdir.$i ;
+                        $msgfiles[] = $mailDecode ;
+                        unset($mailDecode);
+                        $numshown++;
+                }
+                return $msgfiles;
+        }
+        // parse_file - opens a file and feeds the data to parse, file can be relative to the listdir
+        function parse_file($file,$simple = FALSE) {
+                if (!is_file($file)) {
+                        if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
+                        else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
+                        else { return FALSE; }
+                }
+                $this->msgfile = $file;
+        $data = '' ;
+                $fd = fopen($file, "r");
+                while (!feof($fd)) { $data .= fgets($fd,4096); }
+                fclose($fd);
+                return $this->parse($data,$simple);
+        }
+    // parse_file_headers - ouvre un fichier et analyse les entêtes
+        function parse_file_headers($file,$simple = FALSE) {
+                if (!is_file($file)) {
+                        if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
+                        else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
+                        else { return FALSE; }
+                }
+                $this->msgfile = $file;
+        $data = file_get_contents ($file) ;
+        $message = file_get_contents($file) ;
+        $mimeDecode = new Mail_mimeDecode($message) ;
+        $mailDecode = $mimeDecode->decode() ;
+        return $mailDecode ;
+                /*$fd = fopen($file, "r");
+                while (!feof($fd)) { $data .= fgets($fd,4096); }
+                fclose($fd);*/
+        if ($this->_get_headers($data, $simple)) return true ;
+                return false ;
+        }
+        // this does all of the work (well it calls two functions that do all the work :)
+        // all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)
+        function parse($data,$simple = FALSE) {
+                if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }
+                return FALSE;
+        }
+        // all of these are internal functions, you shouldn't call them directly...
+        // _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's
+        function _ct_parse() {
+                $instr = $this->headers['content-type'];
+                preg_replace('/\(.*\)/','',$instr); // strip rfc822 comments
+                if (preg_match('/: /', $instr)) {
+                        $ct = preg_split('/:/',trim($instr),2);
+                        $ct = preg_split('/;/',trim($ct[1]));
+                } else {
+                        $ct = preg_split('/;/',trim($instr));
+                }
+                if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);
+                $i = 2;
+                $ct[1] = $ct[0];
+                $ct[0] = $this->headers['content-type'];
+        if (isset($attrs) && is_array($attrs)) {
+            while (list($key, $val) = each($attrs)) {
+                if ($val == '') continue;
+                $ap = preg_split('/=/',$val,2);
+                if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }
+                $ct[$i] = $ap;
+                $i++;
+            }
+        }
+                // are we a multipart message?
+                if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }
+                return $ct;
+        }
+        // _get_headers: pulls the headers out of the data and builds the $this->headers array
+        function _get_headers($data,$simple = FALSE) {
+                $lines = preg_split('/\n/', $data);
+                while (list($key, $val) = each($lines)) {
+                        $val = trim($val);
+                        if ($val == "") break;
+                        if (preg_match('/^From[^:].*$/', $val)) continue;       /* strips out any From lines added by the MTA */
+                        $hdr = preg_split('/: /', $val, 2);
+                        if (count($hdr) == 1) {
+                                // this is a continuation of the last header (like a recieved from line)
+                                $this->headers[$last] .= $val;
+                        } else {
+                                $this->headers[strtolower($hdr[0])] = $hdr[1];
+                //echo htmlspecialchars($this->headers['from'])."<br />" ;
+                                $last = strtolower($hdr[0]);
+                        }
+                }
+        // ajout alex
+        // pour supprimer le problème des ISO...
+        // a déplacer ailleur, et appelé avant affichage
+        if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {
+            $reg_exp = $nombre[0] ;
+        } else {
+            $reg_exp = 'ISO-8859-15?' ;
+        }
+        if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;
+        preg_match_all ("/=\?$reg_exp\?(Q|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER)  ;
+        for ($i = 0; $i < count ($match[0]); $i++ ) {
+                if ($match[1][$i] == 'Q') {
+                    $decode = quoted_printable_decode ($match[2][$i]) ;
+                } elseif ($match[1][$i] == 'B') {
+                    $decode = base64_decode ($match[2][$i]) ;
+                }
+                $decode = preg_replace ("/_/", " ", $decode) ;
+            if ($reg_exp == 'UTF-8') {
+                $decode = utf8_decode ($decode) ;
+            }
+            $this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;
+        }
+                // sanity anyone?
+                if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }
+                if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }
+                return TRUE;
+        }
+        // _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.
+        function _get_body($data,$simple = FALSE) {
+                $lines = preg_split('/\n/', $data);
+                $doneheaders = FALSE;
+                $data = "";
+                while (list($key,$val) = each($lines)) {
+            //echo htmlspecialchars($val)."<br>";
+                        if (($val == '') and (!$doneheaders)) {
+                                $doneheaders = TRUE;
+                                continue;
+                        } else if ($doneheaders) {
+                                $data .= $val . "\n";
+                        }
+                }
+                // now here comes the fun part... decoding.
+                switch($this->headers['content-transfer-encoding']) {
+                        case 'binary':
+                                $this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);
+                                break;
+                        case 'base64':
+                                $this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);
+                                break;
+                        case 'quoted-printable':
+                                $this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);
+                                break;
+                        case '8bit':
+                                $this->body = $this->_cte_8bit($data,$simple);
+                                break;
+                        case '7bit':            // 7bit doesn't need to be decoded
+                        default:                // And the fall through as well...
+                                $this->body = $data;
+                                break;
+                }
+        //echo  $this->headers['content-type'][2][1];
+        if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {
+                //$this->body = utf8_decode ($this->body) ;
+                //echo quoted_printable_decode(utf8_decode ($this->body)) ;
+        }
+                if ($simple) { return TRUE; }
+                // if we are a multipart message then break up the parts and decode, set the appropriate variables.
+                // here comes the best part about making ezmlm-php OOP. since each part is just really a little message
+                // in itself each part becomes a new parser object and all the wheels turn again... :)
+                if ($this->multipart) {
+                        $boundary = '';
+                        for ($i = 2; $i <= count($this->headers['content-type']); $i++) {
+                                if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {
+                                        $boundary = $this->headers['content-type'][$i][1];
+                                }
+                        }
+                        if ($boundary != '') {
+                                $this->_get_parts($this->body,$boundary);
+                        } else {
+                                // whoopps... something's not right here. we were told that the message is supposed
+                                // to be a multipart message, yet the boundary wasn't set in the content type.
+                                // mark the message as non multipart and add a message to the top of the body.
+                                $this->multipart = FALSE;
+                                $this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;
+                        }
+                }
+                return TRUE;
+        }
+        // _get_parts: breaks up $data into parts based on $boundary following the rfc specs
+        // detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)
+        // After the parts are broken up they are then turned into parser objects and the
+        // resulting array of parts is set to $this->parts;
+        function _get_parts($data,$boundary) {
+                $inpart = -1;
+                $lines = preg_split('/\n/', $data);
+        // La première partie contient l'avertissement pour les client mail ne supportant pas
+        // multipart, elle est stocké dans parts[-1]
+                while(list($key,$val) = each($lines)) {
+                        if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part
+                        else if ($val == "--" . $boundary . "--") { break; } // the end of the last part
+                        else { $parts[$inpart] .= $val . "\n"; }
+                }
+                for ($i = 0; $i < count($parts) - 1; $i++) {    // On saute la première partie
+                        $part[$i] = new ezmlm_parser();
+                        $part[$i]->parse($parts[$i]);
+                        $this->parts[$i] = $part[$i];
+            //echo $this->parts[$i]."<br>" ;
+                }
+        }
+        // _cte_8bit: decode a content transfer encoding of 8bit
+        // NOTE: this function is a little bit special. Since the end result will be displayed in
+        // a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the
+        // html ordinal equivilant, it also ensures that the messages content-type is changed
+        // to include text/html if it changes anything...
+        function _cte_8bit($data,$simple = FALSE) {
+                if ($simple) { return $data; }
+                $changed = FALSE;
+                $chars = preg_split('//',$data);
+                while (list($key,$val) = each($chars)) {
+                        if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }
+                        else { $out .= $val; }
+                }
+                if ($changed) { $this->headers['content-type'][1] = 'text/html'; }
+                return $out;
+        }
+        // _cte_binary: decode a content transfer encoding of binary
+        function _cte_binary($data) { return $data; }
+        // _cte_base64: decode a content transfer encoding of base64
+        function _cte_base64($data) { return base64_decode($data); }
+        // _cte_qp: decode a content transfer encoding of quoted_printable
+        function _cte_qp($data) {
+                // For the time being we'll use PHP's function, it seems to work well enough.
+                return quoted_printable_decode($data);
+        }
+}

Subversion Repositories Applications.papyrus

Compare Revisions

Ignore whitespace Rev 447 → Rev 448