2 |
ddelon |
1 |
<?php
|
172 |
alexandre_ |
2 |
// $Id: ezmlm-parser.php,v 1.2.4.1 2007-03-12 11:22:12 alexandre_tb Exp $
|
2 |
ddelon |
3 |
//
|
|
|
4 |
// ezmlm-parser.php - ezmlm-php v2.0
|
|
|
5 |
// --------------------------------------------------------------
|
|
|
6 |
// Contains all the code for parsing messages.
|
|
|
7 |
// It handles all the nessesary decoding, attachments, etc...
|
|
|
8 |
// Note this does all the parsing itself now removing the dependancy
|
|
|
9 |
// on the mailparse library (as it looks like it will never make
|
|
|
10 |
// it into the official inclusion with PHP)...
|
|
|
11 |
// --------------------------------------------------------------
|
|
|
12 |
|
|
|
13 |
require_once("ezmlm.php");
|
|
|
14 |
require_once("Mail/mimeDecode.php") ;
|
|
|
15 |
// CLASS: ezmlm-parser
|
|
|
16 |
class ezmlm_parser extends ezmlm_php {
|
172 |
alexandre_ |
17 |
var $headers; // the full untouched headers of the message
|
|
|
18 |
var $body; // the full untouched (but decoded) body (this is not $this->parts[0]->body)
|
2 |
ddelon |
19 |
var $parts; // all the parts, if it is a multipart message. each part is an ezmlm_parser object...
|
|
|
20 |
|
172 |
alexandre_ |
21 |
// Here's the most accessed headers, everything else can be
|
|
|
22 |
// accessed from the $this->headers array.
|
|
|
23 |
var $to; // To:
|
|
|
24 |
var $from; // From:
|
|
|
25 |
var $date; // Date:
|
|
|
26 |
var $subject; // Subject:
|
|
|
27 |
var $replyto; // Reply-To:
|
|
|
28 |
var $contenttype; // Content-Type:
|
2 |
ddelon |
29 |
|
|
|
30 |
var $multipart; // TRUE if the message is a multipart message
|
|
|
31 |
|
|
|
32 |
var $msgfile; // if parsed from a file, this is the filename...
|
|
|
33 |
|
|
|
34 |
// functions
|
|
|
35 |
|
|
|
36 |
// recent_msgs - parses and returns an arbitrary number of the most recent messages
|
|
|
37 |
function recent_msgs($show = 20, $month = "") {
|
172 |
alexandre_ |
38 |
|
|
|
39 |
$repertoire_archive = opendir($this->listdir . "/archive/");
|
2 |
ddelon |
40 |
|
172 |
alexandre_ |
41 |
$repertoire_message = array() ;
|
|
|
42 |
$dernier_repertoire = 0 ;
|
|
|
43 |
while (false !== ($item = readdir($repertoire_archive))) {
|
|
|
44 |
// $item contient les noms des repertoires
|
|
|
45 |
// on ne garde que ceux qui sont des chiffres
|
|
|
46 |
|
|
|
47 |
if (preg_match('/[0-9]+/', $item)) {
|
|
|
48 |
// on ouvre le fichier d index de chaque repertoire
|
|
|
49 |
if ((int) $item > $dernier_repertoire) $dernier_repertoire = (int) $item;
|
|
|
50 |
|
|
|
51 |
}
|
2 |
ddelon |
52 |
}
|
172 |
alexandre_ |
53 |
$tableau_message = array() ;
|
|
|
54 |
$compteur_message = 0 ;
|
|
|
55 |
$fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ;
|
|
|
56 |
while (!feof($fichier_index)) {
|
|
|
57 |
// Recuperation du numero de message, du hash du sujet et du sujet
|
|
|
58 |
$temp = fgets($fichier_index, 4096);
|
|
|
59 |
preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ;
|
|
|
60 |
|
|
|
61 |
// dans la seconde on recupere la date, hash auteur et auteur
|
|
|
62 |
$temp = fgets($fichier_index, 4096);
|
|
|
63 |
preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.*) (.*)/', $temp, $match_deuxieme_ligne) ;
|
|
|
64 |
if ($match[1] != '') {
|
|
|
65 |
$tableau_message[$match[1]] = array ($match[2], $match[3],
|
|
|
66 |
$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],
|
|
|
67 |
$match_deuxieme_ligne[5],
|
|
|
68 |
$match_deuxieme_ligne[6]);
|
|
|
69 |
}
|
|
|
70 |
}
|
|
|
71 |
fclose ($fichier_index);
|
|
|
72 |
// on renverse le tableau pour afficher les derniers messages en premier
|
|
|
73 |
$tableau_message = array_reverse($tableau_message, true);
|
2 |
ddelon |
74 |
|
172 |
alexandre_ |
75 |
// On compte le nombre de message, s il est inferieur $show et que l on est
|
|
|
76 |
// pas dans le premier index, on ouvre le fichier precedent et recupere
|
|
|
77 |
// le n dernier message
|
|
|
78 |
|
|
|
79 |
if (count ($tableau_message) < $show && $dernier_repertoire != '0') {
|
|
|
80 |
$avant_dernier_repertoire = $dernier_repertoire - 1 ;
|
|
|
81 |
// On utilise file_get_contents pour renverser le fichier
|
|
|
82 |
$fichier_index = array_reverse(
|
|
|
83 |
explode ("\n",
|
|
|
84 |
preg_replace ('/\n$/', '',
|
|
|
85 |
file_get_contents ($this->listdir.'/archive/'.$avant_dernier_repertoire.'/index')) ), true) ;
|
|
|
86 |
reset ($fichier_index);
|
|
|
87 |
//var_dump ($fichier_index);
|
|
|
88 |
|
|
|
89 |
for ($i = count ($tableau_message); $i <= $show; $i++) {
|
|
|
90 |
// Recuperation du numero de message, du hash du sujet et du sujet
|
|
|
91 |
// dans la seconde on recupere la date, hash auteur et auteur
|
|
|
92 |
|
|
|
93 |
preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.*) (.*)/',
|
|
|
94 |
current ($fichier_index), $match_deuxieme_ligne) ;
|
|
|
95 |
preg_match('/([0-9]+): ([a-z]+) (.*)/', next($fichier_index), $match) ;
|
|
|
96 |
next ($fichier_index);
|
|
|
97 |
|
|
|
98 |
if ($match[1] != '') {
|
|
|
99 |
$tableau_message[$match[1]] = array ($match[2], $match[3],
|
|
|
100 |
$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],
|
|
|
101 |
$match_deuxieme_ligne[5],
|
|
|
102 |
$match_deuxieme_ligne[6]);
|
|
|
103 |
}
|
2 |
ddelon |
104 |
}
|
|
|
105 |
}
|
172 |
alexandre_ |
106 |
|
|
|
107 |
|
|
|
108 |
return ($tableau_message) ;
|
2 |
ddelon |
109 |
}
|
|
|
110 |
|
|
|
111 |
|
|
|
112 |
// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir
|
|
|
113 |
function parse_file($file,$simple = FALSE) {
|
|
|
114 |
if (!is_file($file)) {
|
|
|
115 |
if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
|
|
|
116 |
else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
|
|
|
117 |
else { return FALSE; }
|
|
|
118 |
}
|
|
|
119 |
|
|
|
120 |
$this->msgfile = $file;
|
|
|
121 |
$data = '' ;
|
|
|
122 |
$fd = fopen($file, "r");
|
|
|
123 |
while (!feof($fd)) { $data .= fgets($fd,4096); }
|
|
|
124 |
fclose($fd);
|
|
|
125 |
return $this->parse($data,$simple);
|
|
|
126 |
}
|
|
|
127 |
|
|
|
128 |
// parse_file_headers - ouvre un fichier et analyse les entêtes
|
|
|
129 |
function parse_file_headers($file,$simple = FALSE) {
|
|
|
130 |
if (!is_file($file)) {
|
|
|
131 |
if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
|
|
|
132 |
else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
|
|
|
133 |
else { return FALSE; }
|
|
|
134 |
}
|
|
|
135 |
|
|
|
136 |
$this->msgfile = $file;
|
|
|
137 |
$data = file_get_contents ($file) ;
|
|
|
138 |
$message = file_get_contents($file) ;
|
|
|
139 |
$mimeDecode = new Mail_mimeDecode($message) ;
|
|
|
140 |
$mailDecode = $mimeDecode->decode() ;
|
|
|
141 |
return $mailDecode ;
|
|
|
142 |
/*$fd = fopen($file, "r");
|
|
|
143 |
while (!feof($fd)) { $data .= fgets($fd,4096); }
|
|
|
144 |
fclose($fd);*/
|
|
|
145 |
if ($this->_get_headers($data, $simple)) return true ;
|
|
|
146 |
return false ;
|
|
|
147 |
}
|
|
|
148 |
|
|
|
149 |
// this does all of the work (well it calls two functions that do all the work :)
|
|
|
150 |
// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)
|
|
|
151 |
function parse($data,$simple = FALSE) {
|
|
|
152 |
|
|
|
153 |
if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }
|
|
|
154 |
return FALSE;
|
|
|
155 |
}
|
|
|
156 |
|
|
|
157 |
// all of these are internal functions, you shouldn't call them directly...
|
|
|
158 |
|
|
|
159 |
// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's
|
|
|
160 |
function _ct_parse() {
|
|
|
161 |
$instr = $this->headers['content-type'];
|
|
|
162 |
preg_replace('/\(.*\)/','',$instr); // strip rfc822 comments
|
|
|
163 |
if (preg_match('/: /', $instr)) {
|
|
|
164 |
$ct = preg_split('/:/',trim($instr),2);
|
|
|
165 |
$ct = preg_split('/;/',trim($ct[1]));
|
|
|
166 |
} else {
|
|
|
167 |
$ct = preg_split('/;/',trim($instr));
|
|
|
168 |
}
|
|
|
169 |
if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);
|
|
|
170 |
$i = 2;
|
|
|
171 |
$ct[1] = $ct[0];
|
|
|
172 |
$ct[0] = $this->headers['content-type'];
|
|
|
173 |
if (isset($attrs) && is_array($attrs)) {
|
|
|
174 |
while (list($key, $val) = each($attrs)) {
|
|
|
175 |
if ($val == '') continue;
|
|
|
176 |
$ap = preg_split('/=/',$val,2);
|
|
|
177 |
if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }
|
|
|
178 |
$ct[$i] = $ap;
|
|
|
179 |
$i++;
|
|
|
180 |
}
|
|
|
181 |
}
|
|
|
182 |
// are we a multipart message?
|
|
|
183 |
if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }
|
|
|
184 |
|
|
|
185 |
return $ct;
|
|
|
186 |
}
|
|
|
187 |
|
|
|
188 |
// _get_headers: pulls the headers out of the data and builds the $this->headers array
|
|
|
189 |
function _get_headers($data,$simple = FALSE) {
|
|
|
190 |
$lines = preg_split('/\n/', $data);
|
|
|
191 |
while (list($key, $val) = each($lines)) {
|
|
|
192 |
$val = trim($val);
|
|
|
193 |
if ($val == "") break;
|
|
|
194 |
if (preg_match('/^From[^:].*$/', $val)) continue; /* strips out any From lines added by the MTA */
|
|
|
195 |
|
|
|
196 |
$hdr = preg_split('/: /', $val, 2);
|
|
|
197 |
if (count($hdr) == 1) {
|
|
|
198 |
// this is a continuation of the last header (like a recieved from line)
|
|
|
199 |
$this->headers[$last] .= $val;
|
|
|
200 |
} else {
|
|
|
201 |
$this->headers[strtolower($hdr[0])] = $hdr[1];
|
|
|
202 |
//echo htmlspecialchars($this->headers['from'])."<br />" ;
|
|
|
203 |
$last = strtolower($hdr[0]);
|
|
|
204 |
}
|
|
|
205 |
}
|
|
|
206 |
// ajout alex
|
|
|
207 |
// pour supprimer le problème des ISO...
|
|
|
208 |
// a déplacer ailleur, et appelé avant affichage
|
|
|
209 |
|
|
|
210 |
if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {
|
|
|
211 |
$reg_exp = $nombre[0] ;
|
|
|
212 |
} else {
|
|
|
213 |
$reg_exp = 'ISO-8859-15?' ;
|
|
|
214 |
}
|
|
|
215 |
if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;
|
|
|
216 |
preg_match_all ("/=\?$reg_exp\?(Q|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER) ;
|
|
|
217 |
for ($i = 0; $i < count ($match[0]); $i++ ) {
|
|
|
218 |
|
|
|
219 |
if ($match[1][$i] == 'Q') {
|
|
|
220 |
$decode = quoted_printable_decode ($match[2][$i]) ;
|
|
|
221 |
} elseif ($match[1][$i] == 'B') {
|
|
|
222 |
$decode = base64_decode ($match[2][$i]) ;
|
|
|
223 |
}
|
|
|
224 |
$decode = preg_replace ("/_/", " ", $decode) ;
|
|
|
225 |
if ($reg_exp == 'UTF-8') {
|
|
|
226 |
$decode = utf8_decode ($decode) ;
|
|
|
227 |
}
|
|
|
228 |
$this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;
|
|
|
229 |
}
|
|
|
230 |
// sanity anyone?
|
|
|
231 |
if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }
|
|
|
232 |
if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }
|
|
|
233 |
|
|
|
234 |
|
|
|
235 |
return TRUE;
|
|
|
236 |
}
|
|
|
237 |
|
|
|
238 |
// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.
|
|
|
239 |
function _get_body($data,$simple = FALSE) {
|
|
|
240 |
$lines = preg_split('/\n/', $data);
|
|
|
241 |
$doneheaders = FALSE;
|
|
|
242 |
|
|
|
243 |
$data = "";
|
|
|
244 |
while (list($key,$val) = each($lines)) {
|
|
|
245 |
//echo htmlspecialchars($val)."<br>";
|
|
|
246 |
if (($val == '') and (!$doneheaders)) {
|
|
|
247 |
$doneheaders = TRUE;
|
|
|
248 |
continue;
|
|
|
249 |
} else if ($doneheaders) {
|
|
|
250 |
$data .= $val . "\n";
|
|
|
251 |
}
|
|
|
252 |
}
|
|
|
253 |
|
|
|
254 |
// now here comes the fun part... decoding.
|
|
|
255 |
switch($this->headers['content-transfer-encoding']) {
|
|
|
256 |
case 'binary':
|
|
|
257 |
$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);
|
|
|
258 |
break;
|
|
|
259 |
|
|
|
260 |
case 'base64':
|
|
|
261 |
$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);
|
|
|
262 |
break;
|
|
|
263 |
|
|
|
264 |
case 'quoted-printable':
|
|
|
265 |
$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);
|
|
|
266 |
break;
|
|
|
267 |
|
|
|
268 |
case '8bit':
|
|
|
269 |
$this->body = $this->_cte_8bit($data,$simple);
|
|
|
270 |
break;
|
|
|
271 |
|
|
|
272 |
case '7bit': // 7bit doesn't need to be decoded
|
|
|
273 |
default: // And the fall through as well...
|
|
|
274 |
$this->body = $data;
|
|
|
275 |
break;
|
|
|
276 |
}
|
|
|
277 |
//echo $this->headers['content-type'][2][1];
|
|
|
278 |
if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {
|
|
|
279 |
//$this->body = utf8_decode ($this->body) ;
|
|
|
280 |
//echo quoted_printable_decode(utf8_decode ($this->body)) ;
|
|
|
281 |
}
|
|
|
282 |
if ($simple) { return TRUE; }
|
|
|
283 |
|
|
|
284 |
// if we are a multipart message then break up the parts and decode, set the appropriate variables.
|
|
|
285 |
// here comes the best part about making ezmlm-php OOP. since each part is just really a little message
|
|
|
286 |
// in itself each part becomes a new parser object and all the wheels turn again... :)
|
|
|
287 |
if ($this->multipart) {
|
|
|
288 |
|
|
|
289 |
$boundary = '';
|
|
|
290 |
for ($i = 2; $i <= count($this->headers['content-type']); $i++) {
|
|
|
291 |
if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {
|
|
|
292 |
$boundary = $this->headers['content-type'][$i][1];
|
|
|
293 |
|
|
|
294 |
}
|
|
|
295 |
}
|
|
|
296 |
if ($boundary != '') {
|
|
|
297 |
$this->_get_parts($this->body,$boundary);
|
|
|
298 |
} else {
|
|
|
299 |
// whoopps... something's not right here. we were told that the message is supposed
|
|
|
300 |
// to be a multipart message, yet the boundary wasn't set in the content type.
|
|
|
301 |
// mark the message as non multipart and add a message to the top of the body.
|
|
|
302 |
$this->multipart = FALSE;
|
|
|
303 |
$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;
|
|
|
304 |
}
|
|
|
305 |
}
|
|
|
306 |
|
|
|
307 |
return TRUE;
|
|
|
308 |
}
|
|
|
309 |
|
|
|
310 |
// _get_parts: breaks up $data into parts based on $boundary following the rfc specs
|
|
|
311 |
// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)
|
|
|
312 |
// After the parts are broken up they are then turned into parser objects and the
|
|
|
313 |
// resulting array of parts is set to $this->parts;
|
|
|
314 |
function _get_parts($data,$boundary) {
|
|
|
315 |
$inpart = -1;
|
|
|
316 |
$lines = preg_split('/\n/', $data);
|
|
|
317 |
// La première partie contient l'avertissement pour les client mail ne supportant pas
|
|
|
318 |
// multipart, elle est stocké dans parts[-1]
|
|
|
319 |
while(list($key,$val) = each($lines)) {
|
|
|
320 |
if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part
|
|
|
321 |
else if ($val == "--" . $boundary . "--") { break; } // the end of the last part
|
|
|
322 |
else { $parts[$inpart] .= $val . "\n"; }
|
|
|
323 |
}
|
|
|
324 |
|
|
|
325 |
for ($i = 0; $i < count($parts) - 1; $i++) { // On saute la première partie
|
|
|
326 |
$part[$i] = new ezmlm_parser();
|
|
|
327 |
$part[$i]->parse($parts[$i]);
|
|
|
328 |
$this->parts[$i] = $part[$i];
|
|
|
329 |
//echo $this->parts[$i]."<br>" ;
|
|
|
330 |
}
|
|
|
331 |
|
|
|
332 |
}
|
|
|
333 |
|
|
|
334 |
// _cte_8bit: decode a content transfer encoding of 8bit
|
|
|
335 |
// NOTE: this function is a little bit special. Since the end result will be displayed in
|
|
|
336 |
// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the
|
|
|
337 |
// html ordinal equivilant, it also ensures that the messages content-type is changed
|
|
|
338 |
// to include text/html if it changes anything...
|
|
|
339 |
function _cte_8bit($data,$simple = FALSE) {
|
|
|
340 |
if ($simple) { return $data; }
|
|
|
341 |
$changed = FALSE;
|
|
|
342 |
$chars = preg_split('//',$data);
|
|
|
343 |
while (list($key,$val) = each($chars)) {
|
|
|
344 |
if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }
|
|
|
345 |
else { $out .= $val; }
|
|
|
346 |
}
|
|
|
347 |
if ($changed) { $this->headers['content-type'][1] = 'text/html'; }
|
|
|
348 |
return $out;
|
|
|
349 |
}
|
|
|
350 |
|
|
|
351 |
// _cte_binary: decode a content transfer encoding of binary
|
|
|
352 |
function _cte_binary($data) { return $data; }
|
|
|
353 |
|
|
|
354 |
// _cte_base64: decode a content transfer encoding of base64
|
|
|
355 |
function _cte_base64($data) { return base64_decode($data); }
|
|
|
356 |
|
|
|
357 |
// _cte_qp: decode a content transfer encoding of quoted_printable
|
|
|
358 |
function _cte_qp($data) {
|
|
|
359 |
// For the time being we'll use PHP's function, it seems to work well enough.
|
|
|
360 |
return quoted_printable_decode($data);
|
|
|
361 |
}
|
|
|
362 |
|
|
|
363 |
}
|