Subversion Repositories Applications.papyrus

Rev

Rev 1087 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
448 ddelon 1
<?php
1253 alexandre_ 2
// $Id: ezmlm-parser.php,v 1.2.4.1 2007-03-12 11:22:12 alexandre_tb Exp $
448 ddelon 3
//
4
// ezmlm-parser.php - ezmlm-php v2.0
5
// --------------------------------------------------------------
6
// Contains all the code for parsing messages.
7
// It handles all the nessesary decoding, attachments, etc...
8
// Note this does all the parsing itself now removing the dependancy
9
// on the mailparse library (as it looks like it will never make
10
// it into the official inclusion with PHP)...
11
// --------------------------------------------------------------
12
 
13
require_once("ezmlm.php");
14
require_once("Mail/mimeDecode.php") ;
15
// CLASS: ezmlm-parser
16
class ezmlm_parser extends ezmlm_php {
1253 alexandre_ 17
    var $headers;           // the full untouched headers of the message
18
    var $body;              // the full untouched (but decoded) body (this is not $this->parts[0]->body)
448 ddelon 19
	var $parts;		// all the parts, if it is a multipart message. each part is an ezmlm_parser object...
20
 
1253 alexandre_ 21
    // Here's the most accessed headers, everything else can be
22
    // accessed from the $this->headers array.
23
    var $to;                // To:
24
    var $from;              // From:
25
    var $date;              // Date:
26
    var $subject;           // Subject:
27
    var $replyto;           // Reply-To:
28
    var $contenttype;       // Content-Type:
448 ddelon 29
 
30
	var $multipart;		// TRUE if the message is a multipart message
31
 
32
	var $msgfile;		// if parsed from a file, this is the filename...
33
 
34
	// functions
35
 
36
	// recent_msgs - parses and returns an arbitrary number of the most recent messages
37
	function recent_msgs($show = 20, $month = "") {
1253 alexandre_ 38
 
39
		$repertoire_archive = opendir($this->listdir . "/archive/");
448 ddelon 40
 
1253 alexandre_ 41
		$repertoire_message = array() ;
42
		$dernier_repertoire = 0 ;
43
		while (false !== ($item = readdir($repertoire_archive))) {
44
			// $item contient les noms des repertoires
45
			// on ne garde que ceux qui sont des chiffres
46
 
47
			if (preg_match('/[0-9]+/', $item)) {
48
				// on ouvre le fichier d index de chaque repertoire
49
				if ((int) $item > $dernier_repertoire) $dernier_repertoire = (int) $item;
50
 
51
			}
448 ddelon 52
		}
1253 alexandre_ 53
		$tableau_message = array() ;
54
		$compteur_message = 0 ;
55
		$fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ;
56
		while (!feof($fichier_index)) {
57
				// Recuperation du numero de message, du hash du sujet et du sujet
58
				$temp = fgets($fichier_index, 4096);
59
				preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ;
60
 
61
				// dans la seconde on recupere la date, hash auteur et auteur
62
				$temp = fgets($fichier_index, 4096);
63
				preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.*) (.*)/', $temp, $match_deuxieme_ligne) ;
64
				if ($match[1] != '') {
65
				$tableau_message[$match[1]] = array ($match[2], $match[3],
66
									$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],
67
									$match_deuxieme_ligne[5],
68
									$match_deuxieme_ligne[6]);
69
				}
70
			}
71
			fclose ($fichier_index);
72
		// on renverse le tableau pour afficher les derniers messages en premier
73
		$tableau_message = array_reverse($tableau_message, true);
448 ddelon 74
 
1253 alexandre_ 75
		// On compte le nombre de message, s il est inferieur $show et que l on est
76
		// pas dans le premier index, on ouvre le fichier precedent et recupere
77
		// le n dernier message
78
 
79
		if (count ($tableau_message) < $show && $dernier_repertoire != '0') {
80
			$avant_dernier_repertoire = $dernier_repertoire - 1 ;
81
			// On utilise file_get_contents pour renverser le fichier
82
			$fichier_index = array_reverse(
83
									explode ("\n",
84
										preg_replace ('/\n$/', '',
85
											file_get_contents ($this->listdir.'/archive/'.$avant_dernier_repertoire.'/index')) ), true) ;
86
			reset ($fichier_index);
87
			//var_dump ($fichier_index);
88
 
89
			for ($i = count ($tableau_message); $i <= $show; $i++) {
90
				// Recuperation du numero de message, du hash du sujet et du sujet
91
				// dans la seconde on recupere la date, hash auteur et auteur
92
 
93
				preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.*) (.*)/',
94
									current ($fichier_index), $match_deuxieme_ligne) ;
95
				preg_match('/([0-9]+): ([a-z]+) (.*)/', next($fichier_index), $match) ;
96
				next ($fichier_index);
97
 
98
				if ($match[1] != '') {
99
				$tableau_message[$match[1]] = array ($match[2], $match[3],
100
									$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],
101
									$match_deuxieme_ligne[5],
102
									$match_deuxieme_ligne[6]);
103
				}
448 ddelon 104
			}
105
		}
1253 alexandre_ 106
 
107
 
108
		return ($tableau_message) ;
448 ddelon 109
	}
110
 
111
 
112
	// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir
113
	function parse_file($file,$simple = FALSE) {
114
		if (!is_file($file)) {
115
			if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
116
			else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
117
			else { return FALSE; }
118
		}
119
 
120
		$this->msgfile = $file;
121
        $data = '' ;
122
		$fd = fopen($file, "r");
123
		while (!feof($fd)) { $data .= fgets($fd,4096); }
124
		fclose($fd);
125
		return $this->parse($data,$simple);
126
	}
127
 
128
    // parse_file_headers - ouvre un fichier et analyse les entêtes
129
	function parse_file_headers($file,$simple = FALSE) {
130
		if (!is_file($file)) {
131
			if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
132
			else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
133
			else { return FALSE; }
134
		}
135
 
136
		$this->msgfile = $file;
137
        $data = file_get_contents ($file) ;
138
        $message = file_get_contents($file) ;
139
        $mimeDecode = new Mail_mimeDecode($message) ;
140
        $mailDecode = $mimeDecode->decode() ;
141
        return $mailDecode ;
142
		/*$fd = fopen($file, "r");
143
		while (!feof($fd)) { $data .= fgets($fd,4096); }
144
		fclose($fd);*/
145
        if ($this->_get_headers($data, $simple)) return true ;
146
		return false ;
147
	}
148
 
149
	// this does all of the work (well it calls two functions that do all the work :)
150
	// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)
151
	function parse($data,$simple = FALSE) {
152
 
153
		if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }
154
		return FALSE;
155
	}
156
 
157
	// all of these are internal functions, you shouldn't call them directly...
158
 
159
	// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's
160
	function _ct_parse() {
161
		$instr = $this->headers['content-type'];
162
		preg_replace('/\(.*\)/','',$instr); // strip rfc822 comments
163
		if (preg_match('/: /', $instr)) {
164
			$ct = preg_split('/:/',trim($instr),2);
165
			$ct = preg_split('/;/',trim($ct[1]));
166
		} else {
167
			$ct = preg_split('/;/',trim($instr));
168
		}
169
		if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);
170
		$i = 2;
171
		$ct[1] = $ct[0];
172
		$ct[0] = $this->headers['content-type'];
173
        if (isset($attrs) && is_array($attrs)) {
174
            while (list($key, $val) = each($attrs)) {
175
                if ($val == '') continue;
176
                $ap = preg_split('/=/',$val,2);
177
                if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }
178
                $ct[$i] = $ap;
179
                $i++;
180
            }
181
        }
182
		// are we a multipart message?
183
		if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }
184
 
185
		return $ct;
186
	}
187
 
188
	// _get_headers: pulls the headers out of the data and builds the $this->headers array
189
	function _get_headers($data,$simple = FALSE) {
190
		$lines = preg_split('/\n/', $data);
191
		while (list($key, $val) = each($lines)) {
192
			$val = trim($val);
193
			if ($val == "") break;
194
			if (preg_match('/^From[^:].*$/', $val)) continue;	/* strips out any From lines added by the MTA */
195
 
196
			$hdr = preg_split('/: /', $val, 2);
197
			if (count($hdr) == 1) {
198
				// this is a continuation of the last header (like a recieved from line)
199
				$this->headers[$last] .= $val;
200
			} else {
201
				$this->headers[strtolower($hdr[0])] = $hdr[1];
202
                //echo htmlspecialchars($this->headers['from'])."<br />" ;
203
				$last = strtolower($hdr[0]);
204
			}
205
		}
206
        // ajout alex
207
        // pour supprimer le problème des ISO...
208
        // a déplacer ailleur, et appelé avant affichage
209
 
210
        if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {
211
            $reg_exp = $nombre[0] ;
212
        } else {
213
            $reg_exp = 'ISO-8859-15?' ;
214
        }
215
        if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;
216
        preg_match_all ("/=\?$reg_exp\?(Q|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER)  ;
217
        for ($i = 0; $i < count ($match[0]); $i++ ) {
218
 
219
                if ($match[1][$i] == 'Q') {
220
                    $decode = quoted_printable_decode ($match[2][$i]) ;
221
                } elseif ($match[1][$i] == 'B') {
222
                    $decode = base64_decode ($match[2][$i]) ;
223
                }
224
                $decode = preg_replace ("/_/", " ", $decode) ;
225
            if ($reg_exp == 'UTF-8') {
226
                $decode = utf8_decode ($decode) ;
227
            }
228
            $this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;
229
        }
230
		// sanity anyone?
231
		if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }
232
		if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }
233
 
234
 
235
		return TRUE;
236
	}
237
 
238
	// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.
239
	function _get_body($data,$simple = FALSE) {
240
		$lines = preg_split('/\n/', $data);
241
		$doneheaders = FALSE;
242
 
243
		$data = "";
244
		while (list($key,$val) = each($lines)) {
245
            //echo htmlspecialchars($val)."<br>";
246
			if (($val == '') and (!$doneheaders)) {
247
				$doneheaders = TRUE;
248
				continue;
249
			} else if ($doneheaders) {
250
				$data .= $val . "\n";
251
			}
252
		}
253
 
254
		// now here comes the fun part... decoding.
255
		switch($this->headers['content-transfer-encoding']) {
256
			case 'binary':
257
				$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);
258
				break;
259
 
260
			case 'base64':
261
				$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);
262
				break;
263
 
264
			case 'quoted-printable':
265
				$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);
266
				break;
267
 
268
			case '8bit':
269
				$this->body = $this->_cte_8bit($data,$simple);
270
				break;
271
 
272
			case '7bit':		// 7bit doesn't need to be decoded
273
			default:		// And the fall through as well...
274
				$this->body = $data;
275
				break;
276
		}
277
        //echo  $this->headers['content-type'][2][1];
278
        if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {
279
                //$this->body = utf8_decode ($this->body) ;
280
                //echo quoted_printable_decode(utf8_decode ($this->body)) ;
281
        }
282
		if ($simple) { return TRUE; }
283
 
284
		// if we are a multipart message then break up the parts and decode, set the appropriate variables.
285
		// here comes the best part about making ezmlm-php OOP. since each part is just really a little message
286
		// in itself each part becomes a new parser object and all the wheels turn again... :)
287
		if ($this->multipart) {
288
 
289
			$boundary = '';
290
			for ($i = 2; $i <= count($this->headers['content-type']); $i++) {
291
				if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {
292
					$boundary = $this->headers['content-type'][$i][1];
293
 
294
				}
295
			}
296
			if ($boundary != '') {
297
				$this->_get_parts($this->body,$boundary);
298
			} else {
299
				// whoopps... something's not right here. we were told that the message is supposed
300
				// to be a multipart message, yet the boundary wasn't set in the content type.
301
				// mark the message as non multipart and add a message to the top of the body.
302
				$this->multipart = FALSE;
303
				$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;
304
			}
305
		}
306
 
307
		return TRUE;
308
	}
309
 
310
	// _get_parts: breaks up $data into parts based on $boundary following the rfc specs
311
	// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)
312
	// After the parts are broken up they are then turned into parser objects and the
313
	// resulting array of parts is set to $this->parts;
314
	function _get_parts($data,$boundary) {
315
		$inpart = -1;
316
		$lines = preg_split('/\n/', $data);
317
        // La première partie contient l'avertissement pour les client mail ne supportant pas
318
        // multipart, elle est stocké dans parts[-1]
319
		while(list($key,$val) = each($lines)) {
320
			if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part
321
			else if ($val == "--" . $boundary . "--") { break; } // the end of the last part
322
			else { $parts[$inpart] .= $val . "\n"; }
323
		}
324
 
325
		for ($i = 0; $i < count($parts) - 1; $i++) {    // On saute la première partie
326
			$part[$i] = new ezmlm_parser();
327
			$part[$i]->parse($parts[$i]);
328
			$this->parts[$i] = $part[$i];
329
            //echo $this->parts[$i]."<br>" ;
330
		}
331
 
332
	}
333
 
334
	// _cte_8bit: decode a content transfer encoding of 8bit
335
	// NOTE: this function is a little bit special. Since the end result will be displayed in
336
	// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the
337
	// html ordinal equivilant, it also ensures that the messages content-type is changed
338
	// to include text/html if it changes anything...
339
	function _cte_8bit($data,$simple = FALSE) {
340
		if ($simple) { return $data; }
341
		$changed = FALSE;
342
		$chars = preg_split('//',$data);
343
		while (list($key,$val) = each($chars)) {
344
			if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }
345
			else { $out .= $val; }
346
		}
347
		if ($changed) { $this->headers['content-type'][1] = 'text/html'; }
348
		return $out;
349
	}
350
 
351
	// _cte_binary: decode a content transfer encoding of binary
352
	function _cte_binary($data) { return $data; }
353
 
354
	// _cte_base64: decode a content transfer encoding of base64
355
	function _cte_base64($data) { return base64_decode($data); }
356
 
357
	// _cte_qp: decode a content transfer encoding of quoted_printable
358
	function _cte_qp($data) {
359
		// For the time being we'll use PHP's function, it seems to work well enough.
360
		return quoted_printable_decode($data);
361
	}
362
 
363
}