Subversion Repositories Applications.papyrus

Rev

Rev 448 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
448 ddelon 1
<?php
474 alexandre_ 2
// $Id: ezmlm-parser.php,v 1.2 2005-09-27 16:43:08 alexandre_tb Exp $
448 ddelon 3
//
4
// ezmlm-parser.php - ezmlm-php v2.0
5
// --------------------------------------------------------------
6
// Contains all the code for parsing messages.
7
// It handles all the nessesary decoding, attachments, etc...
8
// Note this does all the parsing itself now removing the dependancy
9
// on the mailparse library (as it looks like it will never make
10
// it into the official inclusion with PHP)...
11
// --------------------------------------------------------------
12
 
13
require_once("ezmlm.php");
14
require_once("Mail/mimeDecode.php") ;
15
// CLASS: ezmlm-parser
16
class ezmlm_parser extends ezmlm_php {
17
        var $headers;           // the full untouched headers of the message
18
        var $body;              // the full untouched (but decoded) body (this is not $this->parts[0]->body)
19
	var $parts;		// all the parts, if it is a multipart message. each part is an ezmlm_parser object...
20
 
21
        // Here's the most accessed headers, everything else can be
22
        // accessed from the $this->headers array.
23
        var $to;                // To:
24
        var $from;              // From:
25
        var $date;              // Date:
26
        var $subject;           // Subject:
27
        var $replyto;           // Reply-To:
28
        var $contenttype;       // Content-Type:
29
 
30
	var $multipart;		// TRUE if the message is a multipart message
31
 
32
	var $msgfile;		// if parsed from a file, this is the filename...
33
 
34
	// functions
35
 
36
	// recent_msgs - parses and returns an arbitrary number of the most recent messages
37
	function recent_msgs($show = 20, $month = "") {
38
		if ($month == "") { $month = date("Ym"); }
39
		$threadyear = substr($month,0,4);
40
		$threadmonth = substr($month,4,2);
41
 
42
		if (!is_file($this->listdir . "/archive/threads/" . $month)) {
43
			if ($threadmonth == '01') { $prevthread = ($threadyear - 1) . "12"; }
44
			else if ($threadmonth >= 11) { $prevthread = $threadyear . ($threadmonth - 1); }
45
			else { $prevthread = $threadyear . "0" . ($threadmonth - 1); }
46
			return $this->recent_msgs($show,$prevthread);
47
		}
48
        // on ouvre les fichiers de threads du dernier mois
49
		$fd = fopen($this->listdir . "/archive/threads/" . $month, "r");
50
		fseek($fd,-256,SEEK_END);
51
 
52
        // on récupère la dernière ligne
53
 
54
		while (!feof($fd)) {
55
			$temp = fgets($fd,4096);
56
			if ($temp != "") { $curthread = $temp; }
57
		}
58
        $nombre_message = 0 ;
59
        fseek ($fd, 0) ;
60
        while (!feof($fd)) {
61
            $nombre_message++;
62
			fgets($fd);
63
		}
64
		fclose($fd);
65
        ///echo "<br />".$curthread."<br />" ;
66
		$subjectfile = preg_replace("/^[0-9]*\:([a-z]*) \[.*/", "\\1", $curthread);
67
		$subjectfile = substr($subjectfile,0,2) . "/" . substr($subjectfile,2,18); // on ne garde que les 2 1ère lettre du hash, slash et le reste du hash
68
 
69
        // on ouvre le fichier des sujets
70
        // présenté comme suit :
71
        // hash sujet originel   (sur la première ligne)
72
        // num_message:annéemois:hash_auteur Nom Auteur
73
		$fd = fopen($this->listdir . "/archive/subjects/" . $subjectfile, "r");
74
		fseek($fd,-512,SEEK_END);
75
 
76
        // on prend la dernière ligne
77
		while (!feof($fd)) {
78
			$temp = fgets($fd,4096);
79
			if ($temp != "") { $cursubject = $temp; }
80
		}
81
		fclose($fd);
82
 
83
		list($msgnum,$fromthread,$authorid) = split(":",$cursubject);
84
		$msgdir = (int)($msgnum / 100); // on reconstruit le répertoire du message en divisant son numéro par 100
85
 
86
		$numshown = 0;
87
 
88
		$msgfiles = array();
89
        // on boucle 100 fois
90
		for ($i = 0; $i <= 99; $i++) {
91
			if (($msgdir == 0) and ($i == 0)) { $i++; };
92
			if ($i < 10) { $msgfile = "0" . $i; }
93
			else { $msgfile = $i; }
94
			if (!is_file($this->listdir . "/archive/" . $msgdir . "/" . $msgfile)) { break; }
95
 
96
		}
97
        if ($show == '') $show = $nombre_message ;  // Si aucun paramètre n'est passé on renvoie tous les fichiers du mois
98
		while ($numshown < $show) {
99
			$i--;
100
			if ($i < 0) {
101
				$i = 99;
102
				$msgdir--;
103
				if ($msgdir < 0) { break; }
104
			}
105
			if ($i < 10) {
106
				$msgfile = $this->listdir . "/archive/" . $msgdir . "/0" . $i;
107
			} else {
108
				$msgfile = $this->listdir . "/archive/" . $msgdir . "/" . $i;
109
			}
110
            /*
111
			$msg = new ezmlm_parser();
112
			$msg->parse_file($msgfile);
113
            */
114
            if (!is_file($msgfile)) {
115
                if (is_file($this->listdir . "/" . $msgfile)) {
116
                    $msgfile = $this->listdir . "/" . $msgfile;
117
                } else if (is_file($this->listdir . "/archive/" . $msgfile)) {
118
                    $msgfile = $this->listdir . "/archive/" . $msgfile;
119
                } else {
120
                    return $msgfiles;
121
                }
122
            }
123
            $message = file_get_contents($msgfile) ;
124
            $mimeDecode = new Mail_mimeDecode($message) ;
125
            $mailDecode = $mimeDecode->decode() ;
126
            $mailDecode->msgfile = $msgfile ;
127
            $mailDecode->nummessage = $msgdir.$i ;
128
			$msgfiles[] = $mailDecode ;
129
 
130
			unset($mailDecode);
131
			$numshown++;
132
		}
133
 
134
		return $msgfiles;
135
	}
136
 
137
 
138
	// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir
139
	function parse_file($file,$simple = FALSE) {
140
		if (!is_file($file)) {
141
			if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
142
			else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
143
			else { return FALSE; }
144
		}
145
 
146
		$this->msgfile = $file;
147
        $data = '' ;
148
		$fd = fopen($file, "r");
149
		while (!feof($fd)) { $data .= fgets($fd,4096); }
150
		fclose($fd);
151
		return $this->parse($data,$simple);
152
	}
153
 
154
    // parse_file_headers - ouvre un fichier et analyse les entêtes
155
	function parse_file_headers($file,$simple = FALSE) {
156
		if (!is_file($file)) {
157
			if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }
158
			else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }
159
			else { return FALSE; }
160
		}
161
 
162
		$this->msgfile = $file;
163
        $data = file_get_contents ($file) ;
164
        $message = file_get_contents($file) ;
165
        $mimeDecode = new Mail_mimeDecode($message) ;
166
        $mailDecode = $mimeDecode->decode() ;
167
        return $mailDecode ;
168
		/*$fd = fopen($file, "r");
169
		while (!feof($fd)) { $data .= fgets($fd,4096); }
170
		fclose($fd);*/
171
        if ($this->_get_headers($data, $simple)) return true ;
172
		return false ;
173
	}
174
 
175
	// this does all of the work (well it calls two functions that do all the work :)
176
	// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)
177
	function parse($data,$simple = FALSE) {
178
 
179
		if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }
180
		return FALSE;
181
	}
182
 
183
	// all of these are internal functions, you shouldn't call them directly...
184
 
185
	// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's
186
	function _ct_parse() {
187
		$instr = $this->headers['content-type'];
188
		preg_replace('/\(.*\)/','',$instr); // strip rfc822 comments
189
		if (preg_match('/: /', $instr)) {
190
			$ct = preg_split('/:/',trim($instr),2);
191
			$ct = preg_split('/;/',trim($ct[1]));
192
		} else {
193
			$ct = preg_split('/;/',trim($instr));
194
		}
195
		if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);
196
		$i = 2;
197
		$ct[1] = $ct[0];
198
		$ct[0] = $this->headers['content-type'];
199
        if (isset($attrs) && is_array($attrs)) {
200
            while (list($key, $val) = each($attrs)) {
201
                if ($val == '') continue;
202
                $ap = preg_split('/=/',$val,2);
203
                if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }
204
                $ct[$i] = $ap;
205
                $i++;
206
            }
207
        }
208
		// are we a multipart message?
209
		if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }
210
 
211
		return $ct;
212
	}
213
 
214
	// _get_headers: pulls the headers out of the data and builds the $this->headers array
215
	function _get_headers($data,$simple = FALSE) {
216
		$lines = preg_split('/\n/', $data);
217
		while (list($key, $val) = each($lines)) {
218
			$val = trim($val);
219
			if ($val == "") break;
220
			if (preg_match('/^From[^:].*$/', $val)) continue;	/* strips out any From lines added by the MTA */
221
 
222
			$hdr = preg_split('/: /', $val, 2);
223
			if (count($hdr) == 1) {
224
				// this is a continuation of the last header (like a recieved from line)
225
				$this->headers[$last] .= $val;
226
			} else {
227
				$this->headers[strtolower($hdr[0])] = $hdr[1];
228
                //echo htmlspecialchars($this->headers['from'])."<br />" ;
229
				$last = strtolower($hdr[0]);
230
			}
231
		}
232
        // ajout alex
233
        // pour supprimer le problème des ISO...
234
        // a déplacer ailleur, et appelé avant affichage
235
 
236
        if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {
237
            $reg_exp = $nombre[0] ;
238
        } else {
239
            $reg_exp = 'ISO-8859-15?' ;
240
        }
241
        if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;
242
        preg_match_all ("/=\?$reg_exp\?(Q|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER)  ;
243
        for ($i = 0; $i < count ($match[0]); $i++ ) {
244
 
245
                if ($match[1][$i] == 'Q') {
246
                    $decode = quoted_printable_decode ($match[2][$i]) ;
247
                } elseif ($match[1][$i] == 'B') {
248
                    $decode = base64_decode ($match[2][$i]) ;
249
                }
250
                $decode = preg_replace ("/_/", " ", $decode) ;
251
            if ($reg_exp == 'UTF-8') {
252
                $decode = utf8_decode ($decode) ;
253
            }
254
            $this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;
255
        }
256
		// sanity anyone?
257
		if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }
258
		if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }
259
 
260
 
261
		return TRUE;
262
	}
263
 
264
	// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.
265
	function _get_body($data,$simple = FALSE) {
266
		$lines = preg_split('/\n/', $data);
267
		$doneheaders = FALSE;
268
 
269
		$data = "";
270
		while (list($key,$val) = each($lines)) {
271
            //echo htmlspecialchars($val)."<br>";
272
			if (($val == '') and (!$doneheaders)) {
273
				$doneheaders = TRUE;
274
				continue;
275
			} else if ($doneheaders) {
276
				$data .= $val . "\n";
277
			}
278
		}
279
 
280
		// now here comes the fun part... decoding.
281
		switch($this->headers['content-transfer-encoding']) {
282
			case 'binary':
283
				$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);
284
				break;
285
 
286
			case 'base64':
287
				$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);
288
				break;
289
 
290
			case 'quoted-printable':
291
				$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);
292
				break;
293
 
294
			case '8bit':
295
				$this->body = $this->_cte_8bit($data,$simple);
296
				break;
297
 
298
			case '7bit':		// 7bit doesn't need to be decoded
299
			default:		// And the fall through as well...
300
				$this->body = $data;
301
				break;
302
		}
303
        //echo  $this->headers['content-type'][2][1];
304
        if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {
305
                //$this->body = utf8_decode ($this->body) ;
306
                //echo quoted_printable_decode(utf8_decode ($this->body)) ;
307
        }
308
		if ($simple) { return TRUE; }
309
 
310
		// if we are a multipart message then break up the parts and decode, set the appropriate variables.
311
		// here comes the best part about making ezmlm-php OOP. since each part is just really a little message
312
		// in itself each part becomes a new parser object and all the wheels turn again... :)
313
		if ($this->multipart) {
314
 
315
			$boundary = '';
316
			for ($i = 2; $i <= count($this->headers['content-type']); $i++) {
317
				if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {
318
					$boundary = $this->headers['content-type'][$i][1];
319
 
320
				}
321
			}
322
			if ($boundary != '') {
323
				$this->_get_parts($this->body,$boundary);
324
			} else {
325
				// whoopps... something's not right here. we were told that the message is supposed
326
				// to be a multipart message, yet the boundary wasn't set in the content type.
327
				// mark the message as non multipart and add a message to the top of the body.
328
				$this->multipart = FALSE;
329
				$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;
330
			}
331
		}
332
 
333
		return TRUE;
334
	}
335
 
336
	// _get_parts: breaks up $data into parts based on $boundary following the rfc specs
337
	// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)
338
	// After the parts are broken up they are then turned into parser objects and the
339
	// resulting array of parts is set to $this->parts;
340
	function _get_parts($data,$boundary) {
341
		$inpart = -1;
342
		$lines = preg_split('/\n/', $data);
343
        // La première partie contient l'avertissement pour les client mail ne supportant pas
344
        // multipart, elle est stocké dans parts[-1]
345
		while(list($key,$val) = each($lines)) {
346
			if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part
347
			else if ($val == "--" . $boundary . "--") { break; } // the end of the last part
348
			else { $parts[$inpart] .= $val . "\n"; }
349
		}
350
 
351
		for ($i = 0; $i < count($parts) - 1; $i++) {    // On saute la première partie
352
			$part[$i] = new ezmlm_parser();
353
			$part[$i]->parse($parts[$i]);
354
			$this->parts[$i] = $part[$i];
355
            //echo $this->parts[$i]."<br>" ;
356
		}
357
 
358
	}
359
 
360
	// _cte_8bit: decode a content transfer encoding of 8bit
361
	// NOTE: this function is a little bit special. Since the end result will be displayed in
362
	// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the
363
	// html ordinal equivilant, it also ensures that the messages content-type is changed
364
	// to include text/html if it changes anything...
365
	function _cte_8bit($data,$simple = FALSE) {
366
		if ($simple) { return $data; }
367
		$changed = FALSE;
368
		$chars = preg_split('//',$data);
369
		while (list($key,$val) = each($chars)) {
370
			if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }
371
			else { $out .= $val; }
372
		}
373
		if ($changed) { $this->headers['content-type'][1] = 'text/html'; }
374
		return $out;
375
	}
376
 
377
	// _cte_binary: decode a content transfer encoding of binary
378
	function _cte_binary($data) { return $data; }
379
 
380
	// _cte_base64: decode a content transfer encoding of base64
381
	function _cte_base64($data) { return base64_decode($data); }
382
 
383
	// _cte_qp: decode a content transfer encoding of quoted_printable
384
	function _cte_qp($data) {
385
		// For the time being we'll use PHP's function, it seems to work well enough.
386
		return quoted_printable_decode($data);
387
	}
388
 
389
}