WebSVN – Applications.papyrus – Blame – /branches/livraison_menes/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php

Rev	Author	Line No.	Line
448	ddelon	1	`<?php`
1253	alexandre_	2	`// $Id: ezmlm-parser.php,v 1.2.4.1 2007-03-12 11:22:12 alexandre_tb Exp $`
448	ddelon	3	`//`
		4	`// ezmlm-parser.php - ezmlm-php v2.0`
		5	`// --------------------------------------------------------------`
		6	`// Contains all the code for parsing messages.`
		7	`// It handles all the nessesary decoding, attachments, etc...`
		8	`// Note this does all the parsing itself now removing the dependancy`
		9	`// on the mailparse library (as it looks like it will never make`
		10	`// it into the official inclusion with PHP)...`
		11	`// --------------------------------------------------------------`
		12
		13	`require_once("ezmlm.php");`
		14	`require_once("Mail/mimeDecode.php") ;`
		15	`// CLASS: ezmlm-parser`
		16	`class ezmlm_parser extends ezmlm_php {`
1253	alexandre_	17	`var $headers; // the full untouched headers of the message`
		18	`var $body; // the full untouched (but decoded) body (this is not $this->parts[0]->body)`
448	ddelon	19	`var $parts; // all the parts, if it is a multipart message. each part is an ezmlm_parser object...`
		20
1253	alexandre_	21	`// Here's the most accessed headers, everything else can be`
		22	`// accessed from the $this->headers array.`
		23	`var $to; // To:`
		24	`var $from; // From:`
		25	`var $date; // Date:`
		26	`var $subject; // Subject:`
		27	`var $replyto; // Reply-To:`
		28	`var $contenttype; // Content-Type:`
448	ddelon	29
		30	`var $multipart; // TRUE if the message is a multipart message`
		31
		32	`var $msgfile; // if parsed from a file, this is the filename...`
		33
		34	`// functions`
		35
		36	`// recent_msgs - parses and returns an arbitrary number of the most recent messages`
		37	`function recent_msgs($show = 20, $month = "") {`
1253	alexandre_	38
		39	`$repertoire_archive = opendir($this->listdir . "/archive/");`
448	ddelon	40
1253	alexandre_	41	`$repertoire_message = array() ;`
		42	`$dernier_repertoire = 0 ;`
		43	`while (false !== ($item = readdir($repertoire_archive))) {`
		44	`// $item contient les noms des repertoires`
		45	`// on ne garde que ceux qui sont des chiffres`
		46
		47	`if (preg_match('/[0-9]+/', $item)) {`
		48	`// on ouvre le fichier d index de chaque repertoire`
		49	`if ((int) $item > $dernier_repertoire) $dernier_repertoire = (int) $item;`
		50
		51	`}`
448	ddelon	52	`}`
1253	alexandre_	53	`$tableau_message = array() ;`
		54	`$compteur_message = 0 ;`
		55	`$fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ;`
		56	`while (!feof($fichier_index)) {`
		57	`// Recuperation du numero de message, du hash du sujet et du sujet`
		58	`$temp = fgets($fichier_index, 4096);`
		59	`preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ;`
		60
		61	`// dans la seconde on recupere la date, hash auteur et auteur`
		62	`$temp = fgets($fichier_index, 4096);`
		63	`preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.) (.)/', $temp, $match_deuxieme_ligne) ;`
		64	`if ($match[1] != '') {`
		65	`$tableau_message[$match[1]] = array ($match[2], $match[3],`
		66	`$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],`
		67	`$match_deuxieme_ligne[5],`
		68	`$match_deuxieme_ligne[6]);`
		69	`}`
		70	`}`
		71	`fclose ($fichier_index);`
		72	`// on renverse le tableau pour afficher les derniers messages en premier`
		73	`$tableau_message = array_reverse($tableau_message, true);`
448	ddelon	74
1253	alexandre_	75	`// On compte le nombre de message, s il est inferieur $show et que l on est`
		76	`// pas dans le premier index, on ouvre le fichier precedent et recupere`
		77	`// le n dernier message`
		78
		79	`if (count ($tableau_message) < $show && $dernier_repertoire != '0') {`
		80	`$avant_dernier_repertoire = $dernier_repertoire - 1 ;`
		81	`// On utilise file_get_contents pour renverser le fichier`
		82	`$fichier_index = array_reverse(`
		83	`explode ("\n",`
		84	`preg_replace ('/\n$/', '',`
		85	`file_get_contents ($this->listdir.'/archive/'.$avant_dernier_repertoire.'/index')) ), true) ;`
		86	`reset ($fichier_index);`
		87	`//var_dump ($fichier_index);`
		88
		89	`for ($i = count ($tableau_message); $i <= $show; $i++) {`
		90	`// Recuperation du numero de message, du hash du sujet et du sujet`
		91	`// dans la seconde on recupere la date, hash auteur et auteur`
		92
		93	`preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);(.) (.)/',`
		94	`current ($fichier_index), $match_deuxieme_ligne) ;`
		95	`preg_match('/([0-9]+): ([a-z]+) (.*)/', next($fichier_index), $match) ;`
		96	`next ($fichier_index);`
		97
		98	`if ($match[1] != '') {`
		99	`$tableau_message[$match[1]] = array ($match[2], $match[3],`
		100	`$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],`
		101	`$match_deuxieme_ligne[5],`
		102	`$match_deuxieme_ligne[6]);`
		103	`}`
448	ddelon	104	`}`
		105	`}`
1253	alexandre_	106
		107
		108	`return ($tableau_message) ;`
448	ddelon	109	`}`
		110
		111
		112	`// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir`
		113	`function parse_file($file,$simple = FALSE) {`
		114	`if (!is_file($file)) {`
		115	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		116	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		117	`else { return FALSE; }`
		118	`}`
		119
		120	`$this->msgfile = $file;`
		121	`$data = '' ;`
		122	`$fd = fopen($file, "r");`
		123	`while (!feof($fd)) { $data .= fgets($fd,4096); }`
		124	`fclose($fd);`
		125	`return $this->parse($data,$simple);`
		126	`}`
		127
		128	`// parse_file_headers - ouvre un fichier et analyse les entêtes`
		129	`function parse_file_headers($file,$simple = FALSE) {`
		130	`if (!is_file($file)) {`
		131	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		132	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		133	`else { return FALSE; }`
		134	`}`
		135
		136	`$this->msgfile = $file;`
		137	`$data = file_get_contents ($file) ;`
		138	`$message = file_get_contents($file) ;`
		139	`$mimeDecode = new Mail_mimeDecode($message) ;`
		140	`$mailDecode = $mimeDecode->decode() ;`
		141	`return $mailDecode ;`
		142	`/*$fd = fopen($file, "r");`
		143	`while (!feof($fd)) { $data .= fgets($fd,4096); }`
		144	`fclose($fd);*/`
		145	`if ($this->_get_headers($data, $simple)) return true ;`
		146	`return false ;`
		147	`}`
		148
		149	`// this does all of the work (well it calls two functions that do all the work :)`
		150	`// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)`
		151	`function parse($data,$simple = FALSE) {`
		152
		153	`if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }`
		154	`return FALSE;`
		155	`}`
		156
		157	`// all of these are internal functions, you shouldn't call them directly...`
		158
		159	`// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's`
		160	`function _ct_parse() {`
		161	`$instr = $this->headers['content-type'];`
		162	`preg_replace('/$.*$/','',$instr); // strip rfc822 comments`
		163	`if (preg_match('/: /', $instr)) {`
		164	`$ct = preg_split('/:/',trim($instr),2);`
		165	`$ct = preg_split('/;/',trim($ct[1]));`
		166	`} else {`
		167	`$ct = preg_split('/;/',trim($instr));`
		168	`}`
		169	`if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);`
		170	`$i = 2;`
		171	`$ct[1] = $ct[0];`
		172	`$ct[0] = $this->headers['content-type'];`
		173	`if (isset($attrs) && is_array($attrs)) {`
		174	`while (list($key, $val) = each($attrs)) {`
		175	`if ($val == '') continue;`
		176	`$ap = preg_split('/=/',$val,2);`
		177	`if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }`
		178	`$ct[$i] = $ap;`
		179	`$i++;`
		180	`}`
		181	`}`
		182	`// are we a multipart message?`
		183	`if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }`
		184
		185	`return $ct;`
		186	`}`
		187
		188	`// _get_headers: pulls the headers out of the data and builds the $this->headers array`
		189	`function _get_headers($data,$simple = FALSE) {`
		190	`$lines = preg_split('/\n/', $data);`
		191	`while (list($key, $val) = each($lines)) {`
		192	`$val = trim($val);`
		193	`if ($val == "") break;`
		194	`if (preg_match('/^From[^:].$/', $val)) continue; / strips out any From lines added by the MTA */`
		195
		196	`$hdr = preg_split('/: /', $val, 2);`
		197	`if (count($hdr) == 1) {`
		198	`// this is a continuation of the last header (like a recieved from line)`
		199	`$this->headers[$last] .= $val;`
		200	`} else {`
		201	`$this->headers[strtolower($hdr[0])] = $hdr[1];`
		202	`//echo htmlspecialchars($this->headers['from'])."<br />" ;`
		203	`$last = strtolower($hdr[0]);`
		204	`}`
		205	`}`
		206	`// ajout alex`
		207	`// pour supprimer le problème des ISO...`
		208	`// a déplacer ailleur, et appelé avant affichage`
		209
		210	`if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {`
		211	`$reg_exp = $nombre[0] ;`
		212	`} else {`
		213	`$reg_exp = 'ISO-8859-15?' ;`
		214	`}`
		215	`if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;`
		216	`preg_match_all ("/=\?$reg_exp\?(Q\|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER) ;`
		217	`for ($i = 0; $i < count ($match[0]); $i++ ) {`
		218
		219	`if ($match[1][$i] == 'Q') {`
		220	`$decode = quoted_printable_decode ($match[2][$i]) ;`
		221	`} elseif ($match[1][$i] == 'B') {`
		222	`$decode = base64_decode ($match[2][$i]) ;`
		223	`}`
		224	`$decode = preg_replace ("/_/", " ", $decode) ;`
		225	`if ($reg_exp == 'UTF-8') {`
		226	`$decode = utf8_decode ($decode) ;`
		227	`}`
		228	`$this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;`
		229	`}`
		230	`// sanity anyone?`
		231	`if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }`
		232	`if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }`
		233
		234
		235	`return TRUE;`
		236	`}`
		237
		238	`// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.`
		239	`function _get_body($data,$simple = FALSE) {`
		240	`$lines = preg_split('/\n/', $data);`
		241	`$doneheaders = FALSE;`
		242
		243	`$data = "";`
		244	`while (list($key,$val) = each($lines)) {`
		245	`//echo htmlspecialchars($val)."<br>";`
		246	`if (($val == '') and (!$doneheaders)) {`
		247	`$doneheaders = TRUE;`
		248	`continue;`
		249	`} else if ($doneheaders) {`
		250	`$data .= $val . "\n";`
		251	`}`
		252	`}`
		253
		254	`// now here comes the fun part... decoding.`
		255	`switch($this->headers['content-transfer-encoding']) {`
		256	`case 'binary':`
		257	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);`
		258	`break;`
		259
		260	`case 'base64':`
		261	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);`
		262	`break;`
		263
		264	`case 'quoted-printable':`
		265	`$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);`
		266	`break;`
		267
		268	`case '8bit':`
		269	`$this->body = $this->_cte_8bit($data,$simple);`
		270	`break;`
		271
		272	`case '7bit': // 7bit doesn't need to be decoded`
		273	`default: // And the fall through as well...`
		274	`$this->body = $data;`
		275	`break;`
		276	`}`
		277	`//echo $this->headers['content-type'][2][1];`
		278	`if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {`
		279	`//$this->body = utf8_decode ($this->body) ;`
		280	`//echo quoted_printable_decode(utf8_decode ($this->body)) ;`
		281	`}`
		282	`if ($simple) { return TRUE; }`
		283
		284	`// if we are a multipart message then break up the parts and decode, set the appropriate variables.`
		285	`// here comes the best part about making ezmlm-php OOP. since each part is just really a little message`
		286	`// in itself each part becomes a new parser object and all the wheels turn again... :)`
		287	`if ($this->multipart) {`
		288
		289	`$boundary = '';`
		290	`for ($i = 2; $i <= count($this->headers['content-type']); $i++) {`
		291	`if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {`
		292	`$boundary = $this->headers['content-type'][$i][1];`
		293
		294	`}`
		295	`}`
		296	`if ($boundary != '') {`
		297	`$this->_get_parts($this->body,$boundary);`
		298	`} else {`
		299	`// whoopps... something's not right here. we were told that the message is supposed`
		300	`// to be a multipart message, yet the boundary wasn't set in the content type.`
		301	`// mark the message as non multipart and add a message to the top of the body.`
		302	`$this->multipart = FALSE;`
		303	`$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;`
		304	`}`
		305	`}`
		306
		307	`return TRUE;`
		308	`}`
		309
		310	`// _get_parts: breaks up $data into parts based on $boundary following the rfc specs`
		311	`// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)`
		312	`// After the parts are broken up they are then turned into parser objects and the`
		313	`// resulting array of parts is set to $this->parts;`
		314	`function _get_parts($data,$boundary) {`
		315	`$inpart = -1;`
		316	`$lines = preg_split('/\n/', $data);`
		317	`// La première partie contient l'avertissement pour les client mail ne supportant pas`
		318	`// multipart, elle est stocké dans parts[-1]`
		319	`while(list($key,$val) = each($lines)) {`
		320	`if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part`
		321	`else if ($val == "--" . $boundary . "--") { break; } // the end of the last part`
		322	`else { $parts[$inpart] .= $val . "\n"; }`
		323	`}`
		324
		325	`for ($i = 0; $i < count($parts) - 1; $i++) { // On saute la première partie`
		326	`$part[$i] = new ezmlm_parser();`
		327	`$part[$i]->parse($parts[$i]);`
		328	`$this->parts[$i] = $part[$i];`
		329	`//echo $this->parts[$i]."<br>" ;`
		330	`}`
		331
		332	`}`
		333
		334	`// _cte_8bit: decode a content transfer encoding of 8bit`
		335	`// NOTE: this function is a little bit special. Since the end result will be displayed in`
		336	`// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the`
		337	`// html ordinal equivilant, it also ensures that the messages content-type is changed`
		338	`// to include text/html if it changes anything...`
		339	`function _cte_8bit($data,$simple = FALSE) {`
		340	`if ($simple) { return $data; }`
		341	`$changed = FALSE;`
		342	`$chars = preg_split('//',$data);`
		343	`while (list($key,$val) = each($chars)) {`
		344	`if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }`
		345	`else { $out .= $val; }`
		346	`}`
		347	`if ($changed) { $this->headers['content-type'][1] = 'text/html'; }`
		348	`return $out;`
		349	`}`
		350
		351	`// _cte_binary: decode a content transfer encoding of binary`
		352	`function _cte_binary($data) { return $data; }`
		353
		354	`// _cte_base64: decode a content transfer encoding of base64`
		355	`function _cte_base64($data) { return base64_decode($data); }`
		356
		357	`// _cte_qp: decode a content transfer encoding of quoted_printable`
		358	`function _cte_qp($data) {`
		359	`// For the time being we'll use PHP's function, it seems to work well enough.`
		360	`return quoted_printable_decode($data);`
		361	`}`
		362
		363	`}`

Subversion Repositories Applications.papyrus

(root)/branches/livraison_menes/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php – Rev 1253