WebSVN – Applications.papyrus – Blame – /trunk/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php

Rev	Author	Line No.	Line
448	ddelon	1	`<?php`
474	alexandre_	2	`// $Id: ezmlm-parser.php,v 1.2 2005-09-27 16:43:08 alexandre_tb Exp $`
448	ddelon	3	`//`
		4	`// ezmlm-parser.php - ezmlm-php v2.0`
		5	`// --------------------------------------------------------------`
		6	`// Contains all the code for parsing messages.`
		7	`// It handles all the nessesary decoding, attachments, etc...`
		8	`// Note this does all the parsing itself now removing the dependancy`
		9	`// on the mailparse library (as it looks like it will never make`
		10	`// it into the official inclusion with PHP)...`
		11	`// --------------------------------------------------------------`
		12
		13	`require_once("ezmlm.php");`
		14	`require_once("Mail/mimeDecode.php") ;`
		15	`// CLASS: ezmlm-parser`
		16	`class ezmlm_parser extends ezmlm_php {`
		17	`var $headers; // the full untouched headers of the message`
		18	`var $body; // the full untouched (but decoded) body (this is not $this->parts[0]->body)`
		19	`var $parts; // all the parts, if it is a multipart message. each part is an ezmlm_parser object...`
		20
		21	`// Here's the most accessed headers, everything else can be`
		22	`// accessed from the $this->headers array.`
		23	`var $to; // To:`
		24	`var $from; // From:`
		25	`var $date; // Date:`
		26	`var $subject; // Subject:`
		27	`var $replyto; // Reply-To:`
		28	`var $contenttype; // Content-Type:`
		29
		30	`var $multipart; // TRUE if the message is a multipart message`
		31
		32	`var $msgfile; // if parsed from a file, this is the filename...`
		33
		34	`// functions`
		35
		36	`// recent_msgs - parses and returns an arbitrary number of the most recent messages`
		37	`function recent_msgs($show = 20, $month = "") {`
		38	`if ($month == "") { $month = date("Ym"); }`
		39	`$threadyear = substr($month,0,4);`
		40	`$threadmonth = substr($month,4,2);`
		41
		42	`if (!is_file($this->listdir . "/archive/threads/" . $month)) {`
		43	`if ($threadmonth == '01') { $prevthread = ($threadyear - 1) . "12"; }`
		44	`else if ($threadmonth >= 11) { $prevthread = $threadyear . ($threadmonth - 1); }`
		45	`else { $prevthread = $threadyear . "0" . ($threadmonth - 1); }`
		46	`return $this->recent_msgs($show,$prevthread);`
		47	`}`
		48	`// on ouvre les fichiers de threads du dernier mois`
		49	`$fd = fopen($this->listdir . "/archive/threads/" . $month, "r");`
		50	`fseek($fd,-256,SEEK_END);`
		51
		52	`// on récupère la dernière ligne`
		53
		54	`while (!feof($fd)) {`
		55	`$temp = fgets($fd,4096);`
		56	`if ($temp != "") { $curthread = $temp; }`
		57	`}`
		58	`$nombre_message = 0 ;`
		59	`fseek ($fd, 0) ;`
		60	`while (!feof($fd)) {`
		61	`$nombre_message++;`
		62	`fgets($fd);`
		63	`}`
		64	`fclose($fd);`
		65	`///echo "<br />".$curthread."<br />" ;`
		66	`$subjectfile = preg_replace("/^[0-9]\:([a-z]) \[.*/", "\\1", $curthread);`
		67	`$subjectfile = substr($subjectfile,0,2) . "/" . substr($subjectfile,2,18); // on ne garde que les 2 1ère lettre du hash, slash et le reste du hash`
		68
		69	`// on ouvre le fichier des sujets`
		70	`// présenté comme suit :`
		71	`// hash sujet originel (sur la première ligne)`
		72	`// num_message:annéemois:hash_auteur Nom Auteur`
		73	`$fd = fopen($this->listdir . "/archive/subjects/" . $subjectfile, "r");`
		74	`fseek($fd,-512,SEEK_END);`
		75
		76	`// on prend la dernière ligne`
		77	`while (!feof($fd)) {`
		78	`$temp = fgets($fd,4096);`
		79	`if ($temp != "") { $cursubject = $temp; }`
		80	`}`
		81	`fclose($fd);`
		82
		83	`list($msgnum,$fromthread,$authorid) = split(":",$cursubject);`
		84	`$msgdir = (int)($msgnum / 100); // on reconstruit le répertoire du message en divisant son numéro par 100`
		85
		86	`$numshown = 0;`
		87
		88	`$msgfiles = array();`
		89	`// on boucle 100 fois`
		90	`for ($i = 0; $i <= 99; $i++) {`
		91	`if (($msgdir == 0) and ($i == 0)) { $i++; };`
		92	`if ($i < 10) { $msgfile = "0" . $i; }`
		93	`else { $msgfile = $i; }`
		94	`if (!is_file($this->listdir . "/archive/" . $msgdir . "/" . $msgfile)) { break; }`
		95
		96	`}`
		97	`if ($show == '') $show = $nombre_message ; // Si aucun paramètre n'est passé on renvoie tous les fichiers du mois`
		98	`while ($numshown < $show) {`
		99	`$i--;`
		100	`if ($i < 0) {`
		101	`$i = 99;`
		102	`$msgdir--;`
		103	`if ($msgdir < 0) { break; }`
		104	`}`
		105	`if ($i < 10) {`
		106	`$msgfile = $this->listdir . "/archive/" . $msgdir . "/0" . $i;`
		107	`} else {`
		108	`$msgfile = $this->listdir . "/archive/" . $msgdir . "/" . $i;`
		109	`}`
		110	`/*`
		111	`$msg = new ezmlm_parser();`
		112	`$msg->parse_file($msgfile);`
		113	`*/`
		114	`if (!is_file($msgfile)) {`
		115	`if (is_file($this->listdir . "/" . $msgfile)) {`
		116	`$msgfile = $this->listdir . "/" . $msgfile;`
		117	`} else if (is_file($this->listdir . "/archive/" . $msgfile)) {`
		118	`$msgfile = $this->listdir . "/archive/" . $msgfile;`
		119	`} else {`
		120	`return $msgfiles;`
		121	`}`
		122	`}`
		123	`$message = file_get_contents($msgfile) ;`
		124	`$mimeDecode = new Mail_mimeDecode($message) ;`
		125	`$mailDecode = $mimeDecode->decode() ;`
		126	`$mailDecode->msgfile = $msgfile ;`
		127	`$mailDecode->nummessage = $msgdir.$i ;`
		128	`$msgfiles[] = $mailDecode ;`
		129
		130	`unset($mailDecode);`
		131	`$numshown++;`
		132	`}`
		133
		134	`return $msgfiles;`
		135	`}`
		136
		137
		138	`// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir`
		139	`function parse_file($file,$simple = FALSE) {`
		140	`if (!is_file($file)) {`
		141	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		142	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		143	`else { return FALSE; }`
		144	`}`
		145
		146	`$this->msgfile = $file;`
		147	`$data = '' ;`
		148	`$fd = fopen($file, "r");`
		149	`while (!feof($fd)) { $data .= fgets($fd,4096); }`
		150	`fclose($fd);`
		151	`return $this->parse($data,$simple);`
		152	`}`
		153
		154	`// parse_file_headers - ouvre un fichier et analyse les entêtes`
		155	`function parse_file_headers($file,$simple = FALSE) {`
		156	`if (!is_file($file)) {`
		157	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		158	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		159	`else { return FALSE; }`
		160	`}`
		161
		162	`$this->msgfile = $file;`
		163	`$data = file_get_contents ($file) ;`
		164	`$message = file_get_contents($file) ;`
		165	`$mimeDecode = new Mail_mimeDecode($message) ;`
		166	`$mailDecode = $mimeDecode->decode() ;`
		167	`return $mailDecode ;`
		168	`/*$fd = fopen($file, "r");`
		169	`while (!feof($fd)) { $data .= fgets($fd,4096); }`
		170	`fclose($fd);*/`
		171	`if ($this->_get_headers($data, $simple)) return true ;`
		172	`return false ;`
		173	`}`
		174
		175	`// this does all of the work (well it calls two functions that do all the work :)`
		176	`// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)`
		177	`function parse($data,$simple = FALSE) {`
		178
		179	`if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }`
		180	`return FALSE;`
		181	`}`
		182
		183	`// all of these are internal functions, you shouldn't call them directly...`
		184
		185	`// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's`
		186	`function _ct_parse() {`
		187	`$instr = $this->headers['content-type'];`
		188	`preg_replace('/$.*$/','',$instr); // strip rfc822 comments`
		189	`if (preg_match('/: /', $instr)) {`
		190	`$ct = preg_split('/:/',trim($instr),2);`
		191	`$ct = preg_split('/;/',trim($ct[1]));`
		192	`} else {`
		193	`$ct = preg_split('/;/',trim($instr));`
		194	`}`
		195	`if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);`
		196	`$i = 2;`
		197	`$ct[1] = $ct[0];`
		198	`$ct[0] = $this->headers['content-type'];`
		199	`if (isset($attrs) && is_array($attrs)) {`
		200	`while (list($key, $val) = each($attrs)) {`
		201	`if ($val == '') continue;`
		202	`$ap = preg_split('/=/',$val,2);`
		203	`if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }`
		204	`$ct[$i] = $ap;`
		205	`$i++;`
		206	`}`
		207	`}`
		208	`// are we a multipart message?`
		209	`if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }`
		210
		211	`return $ct;`
		212	`}`
		213
		214	`// _get_headers: pulls the headers out of the data and builds the $this->headers array`
		215	`function _get_headers($data,$simple = FALSE) {`
		216	`$lines = preg_split('/\n/', $data);`
		217	`while (list($key, $val) = each($lines)) {`
		218	`$val = trim($val);`
		219	`if ($val == "") break;`
		220	`if (preg_match('/^From[^:].$/', $val)) continue; / strips out any From lines added by the MTA */`
		221
		222	`$hdr = preg_split('/: /', $val, 2);`
		223	`if (count($hdr) == 1) {`
		224	`// this is a continuation of the last header (like a recieved from line)`
		225	`$this->headers[$last] .= $val;`
		226	`} else {`
		227	`$this->headers[strtolower($hdr[0])] = $hdr[1];`
		228	`//echo htmlspecialchars($this->headers['from'])."<br />" ;`
		229	`$last = strtolower($hdr[0]);`
		230	`}`
		231	`}`
		232	`// ajout alex`
		233	`// pour supprimer le problème des ISO...`
		234	`// a déplacer ailleur, et appelé avant affichage`
		235
		236	`if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {`
		237	`$reg_exp = $nombre[0] ;`
		238	`} else {`
		239	`$reg_exp = 'ISO-8859-15?' ;`
		240	`}`
		241	`if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;`
		242	`preg_match_all ("/=\?$reg_exp\?(Q\|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER) ;`
		243	`for ($i = 0; $i < count ($match[0]); $i++ ) {`
		244
		245	`if ($match[1][$i] == 'Q') {`
		246	`$decode = quoted_printable_decode ($match[2][$i]) ;`
		247	`} elseif ($match[1][$i] == 'B') {`
		248	`$decode = base64_decode ($match[2][$i]) ;`
		249	`}`
		250	`$decode = preg_replace ("/_/", " ", $decode) ;`
		251	`if ($reg_exp == 'UTF-8') {`
		252	`$decode = utf8_decode ($decode) ;`
		253	`}`
		254	`$this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;`
		255	`}`
		256	`// sanity anyone?`
		257	`if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }`
		258	`if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }`
		259
		260
		261	`return TRUE;`
		262	`}`
		263
		264	`// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.`
		265	`function _get_body($data,$simple = FALSE) {`
		266	`$lines = preg_split('/\n/', $data);`
		267	`$doneheaders = FALSE;`
		268
		269	`$data = "";`
		270	`while (list($key,$val) = each($lines)) {`
		271	`//echo htmlspecialchars($val)."<br>";`
		272	`if (($val == '') and (!$doneheaders)) {`
		273	`$doneheaders = TRUE;`
		274	`continue;`
		275	`} else if ($doneheaders) {`
		276	`$data .= $val . "\n";`
		277	`}`
		278	`}`
		279
		280	`// now here comes the fun part... decoding.`
		281	`switch($this->headers['content-transfer-encoding']) {`
		282	`case 'binary':`
		283	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);`
		284	`break;`
		285
		286	`case 'base64':`
		287	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);`
		288	`break;`
		289
		290	`case 'quoted-printable':`
		291	`$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);`
		292	`break;`
		293
		294	`case '8bit':`
		295	`$this->body = $this->_cte_8bit($data,$simple);`
		296	`break;`
		297
		298	`case '7bit': // 7bit doesn't need to be decoded`
		299	`default: // And the fall through as well...`
		300	`$this->body = $data;`
		301	`break;`
		302	`}`
		303	`//echo $this->headers['content-type'][2][1];`
		304	`if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {`
		305	`//$this->body = utf8_decode ($this->body) ;`
		306	`//echo quoted_printable_decode(utf8_decode ($this->body)) ;`
		307	`}`
		308	`if ($simple) { return TRUE; }`
		309
		310	`// if we are a multipart message then break up the parts and decode, set the appropriate variables.`
		311	`// here comes the best part about making ezmlm-php OOP. since each part is just really a little message`
		312	`// in itself each part becomes a new parser object and all the wheels turn again... :)`
		313	`if ($this->multipart) {`
		314
		315	`$boundary = '';`
		316	`for ($i = 2; $i <= count($this->headers['content-type']); $i++) {`
		317	`if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {`
		318	`$boundary = $this->headers['content-type'][$i][1];`
		319
		320	`}`
		321	`}`
		322	`if ($boundary != '') {`
		323	`$this->_get_parts($this->body,$boundary);`
		324	`} else {`
		325	`// whoopps... something's not right here. we were told that the message is supposed`
		326	`// to be a multipart message, yet the boundary wasn't set in the content type.`
		327	`// mark the message as non multipart and add a message to the top of the body.`
		328	`$this->multipart = FALSE;`
		329	`$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;`
		330	`}`
		331	`}`
		332
		333	`return TRUE;`
		334	`}`
		335
		336	`// _get_parts: breaks up $data into parts based on $boundary following the rfc specs`
		337	`// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)`
		338	`// After the parts are broken up they are then turned into parser objects and the`
		339	`// resulting array of parts is set to $this->parts;`
		340	`function _get_parts($data,$boundary) {`
		341	`$inpart = -1;`
		342	`$lines = preg_split('/\n/', $data);`
		343	`// La première partie contient l'avertissement pour les client mail ne supportant pas`
		344	`// multipart, elle est stocké dans parts[-1]`
		345	`while(list($key,$val) = each($lines)) {`
		346	`if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part`
		347	`else if ($val == "--" . $boundary . "--") { break; } // the end of the last part`
		348	`else { $parts[$inpart] .= $val . "\n"; }`
		349	`}`
		350
		351	`for ($i = 0; $i < count($parts) - 1; $i++) { // On saute la première partie`
		352	`$part[$i] = new ezmlm_parser();`
		353	`$part[$i]->parse($parts[$i]);`
		354	`$this->parts[$i] = $part[$i];`
		355	`//echo $this->parts[$i]."<br>" ;`
		356	`}`
		357
		358	`}`
		359
		360	`// _cte_8bit: decode a content transfer encoding of 8bit`
		361	`// NOTE: this function is a little bit special. Since the end result will be displayed in`
		362	`// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the`
		363	`// html ordinal equivilant, it also ensures that the messages content-type is changed`
		364	`// to include text/html if it changes anything...`
		365	`function _cte_8bit($data,$simple = FALSE) {`
		366	`if ($simple) { return $data; }`
		367	`$changed = FALSE;`
		368	`$chars = preg_split('//',$data);`
		369	`while (list($key,$val) = each($chars)) {`
		370	`if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }`
		371	`else { $out .= $val; }`
		372	`}`
		373	`if ($changed) { $this->headers['content-type'][1] = 'text/html'; }`
		374	`return $out;`
		375	`}`
		376
		377	`// _cte_binary: decode a content transfer encoding of binary`
		378	`function _cte_binary($data) { return $data; }`
		379
		380	`// _cte_base64: decode a content transfer encoding of base64`
		381	`function _cte_base64($data) { return base64_decode($data); }`
		382
		383	`// _cte_qp: decode a content transfer encoding of quoted_printable`
		384	`function _cte_qp($data) {`
		385	`// For the time being we'll use PHP's function, it seems to work well enough.`
		386	`return quoted_printable_decode($data);`
		387	`}`
		388
		389	`}`

Subversion Repositories Applications.papyrus

(root)/trunk/client/projet/classes/ezmlm-php-2.0/ezmlm-parser.php @ 584 – Rev