WebSVN – Applications.projet – Blame – /trunk/classes/ezmlm-php-2.0/ezmlm-parser.php

Rev	Author	Line No.	Line
2	ddelon	1	`<?php`
208	neiluj	2	`/vim: set expandtab tabstop=4 shiftwidth=4: /`
		3	`// +------------------------------------------------------------------------------------------------------+`
		4	`// \| PHP version 4.1 \|`
		5	`// +------------------------------------------------------------------------------------------------------+`
		6	`// \| Copyright (C) 2004 Tela Botanica (accueil@tela-botanica.org) \|`
		7	`// +------------------------------------------------------------------------------------------------------+`
		8	`// \| This library is free software; you can redistribute it and/or \|`
		9	`// \| modify it under the terms of the GNU General Public \|`
		10	`// \| License as published by the Free Software Foundation; either \|`
		11	`// \| version 2.1 of the License, or (at your option) any later version. \|`
		12	`// \| \|`
		13	`// \| This library is distributed in the hope that it will be useful, \|`
		14	`// \| but WITHOUT ANY WARRANTY; without even the implied warranty of \|`
		15	`// \| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \|`
		16	`// \| General Public License for more details. \|`
		17	`// \| \|`
		18	`// \| You should have received a copy of the GNU General Public \|`
		19	`// \| License along with this library; if not, write to the Free Software \|`
		20	`// \| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA \|`
		21	`// +------------------------------------------------------------------------------------------------------+`
310	alexandre_	22	`// CVS : $Id: ezmlm-parser.php,v 1.4 2008-08-25 15:20:54 alexandre_tb Exp $`
208	neiluj	23	`/**`
		24	`* Application projet`
		25	`*`
		26	`* classe ezmlm_parser pour lire les fichiers d index de ezmlm-idx`
		27	`*`
		28	`*@package projet`
		29	`//Auteur original : ?? recupere dans ezmlm-php`
		30	`*@author Alexandre Granier <alexandre@tela-botanica.org>`
		31	`*@copyright Tela-Botanica 2000-2004`
310	alexandre_	32	`*@version $Revision: 1.4 $`
208	neiluj	33	`// +------------------------------------------------------------------------------------------------------+`
		34	`*/`
		35
		36
		37	`// +------------------------------------------------------------------------------------------------------+`
		38	`// \| ENTETE du PROGRAMME \|`
		39	`// +------------------------------------------------------------------------------------------------------+`
		40
310	alexandre_	41	`// $Id: ezmlm-parser.php,v 1.4 2008-08-25 15:20:54 alexandre_tb Exp $`
2	ddelon	42	`//`
		43
		44	`require_once("ezmlm.php");`
		45	`require_once("Mail/mimeDecode.php") ;`
		46	`// CLASS: ezmlm-parser`
		47	`class ezmlm_parser extends ezmlm_php {`
208	neiluj	48	`var $headers; // the full untouched headers of the message`
		49	`var $body; // the full untouched (but decoded) body (this is not $this->parts[0]->body)`
2	ddelon	50	`var $parts; // all the parts, if it is a multipart message. each part is an ezmlm_parser object...`
		51
208	neiluj	52	`// Here's the most accessed headers, everything else can be`
		53	`// accessed from the $this->headers array.`
		54	`var $to; // To:`
		55	`var $from; // From:`
		56	`var $date; // Date:`
		57	`var $subject; // Subject:`
		58	`var $replyto; // Reply-To:`
		59	`var $contenttype; // Content-Type:`
2	ddelon	60
		61	`var $multipart; // TRUE if the message is a multipart message`
		62
		63	`var $msgfile; // if parsed from a file, this is the filename...`
		64
		65	`// functions`
		66
208	neiluj	67	`/**`
		68	`* recent_msgs renvoie les derniers messages de la liste de discussion`
		69	`* ezmlm`
		70	`*`
		71	`* (`
		72	`* [0] => Array`
		73	`* (`
		74	`* [1] => sujet`
		75	`* [2] => date en anglais`
		76	`* [3] => le hash de l auteur`
		77	`* [4] => l auteur`
		78	`* )`
		79	`* [1] => ...`
		80	`* )`
		81	`* @param int le nombre de message a renvoye`
		82	`* @return array un tableau contenant les messages`
		83	`* @access public`
		84	`*/`
2	ddelon	85	`function recent_msgs($show = 20, $month = "") {`
208	neiluj	86
		87	`$repertoire_archive = opendir($this->listdir . "/archive/");`
2	ddelon	88
208	neiluj	89	`$repertoire_message = array() ;`
		90
310	alexandre_	91	`$numArchive = $this->getNumArchive();`
		92	`$dernier_repertoire = floor($numArchive / 100);`
		93
208	neiluj	94	`$tableau_message = array() ;`
		95	`$compteur_message = 0 ;`
		96	`$fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ;`
		97	`while (!feof($fichier_index)) {`
		98	`// Recuperation du numero de message, du hash du sujet et du sujet`
		99	`$temp = fgets($fichier_index, 4096);`
		100	`preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ;`
		101
		102	`// dans la seconde on recupere la date, hash auteur et auteur`
		103	`$temp = fgets($fichier_index, 4096);`
		104	`preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);([^ ]) (.)/', $temp, $match_deuxieme_ligne) ;`
		105	`if ($match[1] != '') {`
		106	`$tableau_message[$match[1]] = array ($match[2], $match[3],`
		107	`$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],`
		108	`$match_deuxieme_ligne[5],`
		109	`$match_deuxieme_ligne[6]);`
		110	`}`
		111	`}`
		112	`fclose ($fichier_index);`
		113	`// on renverse le tableau pour afficher les derniers messages en premier`
		114	`$tableau_message = array_reverse($tableau_message, true);`
2	ddelon	115
208	neiluj	116	`// On compte le nombre de message, s il est inferieur $show et que l on est`
		117	`// pas dans le premier index, on ouvre le fichier precedent et recupere`
		118	`// le n dernier message`
		119
		120	`if (count ($tableau_message) < $show && $dernier_repertoire != '0') {`
		121	`$avant_dernier_repertoire = $dernier_repertoire - 1 ;`
		122	`// On utilise file_get_contents pour renverser le fichier`
		123	`$fichier_index = array_reverse(`
		124	`explode ("\n",`
		125	`preg_replace ('/\n$/', '',`
		126	`file_get_contents ($this->listdir.'/archive/'.$avant_dernier_repertoire.'/index')) ), true) ;`
		127	`reset ($fichier_index);`
		128	`//var_dump ($fichier_index);`
		129
		130	`for ($i = count ($tableau_message); $i <= $show; $i++) {`
		131	`// Recuperation du numero de message, du hash du sujet et du sujet`
		132	`// dans la seconde on recupere la date, hash auteur et auteur`
		133
		134	`preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([^;]+);([^ ]) (.)/',`
		135	`current ($fichier_index), $match_deuxieme_ligne) ;`
		136	`preg_match('/([0-9]+): ([a-z]+) (.*)/', next($fichier_index), $match) ;`
		137	`next ($fichier_index);`
		138
		139	`if ($match[1] != '') {`
		140	`$tableau_message[$match[1]] = array ($match[2], $match[3],`
		141	`$match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3],`
		142	`$match_deuxieme_ligne[5],`
		143	`$match_deuxieme_ligne[6]);`
		144	`}`
2	ddelon	145	`}`
208	neiluj	146	`} else {`
		147	`// Si le nombre de message est > $show on limite le tableau de retour`
		148	`$tableau_message = array_slice($tableau_message, 0, $show, true);`
2	ddelon	149	`}`
208	neiluj	150
		151
		152	`return $tableau_message ;`
2	ddelon	153	`}`
		154
		155
310	alexandre_	156	`/**`
		157	`* num_msg_lest_week renvoie le nombre de messages echanges la derniere semaine`
		158	`*`
		159	`* @return int un entier`
		160	`* @access public`
		161	`*/`
		162	`function num_msg_last_week() {`
		163
		164	`$repertoire_archive = opendir($this->listdir . "/archive/");`
		165
		166	`$repertoire_message = array() ;`
		167	`$dernier_repertoire = 0 ;`
		168
		169
		170	`// Recherche du dernier repertorie`
		171	`while (false !== ($item = readdir($repertoire_archive))) {`
		172	`// $item contient les noms des repertoires`
		173	`// on ne garde que ceux qui sont des chiffres`
		174
		175	`if (preg_match('/[0-9]+/', $item)) {`
		176	`// on ouvre le fichier d index de chaque repertoire`
		177	`if ((int) $item > $dernier_repertoire) $dernier_repertoire = (int) $item;`
		178
		179	`}`
		180	`}`
		181	`$tableau_date = array() ;`
		182	`$tableau_final = array();`
		183	`$compteur_message = '0' ;`
		184
		185	`while ($dernier_repertoire != -1) {`
		186	`if (!file_exists($this->listdir.'/archive/'.$dernier_repertoire.'/index')) return '0';`
		187
		188	`// Si le fichier index est vide, c'est probable que la liste soit nouvelle,`
		189	`// on retourne 0`
		190	`if (filesize($this->listdir.'/archive/'.$dernier_repertoire.'/index') == 0) return '0';`
		191
		192	`$fichier_index = fopen ($this->listdir.'/archive/'.$dernier_repertoire.'/index', 'r') ;`
		193
		194	`while (!feof($fichier_index)) {`
		195	`// Recuperation du numero de message, du hash du sujet et du sujet`
		196	`$temp = fgets($fichier_index, 4096);`
		197	`preg_match('/([0-9]+): ([a-z]+) (.*)/', $temp, $match) ;`
		198
		199	`// dans la seconde on recupere la date, hash auteur et auteur`
		200	`$temp = fgets($fichier_index, 4096);`
		201	`preg_match('/\t([0-9]+) ([a-zA-Z][a-zA-Z][a-zA-Z]) ([0-9][0-9][0-9][0-9]) ([0-9][0-9]:[0-9][0-9]:[0-9][0-9]) ([^;]+);([^ ]) (.)/', $temp, $match_deuxieme_ligne) ;`
		202	`if ($match[1] != '') {`
		203	`$date = $match_deuxieme_ligne[1].' '.$match_deuxieme_ligne[2].' '.$match_deuxieme_ligne[3].' '.$match_deuxieme_ligne[4];`
		204
		205	`$tableau_date[] = $date;`
		206	`}`
		207	`}`
		208	`fclose ($fichier_index);`
		209	`$tableau_date = array_reverse($tableau_date);`
		210	`foreach ($tableau_date as $date) {`
		211
		212	`if (strtotime ($date) > time() - 7 * 24 * 60 *60) {`
		213	`$tableau_final[] = $date;`
		214	`$compteur_message++;`
		215	`} else {`
		216	`return $compteur_message;`
		217	`}`
		218	`}`
		219	`$dernier_repertoire--;`
		220	`}`
		221	`}`
		222
2	ddelon	223	`// parse_file - opens a file and feeds the data to parse, file can be relative to the listdir`
		224	`function parse_file($file,$simple = FALSE) {`
		225	`if (!is_file($file)) {`
		226	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		227	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		228	`else { return FALSE; }`
		229	`}`
		230
		231	`$this->msgfile = $file;`
		232	`$data = '' ;`
		233	`$fd = fopen($file, "r");`
		234	`while (!feof($fd)) { $data .= fgets($fd,4096); }`
		235	`fclose($fd);`
		236	`return $this->parse($data,$simple);`
		237	`}`
		238
208	neiluj	239	`// parse_file_headers - ouvre un fichier et analyse les entête`
2	ddelon	240	`function parse_file_headers($file,$simple = FALSE) {`
		241	`if (!is_file($file)) {`
		242	`if (is_file($this->listdir . "/" . $file)) { $file = $this->listdir . "/" . $file; }`
		243	`else if (is_file($this->listdir . "/archive/" . $file)) { $file = $this->listdir . "/archive/" . $file; }`
		244	`else { return FALSE; }`
		245	`}`
		246
		247	`$this->msgfile = $file;`
		248	`$data = file_get_contents ($file) ;`
		249	`$message = file_get_contents($file) ;`
		250	`$mimeDecode = new Mail_mimeDecode($message) ;`
		251	`$mailDecode = $mimeDecode->decode() ;`
		252	`return $mailDecode ;`
		253	`}`
		254
		255	`// this does all of the work (well it calls two functions that do all the work :)`
		256	`// all the decoding a part breaking follows RFC2045 (http://www.faqs.org/rfcs/rfc2045.html)`
		257	`function parse($data,$simple = FALSE) {`
		258
		259	`if (($this->_get_headers($data,$simple)) && $this->_get_body($data,$simple)) { return TRUE; }`
		260	`return FALSE;`
		261	`}`
		262
		263	`// all of these are internal functions, you shouldn't call them directly...`
		264
		265	`// _ct_parse: parse Content-Type headers -> $ct[0] = Full header, $ct[1] = Content-Type, $ct[2] ... $ct[n] = AP's`
		266	`function _ct_parse() {`
		267	`$instr = $this->headers['content-type'];`
		268	`preg_replace('/$.*$/','',$instr); // strip rfc822 comments`
		269	`if (preg_match('/: /', $instr)) {`
		270	`$ct = preg_split('/:/',trim($instr),2);`
		271	`$ct = preg_split('/;/',trim($ct[1]));`
		272	`} else {`
		273	`$ct = preg_split('/;/',trim($instr));`
		274	`}`
		275	`if (isset($ct[1])) $attrs = preg_split('/[\s\n]/',$ct[1]);`
		276	`$i = 2;`
		277	`$ct[1] = $ct[0];`
		278	`$ct[0] = $this->headers['content-type'];`
		279	`if (isset($attrs) && is_array($attrs)) {`
		280	`while (list($key, $val) = each($attrs)) {`
		281	`if ($val == '') continue;`
		282	`$ap = preg_split('/=/',$val,2);`
		283	`if (preg_match('/^"/',$ap[1])) { $ap[1] = substr($ap[1],1,strlen($ap[1])-2); }`
		284	`$ct[$i] = $ap;`
		285	`$i++;`
		286	`}`
		287	`}`
		288	`// are we a multipart message?`
		289	`if (preg_match('/^multipart/i', $ct[1])) { $this->multipart = TRUE; }`
		290
		291	`return $ct;`
		292	`}`
		293
		294	`// _get_headers: pulls the headers out of the data and builds the $this->headers array`
		295	`function _get_headers($data,$simple = FALSE) {`
		296	`$lines = preg_split('/\n/', $data);`
		297	`while (list($key, $val) = each($lines)) {`
		298	`$val = trim($val);`
		299	`if ($val == "") break;`
		300	`if (preg_match('/^From[^:].$/', $val)) continue; / strips out any From lines added by the MTA */`
		301
		302	`$hdr = preg_split('/: /', $val, 2);`
		303	`if (count($hdr) == 1) {`
		304	`// this is a continuation of the last header (like a recieved from line)`
		305	`$this->headers[$last] .= $val;`
		306	`} else {`
		307	`$this->headers[strtolower($hdr[0])] = $hdr[1];`
		308	`//echo htmlspecialchars($this->headers['from'])."<br />" ;`
		309	`$last = strtolower($hdr[0]);`
		310	`}`
		311	`}`
		312	`// ajout alex`
		313	`// pour supprimer le problème des ISO...`
		314	`// a déplacer ailleur, et appelé avant affichage`
		315
		316	`if (preg_match ('/windows-[0-9][0-9][0-9][0-9]/', $this->headers['subject'], $nombre)) {`
		317	`$reg_exp = $nombre[0] ;`
		318	`} else {`
		319	`$reg_exp = 'ISO-8859-15?' ;`
		320	`}`
		321	`if (preg_match ('/UTF/i', $this->headers['subject'])) $reg_exp = 'UTF-8' ;`
		322	`preg_match_all ("/=\?$reg_exp\?(Q\|B)\?(.*?)\?=/i", $this->headers['subject'], $match, PREG_PATTERN_ORDER) ;`
		323	`for ($i = 0; $i < count ($match[0]); $i++ ) {`
		324
		325	`if ($match[1][$i] == 'Q') {`
		326	`$decode = quoted_printable_decode ($match[2][$i]) ;`
		327	`} elseif ($match[1][$i] == 'B') {`
		328	`$decode = base64_decode ($match[2][$i]) ;`
		329	`}`
		330	`$decode = preg_replace ("/_/", " ", $decode) ;`
		331	`if ($reg_exp == 'UTF-8') {`
		332	`$decode = utf8_decode ($decode) ;`
		333	`}`
		334	`$this->headers['subject'] = str_replace ($match[0][$i], $decode, $this->headers['subject']) ;`
		335	`}`
		336	`// sanity anyone?`
		337	`if (!$this->headers['content-type']) { $this->headers['content-type'] = "text/plain; charset=us-ascii"; }`
		338	`if (!$simple) { $this->headers['content-type'] = $this->_ct_parse(); }`
		339
		340
		341	`return TRUE;`
		342	`}`
		343
		344	`// _get_body: pulls the body out of the data and fills $this->body, decoding the data if nessesary.`
		345	`function _get_body($data,$simple = FALSE) {`
		346	`$lines = preg_split('/\n/', $data);`
		347	`$doneheaders = FALSE;`
		348
		349	`$data = "";`
		350	`while (list($key,$val) = each($lines)) {`
		351	`//echo htmlspecialchars($val)."<br>";`
		352	`if (($val == '') and (!$doneheaders)) {`
		353	`$doneheaders = TRUE;`
		354	`continue;`
		355	`} else if ($doneheaders) {`
		356	`$data .= $val . "\n";`
		357	`}`
		358	`}`
		359
		360	`// now here comes the fun part... decoding.`
		361	`switch($this->headers['content-transfer-encoding']) {`
		362	`case 'binary':`
		363	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_binary($data)),$simple);`
		364	`break;`
		365
		366	`case 'base64':`
		367	`$this->body = $this->_cte_8bit($this->_cte_qp($this->_cte_base64($data)),$simple);`
		368	`break;`
		369
		370	`case 'quoted-printable':`
		371	`$this->body = $this->_cte_8bit($this->_cte_qp($data),$simple);`
		372	`break;`
		373
		374	`case '8bit':`
		375	`$this->body = $this->_cte_8bit($data,$simple);`
		376	`break;`
		377
		378	`case '7bit': // 7bit doesn't need to be decoded`
		379	`default: // And the fall through as well...`
		380	`$this->body = $data;`
		381	`break;`
		382	`}`
		383	`//echo $this->headers['content-type'][2][1];`
		384	`if (isset($this->headers['content-type'][2][1]) && $this->headers['content-type'][2][1] == 'UTF-8') {`
		385	`//$this->body = utf8_decode ($this->body) ;`
		386	`//echo quoted_printable_decode(utf8_decode ($this->body)) ;`
		387	`}`
		388	`if ($simple) { return TRUE; }`
		389
		390	`// if we are a multipart message then break up the parts and decode, set the appropriate variables.`
		391	`// here comes the best part about making ezmlm-php OOP. since each part is just really a little message`
		392	`// in itself each part becomes a new parser object and all the wheels turn again... :)`
		393	`if ($this->multipart) {`
		394
		395	`$boundary = '';`
		396	`for ($i = 2; $i <= count($this->headers['content-type']); $i++) {`
		397	`if (preg_match('/boundary/i', $this->headers['content-type'][$i][0])) {`
		398	`$boundary = $this->headers['content-type'][$i][1];`
		399
		400	`}`
		401	`}`
		402	`if ($boundary != '') {`
		403	`$this->_get_parts($this->body,$boundary);`
		404	`} else {`
		405	`// whoopps... something's not right here. we were told that the message is supposed`
		406	`// to be a multipart message, yet the boundary wasn't set in the content type.`
		407	`// mark the message as non multipart and add a message to the top of the body.`
		408	`$this->multipart = FALSE;`
		409	`$this->body = "PARSER ERROR:\nWHILE PARSING THIS MESSAGE AS A MULTIPART MESSAGE AS DEFINED IN RFC2045 THE BOUNDARY IDENTIFIER WAS NOT FOUND!\nTHIS MESSAGE WILL NOT DISPLAY CORRECTLY!\n\n" . $this->body;`
		410	`}`
		411	`}`
		412
		413	`return TRUE;`
		414	`}`
		415
		416	`// _get_parts: breaks up $data into parts based on $boundary following the rfc specs`
		417	`// detailed in section 5 of RFC2046 (http://www.faqs.org/rfcs/rfc2046.html)`
		418	`// After the parts are broken up they are then turned into parser objects and the`
		419	`// resulting array of parts is set to $this->parts;`
		420	`function _get_parts($data,$boundary) {`
		421	`$inpart = -1;`
		422	`$lines = preg_split('/\n/', $data);`
		423	`// La première partie contient l'avertissement pour les client mail ne supportant pas`
		424	`// multipart, elle est stocké dans parts[-1]`
		425	`while(list($key,$val) = each($lines)) {`
		426	`if ($val == "--" . $boundary) { $inpart++; continue; } // start of a part`
		427	`else if ($val == "--" . $boundary . "--") { break; } // the end of the last part`
		428	`else { $parts[$inpart] .= $val . "\n"; }`
		429	`}`
		430
		431	`for ($i = 0; $i < count($parts) - 1; $i++) { // On saute la première partie`
		432	`$part[$i] = new ezmlm_parser();`
		433	`$part[$i]->parse($parts[$i]);`
		434	`$this->parts[$i] = $part[$i];`
		435	`//echo $this->parts[$i]."<br>" ;`
		436	`}`
		437
		438	`}`
		439
		440	`// _cte_8bit: decode a content transfer encoding of 8bit`
		441	`// NOTE: this function is a little bit special. Since the end result will be displayed in`
		442	`// a web browser _cte_8bit decodes ASCII characters > 127 (the US-ASCII table) into the`
		443	`// html ordinal equivilant, it also ensures that the messages content-type is changed`
		444	`// to include text/html if it changes anything...`
		445	`function _cte_8bit($data,$simple = FALSE) {`
		446	`if ($simple) { return $data; }`
		447	`$changed = FALSE;`
		448	`$chars = preg_split('//',$data);`
		449	`while (list($key,$val) = each($chars)) {`
		450	`if (ord($val) > 127) { $out .= '&#' . ord($val) . ';'; $changed = TRUE; }`
		451	`else { $out .= $val; }`
		452	`}`
		453	`if ($changed) { $this->headers['content-type'][1] = 'text/html'; }`
		454	`return $out;`
		455	`}`
		456
		457	`// _cte_binary: decode a content transfer encoding of binary`
		458	`function _cte_binary($data) { return $data; }`
		459
		460	`// _cte_base64: decode a content transfer encoding of base64`
		461	`function _cte_base64($data) { return base64_decode($data); }`
		462
		463	`// _cte_qp: decode a content transfer encoding of quoted_printable`
		464	`function _cte_qp($data) {`
		465	`// For the time being we'll use PHP's function, it seems to work well enough.`
		466	`return quoted_printable_decode($data);`
		467	`}`
		468
		469	`}`

Subversion Repositories Applications.projet

(root)/trunk/classes/ezmlm-php-2.0/ezmlm-parser.php @ 208 – Rev 310