1665 |
raphael |
1 |
<?php
|
|
|
2 |
//============================================================+
|
|
|
3 |
// File name : tcpdf_parser.php
|
|
|
4 |
// Version : 1.0.003
|
|
|
5 |
// Begin : 2011-05-23
|
|
|
6 |
// Last Update : 2013-03-17
|
|
|
7 |
// Author : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
|
|
|
8 |
// License : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
|
|
|
9 |
// -------------------------------------------------------------------
|
|
|
10 |
// Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
|
|
|
11 |
//
|
|
|
12 |
// This file is part of TCPDF software library.
|
|
|
13 |
//
|
|
|
14 |
// TCPDF is free software: you can redistribute it and/or modify it
|
|
|
15 |
// under the terms of the GNU Lesser General Public License as
|
|
|
16 |
// published by the Free Software Foundation, either version 3 of the
|
|
|
17 |
// License, or (at your option) any later version.
|
|
|
18 |
//
|
|
|
19 |
// TCPDF is distributed in the hope that it will be useful, but
|
|
|
20 |
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
21 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
22 |
// See the GNU Lesser General Public License for more details.
|
|
|
23 |
//
|
|
|
24 |
// You should have received a copy of the License
|
|
|
25 |
// along with TCPDF. If not, see
|
|
|
26 |
// <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
|
|
|
27 |
//
|
|
|
28 |
// See LICENSE.TXT file for more information.
|
|
|
29 |
// -------------------------------------------------------------------
|
|
|
30 |
//
|
|
|
31 |
// Description : This is a PHP class for parsing PDF documents.
|
|
|
32 |
//
|
|
|
33 |
//============================================================+
|
|
|
34 |
|
|
|
35 |
/**
|
|
|
36 |
* @file
|
|
|
37 |
* This is a PHP class for parsing PDF documents.<br>
|
|
|
38 |
* @package com.tecnick.tcpdf
|
|
|
39 |
* @author Nicola Asuni
|
|
|
40 |
* @version 1.0.003
|
|
|
41 |
*/
|
|
|
42 |
|
|
|
43 |
// include class for decoding filters
|
|
|
44 |
require_once(dirname(__FILE__).'/include/tcpdf_filters.php');
|
|
|
45 |
|
|
|
46 |
/**
|
|
|
47 |
* @class TCPDF_PARSER
|
|
|
48 |
* This is a PHP class for parsing PDF documents.<br>
|
|
|
49 |
* @package com.tecnick.tcpdf
|
|
|
50 |
* @brief This is a PHP class for parsing PDF documents..
|
|
|
51 |
* @version 1.0.003
|
|
|
52 |
* @author Nicola Asuni - info@tecnick.com
|
|
|
53 |
*/
|
|
|
54 |
class TCPDF_PARSER {
|
|
|
55 |
|
|
|
56 |
/**
|
|
|
57 |
* Raw content of the PDF document.
|
|
|
58 |
* @private
|
|
|
59 |
*/
|
|
|
60 |
private $pdfdata = '';
|
|
|
61 |
|
|
|
62 |
/**
|
|
|
63 |
* XREF data.
|
|
|
64 |
* @protected
|
|
|
65 |
*/
|
|
|
66 |
protected $xref = array();
|
|
|
67 |
|
|
|
68 |
/**
|
|
|
69 |
* Array of PDF objects.
|
|
|
70 |
* @protected
|
|
|
71 |
*/
|
|
|
72 |
protected $objects = array();
|
|
|
73 |
|
|
|
74 |
/**
|
|
|
75 |
* Class object for decoding filters.
|
|
|
76 |
* @private
|
|
|
77 |
*/
|
|
|
78 |
private $FilterDecoders;
|
|
|
79 |
|
|
|
80 |
// -----------------------------------------------------------------------------
|
|
|
81 |
|
|
|
82 |
/**
|
|
|
83 |
* Parse a PDF document an return an array of objects.
|
|
|
84 |
* @param $data (string) PDF data to parse.
|
|
|
85 |
* @public
|
|
|
86 |
* @since 1.0.000 (2011-05-24)
|
|
|
87 |
*/
|
|
|
88 |
public function __construct($data) {
|
|
|
89 |
if (empty($data)) {
|
|
|
90 |
$this->Error('Empty PDF data.');
|
|
|
91 |
}
|
|
|
92 |
$this->pdfdata = $data;
|
|
|
93 |
// get length
|
|
|
94 |
$pdflen = strlen($this->pdfdata);
|
|
|
95 |
// initialize class for decoding filters
|
|
|
96 |
$this->FilterDecoders = new TCPDF_FILTERS();
|
|
|
97 |
// get xref and trailer data
|
|
|
98 |
$this->xref = $this->getXrefData();
|
|
|
99 |
// parse all document objects
|
|
|
100 |
$this->objects = array();
|
|
|
101 |
foreach ($this->xref['xref'] as $obj => $offset) {
|
|
|
102 |
if (!isset($this->objects[$obj]) AND ($offset > 0)) {
|
|
|
103 |
// decode only objects with positive offset
|
|
|
104 |
$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
|
|
|
105 |
}
|
|
|
106 |
}
|
|
|
107 |
// release some memory
|
|
|
108 |
unset($this->pdfdata);
|
|
|
109 |
$this->pdfdata = '';
|
|
|
110 |
}
|
|
|
111 |
|
|
|
112 |
/**
|
|
|
113 |
* Return an array of parsed PDF document objects.
|
|
|
114 |
* @return (array) Array of parsed PDF document objects.
|
|
|
115 |
* @public
|
|
|
116 |
* @since 1.0.000 (2011-06-26)
|
|
|
117 |
*/
|
|
|
118 |
public function getParsedData() {
|
|
|
119 |
return array($this->xref, $this->objects);
|
|
|
120 |
}
|
|
|
121 |
|
|
|
122 |
/**
|
|
|
123 |
* Get Cross-Reference (xref) table and trailer data from PDF document data.
|
|
|
124 |
* @param $offset (int) xref offset (if know).
|
|
|
125 |
* @param $xref (array) previous xref array (if any).
|
|
|
126 |
* @return Array containing xref and trailer data.
|
|
|
127 |
* @protected
|
|
|
128 |
* @since 1.0.000 (2011-05-24)
|
|
|
129 |
*/
|
|
|
130 |
protected function getXrefData($offset=0, $xref=array()) {
|
|
|
131 |
if ($offset == 0) {
|
|
|
132 |
// find last startxref
|
|
|
133 |
if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
|
|
|
134 |
$this->Error('Unable to find startxref');
|
|
|
135 |
}
|
|
|
136 |
$matches = array_pop($matches);
|
|
|
137 |
$startxref = $matches[1];
|
|
|
138 |
} else {
|
|
|
139 |
if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
|
|
|
140 |
// Cross-Reference Stream object
|
|
|
141 |
$startxref = $offset;
|
|
|
142 |
} elseif (preg_match('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
|
|
|
143 |
// startxref found
|
|
|
144 |
$startxref = $matches[1][0];
|
|
|
145 |
} else {
|
|
|
146 |
$this->Error('Unable to find startxref');
|
|
|
147 |
}
|
|
|
148 |
}
|
|
|
149 |
// check xref position
|
|
|
150 |
if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
|
|
|
151 |
// Cross-Reference
|
|
|
152 |
$xref = $this->decodeXref($startxref, $xref);
|
|
|
153 |
} else {
|
|
|
154 |
// Cross-Reference Stream
|
|
|
155 |
$xref = $this->decodeXrefStream($startxref, $xref);
|
|
|
156 |
}
|
|
|
157 |
if (empty($xref)) {
|
|
|
158 |
$this->Error('Unable to find xref');
|
|
|
159 |
}
|
|
|
160 |
return $xref;
|
|
|
161 |
}
|
|
|
162 |
|
|
|
163 |
/**
|
|
|
164 |
* Decode the Cross-Reference section
|
|
|
165 |
* @param $startxref (int) Offset at which the xref section starts.
|
|
|
166 |
* @param $xref (array) Previous xref array (if any).
|
|
|
167 |
* @return Array containing xref and trailer data.
|
|
|
168 |
* @protected
|
|
|
169 |
* @since 1.0.000 (2011-06-20)
|
|
|
170 |
*/
|
|
|
171 |
protected function decodeXref($startxref, $xref=array()) {
|
|
|
172 |
// extract xref data (object indexes and offsets)
|
|
|
173 |
$xoffset = $startxref + 5;
|
|
|
174 |
// initialize object number
|
|
|
175 |
$obj_num = 0;
|
|
|
176 |
$offset = $xoffset;
|
|
|
177 |
while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
|
|
|
178 |
$offset = (strlen($matches[0][0]) + $matches[0][1]);
|
|
|
179 |
if ($matches[3][0] == 'n') {
|
|
|
180 |
// create unique object index: [object number]_[generation number]
|
|
|
181 |
$index = $obj_num.'_'.intval($matches[2][0]);
|
|
|
182 |
// check if object already exist
|
|
|
183 |
if (!isset($xref['xref'][$index])) {
|
|
|
184 |
// store object offset position
|
|
|
185 |
$xref['xref'][$index] = intval($matches[1][0]);
|
|
|
186 |
}
|
|
|
187 |
++$obj_num;
|
|
|
188 |
$offset += 2;
|
|
|
189 |
} elseif ($matches[3][0] == 'f') {
|
|
|
190 |
++$obj_num;
|
|
|
191 |
$offset += 2;
|
|
|
192 |
} else {
|
|
|
193 |
// object number (index)
|
|
|
194 |
$obj_num = intval($matches[1][0]);
|
|
|
195 |
}
|
|
|
196 |
}
|
|
|
197 |
// get trailer data
|
|
|
198 |
if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
|
|
|
199 |
$trailer_data = $matches[1][0];
|
|
|
200 |
if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
|
|
|
201 |
// get only the last updated version
|
|
|
202 |
$xref['trailer'] = array();
|
|
|
203 |
// parse trailer_data
|
|
|
204 |
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
|
|
|
205 |
$xref['trailer']['size'] = intval($matches[1]);
|
|
|
206 |
}
|
|
|
207 |
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
|
|
|
208 |
$xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
|
|
|
209 |
}
|
|
|
210 |
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
|
|
|
211 |
$xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
|
|
|
212 |
}
|
|
|
213 |
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
|
|
|
214 |
$xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
|
|
|
215 |
}
|
|
|
216 |
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
|
|
|
217 |
$xref['trailer']['id'] = array();
|
|
|
218 |
$xref['trailer']['id'][0] = $matches[1];
|
|
|
219 |
$xref['trailer']['id'][1] = $matches[2];
|
|
|
220 |
}
|
|
|
221 |
}
|
|
|
222 |
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
|
|
|
223 |
// get previous xref
|
|
|
224 |
$xref = $this->getXrefData(intval($matches[1]), $xref);
|
|
|
225 |
}
|
|
|
226 |
} else {
|
|
|
227 |
$this->Error('Unable to find trailer');
|
|
|
228 |
}
|
|
|
229 |
return $xref;
|
|
|
230 |
}
|
|
|
231 |
|
|
|
232 |
/**
|
|
|
233 |
* Decode the Cross-Reference Stream section
|
|
|
234 |
* @param $startxref (int) Offset at which the xref section starts.
|
|
|
235 |
* @param $xref (array) Previous xref array (if any).
|
|
|
236 |
* @return Array containing xref and trailer data.
|
|
|
237 |
* @protected
|
|
|
238 |
* @since 1.0.003 (2013-03-16)
|
|
|
239 |
*/
|
|
|
240 |
protected function decodeXrefStream($startxref, $xref=array()) {
|
|
|
241 |
// try to read Cross-Reference Stream
|
|
|
242 |
$xrefobj = $this->getRawObject($startxref);
|
|
|
243 |
$xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
|
|
|
244 |
if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
|
|
|
245 |
// get only the last updated version
|
|
|
246 |
$xref['trailer'] = array();
|
|
|
247 |
$filltrailer = true;
|
|
|
248 |
} else {
|
|
|
249 |
$filltrailer = false;
|
|
|
250 |
}
|
|
|
251 |
$valid_crs = false;
|
|
|
252 |
$sarr = $xrefcrs[0][1];
|
|
|
253 |
foreach ($sarr as $k => $v) {
|
|
|
254 |
if (($v[0] == '/') AND ($v[1] == 'Type') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == '/') AND ($sarr[($k +1)][1] == 'XRef'))) {
|
|
|
255 |
$valid_crs = true;
|
|
|
256 |
} elseif (($v[0] == '/') AND ($v[1] == 'Index') AND (isset($sarr[($k +1)]))) {
|
|
|
257 |
// first object number in the subsection
|
|
|
258 |
$index_first = intval($sarr[($k +1)][1][0][1]);
|
|
|
259 |
// number of entries in the subsection
|
|
|
260 |
$index_entries = intval($sarr[($k +1)][1][1][1]);
|
|
|
261 |
} elseif (($v[0] == '/') AND ($v[1] == 'Prev') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
|
|
|
262 |
// get previous xref offset
|
|
|
263 |
$prevxref = intval($sarr[($k +1)][1]);
|
|
|
264 |
} elseif (($v[0] == '/') AND ($v[1] == 'W') AND (isset($sarr[($k +1)]))) {
|
|
|
265 |
// number of bytes (in the decoded stream) of the corresponding field
|
|
|
266 |
$wb = array();
|
|
|
267 |
$wb[0] = intval($sarr[($k +1)][1][0][1]);
|
|
|
268 |
$wb[1] = intval($sarr[($k +1)][1][1][1]);
|
|
|
269 |
$wb[2] = intval($sarr[($k +1)][1][2][1]);
|
|
|
270 |
} elseif (($v[0] == '/') AND ($v[1] == 'DecodeParms') AND (isset($sarr[($k +1)][1]))) {
|
|
|
271 |
$decpar = $sarr[($k +1)][1];
|
|
|
272 |
foreach ($decpar as $kdc => $vdc) {
|
|
|
273 |
if (($vdc[0] == '/') AND ($vdc[1] == 'Columns') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
|
|
|
274 |
$columns = intval($decpar[($kdc +1)][1]);
|
|
|
275 |
} elseif (($vdc[0] == '/') AND ($vdc[1] == 'Predictor') AND (isset($decpar[($kdc +1)]) AND ($decpar[($kdc +1)][0] == 'numeric'))) {
|
|
|
276 |
$predictor = intval($decpar[($kdc +1)][1]);
|
|
|
277 |
}
|
|
|
278 |
}
|
|
|
279 |
} elseif ($filltrailer) {
|
|
|
280 |
if (($v[0] == '/') AND ($v[1] == 'Size') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'numeric'))) {
|
|
|
281 |
$xref['trailer']['size'] = $sarr[($k +1)][1];
|
|
|
282 |
} elseif (($v[0] == '/') AND ($v[1] == 'Root') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'ojbref'))) {
|
|
|
283 |
$xref['trailer']['root'] = $sarr[($k +1)][1];
|
|
|
284 |
} elseif (($v[0] == '/') AND ($v[1] == 'Info') AND (isset($sarr[($k +1)]) AND ($sarr[($k +1)][0] == 'ojbref'))) {
|
|
|
285 |
$xref['trailer']['info'] = $sarr[($k +1)][1];
|
|
|
286 |
} elseif (($v[0] == '/') AND ($v[1] == 'ID') AND (isset($sarr[($k +1)]))) {
|
|
|
287 |
$xref['trailer']['id'] = array();
|
|
|
288 |
$xref['trailer']['id'][0] = $sarr[($k +1)][1][0][1];
|
|
|
289 |
$xref['trailer']['id'][1] = $sarr[($k +1)][1][1][1];
|
|
|
290 |
}
|
|
|
291 |
}
|
|
|
292 |
}
|
|
|
293 |
// decode data
|
|
|
294 |
if ($valid_crs AND isset($xrefcrs[1][3][0])) {
|
|
|
295 |
// number of bytes in a row
|
|
|
296 |
$rowlen = ($columns + 1);
|
|
|
297 |
// convert the stream into an array of integers
|
|
|
298 |
$sdata = unpack('C*', $xrefcrs[1][3][0]);
|
|
|
299 |
// split the rows
|
|
|
300 |
$sdata = array_chunk($sdata, $rowlen);
|
|
|
301 |
// initialize decoded array
|
|
|
302 |
$ddata = array();
|
|
|
303 |
// initialize first row with zeros
|
|
|
304 |
$prev_row = array_fill (0, $rowlen, 0);
|
|
|
305 |
// for each row apply PNG unpredictor
|
|
|
306 |
foreach ($sdata as $k => $row) {
|
|
|
307 |
// initialize new row
|
|
|
308 |
$ddata[$k] = array();
|
|
|
309 |
// get PNG predictor value
|
|
|
310 |
$predictor = (10 + $row[0]);
|
|
|
311 |
// for each byte on the row
|
|
|
312 |
for ($i=1; $i<=$columns; ++$i) {
|
|
|
313 |
// new index
|
|
|
314 |
$j = ($i - 1);
|
|
|
315 |
$row_up = $prev_row[$j];
|
|
|
316 |
if ($i == 1) {
|
|
|
317 |
$row_left = 0;
|
|
|
318 |
$row_upleft = 0;
|
|
|
319 |
} else {
|
|
|
320 |
$row_left = $row[($i - 1)];
|
|
|
321 |
$row_upleft = $prev_row[($j - 1)];
|
|
|
322 |
}
|
|
|
323 |
switch ($predictor) {
|
|
|
324 |
case 10: { // PNG prediction (on encoding, PNG None on all rows)
|
|
|
325 |
$ddata[$k][$j] = $row[$i];
|
|
|
326 |
break;
|
|
|
327 |
}
|
|
|
328 |
case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
|
|
|
329 |
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
|
|
|
330 |
break;
|
|
|
331 |
}
|
|
|
332 |
case 12: { // PNG prediction (on encoding, PNG Up on all rows)
|
|
|
333 |
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
|
|
|
334 |
break;
|
|
|
335 |
}
|
|
|
336 |
case 13: { // PNG prediction (on encoding, PNG Average on all rows)
|
|
|
337 |
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
|
|
|
338 |
break;
|
|
|
339 |
}
|
|
|
340 |
case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
|
|
|
341 |
// initial estimate
|
|
|
342 |
$p = ($row_left + $row_up - $row_upleft);
|
|
|
343 |
// distances
|
|
|
344 |
$pa = abs($p - $row_left);
|
|
|
345 |
$pb = abs($p - $row_up);
|
|
|
346 |
$pc = abs($p - $row_upleft);
|
|
|
347 |
$pmin = min($pa, $pb, $pc);
|
|
|
348 |
// return minumum distance
|
|
|
349 |
switch ($pmin) {
|
|
|
350 |
case $pa: {
|
|
|
351 |
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
|
|
|
352 |
break;
|
|
|
353 |
}
|
|
|
354 |
case $pb: {
|
|
|
355 |
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
|
|
|
356 |
break;
|
|
|
357 |
}
|
|
|
358 |
case $pc: {
|
|
|
359 |
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
|
|
|
360 |
break;
|
|
|
361 |
}
|
|
|
362 |
}
|
|
|
363 |
break;
|
|
|
364 |
}
|
|
|
365 |
default: { // PNG prediction (on encoding, PNG optimum)
|
|
|
366 |
$this->Error('Unknown PNG predictor');
|
|
|
367 |
break;
|
|
|
368 |
}
|
|
|
369 |
}
|
|
|
370 |
}
|
|
|
371 |
$prev_row = $ddata[$k];
|
|
|
372 |
} // end for each row
|
|
|
373 |
// complete decoding
|
|
|
374 |
$sdata = array();
|
|
|
375 |
// for every row
|
|
|
376 |
foreach ($ddata as $k => $row) {
|
|
|
377 |
// initialize new row
|
|
|
378 |
$sdata[$k] = array(0, 0, 0);
|
|
|
379 |
if ($wb[0] == 0) {
|
|
|
380 |
// default type field
|
|
|
381 |
$sdata[$k][0] = 1;
|
|
|
382 |
}
|
|
|
383 |
$i = 0; // count bytes on the row
|
|
|
384 |
// for every column
|
|
|
385 |
for ($c = 0; $c < 3; ++$c) {
|
|
|
386 |
// for every byte on the column
|
|
|
387 |
for ($b = 0; $b < $wb[$c]; ++$b) {
|
|
|
388 |
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
|
|
|
389 |
++$i;
|
|
|
390 |
}
|
|
|
391 |
}
|
|
|
392 |
}
|
|
|
393 |
$ddata = array();
|
|
|
394 |
// fill xref
|
|
|
395 |
if (isset($index_first)) {
|
|
|
396 |
$obj_num = $index_first;
|
|
|
397 |
} else {
|
|
|
398 |
$obj_num = 0;
|
|
|
399 |
}
|
|
|
400 |
foreach ($sdata as $k => $row) {
|
|
|
401 |
switch ($row[0]) {
|
|
|
402 |
case 0: { // (f) linked list of free objects
|
|
|
403 |
++$obj_num;
|
|
|
404 |
break;
|
|
|
405 |
}
|
|
|
406 |
case 1: { // (n) objects that are in use but are not compressed
|
|
|
407 |
// create unique object index: [object number]_[generation number]
|
|
|
408 |
$index = $obj_num.'_'.$row[2];
|
|
|
409 |
// check if object already exist
|
|
|
410 |
if (!isset($xref['xref'][$index])) {
|
|
|
411 |
// store object offset position
|
|
|
412 |
$xref['xref'][$index] = $row[1];
|
|
|
413 |
}
|
|
|
414 |
++$obj_num;
|
|
|
415 |
break;
|
|
|
416 |
}
|
|
|
417 |
case 2: { // compressed objects
|
|
|
418 |
// $row[1] = object number of the object stream in which this object is stored
|
|
|
419 |
// $row[2] = index of this object within the object stream
|
|
|
420 |
$index = $row[1].'_0_'.$row[2];
|
|
|
421 |
$xref['xref'][$index] = -1;
|
|
|
422 |
break;
|
|
|
423 |
}
|
|
|
424 |
default: { // null objects
|
|
|
425 |
break;
|
|
|
426 |
}
|
|
|
427 |
}
|
|
|
428 |
}
|
|
|
429 |
} // end decoding data
|
|
|
430 |
if (isset($prevxref)) {
|
|
|
431 |
// get previous xref
|
|
|
432 |
$xref = $this->getXrefData($prevxref, $xref);
|
|
|
433 |
}
|
|
|
434 |
return $xref;
|
|
|
435 |
}
|
|
|
436 |
|
|
|
437 |
/**
|
|
|
438 |
* Get object type, raw value and offset to next object
|
|
|
439 |
* @param $offset (int) Object offset.
|
|
|
440 |
* @return array containing object type, raw value and offset to next object
|
|
|
441 |
* @protected
|
|
|
442 |
* @since 1.0.000 (2011-06-20)
|
|
|
443 |
*/
|
|
|
444 |
protected function getRawObject($offset=0) {
|
|
|
445 |
$objtype = ''; // object type to be returned
|
|
|
446 |
$objval = ''; // object value to be returned
|
|
|
447 |
// skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
|
|
|
448 |
$offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
|
|
|
449 |
// get first char
|
|
|
450 |
$char = $this->pdfdata{$offset};
|
|
|
451 |
// get object type
|
|
|
452 |
switch ($char) {
|
|
|
453 |
case '%': { // \x25 PERCENT SIGN
|
|
|
454 |
// skip comment and search for next token
|
|
|
455 |
$next = strcspn($this->pdfdata, "\r\n", $offset);
|
|
|
456 |
if ($next > 0) {
|
|
|
457 |
$offset += $next;
|
|
|
458 |
return $this->getRawObject($this->pdfdata, $offset);
|
|
|
459 |
}
|
|
|
460 |
break;
|
|
|
461 |
}
|
|
|
462 |
case '/': { // \x2F SOLIDUS
|
|
|
463 |
// name object
|
|
|
464 |
$objtype = $char;
|
|
|
465 |
++$offset;
|
|
|
466 |
if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
|
|
|
467 |
$objval = $matches[1]; // unescaped value
|
|
|
468 |
$offset += strlen($objval);
|
|
|
469 |
}
|
|
|
470 |
break;
|
|
|
471 |
}
|
|
|
472 |
case '(': // \x28 LEFT PARENTHESIS
|
|
|
473 |
case ')': { // \x29 RIGHT PARENTHESIS
|
|
|
474 |
// literal string object
|
|
|
475 |
$objtype = $char;
|
|
|
476 |
++$offset;
|
|
|
477 |
$strpos = $offset;
|
|
|
478 |
if ($char == '(') {
|
|
|
479 |
$open_bracket = 1;
|
|
|
480 |
while ($open_bracket > 0) {
|
|
|
481 |
if (!isset($this->pdfdata{$strpos})) {
|
|
|
482 |
break;
|
|
|
483 |
}
|
|
|
484 |
$ch = $this->pdfdata{$strpos};
|
|
|
485 |
switch ($ch) {
|
|
|
486 |
case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
|
|
|
487 |
// skip next character
|
|
|
488 |
++$strpos;
|
|
|
489 |
break;
|
|
|
490 |
}
|
|
|
491 |
case '(': { // LEFT PARENHESIS (28h)
|
|
|
492 |
++$open_bracket;
|
|
|
493 |
break;
|
|
|
494 |
}
|
|
|
495 |
case ')': { // RIGHT PARENTHESIS (29h)
|
|
|
496 |
--$open_bracket;
|
|
|
497 |
break;
|
|
|
498 |
}
|
|
|
499 |
}
|
|
|
500 |
++$strpos;
|
|
|
501 |
}
|
|
|
502 |
$objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
|
|
|
503 |
$offset = $strpos;
|
|
|
504 |
}
|
|
|
505 |
break;
|
|
|
506 |
}
|
|
|
507 |
case '[': // \x5B LEFT SQUARE BRACKET
|
|
|
508 |
case ']': { // \x5D RIGHT SQUARE BRACKET
|
|
|
509 |
// array object
|
|
|
510 |
$objtype = $char;
|
|
|
511 |
++$offset;
|
|
|
512 |
if ($char == '[') {
|
|
|
513 |
// get array content
|
|
|
514 |
$objval = array();
|
|
|
515 |
do {
|
|
|
516 |
// get element
|
|
|
517 |
$element = $this->getRawObject($offset);
|
|
|
518 |
$offset = $element[2];
|
|
|
519 |
$objval[] = $element;
|
|
|
520 |
} while ($element[0] != ']');
|
|
|
521 |
// remove closing delimiter
|
|
|
522 |
array_pop($objval);
|
|
|
523 |
}
|
|
|
524 |
break;
|
|
|
525 |
}
|
|
|
526 |
case '<': // \x3C LESS-THAN SIGN
|
|
|
527 |
case '>': { // \x3E GREATER-THAN SIGN
|
|
|
528 |
if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
|
|
|
529 |
// dictionary object
|
|
|
530 |
$objtype = $char.$char;
|
|
|
531 |
$offset += 2;
|
|
|
532 |
if ($char == '<') {
|
|
|
533 |
// get array content
|
|
|
534 |
$objval = array();
|
|
|
535 |
do {
|
|
|
536 |
// get element
|
|
|
537 |
$element = $this->getRawObject($offset);
|
|
|
538 |
$offset = $element[2];
|
|
|
539 |
$objval[] = $element;
|
|
|
540 |
} while ($element[0] != '>>');
|
|
|
541 |
// remove closing delimiter
|
|
|
542 |
array_pop($objval);
|
|
|
543 |
}
|
|
|
544 |
} else {
|
|
|
545 |
// hexadecimal string object
|
|
|
546 |
$objtype = $char;
|
|
|
547 |
++$offset;
|
|
|
548 |
if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
|
|
|
549 |
$objval = $matches[1];
|
|
|
550 |
$offset += strlen($matches[0]);
|
|
|
551 |
}
|
|
|
552 |
}
|
|
|
553 |
break;
|
|
|
554 |
}
|
|
|
555 |
default: {
|
|
|
556 |
if (substr($this->pdfdata, $offset, 6) == 'endobj') {
|
|
|
557 |
// indirect object
|
|
|
558 |
$objtype = 'endobj';
|
|
|
559 |
$offset += 6;
|
|
|
560 |
} elseif (substr($this->pdfdata, $offset, 4) == 'null') {
|
|
|
561 |
// null object
|
|
|
562 |
$objtype = 'null';
|
|
|
563 |
$offset += 4;
|
|
|
564 |
$objval = 'null';
|
|
|
565 |
} elseif (substr($this->pdfdata, $offset, 4) == 'true') {
|
|
|
566 |
// boolean true object
|
|
|
567 |
$objtype = 'boolean';
|
|
|
568 |
$offset += 4;
|
|
|
569 |
$objval = 'true';
|
|
|
570 |
} elseif (substr($this->pdfdata, $offset, 5) == 'false') {
|
|
|
571 |
// boolean false object
|
|
|
572 |
$objtype = 'boolean';
|
|
|
573 |
$offset += 5;
|
|
|
574 |
$objval = 'false';
|
|
|
575 |
} elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
|
|
|
576 |
// start stream object
|
|
|
577 |
$objtype = 'stream';
|
|
|
578 |
$offset += 6;
|
|
|
579 |
if (preg_match('/^([\r\n]+)/isU', substr($this->pdfdata, $offset), $matches) == 1) {
|
|
|
580 |
$offset += strlen($matches[0]);
|
|
|
581 |
}
|
|
|
582 |
if (preg_match('/([\r\n]*endstream)/isU', substr($this->pdfdata, $offset), $matches, PREG_OFFSET_CAPTURE) == 1) {
|
|
|
583 |
$objval = substr($this->pdfdata, $offset, $matches[0][1]);
|
|
|
584 |
$offset += $matches[0][1];
|
|
|
585 |
}
|
|
|
586 |
} elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
|
|
|
587 |
// end stream object
|
|
|
588 |
$objtype = 'endstream';
|
|
|
589 |
$offset += 9;
|
|
|
590 |
} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
|
|
|
591 |
// indirect object reference
|
|
|
592 |
$objtype = 'ojbref';
|
|
|
593 |
$offset += strlen($matches[0]);
|
|
|
594 |
$objval = intval($matches[1]).'_'.intval($matches[2]);
|
|
|
595 |
} elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
|
|
|
596 |
// object start
|
|
|
597 |
$objtype = 'ojb';
|
|
|
598 |
$objval = intval($matches[1]).'_'.intval($matches[2]);
|
|
|
599 |
$offset += strlen ($matches[0]);
|
|
|
600 |
} elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
|
|
|
601 |
// numeric object
|
|
|
602 |
$objtype = 'numeric';
|
|
|
603 |
$objval = substr($this->pdfdata, $offset, $numlen);
|
|
|
604 |
$offset += $numlen;
|
|
|
605 |
}
|
|
|
606 |
break;
|
|
|
607 |
}
|
|
|
608 |
}
|
|
|
609 |
return array($objtype, $objval, $offset);
|
|
|
610 |
}
|
|
|
611 |
|
|
|
612 |
/**
|
|
|
613 |
* Get content of indirect object.
|
|
|
614 |
* @param $obj_ref (string) Object number and generation number separated by underscore character.
|
|
|
615 |
* @param $offset (int) Object offset.
|
|
|
616 |
* @param $decoding (boolean) If true decode streams.
|
|
|
617 |
* @return array containing object data.
|
|
|
618 |
* @protected
|
|
|
619 |
* @since 1.0.000 (2011-05-24)
|
|
|
620 |
*/
|
|
|
621 |
protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
|
|
|
622 |
$obj = explode('_', $obj_ref);
|
|
|
623 |
if (($obj === false) OR (count($obj) != 2)) {
|
|
|
624 |
$this->Error('Invalid object reference: '.$obj);
|
|
|
625 |
return;
|
|
|
626 |
}
|
|
|
627 |
$objref = $obj[0].' '.$obj[1].' obj';
|
|
|
628 |
if (strpos($this->pdfdata, $objref, $offset) != $offset) {
|
|
|
629 |
// an indirect reference to an undefined object shall be considered a reference to the null object
|
|
|
630 |
return array('null', 'null', $offset);
|
|
|
631 |
}
|
|
|
632 |
// starting position of object content
|
|
|
633 |
$offset += strlen($objref);
|
|
|
634 |
// get array of object content
|
|
|
635 |
$objdata = array();
|
|
|
636 |
$i = 0; // object main index
|
|
|
637 |
do {
|
|
|
638 |
// get element
|
|
|
639 |
$element = $this->getRawObject($offset);
|
|
|
640 |
$offset = $element[2];
|
|
|
641 |
// decode stream using stream's dictionary information
|
|
|
642 |
if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
|
|
|
643 |
$element[3] = $this->decodeStream($objdata[($i - 1)][1], substr($element[1], 1));
|
|
|
644 |
}
|
|
|
645 |
$objdata[$i] = $element;
|
|
|
646 |
++$i;
|
|
|
647 |
} while ($element[0] != 'endobj');
|
|
|
648 |
// remove closing delimiter
|
|
|
649 |
array_pop($objdata);
|
|
|
650 |
// return raw object content
|
|
|
651 |
return $objdata;
|
|
|
652 |
}
|
|
|
653 |
|
|
|
654 |
/**
|
|
|
655 |
* Get the content of object, resolving indect object reference if necessary.
|
|
|
656 |
* @param $obj (string) Object value.
|
|
|
657 |
* @return array containing object data.
|
|
|
658 |
* @protected
|
|
|
659 |
* @since 1.0.000 (2011-06-26)
|
|
|
660 |
*/
|
|
|
661 |
protected function getObjectVal($obj) {
|
|
|
662 |
if ($obj[0] == 'objref') {
|
|
|
663 |
// reference to indirect object
|
|
|
664 |
if (isset($this->objects[$obj[1]])) {
|
|
|
665 |
// this object has been already parsed
|
|
|
666 |
return $this->objects[$obj[1]];
|
|
|
667 |
} elseif (isset($this->xref[$obj[1]])) {
|
|
|
668 |
// parse new object
|
|
|
669 |
$this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
|
|
|
670 |
return $this->objects[$obj[1]];
|
|
|
671 |
}
|
|
|
672 |
}
|
|
|
673 |
return $obj;
|
|
|
674 |
}
|
|
|
675 |
|
|
|
676 |
/**
|
|
|
677 |
* Decode the specified stream.
|
|
|
678 |
* @param $sdic (array) Stream's dictionary array.
|
|
|
679 |
* @param $stream (string) Stream to decode.
|
|
|
680 |
* @return array containing decoded stream data and remaining filters.
|
|
|
681 |
* @protected
|
|
|
682 |
* @since 1.0.000 (2011-06-22)
|
|
|
683 |
*/
|
|
|
684 |
protected function decodeStream($sdic, $stream) {
|
|
|
685 |
// get stream lenght and filters
|
|
|
686 |
$slength = strlen($stream);
|
|
|
687 |
if ($slength <= 0) {
|
|
|
688 |
return array('', array());
|
|
|
689 |
}
|
|
|
690 |
$filters = array();
|
|
|
691 |
foreach ($sdic as $k => $v) {
|
|
|
692 |
if ($v[0] == '/') {
|
|
|
693 |
if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
|
|
|
694 |
// get declared stream lenght
|
|
|
695 |
$declength = intval($sdic[($k + 1)][1]);
|
|
|
696 |
if ($declength < $slength) {
|
|
|
697 |
$stream = substr($stream, 0, $declength);
|
|
|
698 |
$slength = $declength;
|
|
|
699 |
}
|
|
|
700 |
} elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
|
|
|
701 |
// resolve indirect object
|
|
|
702 |
$objval = $this->getObjectVal($sdic[($k + 1)]);
|
|
|
703 |
if ($objval[0] == '/') {
|
|
|
704 |
// single filter
|
|
|
705 |
$filters[] = $objval[1];
|
|
|
706 |
} elseif ($objval[0] == '[') {
|
|
|
707 |
// array of filters
|
|
|
708 |
foreach ($objval[1] as $flt) {
|
|
|
709 |
if ($flt[0] == '/') {
|
|
|
710 |
$filters[] = $flt[1];
|
|
|
711 |
}
|
|
|
712 |
}
|
|
|
713 |
}
|
|
|
714 |
}
|
|
|
715 |
}
|
|
|
716 |
}
|
|
|
717 |
// decode the stream
|
|
|
718 |
$remaining_filters = array();
|
|
|
719 |
foreach ($filters as $filter) {
|
|
|
720 |
if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
|
|
|
721 |
$stream = $this->FilterDecoders->decodeFilter($filter, $stream);
|
|
|
722 |
} else {
|
|
|
723 |
// add missing filter to array
|
|
|
724 |
$remaining_filters[] = $filter;
|
|
|
725 |
}
|
|
|
726 |
}
|
|
|
727 |
return array($stream, $remaining_filters);
|
|
|
728 |
}
|
|
|
729 |
|
|
|
730 |
/**
|
|
|
731 |
* This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
|
|
|
732 |
* @param $msg (string) The error message
|
|
|
733 |
* @public
|
|
|
734 |
* @since 1.0.000 (2011-05-23)
|
|
|
735 |
*/
|
|
|
736 |
public function Error($msg) {
|
|
|
737 |
// exit program and print error
|
|
|
738 |
die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
|
|
|
739 |
}
|
|
|
740 |
|
|
|
741 |
} // END OF TCPDF_PARSER CLASS
|
|
|
742 |
|
|
|
743 |
//============================================================+
|
|
|
744 |
// END OF FILE
|
|
|
745 |
//============================================================+
|