From f6ce576e7f7269d93668e45c0d30e4b2787c073e Mon Sep 17 00:00:00 2001 From: wlx Date: Thu, 3 Nov 2011 02:33:37 +0000 Subject: [PATCH] upgrade to version 1.4.2 --- include/fpdi/filters/FilterASCII85.php | 101 ++ include/fpdi/filters/FilterASCII85_FPDI.php | 33 + include/fpdi/filters/FilterLZW.php | 157 +++ include/fpdi/filters/FilterLZW_FPDI.php | 33 + include/fpdi/fpdi.php | 219 ++-- include/fpdi/fpdi2tcpdf_bridge.php | 54 +- include/fpdi/fpdi_pdf_parser.php | 116 +- include/fpdi/pdf_context.php | 157 +-- include/fpdi/pdf_parser.php | 1273 ++++++++++--------- 9 files changed, 1293 insertions(+), 850 deletions(-) create mode 100644 include/fpdi/filters/FilterASCII85.php create mode 100644 include/fpdi/filters/FilterASCII85_FPDI.php create mode 100644 include/fpdi/filters/FilterLZW.php create mode 100644 include/fpdi/filters/FilterLZW_FPDI.php diff --git a/include/fpdi/filters/FilterASCII85.php b/include/fpdi/filters/FilterASCII85.php new file mode 100644 index 00000000..01402ba1 --- /dev/null +++ b/include/fpdi/filters/FilterASCII85.php @@ -0,0 +1,101 @@ + ORD_u) { + return $this->error('Illegal character in ASCII85Decode.'); + } + + $chn[$state++] = $ch - ORD_exclmark; + + if ($state == 5) { + $state = 0; + $r = 0; + for ($j = 0; $j < 5; ++$j) + $r = $r * 85 + $chn[$j]; + $out .= chr($r >> 24); + $out .= chr($r >> 16); + $out .= chr($r >> 8); + $out .= chr($r); + } + } + $r = 0; + + if ($state == 1) + return $this->error('Illegal length in ASCII85Decode.'); + if ($state == 2) { + $r = $chn[0] * 85 * 85 * 85 * 85 + ($chn[1]+1) * 85 * 85 * 85; + $out .= chr($r >> 24); + } + else if ($state == 3) { + $r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + ($chn[2]+1) * 85 * 85; + $out .= chr($r >> 24); + $out .= chr($r >> 16); + } + else if ($state == 4) { + $r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + $chn[2] * 85 * 85 + ($chn[3]+1) * 85 ; + $out .= chr($r >> 24); + $out .= chr($r >> 16); + $out .= chr($r >> 8); + } + + return $out; + } + + function encode($in) { + return $this->error("ASCII85 encoding not implemented."); + } + } +} diff --git a/include/fpdi/filters/FilterASCII85_FPDI.php b/include/fpdi/filters/FilterASCII85_FPDI.php new file mode 100644 index 00000000..fb560011 --- /dev/null +++ b/include/fpdi/filters/FilterASCII85_FPDI.php @@ -0,0 +1,33 @@ +fpdi =& $fpdi; + } + + function error($msg) { + $this->fpdi->error($msg); + } +} \ No newline at end of file diff --git a/include/fpdi/filters/FilterLZW.php b/include/fpdi/filters/FilterLZW.php new file mode 100644 index 00000000..292461d2 --- /dev/null +++ b/include/fpdi/filters/FilterLZW.php @@ -0,0 +1,157 @@ +error('LZW flavour not supported.'); + } + + $this->initsTable(); + + $this->data = $data; + $this->dataLength = strlen($data); + + // Initialize pointers + $this->bytePointer = 0; + $this->bitPointer = 0; + + $this->nextData = 0; + $this->nextBits = 0; + + $oldCode = 0; + + $string = ''; + $uncompData = ''; + + while (($code = $this->getNextCode()) != 257) { + if ($code == 256) { + $this->initsTable(); + $code = $this->getNextCode(); + + if ($code == 257) { + break; + } + + $uncompData .= $this->sTable[$code]; + $oldCode = $code; + + } else { + + if ($code < $this->tIdx) { + $string = $this->sTable[$code]; + $uncompData .= $string; + + $this->addStringToTable($this->sTable[$oldCode], $string[0]); + $oldCode = $code; + } else { + $string = $this->sTable[$oldCode]; + $string = $string . $string[0]; + $uncompData .= $string; + + $this->addStringToTable($string); + $oldCode = $code; + } + } + } + + return $uncompData; + } + + + /** + * Initialize the string table. + */ + function initsTable() { + $this->sTable = array(); + + for ($i = 0; $i < 256; $i++) + $this->sTable[$i] = chr($i); + + $this->tIdx = 258; + $this->bitsToGet = 9; + } + + /** + * Add a new string to the string table. + */ + function addStringToTable ($oldString, $newString='') { + $string = $oldString.$newString; + + // Add this new String to the table + $this->sTable[$this->tIdx++] = $string; + + if ($this->tIdx == 511) { + $this->bitsToGet = 10; + } else if ($this->tIdx == 1023) { + $this->bitsToGet = 11; + } else if ($this->tIdx == 2047) { + $this->bitsToGet = 12; + } + } + + // Returns the next 9, 10, 11 or 12 bits + function getNextCode() { + if ($this->bytePointer == $this->dataLength) { + return 257; + } + + $this->nextData = ($this->nextData << 8) | (ord($this->data[$this->bytePointer++]) & 0xff); + $this->nextBits += 8; + + if ($this->nextBits < $this->bitsToGet) { + $this->nextData = ($this->nextData << 8) | (ord($this->data[$this->bytePointer++]) & 0xff); + $this->nextBits += 8; + } + + $code = ($this->nextData >> ($this->nextBits - $this->bitsToGet)) & $this->andTable[$this->bitsToGet-9]; + $this->nextBits -= $this->bitsToGet; + + return $code; + } + + function encode($in) { + $this->error("LZW encoding not implemented."); + } + } +} diff --git a/include/fpdi/filters/FilterLZW_FPDI.php b/include/fpdi/filters/FilterLZW_FPDI.php new file mode 100644 index 00000000..e7a88119 --- /dev/null +++ b/include/fpdi/filters/FilterLZW_FPDI.php @@ -0,0 +1,33 @@ +fpdi =& $fpdi; + } + + function error($msg) { + $this->fpdi->error($msg); + } +} \ No newline at end of file diff --git a/include/fpdi/fpdi.php b/include/fpdi/fpdi.php index c1ebb89c..0ae2c637 100644 --- a/include/fpdi/fpdi.php +++ b/include/fpdi/fpdi.php @@ -1,8 +1,8 @@ current_filename = $filename; - $fn =& $this->current_filename; - - if (!isset($this->parsers[$fn])) - $this->parsers[$fn] = new fpdi_pdf_parser($fn, $this); - $this->current_parser =& $this->parsers[$fn]; - return $this->parsers[$fn]->getPageCount(); + if (!isset($this->parsers[$filename])) + $this->parsers[$filename] = $this->_getPdfParser($filename); + $this->current_parser =& $this->parsers[$filename]; + + return $this->parsers[$filename]->getPageCount(); } + /** + * Returns a PDF parser object + * + * @param string $filename + * @return fpdi_pdf_parser + */ + function _getPdfParser($filename) { + return new fpdi_pdf_parser($filename, $this); + } + + /** + * Get the current PDF version + * + * @return string + */ + function getPDFVersion() { + return $this->PDFVersion; + } + + /** + * Set the PDF version + * + * @return string + */ + function setPDFVersion($version = '1.3') { + $this->PDFVersion = $version; + } + /** * Import a page * * @param int $pageno pagenumber * @return int Index of imported page - to use with fpdf_tpl::useTemplate() */ - function importPage($pageno, $boxName='/CropBox') { + function importPage($pageno, $boxName = '/CropBox') { if ($this->_intpl) { return $this->error('Please import the desired pages before creating a new template.'); } - $fn =& $this->current_filename; + $fn = $this->current_filename; // check if page already imported - $pageKey = $fn.((int)$pageno).$boxName; + $pageKey = $fn . '-' . ((int)$pageno) . $boxName; if (isset($this->_importedPages[$pageKey])) return $this->_importedPages[$pageKey]; $parser =& $this->parsers[$fn]; $parser->setPageno($pageno); - $this->tpl++; - $this->tpls[$this->tpl] = array(); - $tpl =& $this->tpls[$this->tpl]; - $tpl['parser'] =& $parser; - $tpl['resources'] = $parser->getPageResources(); - $tpl['buffer'] = $parser->getContent(); - if (!in_array($boxName, $parser->availableBoxes)) return $this->Error(sprintf('Unknown box: %s', $boxName)); - $pageboxes = $parser->getPageBoxes($pageno); + + $pageboxes = $parser->getPageBoxes($pageno, $this->k); /** * MediaBox @@ -137,32 +161,41 @@ class FPDI extends FPDF_TPL { if (!isset($pageboxes[$boxName])) return false; + $this->lastUsedPageBox = $boxName; $box = $pageboxes[$boxName]; + + $this->tpl++; + $this->tpls[$this->tpl] = array(); + $tpl =& $this->tpls[$this->tpl]; + $tpl['parser'] =& $parser; + $tpl['resources'] = $parser->getPageResources(); + $tpl['buffer'] = $parser->getContent(); $tpl['box'] = $box; // To build an array that can be used by PDF_TPL::useTemplate() - $this->tpls[$this->tpl] = array_merge($this->tpls[$this->tpl],$box); + $this->tpls[$this->tpl] = array_merge($this->tpls[$this->tpl], $box); // An imported page will start at 0,0 everytime. Translation will be set in _putformxobjects() $tpl['x'] = 0; $tpl['y'] = 0; - $page =& $parser->pages[$parser->pageno]; - // handle rotated pages $rotation = $parser->getPageRotation($pageno); $tpl['_rotationAngle'] = 0; if (isset($rotation[1]) && ($angle = $rotation[1] % 360) != 0) { - $steps = $angle / 90; + $steps = $angle / 90; $_w = $tpl['w']; $_h = $tpl['h']; $tpl['w'] = $steps % 2 == 0 ? $_w : $_h; $tpl['h'] = $steps % 2 == 0 ? $_h : $_w; - $tpl['_rotationAngle'] = $angle*-1; + if ($angle < 0) + $angle += 360; + + $tpl['_rotationAngle'] = $angle * -1; } $this->_importedPages[$pageKey] = $this->tpl; @@ -170,28 +203,54 @@ class FPDI extends FPDF_TPL { return $this->tpl; } + /** + * Returns the last used page box + * + * @return string + */ function getLastUsedPageBox() { return $this->lastUsedPageBox; } - function useTemplate($tplidx, $_x=null, $_y=null, $_w=0, $_h=0, $adjustPageSize=false) { + + function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) { if ($adjustPageSize == true && is_null($_x) && is_null($_y)) { $size = $this->getTemplateSize($tplidx, $_w, $_h); - $format = array($size['w'], $size['h']); - if ($format[0]!=$this->CurPageFormat[0] || $format[1]!=$this->CurPageFormat[1]) { - $this->w=$format[0]; - $this->h=$format[1]; - $this->wPt=$this->w*$this->k; - $this->hPt=$this->h*$this->k; - $this->PageBreakTrigger=$this->h-$this->bMargin; - $this->CurPageFormat=$format; - $this->PageSizes[$this->page]=array($this->wPt, $this->hPt); - } + $orientation = $size['w'] > $size['h'] ? 'L' : 'P'; + $size = array($size['w'], $size['h']); + + if (is_subclass_of($this, 'TCPDF')) { + $this->setPageFormat($size, $orientation); + } else { + $size = $this->_getpagesize($size); + + if($orientation!=$this->CurOrientation || $size[0]!=$this->CurPageSize[0] || $size[1]!=$this->CurPageSize[1]) + { + // New size or orientation + if($orientation=='P') + { + $this->w = $size[0]; + $this->h = $size[1]; + } + else + { + $this->w = $size[1]; + $this->h = $size[0]; + } + $this->wPt = $this->w*$this->k; + $this->hPt = $this->h*$this->k; + $this->PageBreakTrigger = $this->h-$this->bMargin; + $this->CurOrientation = $orientation; + $this->CurPageSize = $size; + $this->PageSizes[$this->page] = array($this->wPt, $this->hPt); + } + } } $this->_out('q 0 J 1 w 0 j 0 G 0 g'); // reset standard values $s = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h); $this->_out('Q'); + return $s; } @@ -204,14 +263,14 @@ class FPDI extends FPDF_TPL { $this->current_parser =& $this->parsers[$filename]; if (isset($this->_obj_stack[$filename]) && is_array($this->_obj_stack[$filename])) { while(($n = key($this->_obj_stack[$filename])) !== null) { - $nObj = $this->current_parser->pdf_resolve_object($this->current_parser->c,$this->_obj_stack[$filename][$n][1]); + $nObj = $this->current_parser->pdf_resolve_object($this->current_parser->c, $this->_obj_stack[$filename][$n][1]); $this->_newobj($this->_obj_stack[$filename][$n][0]); if ($nObj[0] == PDF_TYPE_STREAM) { - $this->pdf_write_value ($nObj); + $this->pdf_write_value($nObj); } else { - $this->pdf_write_value ($nObj[1]); + $this->pdf_write_value($nObj[1]); } $this->_out('endobj'); @@ -237,15 +296,15 @@ class FPDI extends FPDF_TPL { $cN = $this->n; // TCPDF/Protection: rem current "n" $this->tpls[$tplidx]['n'] = $this->n; - $this->_out('<<'.$filter.'/Type /XObject'); + $this->_out('<<' . $filter . '/Type /XObject'); $this->_out('/Subtype /Form'); $this->_out('/FormType 1'); $this->_out(sprintf('/BBox [%.2F %.2F %.2F %.2F]', - (isset($tpl['box']['llx']) ? $tpl['box']['llx'] : $tpl['x'])*$this->k, - (isset($tpl['box']['lly']) ? $tpl['box']['lly'] : -$tpl['y'])*$this->k, - (isset($tpl['box']['urx']) ? $tpl['box']['urx'] : $tpl['w'] + $tpl['x'])*$this->k, - (isset($tpl['box']['ury']) ? $tpl['box']['ury'] : $tpl['h']-$tpl['y'])*$this->k + (isset($tpl['box']['llx']) ? $tpl['box']['llx'] : $tpl['x']) * $this->k, + (isset($tpl['box']['lly']) ? $tpl['box']['lly'] : -$tpl['y']) * $this->k, + (isset($tpl['box']['urx']) ? $tpl['box']['urx'] : $tpl['w'] + $tpl['x']) * $this->k, + (isset($tpl['box']['ury']) ? $tpl['box']['ury'] : $tpl['h'] - $tpl['y']) * $this->k )); $c = 1; @@ -255,7 +314,7 @@ class FPDI extends FPDF_TPL { if (isset($tpl['box'])) { $tx = -$tpl['box']['llx']; - $ty = -$tpl['box']['lly']; + $ty = -$tpl['box']['lly']; if ($tpl['_rotationAngle'] <> 0) { $angle = $tpl['_rotationAngle'] * M_PI/180; @@ -272,14 +331,14 @@ class FPDI extends FPDF_TPL { $ty = $tpl['box']['ury']; break; case -270: - $tx = $tpl['box']['ury']; - $ty = 0; + $tx = $tpl['box']['ury']; + $ty = -$tpl['box']['llx']; break; } } } else if ($tpl['x'] != 0 || $tpl['y'] != 0) { - $tx = -$tpl['x']*2; - $ty = $tpl['y']*2; + $tx = -$tpl['x'] * 2; + $ty = $tpl['y'] * 2; } $tx *= $this->k; @@ -301,7 +360,7 @@ class FPDI extends FPDF_TPL { if (isset($this->_res['tpl'][$tplidx]['fonts']) && count($this->_res['tpl'][$tplidx]['fonts'])) { $this->_out('/Font <<'); foreach($this->_res['tpl'][$tplidx]['fonts'] as $font) - $this->_out('/F'.$font['i'].' '.$font['n'].' 0 R'); + $this->_out('/F' . $font['i'] . ' ' . $font['n'] . ' 0 R'); $this->_out('>>'); } if(isset($this->_res['tpl'][$tplidx]['images']) && count($this->_res['tpl'][$tplidx]['images']) || @@ -310,11 +369,11 @@ class FPDI extends FPDF_TPL { $this->_out('/XObject <<'); if (isset($this->_res['tpl'][$tplidx]['images']) && count($this->_res['tpl'][$tplidx]['images'])) { foreach($this->_res['tpl'][$tplidx]['images'] as $image) - $this->_out('/I'.$image['i'].' '.$image['n'].' 0 R'); + $this->_out('/I' . $image['i'] . ' ' . $image['n'] . ' 0 R'); } if (isset($this->_res['tpl'][$tplidx]['tpls']) && count($this->_res['tpl'][$tplidx]['tpls'])) { foreach($this->_res['tpl'][$tplidx]['tpls'] as $i => $tpl) - $this->_out($this->tplprefix.$i.' '.$tpl['n'].' 0 R'); + $this->_out($this->tplprefix . $i . ' ' . $tpl['n'] . ' 0 R'); } $this->_out('>>'); } @@ -323,8 +382,14 @@ class FPDI extends FPDF_TPL { $nN = $this->n; // TCPDF: rem new "n" $this->n = $cN; // TCPDF: reset to current "n" - $this->_out('/Length '.strlen($p).' >>'); - $this->_putstream($p); + if (is_subclass_of($this, 'TCPDF')) { + $p = $this->_getrawstream($p); + $this->_out('/Length ' . strlen($p) . ' >>'); + $this->_out("stream\n" . $p . "\nendstream"); + } else { + $this->_out('/Length ' . strlen($p) . ' >>'); + $this->_putstream($p); + } $this->_out('endobj'); $this->n = $nN; // TCPDF: reset to new "n" } @@ -335,7 +400,7 @@ class FPDI extends FPDF_TPL { /** * Rewritten to handle existing own defined objects */ - function _newobj($obj_id=false,$onlynewobj=false) { + function _newobj($obj_id = false, $onlynewobj = false) { if (!$obj_id) { $obj_id = ++$this->n; } @@ -343,9 +408,11 @@ class FPDI extends FPDF_TPL { //Begin a new object if (!$onlynewobj) { $this->offsets[$obj_id] = is_subclass_of($this, 'TCPDF') ? $this->bufferlen : strlen($this->buffer); - $this->_out($obj_id.' 0 obj'); + $this->_out($obj_id . ' 0 obj'); $this->_current_obj_id = $obj_id; // for later use with encryption } + + return $obj_id; } /** @@ -362,19 +429,19 @@ class FPDI extends FPDF_TPL { switch ($value[0]) { - case PDF_TYPE_TOKEN : + case PDF_TYPE_TOKEN: $this->_straightOut($value[1] . ' '); break; - case PDF_TYPE_NUMERIC : - case PDF_TYPE_REAL : + case PDF_TYPE_NUMERIC: + case PDF_TYPE_REAL: if (is_float($value[1]) && $value[1] != 0) { - $this->_straightOut(rtrim(rtrim(sprintf('%F', $value[1]), '0'), '.') .' '); + $this->_straightOut(rtrim(rtrim(sprintf('%F', $value[1]), '0'), '.') . ' '); } else { $this->_straightOut($value[1] . ' '); } break; - case PDF_TYPE_ARRAY : + case PDF_TYPE_ARRAY: // An array. Output the proper // structure and move on. @@ -387,7 +454,7 @@ class FPDI extends FPDF_TPL { $this->_out(']'); break; - case PDF_TYPE_DICTIONARY : + case PDF_TYPE_DICTIONARY: // A dictionary. $this->_straightOut('<<'); @@ -402,30 +469,30 @@ class FPDI extends FPDF_TPL { $this->_straightOut('>>'); break; - case PDF_TYPE_OBJREF : + case PDF_TYPE_OBJREF: // An indirect object reference // Fill the object stack if needed $cpfn =& $this->current_parser->filename; if (!isset($this->_don_obj_stack[$cpfn][$value[1]])) { - $this->_newobj(false,true); + $this->_newobj(false, true); $this->_obj_stack[$cpfn][$value[1]] = array($this->n, $value); $this->_don_obj_stack[$cpfn][$value[1]] = array($this->n, $value); // Value is maybee obsolete!!! } $objid = $this->_don_obj_stack[$cpfn][$value[1]][0]; - $this->_out($objid.' 0 R'); + $this->_out($objid . ' 0 R'); break; - case PDF_TYPE_STRING : + case PDF_TYPE_STRING: // A string. - $this->_straightOut('('.$value[1].')'); + $this->_straightOut('(' . $value[1] . ')'); break; - case PDF_TYPE_STREAM : + case PDF_TYPE_STREAM: // A stream. First, output the // stream dictionary, then the @@ -435,15 +502,16 @@ class FPDI extends FPDF_TPL { $this->_out($value[2][1]); $this->_out('endstream'); break; - case PDF_TYPE_HEX : - $this->_straightOut('<'.$value[1].'>'); + + case PDF_TYPE_HEX: + $this->_straightOut('<' . $value[1] . '>'); break; - case PDF_TYPE_BOOLEAN : + case PDF_TYPE_BOOLEAN: $this->_straightOut($value[1] ? 'true ' : 'false '); break; - case PDF_TYPE_NULL : + case PDF_TYPE_NULL: // The null object. $this->_straightOut('null '); @@ -467,7 +535,7 @@ class FPDI extends FPDF_TPL { // puts data before page footer $page = substr($this->getPageBuffer($this->page), 0, -$this->footerlen[$this->page]); $footer = substr($this->getPageBuffer($this->page), -$this->footerlen[$this->page]); - $this->setPageBuffer($this->page, $page.' '.$s."\n".$footer); + $this->setPageBuffer($this->page, $page . ' ' . $s . "\n" . $footer); } else { $this->setPageBuffer($this->page, $s, true); } @@ -500,5 +568,4 @@ class FPDI extends FPDF_TPL { } return false; } - } \ No newline at end of file diff --git a/include/fpdi/fpdi2tcpdf_bridge.php b/include/fpdi/fpdi2tcpdf_bridge.php index 20a5cccd..8a031a38 100644 --- a/include/fpdi/fpdi2tcpdf_bridge.php +++ b/include/fpdi/fpdi2tcpdf_bridge.php @@ -1,8 +1,8 @@ PDFVersion; - case 'k': - return $this->k; - default: - // Error handling - $this->Error('Cannot access protected property '.get_class($this).':$'.$name.' / Undefined property: '.get_class($this).'::$'.$name); + function _putstream($s) { + $this->_out($this->_getstream($s)); + } + + function _getxobjectdict() { + $out = parent::_getxobjectdict(); + if (count($this->tpls)) { + foreach($this->tpls as $tplidx => $tpl) { + $out .= sprintf('%s%d %d 0 R', $this->tplprefix, $tplidx, $tpl['n']); + } } + + return $out; } - - function __set($name, $value) { - switch ($name) { - case 'PDFVersion': - $this->PDFVersion = $value; - break; - default: - // Error handling - $this->Error('Cannot access protected property '.get_class($this).':$'.$name.' / Undefined property: '.get_class($this).'::$'.$name); - } - } - + /** * Encryption of imported data by FPDI * @@ -58,24 +50,28 @@ class FPDF extends TCPDF { */ function pdf_write_value(&$value) { switch ($value[0]) { - case PDF_TYPE_STRING : + case PDF_TYPE_STRING: if ($this->encrypted) { $value[1] = $this->_unescape($value[1]); - $value[1] = $this->_RC4($this->_objectkey($this->_current_obj_id), $value[1]); + $value[1] = $this->_encrypt_data($this->_current_obj_id, $value[1]); $value[1] = $this->_escape($value[1]); } break; - case PDF_TYPE_STREAM : + case PDF_TYPE_STREAM: if ($this->encrypted) { - $value[2][1] = $this->_RC4($this->_objectkey($this->_current_obj_id), $value[2][1]); + $value[2][1] = $this->_encrypt_data($this->_current_obj_id, $value[2][1]); + $value[1][1]['/Length'] = array( + PDF_TYPE_NUMERIC, + strlen($value[2][1]) + ); } break; - case PDF_TYPE_HEX : + case PDF_TYPE_HEX: if ($this->encrypted) { $value[1] = $this->hex2str($value[1]); - $value[1] = $this->_RC4($this->_objectkey($this->_current_obj_id), $value[1]); + $value[1] = $this->_encrypt_data($this->_current_obj_id, $value[1]); // remake hexstring of encrypted string $value[1] = $this->str2hex($value[1]); diff --git a/include/fpdi/fpdi_pdf_parser.php b/include/fpdi/fpdi_pdf_parser.php index b1fb5626..fd2b4414 100644 --- a/include/fpdi/fpdi_pdf_parser.php +++ b/include/fpdi/fpdi_pdf_parser.php @@ -1,8 +1,8 @@ pages[$this->pageno][1][1]['/Contents'])) { $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']); foreach($contents AS $tmp_content) { - $buffer .= $this->_rebuildContentStream($tmp_content).' '; + $buffer .= $this->_rebuildContentStream($tmp_content) . ' '; } } @@ -213,6 +213,11 @@ class fpdi_pdf_parser extends pdf_parser { if (isset($obj[1][1]['/Filter'])) { $_filter = $obj[1][1]['/Filter']; + if ($_filter[0] == PDF_TYPE_OBJREF) { + $tmpFilter = $this->pdf_resolve_object($this->c, $_filter); + $_filter = $tmpFilter[1]; + } + if ($_filter[0] == PDF_TYPE_TOKEN) { $filters[] = $_filter; } else if ($_filter[0] == PDF_TYPE_ARRAY) { @@ -225,30 +230,35 @@ class fpdi_pdf_parser extends pdf_parser { foreach ($filters AS $_filter) { switch ($_filter[1]) { case '/FlateDecode': - if (function_exists('gzuncompress')) { - $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : ''; + case '/Fl': + // $stream .= "\x0F\x0D"; // in an errorious stream this suffix could work + // $stream .= "\x0A"; + // $stream .= "\x0D"; + if (function_exists('gzuncompress')) { + $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : ''; } else { - $this->fpdi->error(sprintf('To handle %s filter, please compile php with zlib support.',$_filter[1])); + $this->error(sprintf('To handle %s filter, please compile php with zlib support.',$_filter[1])); } + if ($stream === false) { - $this->fpdi->error('Error while decompressing stream.'); + $this->error('Error while decompressing stream.'); } break; + case '/LZWDecode': + include_once('filters/FilterLZW_FPDI.php'); + $decoder = new FilterLZW_FPDI($this->fpdi); + $stream = $decoder->decode($stream); + break; + case '/ASCII85Decode': + include_once('filters/FilterASCII85_FPDI.php'); + $decoder = new FilterASCII85_FPDI($this->fpdi); + $stream = $decoder->decode($stream); + break; case null: $stream = $stream; break; default: - if (preg_match('/^\/[a-z85]*$/i', $_filter[1], $filterName) && @include_once('decoders'.$_filter[1].'.php')) { - $filterName = substr($_filter[1],1); - if (class_exists($filterName)) { - $decoder = new $filterName($this->fpdi); - $stream = $decoder->decode($stream); - } else { - $this->fpdi->error(sprintf('Unsupported Filter: %s',$_filter[1])); - } - } else { - $this->fpdi->error(sprintf('Unsupported Filter: %s',$_filter[1])); - } + $this->error(sprintf('Unsupported Filter: %s',$_filter[1])); } } @@ -262,52 +272,60 @@ class fpdi_pdf_parser extends pdf_parser { * * @param array $page a /Page * @param string $box_index Type of Box @see $availableBoxes + * @param float Scale factor from user space units to points * @return array */ - function getPageBox($page, $box_index) { - $page = $this->pdf_resolve_object($this->c,$page); + function getPageBox($page, $box_index, $k) { + $page = $this->pdf_resolve_object($this->c, $page); $box = null; if (isset($page[1][1][$box_index])) $box =& $page[1][1][$box_index]; if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) { - $tmp_box = $this->pdf_resolve_object($this->c,$box); + $tmp_box = $this->pdf_resolve_object($this->c, $box); $box = $tmp_box[1]; } if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) { $b =& $box[1]; - return array('x' => $b[0][1]/$this->fpdi->k, - 'y' => $b[1][1]/$this->fpdi->k, - 'w' => abs($b[0][1]-$b[2][1])/$this->fpdi->k, - 'h' => abs($b[1][1]-$b[3][1])/$this->fpdi->k, - 'llx' => $b[0][1]/$this->fpdi->k, - 'lly' => $b[1][1]/$this->fpdi->k, - 'urx' => $b[2][1]/$this->fpdi->k, - 'ury' => $b[3][1]/$this->fpdi->k, + return array('x' => $b[0][1] / $k, + 'y' => $b[1][1] / $k, + 'w' => abs($b[0][1] - $b[2][1]) / $k, + 'h' => abs($b[1][1] - $b[3][1]) / $k, + 'llx' => min($b[0][1], $b[2][1]) / $k, + 'lly' => min($b[1][1], $b[3][1]) / $k, + 'urx' => max($b[0][1], $b[2][1]) / $k, + 'ury' => max($b[1][1], $b[3][1]) / $k, ); } else if (!isset ($page[1][1]['/Parent'])) { return false; } else { - return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index); + return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index, $k); } } - function getPageBoxes($pageno) { - return $this->_getPageBoxes($this->pages[$pageno-1]); + /** + * Get all page boxes by page no + * + * @param int The page number + * @param float Scale factor from user space units to points + * @return array + */ + function getPageBoxes($pageno, $k) { + return $this->_getPageBoxes($this->pages[$pageno - 1], $k); } /** - * Get all Boxes from /Page + * Get all boxes from /Page * * @param array a /Page * @return array */ - function _getPageBoxes($page) { + function _getPageBoxes($page, $k) { $boxes = array(); foreach($this->availableBoxes AS $box) { - if ($_box = $this->getPageBox($page,$box)) { + if ($_box = $this->getPageBox($page, $box, $k)) { $boxes[$box] = $_box; } } @@ -322,10 +340,10 @@ class fpdi_pdf_parser extends pdf_parser { * @return array */ function getPageRotation($pageno) { - return $this->_getPageRotation($this->pages[$pageno-1]); + return $this->_getPageRotation($this->pages[$pageno - 1]); } - function _getPageRotation ($obj) { // $obj = /Page + function _getPageRotation($obj) { // $obj = /Page $obj = $this->pdf_resolve_object($this->c, $obj); if (isset ($obj[1][1]['/Rotate'])) { $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']); @@ -351,18 +369,25 @@ class fpdi_pdf_parser extends pdf_parser { * @param array /Pages * @param array the result-array */ - function read_pages (&$c, &$pages, &$result) { + function read_pages(&$c, &$pages, &$result) { // Get the kids dictionary - $kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']); - - if (!is_array($kids)) - $this->fpdi->Error('Cannot find /Kids in current /Page-Dictionary'); - foreach ($kids[1] as $v) { + $_kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']); + + if (!is_array($_kids)) + $this->error('Cannot find /Kids in current /Page-Dictionary'); + + if ($_kids[1][0] == PDF_TYPE_ARRAY) { + $kids = $_kids[1][1]; + } else { + $kids = $_kids[1]; + } + + foreach ($kids as $v) { $pg = $this->pdf_resolve_object ($c, $v); if ($pg[1][1]['/Type'][1] === '/Pages') { // If one of the kids is an embedded // /Pages array, resolve it as well. - $this->read_pages ($c, $pg, $result); + $this->read_pages($c, $pg, $result); } else { $result[] = $pg; } @@ -378,7 +403,6 @@ class fpdi_pdf_parser extends pdf_parser { */ function getPDFVersion() { parent::getPDFVersion(); - $this->fpdi->PDFVersion = max($this->fpdi->PDFVersion, $this->pdfVersion); + $this->fpdi->setPDFVersion(max($this->fpdi->getPDFVersion(), $this->pdfVersion)); } - } \ No newline at end of file diff --git a/include/fpdi/pdf_context.php b/include/fpdi/pdf_context.php index cddc9fb9..435df1e8 100644 --- a/include/fpdi/pdf_context.php +++ b/include/fpdi/pdf_context.php @@ -1,8 +1,8 @@ file =& $f; - if (is_string($this->file)) - $this->_mode = 1; - $this->reset(); - } - - // Optionally move the file - // pointer to a new location - // and reset the buffered data - - function reset($pos = null, $l = 100) { - if ($this->_mode == 0) { - if (!is_null ($pos)) { - fseek ($this->file, $pos); + class pdf_context { + + /** + * Modi + * + * @var integer 0 = file | 1 = string + */ + var $_mode = 0; + + var $file; + var $buffer; + var $offset; + var $length; + + var $stack; + + // Constructor + + function pdf_context(&$f) { + $this->file =& $f; + if (is_string($this->file)) + $this->_mode = 1; + $this->reset(); + } + + // Optionally move the file + // pointer to a new location + // and reset the buffered data + + function reset($pos = null, $l = 100) { + if ($this->_mode == 0) { + if (!is_null ($pos)) { + fseek ($this->file, $pos); + } + + $this->buffer = $l > 0 ? fread($this->file, $l) : ''; + $this->length = strlen($this->buffer); + if ($this->length < $l) + $this->increase_length($l - $this->length); + } else { + $this->buffer = $this->file; + $this->length = strlen($this->buffer); + } + $this->offset = 0; + $this->stack = array(); + } + + // Make sure that there is at least one + // character beyond the current offset in + // the buffer to prevent the tokenizer + // from attempting to access data that does + // not exist + + function ensure_content() { + if ($this->offset >= $this->length - 1) { + return $this->increase_length(); + } else { + return true; } + } - $this->buffer = $l > 0 ? fread($this->file, $l) : ''; - $this->length = strlen($this->buffer); - if ($this->length < $l) - $this->increase_length($l - $this->length); - } else { - $this->buffer = $this->file; - $this->length = strlen($this->buffer); - } - $this->offset = 0; - $this->stack = array(); - } - - // Make sure that there is at least one - // character beyond the current offset in - // the buffer to prevent the tokenizer - // from attempting to access data that does - // not exist - - function ensure_content() { - if ($this->offset >= $this->length - 1) { - return $this->increase_length(); - } else { - return true; + // Forcefully read more data into the buffer + + function increase_length($l = 100) { + if ($this->_mode == 0 && feof($this->file)) { + return false; + } else if ($this->_mode == 0) { + $totalLength = $this->length + $l; + do { + $toRead = $totalLength - $this->length; + if ($toRead < 1) + break; + + $this->buffer .= fread($this->file, $toRead); + } while ((($this->length = strlen($this->buffer)) != $totalLength) && !feof($this->file)); + + return true; + } else { + return false; + } } - } - - // Forcefully read more data into the buffer - - function increase_length($l=100) { - if ($this->_mode == 0 && feof($this->file)) { - return false; - } else if ($this->_mode == 0) { - $totalLength = $this->length + $l; - do { - $this->buffer .= fread($this->file, $totalLength-$this->length); - } while ((($this->length = strlen($this->buffer)) != $totalLength) && !feof($this->file)); - - return true; - } else { - return false; - } - } + } } \ No newline at end of file diff --git a/include/fpdi/pdf_parser.php b/include/fpdi/pdf_parser.php index 77186519..4c6dd8e8 100644 --- a/include/fpdi/pdf_parser.php +++ b/include/fpdi/pdf_parser.php @@ -1,8 +1,8 @@ filename = $filename; + class pdf_parser { + + /** + * Filename + * @var string + */ + var $filename; - $this->f = @fopen($this->filename, 'rb'); - - if (!$this->f) - $this->error(sprintf('Cannot open %s !', $filename)); - - $this->getPDFVersion(); - - $this->c = new pdf_context($this->f); + /** + * File resource + * @var resource + */ + var $f; - // Read xref-Data - $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); + /** + * PDF Context + * @var object pdf_context-Instance + */ + var $c; - // Check for Encryption - $this->getEncryption(); - - // Read root - $this->pdf_read_root(); - } + /** + * xref-Data + * @var array + */ + var $xref; - /** - * Close the opened file - */ - function closeFile() { - if (isset($this->f)) { - fclose($this->f); - unset($this->f); - } - } - - /** - * Print Error and die - * - * @param string $msg Error-Message - */ - function error($msg) { - die('PDF-Parser Error: '.$msg); - } - - /** - * Check Trailer for Encryption - */ - function getEncryption() { - if (isset($this->xref['trailer'][1]['/Encrypt'])) { - $this->error('File is encrypted!'); - } - } - - /** - * Find/Return /Root - * - * @return array - */ - function pdf_find_root() { - if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { - $this->error('Wrong Type of Root-Element! Must be an indirect reference'); - } - return $this->xref['trailer'][1]['/Root']; - } - - /** - * Read the /Root - */ - function pdf_read_root() { - // read root - $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); - } - - /** - * Get PDF-Version - * - * And reset the PDF Version used in FPDI if needed - */ - function getPDFVersion() { - fseek($this->f, 0); - preg_match('/\d\.\d/',fread($this->f,16),$m); - if (isset($m[0])) - $this->pdfVersion = $m[0]; - return $this->pdfVersion; - } - - /** - * Find the xref-Table - */ - function pdf_find_xref() { - $toRead = 1500; - - $stat = fseek ($this->f, -$toRead, SEEK_END); - if ($stat === -1) { - fseek ($this->f, 0); - } - $data = fread($this->f, $toRead); + /** + * root-Object + * @var array + */ + var $root; + + /** + * PDF version of the loaded document + * @var string + */ + var $pdfVersion; - $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); - $data = substr($data, $pos); - - if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { - $this->error('Unable to find pointer to xref table'); - } - - return (int) $matches[1]; - } - - /** - * Read xref-table - * - * @param array $result Array of xref-table - * @param integer $offset of xref-table - */ - function pdf_read_xref(&$result, $offset) { - - fseek($this->f, $o_pos = $offset-20); // set some bytes backwards to fetch errorious docs + /** + * For reading encrypted documents and xref/objectstreams are in use + * + * @var boolean + */ + var $readPlain = true; + + /** + * Constructor + * + * @param string $filename Source-Filename + */ + function pdf_parser($filename) { + $this->filename = $filename; - $data = fread($this->f, 100); - - $xrefPos = strpos($data, 'xref'); - - if ($xrefPos === false) { - $this->error('Unable to find xref table.'); + $this->f = @fopen($this->filename, 'rb'); + + if (!$this->f) + $this->error(sprintf('Cannot open %s !', $filename)); + + $this->getPDFVersion(); + + $this->c = new pdf_context($this->f); + + // Read xref-Data + $this->xref = array(); + $this->pdf_read_xref($this->xref, $this->pdf_find_xref()); + + // Check for Encryption + $this->getEncryption(); + + // Read root + $this->pdf_read_root(); } - if (!isset($result['xref_location'])) { - $result['xref_location'] = $o_pos+$xrefPos; - $result['max_object'] = 0; - } - - $cylces = -1; - $bytesPerCycle = 100; - - fseek($this->f, $o_pos = $o_pos+$xrefPos+4); // set the handle directly after the "xref"-keyword - $data = fread($this->f, $bytesPerCycle); - - while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle*$cylces++, 0))) === false && !feof($this->f)) { - $data .= fread($this->f, $bytesPerCycle); + /** + * Close the opened file + */ + function closeFile() { + if (isset($this->f) && is_resource($this->f)) { + fclose($this->f); + unset($this->f); + } } - if ($trailerPos === false) { - $this->error('Trailer keyword not found after xref table'); + /** + * Print Error and die + * + * @param string $msg Error-Message + */ + function error($msg) { + die('PDF-Parser Error: ' . $msg); } - $data = substr($data, 0, $trailerPos); - - // get Line-Ending - preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks - - $differentLineEndings = count(array_unique($m[0])); - if ($differentLineEndings > 1) { - $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); - } else { - $lines = explode($m[0][1], $data); - } - - $data = $differentLineEndings = $m = null; - unset($data, $differentLineEndings, $m); - - $linesCount = count($lines); - - $start = 1; - - for ($i = 0; $i < $linesCount; $i++) { - $line = trim($lines[$i]); - if ($line) { - $pieces = explode(' ', $line); - $c = count($pieces); - switch($c) { - case 2: - $start = (int)$pieces[0]; - $end = $start+(int)$pieces[1]; - if ($end > $result['max_object']) - $result['max_object'] = $end; - break; - case 3: - if (!isset($result['xref'][$start])) - $result['xref'][$start] = array(); - - if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { - $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; - } - $start++; - break; - default: - $this->error('Unexpected data in xref table'); - } + /** + * Check Trailer for Encryption + */ + function getEncryption() { + if (isset($this->xref['trailer'][1]['/Encrypt'])) { + $this->error('File is encrypted!'); } } - $lines = $pieces = $line = $start = $end = $gen = null; - unset($lines, $pieces, $line, $start, $end, $gen); - - fseek($this->f, $o_pos+$trailerPos+7); - - $c = new pdf_context($this->f); - $trailer = $this->pdf_read_value($c); - - $c = null; - unset($c); - - if (!isset($result['trailer'])) { - $result['trailer'] = $trailer; - } - - if (isset($trailer[1]['/Prev'])) { - $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); - } - - $trailer = null; - unset($trailer); - - return true; - } + /** + * Find/Return /Root + * + * @return array + */ + function pdf_find_root() { + if ($this->xref['trailer'][1]['/Root'][0] != PDF_TYPE_OBJREF) { + $this->error('Wrong Type of Root-Element! Must be an indirect reference'); + } + + return $this->xref['trailer'][1]['/Root']; + } - /** - * Reads an Value - * - * @param object $c pdf_context - * @param string $token a Token - * @return mixed - */ - function pdf_read_value(&$c, $token = null) { - if (is_null($token)) { - $token = $this->pdf_read_token($c); - } - - if ($token === false) { - return false; - } - - switch ($token) { - case '<': - // This is a hex string. - // Read the value, then the terminator - - $pos = $c->offset; - - while(1) { - - $match = strpos ($c->buffer, '>', $pos); - - // If you can't find it, try - // reading more data from the stream - - if ($match === false) { - if (!$c->increase_length()) { - return false; - } else { - continue; - } - } - - $result = substr ($c->buffer, $c->offset, $match - $c->offset); - $c->offset = $match + 1; - - return array (PDF_TYPE_HEX, $result); + /** + * Read the /Root + */ + function pdf_read_root() { + // read root + $this->root = $this->pdf_resolve_object($this->c, $this->pdf_find_root()); + } + + /** + * Get PDF-Version + * + * And reset the PDF Version used in FPDI if needed + */ + function getPDFVersion() { + fseek($this->f, 0); + preg_match('/\d\.\d/',fread($this->f, 16), $m); + if (isset($m[0])) + $this->pdfVersion = $m[0]; + return $this->pdfVersion; + } + + /** + * Find the xref-Table + */ + function pdf_find_xref() { + $toRead = 1500; + + $stat = fseek ($this->f, -$toRead, SEEK_END); + if ($stat === -1) { + fseek ($this->f, 0); + } + $data = fread($this->f, $toRead); + + $pos = strlen($data) - strpos(strrev($data), strrev('startxref')); + $data = substr($data, $pos); + + if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) { + $this->error('Unable to find pointer to xref table'); + } + + return (int) $matches[1]; + } + + /** + * Read xref-table + * + * @param array $result Array of xref-table + * @param integer $offset of xref-table + */ + function pdf_read_xref(&$result, $offset) { + $o_pos = $offset-min(20, $offset); + fseek($this->f, $o_pos); // set some bytes backwards to fetch errorious docs + + $data = fread($this->f, 100); + + $xrefPos = strrpos($data, 'xref'); + + if ($xrefPos === false) { + fseek($this->f, $offset); + $c = new pdf_context($this->f); + $xrefStreamObjDec = $this->pdf_read_value($c); + + if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == PDF_TYPE_OBJDEC) { + $this->error(sprintf('This document (%s) probably uses a compression technique which is not supported by the free parser shipped with FPDI.', $this->filename)); + } else { + $this->error('Unable to find xref table.'); } - - break; - case '<<': - // This is a dictionary. - - $result = array(); - - // Recurse into this function until we reach - // the end of the dictionary. - while (($key = $this->pdf_read_token($c)) !== '>>') { - if ($key === false) { - return false; - } - - if (($value = $this->pdf_read_value($c)) === false) { - return false; - } - - // Catch missing value - if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { - $result[$key] = array(PDF_TYPE_NULL); - break; - } - - $result[$key] = $value; - } - - return array (PDF_TYPE_DICTIONARY, $result); - - case '[': - // This is an array. - - $result = array(); - - // Recurse into this function until we reach - // the end of the array. - while (($token = $this->pdf_read_token($c)) !== ']') { - if ($token === false) { - return false; - } - - if (($value = $this->pdf_read_value($c, $token)) === false) { - return false; - } - - $result[] = $value; - } - - return array (PDF_TYPE_ARRAY, $result); - - case '(' : - // This is a string - $pos = $c->offset; - - $openBrackets = 1; - do { - for (; $openBrackets != 0 && $pos < $c->length; $pos++) { - switch (ord($c->buffer[$pos])) { - case 0x28: // '(' - $openBrackets++; - break; - case 0x29: // ')' - $openBrackets--; - break; - case 0x5C: // backslash - $pos++; - } + } + + if (!isset($result['xref_location'])) { + $result['xref_location'] = $o_pos + $xrefPos; + $result['max_object'] = 0; + } + + $cylces = -1; + $bytesPerCycle = 100; + + fseek($this->f, $o_pos = $o_pos + $xrefPos + 4); // set the handle directly after the "xref"-keyword + $data = fread($this->f, $bytesPerCycle); + + while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cylces++, 0))) === false && !feof($this->f)) { + $data .= fread($this->f, $bytesPerCycle); + } + + if ($trailerPos === false) { + $this->error('Trailer keyword not found after xref table'); + } + + $data = substr($data, 0, $trailerPos); + + // get Line-Ending + preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for linebreaks + + $differentLineEndings = count(array_unique($m[0])); + if ($differentLineEndings > 1) { + $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY); + } else { + $lines = explode($m[0][1], $data); + } + + $data = $differentLineEndings = $m = null; + unset($data, $differentLineEndings, $m); + + $linesCount = count($lines); + + $start = 1; + + for ($i = 0; $i < $linesCount; $i++) { + $line = trim($lines[$i]); + if ($line) { + $pieces = explode(' ', $line); + $c = count($pieces); + switch($c) { + case 2: + $start = (int)$pieces[0]; + $end = $start + (int)$pieces[1]; + if ($end > $result['max_object']) + $result['max_object'] = $end; + break; + case 3: + if (!isset($result['xref'][$start])) + $result['xref'][$start] = array(); + + if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) { + $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null; + } + $start++; + break; + default: + $this->error('Unexpected data in xref table'); } - } while($openBrackets != 0 && $c->increase_length()); - - $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); - $c->offset = $pos; - - return array (PDF_TYPE_STRING, $result); - - - case 'stream': - $o_pos = ftell($c->file)-strlen($c->buffer); - $o_offset = $c->offset; - - $c->reset($startpos = $o_pos + $o_offset); - - $e = 0; // ensure line breaks in front of the stream - if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) - $e++; - if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) - $e++; - - if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { - $tmp_c = new pdf_context($this->f); - $tmp_length = $this->pdf_resolve_object($tmp_c,$this->actual_obj[1][1]['/Length']); - $length = $tmp_length[1][1]; - } else { - $length = $this->actual_obj[1][1]['/Length'][1]; - } - - if ($length > 0) { - $c->reset($startpos+$e,$length); - $v = $c->buffer; - } else { - $v = ''; - } - $c->reset($startpos+$e+$length+9); // 9 = strlen("endstream") - - return array(PDF_TYPE_STREAM, $v); - - default : - if (is_numeric ($token)) { - // A numeric token. Make sure that - // it is not part of something else. - if (($tok2 = $this->pdf_read_token ($c)) !== false) { - if (is_numeric ($tok2)) { - - // Two numeric tokens in a row. - // In this case, we're probably in - // front of either an object reference - // or an object specification. - // Determine the case and return the data - if (($tok3 = $this->pdf_read_token ($c)) !== false) { - switch ($tok3) { - case 'obj' : - return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); - case 'R' : - return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); - } - // If we get to this point, that numeric value up - // there was just a numeric value. Push the extra - // tokens back into the stack and return the value. - array_push ($c->stack, $tok3); - } - } - - array_push ($c->stack, $tok2); - } - - if ($token === (string)((int)$token)) - return array (PDF_TYPE_NUMERIC, (int)$token); - else - return array (PDF_TYPE_REAL, (float)$token); - } else if ($token == 'true' || $token == 'false') { - return array (PDF_TYPE_BOOLEAN, $token == 'true'); - } else if ($token == 'null') { - return array (PDF_TYPE_NULL); - } else { - - // Just a token. Return it. - return array (PDF_TYPE_TOKEN, $token); - } - - } - } - - /** - * Resolve an object - * - * @param object $c pdf_context - * @param array $obj_spec The object-data - * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para - */ - function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { - // Exit if we get invalid data - if (!is_array($obj_spec)) { - return false; - } - - if ($obj_spec[0] == PDF_TYPE_OBJREF) { - - // This is a reference, resolve it - if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { - - // Save current file position - // This is needed if you want to resolve - // references while you're reading another object - // (e.g.: if you need to determine the length - // of a stream) - - $old_pos = ftell($c->file); - - // Reposition the file pointer and - // load the object header. - - $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); - - $header = $this->pdf_read_value($c); - - if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { - $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); - } - - // If we're being asked to store all the information - // about the object, we add the object ID and generation - // number for later use - $this->actual_obj =& $result; - if ($encapsulate) { - $result = array ( - PDF_TYPE_OBJECT, - 'obj' => $obj_spec[1], - 'gen' => $obj_spec[2] - ); - } else { - $result = array(); - } - - // Now simply read the object data until - // we encounter an end-of-object marker - while(1) { - $value = $this->pdf_read_value($c); - if ($value === false || count($result) > 4) { - // in this case the parser coudn't find an endobj so we break here - break; - } - - if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { - break; - } - - $result[] = $value; - } - - $c->reset($old_pos); - - if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { - $result[0] = PDF_TYPE_STREAM; } - - return $result; - } - } else { - return $obj_spec; - } - } - + } + + $lines = $pieces = $line = $start = $end = $gen = null; + unset($lines, $pieces, $line, $start, $end, $gen); + + fseek($this->f, $o_pos + $trailerPos + 7); + + $c = new pdf_context($this->f); + $trailer = $this->pdf_read_value($c); + + $c = null; + unset($c); + + if (!isset($result['trailer'])) { + $result['trailer'] = $trailer; + } + + if (isset($trailer[1]['/Prev'])) { + $this->pdf_read_xref($result, $trailer[1]['/Prev'][1]); + } + + $trailer = null; + unset($trailer); + + return true; + } + + /** + * Reads an Value + * + * @param object $c pdf_context + * @param string $token a Token + * @return mixed + */ + function pdf_read_value(&$c, $token = null) { + if (is_null($token)) { + $token = $this->pdf_read_token($c); + } + + if ($token === false) { + return false; + } + switch ($token) { + case '<': + // This is a hex string. + // Read the value, then the terminator - /** - * Reads a token from the file - * - * @param object $c pdf_context - * @return mixed - */ - function pdf_read_token(&$c) - { - // If there is a token available - // on the stack, pop it out and - // return it. - - if (count($c->stack)) { - return array_pop($c->stack); - } - - // Strip away any whitespace - - do { - if (!$c->ensure_content()) { - return false; - } - $c->offset += strspn($c->buffer, " \n\r\t", $c->offset); - } while ($c->offset >= $c->length - 1); - - // Get the first character in the stream - - $char = $c->buffer[$c->offset++]; - - switch ($char) { - - case '[' : - case ']' : - case '(' : - case ')' : - - // This is either an array or literal string - // delimiter, Return it - - return $char; - - case '<' : - case '>' : - - // This could either be a hex string or - // dictionary delimiter. Determine the - // appropriate case and return the token - - if ($c->buffer[$c->offset] == $char) { - if (!$c->ensure_content()) { - return false; - } - $c->offset++; - return $char . $char; - } else { - return $char; - } - - case '%' : - - // This is a comment - jump over it! - - $pos = $c->offset; - while(1) { - $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); - if ($match === 0) { - if (!$c->increase_length()) { - return false; - } else { - continue; - } - } - - $c->offset = $m[0][1]+strlen($m[0][0]); + $pos = $c->offset; + + while(1) { + + $match = strpos ($c->buffer, '>', $pos); - return $this->pdf_read_token($c); - } - - default : - - // This is "another" type of token (probably - // a dictionary entry or a numeric value) - // Find the end and return it. - - if (!$c->ensure_content()) { - return false; - } - - while(1) { - - // Determine the length of the token - - $pos = strcspn($c->buffer, " %[]<>()\r\n\t/", $c->offset); - if ($c->offset + $pos <= $c->length - 1) { - break; - } else { - // If the script reaches this point, - // the token may span beyond the end - // of the current buffer. Therefore, - // we increase the size of the buffer - // and try again--just to be safe. - - $c->increase_length(); - } - } - - $result = substr($c->buffer, $c->offset - 1, $pos + 1); - - $c->offset += $pos; - return $result; - } + // If you can't find it, try + // reading more data from the stream + + if ($match === false) { + if (!$c->increase_length()) { + return false; + } else { + continue; + } + } + + $result = substr ($c->buffer, $c->offset, $match - $c->offset); + $c->offset = $match + 1; + + return array (PDF_TYPE_HEX, $result); + } + + break; + case '<<': + // This is a dictionary. + + $result = array(); + + // Recurse into this function until we reach + // the end of the dictionary. + while (($key = $this->pdf_read_token($c)) !== '>>') { + if ($key === false) { + return false; + } + + if (($value = $this->pdf_read_value($c)) === false) { + return false; + } + + // Catch missing value + if ($value[0] == PDF_TYPE_TOKEN && $value[1] == '>>') { + $result[$key] = array(PDF_TYPE_NULL); + break; + } + + $result[$key] = $value; + } + + return array (PDF_TYPE_DICTIONARY, $result); + + case '[': + // This is an array. + + $result = array(); + + // Recurse into this function until we reach + // the end of the array. + while (($token = $this->pdf_read_token($c)) !== ']') { + if ($token === false) { + return false; + } + + if (($value = $this->pdf_read_value($c, $token)) === false) { + return false; + } + + $result[] = $value; + } + + return array (PDF_TYPE_ARRAY, $result); + + case '(' : + // This is a string + $pos = $c->offset; + + $openBrackets = 1; + do { + for (; $openBrackets != 0 && $pos < $c->length; $pos++) { + switch (ord($c->buffer[$pos])) { + case 0x28: // '(' + $openBrackets++; + break; + case 0x29: // ')' + $openBrackets--; + break; + case 0x5C: // backslash + $pos++; + } + } + } while($openBrackets != 0 && $c->increase_length()); + + $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1); + $c->offset = $pos; + + return array (PDF_TYPE_STRING, $result); + + case 'stream': + $o_pos = ftell($c->file)-strlen($c->buffer); + $o_offset = $c->offset; + + $c->reset($startpos = $o_pos + $o_offset); + + $e = 0; // ensure line breaks in front of the stream + if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13)) + $e++; + if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10)) + $e++; + + if ($this->actual_obj[1][1]['/Length'][0] == PDF_TYPE_OBJREF) { + $tmp_c = new pdf_context($this->f); + $tmp_length = $this->pdf_resolve_object($tmp_c, $this->actual_obj[1][1]['/Length']); + $length = $tmp_length[1][1]; + } else { + $length = $this->actual_obj[1][1]['/Length'][1]; + } + + if ($length > 0) { + $c->reset($startpos + $e,$length); + $v = $c->buffer; + } else { + $v = ''; + } + $c->reset($startpos + $e + $length + 9); // 9 = strlen("endstream") + + return array(PDF_TYPE_STREAM, $v); + + default : + if (is_numeric ($token)) { + // A numeric token. Make sure that + // it is not part of something else. + if (($tok2 = $this->pdf_read_token ($c)) !== false) { + if (is_numeric ($tok2)) { + + // Two numeric tokens in a row. + // In this case, we're probably in + // front of either an object reference + // or an object specification. + // Determine the case and return the data + if (($tok3 = $this->pdf_read_token ($c)) !== false) { + switch ($tok3) { + case 'obj': + return array (PDF_TYPE_OBJDEC, (int) $token, (int) $tok2); + case 'R': + return array (PDF_TYPE_OBJREF, (int) $token, (int) $tok2); + } + // If we get to this point, that numeric value up + // there was just a numeric value. Push the extra + // tokens back into the stack and return the value. + array_push ($c->stack, $tok3); + } + } + + array_push ($c->stack, $tok2); + } + + if ($token === (string)((int)$token)) + return array (PDF_TYPE_NUMERIC, (int)$token); + else + return array (PDF_TYPE_REAL, (float)$token); + } else if ($token == 'true' || $token == 'false') { + return array (PDF_TYPE_BOOLEAN, $token == 'true'); + } else if ($token == 'null') { + return array (PDF_TYPE_NULL); + } else { + // Just a token. Return it. + return array (PDF_TYPE_TOKEN, $token); + } + } + } + + /** + * Resolve an object + * + * @param object $c pdf_context + * @param array $obj_spec The object-data + * @param boolean $encapsulate Must set to true, cause the parsing and fpdi use this method only without this para + */ + function pdf_resolve_object(&$c, $obj_spec, $encapsulate = true) { + // Exit if we get invalid data + if (!is_array($obj_spec)) { + $ret = false; + return $ret; + } + + if ($obj_spec[0] == PDF_TYPE_OBJREF) { + + // This is a reference, resolve it + if (isset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]])) { + + // Save current file position + // This is needed if you want to resolve + // references while you're reading another object + // (e.g.: if you need to determine the length + // of a stream) + + $old_pos = ftell($c->file); + + // Reposition the file pointer and + // load the object header. + + $c->reset($this->xref['xref'][$obj_spec[1]][$obj_spec[2]]); + + $header = $this->pdf_read_value($c); + + if ($header[0] != PDF_TYPE_OBJDEC || $header[1] != $obj_spec[1] || $header[2] != $obj_spec[2]) { + $toSearchFor = $obj_spec[1] . ' ' . $obj_spec[2] . ' obj'; + if (preg_match('/' . $toSearchFor . '/', $c->buffer)) { + $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor); + // reset stack + $c->stack = array(); + } else { + $this->error("Unable to find object ({$obj_spec[1]}, {$obj_spec[2]}) at expected location"); + } + } + + // If we're being asked to store all the information + // about the object, we add the object ID and generation + // number for later use + $result = array(); + $this->actual_obj =& $result; + if ($encapsulate) { + $result = array ( + PDF_TYPE_OBJECT, + 'obj' => $obj_spec[1], + 'gen' => $obj_spec[2] + ); + } + + // Now simply read the object data until + // we encounter an end-of-object marker + while(1) { + $value = $this->pdf_read_value($c); + if ($value === false || count($result) > 4) { + // in this case the parser coudn't find an endobj so we break here + break; + } + + if ($value[0] == PDF_TYPE_TOKEN && $value[1] === 'endobj') { + break; + } + + $result[] = $value; + } + + $c->reset($old_pos); + + if (isset($result[2][0]) && $result[2][0] == PDF_TYPE_STREAM) { + $result[0] = PDF_TYPE_STREAM; + } + + return $result; + } + } else { + return $obj_spec; + } + } + + + + /** + * Reads a token from the file + * + * @param object $c pdf_context + * @return mixed + */ + function pdf_read_token(&$c) + { + // If there is a token available + // on the stack, pop it out and + // return it. + + if (count($c->stack)) { + return array_pop($c->stack); + } + + // Strip away any whitespace + + do { + if (!$c->ensure_content()) { + return false; + } + $c->offset += strspn($c->buffer, " \n\r\t", $c->offset); + } while ($c->offset >= $c->length - 1); + + // Get the first character in the stream + + $char = $c->buffer[$c->offset++]; + + switch ($char) { + + case '[': + case ']': + case '(': + case ')': + + // This is either an array or literal string + // delimiter, Return it + + return $char; + + case '<': + case '>': + + // This could either be a hex string or + // dictionary delimiter. Determine the + // appropriate case and return the token + + if ($c->buffer[$c->offset] == $char) { + if (!$c->ensure_content()) { + return false; + } + $c->offset++; + return $char . $char; + } else { + return $char; + } + + case '%': + + // This is a comment - jump over it! + + $pos = $c->offset; + while(1) { + $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos); + if ($match === 0) { + if (!$c->increase_length()) { + return false; + } else { + continue; + } + } + + $c->offset = $m[0][1]+strlen($m[0][0]); + + return $this->pdf_read_token($c); + } + + default: + + // This is "another" type of token (probably + // a dictionary entry or a numeric value) + // Find the end and return it. + + if (!$c->ensure_content()) { + return false; + } + + while(1) { + + // Determine the length of the token + + $pos = strcspn($c->buffer, " %[]<>()\r\n\t/", $c->offset); + + if ($c->offset + $pos <= $c->length - 1) { + break; + } else { + // If the script reaches this point, + // the token may span beyond the end + // of the current buffer. Therefore, + // we increase the size of the buffer + // and try again--just to be safe. + + $c->increase_length(); + } + } + + $result = substr($c->buffer, $c->offset - 1, $pos + 1); + + $c->offset += $pos; + return $result; + } + } } -} \ No newline at end of file +}