westdc-zf1/include/fpdi/fpdi_pdf_parser.php

408 lines
12 KiB
PHP
Raw Permalink Normal View History

2009-03-06 03:20:46 +00:00
<?php
//
2011-11-03 02:33:37 +00:00
// FPDI - Version 1.4.2
2009-03-06 03:20:46 +00:00
//
2011-11-03 02:33:37 +00:00
// Copyright 2004-2011 Setasign - Jan Slabon
2009-03-06 03:20:46 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
require_once('pdf_parser.php');
2009-03-06 03:20:46 +00:00
class fpdi_pdf_parser extends pdf_parser {
/**
* Pages
* Index beginns at 0
*
* @var array
*/
var $pages;
/**
* Page count
* @var integer
*/
var $page_count;
/**
* actual page number
* @var integer
*/
var $pageno;
/**
* PDF Version of imported Document
* @var string
*/
var $pdfVersion;
/**
* FPDI Reference
* @var object
*/
var $fpdi;
/**
* Available BoxTypes
*
* @var array
*/
var $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
2009-03-06 03:20:46 +00:00
/**
* Constructor
*
* @param string $filename Source-Filename
* @param object $fpdi Object of type fpdi
*/
function fpdi_pdf_parser($filename, &$fpdi) {
2009-03-06 03:20:46 +00:00
$this->fpdi =& $fpdi;
parent::pdf_parser($filename);
// resolve Pages-Dictonary
$pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
// Read pages
$this->read_pages($this->c, $pages, $this->pages);
// count pages;
$this->page_count = count($this->pages);
}
/**
* Overwrite parent::error()
*
* @param string $msg Error-Message
*/
function error($msg) {
$this->fpdi->error($msg);
}
/**
* Get pagecount from sourcefile
*
* @return int
*/
function getPageCount() {
return $this->page_count;
}
/**
* Set pageno
*
* @param int $pageno Pagenumber to use
*/
function setPageno($pageno) {
$pageno = ((int) $pageno) - 1;
if ($pageno < 0 || $pageno >= $this->getPageCount()) {
$this->fpdi->error('Pagenumber is wrong!');
2009-03-06 03:20:46 +00:00
}
$this->pageno = $pageno;
}
/**
* Get page-resources from current page
*
* @return array
*/
function getPageResources() {
return $this->_getPageResources($this->pages[$this->pageno]);
}
/**
* Get page-resources from /Page
*
* @param array $obj Array of pdf-data
*/
function _getPageResources ($obj) { // $obj = /Page
$obj = $this->pdf_resolve_object($this->c, $obj);
// If the current object has a resources
// dictionary associated with it, we use
// it. Otherwise, we move back to its
// parent object.
if (isset ($obj[1][1]['/Resources'])) {
$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
} else {
if (!isset ($obj[1][1]['/Parent'])) {
return false;
} else {
$res = $this->_getPageResources($obj[1][1]['/Parent']);
if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
}
}
}
/**
* Get content of current page
*
* If more /Contents is an array, the streams are concated
*
* @return string
*/
function getContent() {
$buffer = '';
2009-03-06 03:20:46 +00:00
if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
$contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
foreach($contents AS $tmp_content) {
2011-11-03 02:33:37 +00:00
$buffer .= $this->_rebuildContentStream($tmp_content) . ' ';
2009-03-06 03:20:46 +00:00
}
}
return $buffer;
}
/**
* Resolve all content-objects
*
* @param array $content_ref
* @return array
*/
function _getPageContent($content_ref) {
$contents = array();
if ($content_ref[0] == PDF_TYPE_OBJREF) {
$content = $this->pdf_resolve_object($this->c, $content_ref);
if ($content[1][0] == PDF_TYPE_ARRAY) {
$contents = $this->_getPageContent($content[1]);
} else {
$contents[] = $content;
}
} else if ($content_ref[0] == PDF_TYPE_ARRAY) {
foreach ($content_ref[1] AS $tmp_content_ref) {
$contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
}
}
return $contents;
}
/**
* Rebuild content-streams
*
* @param array $obj
* @return string
*/
function _rebuildContentStream($obj) {
$filters = array();
if (isset($obj[1][1]['/Filter'])) {
$_filter = $obj[1][1]['/Filter'];
2011-11-03 02:33:37 +00:00
if ($_filter[0] == PDF_TYPE_OBJREF) {
$tmpFilter = $this->pdf_resolve_object($this->c, $_filter);
$_filter = $tmpFilter[1];
}
2009-03-06 03:20:46 +00:00
if ($_filter[0] == PDF_TYPE_TOKEN) {
$filters[] = $_filter;
} else if ($_filter[0] == PDF_TYPE_ARRAY) {
$filters = $_filter[1];
}
}
$stream = $obj[2][1];
foreach ($filters AS $_filter) {
switch ($_filter[1]) {
case '/FlateDecode':
2011-11-03 02:33:37 +00:00
case '/Fl':
// $stream .= "\x0F\x0D"; // in an errorious stream this suffix could work
// $stream .= "\x0A";
// $stream .= "\x0D";
if (function_exists('gzuncompress')) {
$stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
2009-03-06 03:20:46 +00:00
} else {
2011-11-03 02:33:37 +00:00
$this->error(sprintf('To handle %s filter, please compile php with zlib support.',$_filter[1]));
2009-03-06 03:20:46 +00:00
}
2011-11-03 02:33:37 +00:00
2009-03-06 03:20:46 +00:00
if ($stream === false) {
2011-11-03 02:33:37 +00:00
$this->error('Error while decompressing stream.');
2009-03-06 03:20:46 +00:00
}
break;
2011-11-03 02:33:37 +00:00
case '/LZWDecode':
include_once('filters/FilterLZW_FPDI.php');
$decoder = new FilterLZW_FPDI($this->fpdi);
$stream = $decoder->decode($stream);
break;
case '/ASCII85Decode':
include_once('filters/FilterASCII85_FPDI.php');
$decoder = new FilterASCII85_FPDI($this->fpdi);
$stream = $decoder->decode($stream);
break;
2009-03-06 03:20:46 +00:00
case null:
$stream = $stream;
break;
default:
2011-11-03 02:33:37 +00:00
$this->error(sprintf('Unsupported Filter: %s',$_filter[1]));
2009-03-06 03:20:46 +00:00
}
}
return $stream;
}
/**
* Get a Box from a page
* Arrayformat is same as used by fpdf_tpl
*
* @param array $page a /Page
* @param string $box_index Type of Box @see $availableBoxes
2011-11-03 02:33:37 +00:00
* @param float Scale factor from user space units to points
2009-03-06 03:20:46 +00:00
* @return array
*/
2011-11-03 02:33:37 +00:00
function getPageBox($page, $box_index, $k) {
$page = $this->pdf_resolve_object($this->c, $page);
2009-03-06 03:20:46 +00:00
$box = null;
if (isset($page[1][1][$box_index]))
$box =& $page[1][1][$box_index];
if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
2011-11-03 02:33:37 +00:00
$tmp_box = $this->pdf_resolve_object($this->c, $box);
2009-03-06 03:20:46 +00:00
$box = $tmp_box[1];
}
if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
$b =& $box[1];
2011-11-03 02:33:37 +00:00
return array('x' => $b[0][1] / $k,
'y' => $b[1][1] / $k,
'w' => abs($b[0][1] - $b[2][1]) / $k,
'h' => abs($b[1][1] - $b[3][1]) / $k,
'llx' => min($b[0][1], $b[2][1]) / $k,
'lly' => min($b[1][1], $b[3][1]) / $k,
'urx' => max($b[0][1], $b[2][1]) / $k,
'ury' => max($b[1][1], $b[3][1]) / $k,
);
2009-03-06 03:20:46 +00:00
} else if (!isset ($page[1][1]['/Parent'])) {
return false;
} else {
2011-11-03 02:33:37 +00:00
return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index, $k);
2009-03-06 03:20:46 +00:00
}
}
2011-11-03 02:33:37 +00:00
/**
* Get all page boxes by page no
*
* @param int The page number
* @param float Scale factor from user space units to points
* @return array
*/
function getPageBoxes($pageno, $k) {
return $this->_getPageBoxes($this->pages[$pageno - 1], $k);
2009-03-06 03:20:46 +00:00
}
/**
2011-11-03 02:33:37 +00:00
* Get all boxes from /Page
2009-03-06 03:20:46 +00:00
*
* @param array a /Page
* @return array
*/
2011-11-03 02:33:37 +00:00
function _getPageBoxes($page, $k) {
2009-03-06 03:20:46 +00:00
$boxes = array();
foreach($this->availableBoxes AS $box) {
2011-11-03 02:33:37 +00:00
if ($_box = $this->getPageBox($page, $box, $k)) {
2009-03-06 03:20:46 +00:00
$boxes[$box] = $_box;
}
}
return $boxes;
}
/**
* Get the page rotation by pageno
*
* @param integer $pageno
* @return array
*/
function getPageRotation($pageno) {
2011-11-03 02:33:37 +00:00
return $this->_getPageRotation($this->pages[$pageno - 1]);
2009-03-06 03:20:46 +00:00
}
2011-11-03 02:33:37 +00:00
function _getPageRotation($obj) { // $obj = /Page
2009-03-06 03:20:46 +00:00
$obj = $this->pdf_resolve_object($this->c, $obj);
if (isset ($obj[1][1]['/Rotate'])) {
$res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']);
if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
} else {
if (!isset ($obj[1][1]['/Parent'])) {
return false;
} else {
$res = $this->_getPageRotation($obj[1][1]['/Parent']);
if ($res[0] == PDF_TYPE_OBJECT)
return $res[1];
return $res;
}
}
}
/**
* Read all /Page(es)
*
* @param object pdf_context
* @param array /Pages
* @param array the result-array
*/
2011-11-03 02:33:37 +00:00
function read_pages(&$c, &$pages, &$result) {
2009-03-06 03:20:46 +00:00
// Get the kids dictionary
2011-11-03 02:33:37 +00:00
$_kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
if (!is_array($_kids))
$this->error('Cannot find /Kids in current /Page-Dictionary');
if ($_kids[1][0] == PDF_TYPE_ARRAY) {
$kids = $_kids[1][1];
} else {
$kids = $_kids[1];
}
foreach ($kids as $v) {
2009-03-06 03:20:46 +00:00
$pg = $this->pdf_resolve_object ($c, $v);
if ($pg[1][1]['/Type'][1] === '/Pages') {
// If one of the kids is an embedded
// /Pages array, resolve it as well.
2011-11-03 02:33:37 +00:00
$this->read_pages($c, $pg, $result);
2009-03-06 03:20:46 +00:00
} else {
$result[] = $pg;
}
}
}
/**
* Get PDF-Version
*
* And reset the PDF Version used in FPDI if needed
*/
function getPDFVersion() {
parent::getPDFVersion();
2011-11-03 02:33:37 +00:00
$this->fpdi->setPDFVersion(max($this->fpdi->getPDFVersion(), $this->pdfVersion));
2009-03-06 03:20:46 +00:00
}
}