westdc-zf1/include/fpdi/fpdi_pdf_parser.php

354 lines
9.5 KiB
PHP
Raw Normal View History

2009-03-06 03:20:46 +00:00
<?php
//
2014-05-15 13:34:18 +00:00
// FPDI - Version 1.5.1
2009-03-06 03:20:46 +00:00
//
2014-05-15 13:34:18 +00:00
// Copyright 2004-2014 Setasign - Jan Slabon
2009-03-06 03:20:46 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
require_once('pdf_parser.php');
2009-03-06 03:20:46 +00:00
2014-05-15 13:34:18 +00:00
/**
* Class fpdi_pdf_parser
*/
class fpdi_pdf_parser extends pdf_parser
{
2009-03-06 03:20:46 +00:00
/**
* Pages
2014-05-15 13:34:18 +00:00
*
* Index begins at 0
2009-03-06 03:20:46 +00:00
*
* @var array
*/
2014-05-15 13:34:18 +00:00
protected $_pages;
2009-03-06 03:20:46 +00:00
/**
* Page count
2014-05-15 13:34:18 +00:00
*
2009-03-06 03:20:46 +00:00
* @var integer
*/
2014-05-15 13:34:18 +00:00
protected $_pageCount;
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* Current page number
*
2009-03-06 03:20:46 +00:00
* @var integer
*/
2014-05-15 13:34:18 +00:00
public $pageNo;
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* PDF version of imported document
*
2009-03-06 03:20:46 +00:00
* @var string
*/
2014-05-15 13:34:18 +00:00
public $_pdfVersion;
2009-03-06 03:20:46 +00:00
/**
* Available BoxTypes
*
* @var array
*/
2014-05-15 13:34:18 +00:00
public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* The constructor.
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param string $filename The source filename
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
public function __construct($filename)
{
parent::__construct($filename);
2009-03-06 03:20:46 +00:00
// resolve Pages-Dictonary
2014-05-15 13:34:18 +00:00
$pages = $this->resolveObject($this->_root[1][1]['/Pages']);
2009-03-06 03:20:46 +00:00
// Read pages
2014-05-15 13:34:18 +00:00
$this->_readPages($pages, $this->_pages);
2009-03-06 03:20:46 +00:00
// count pages;
2014-05-15 13:34:18 +00:00
$this->_pageCount = count($this->_pages);
2009-03-06 03:20:46 +00:00
}
/**
2014-05-15 13:34:18 +00:00
* Get page count from source file.
2009-03-06 03:20:46 +00:00
*
* @return int
*/
2014-05-15 13:34:18 +00:00
public function getPageCount()
{
return $this->_pageCount;
2009-03-06 03:20:46 +00:00
}
/**
2014-05-15 13:34:18 +00:00
* Set the page number.
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param int $pageNo Page number to use
* @throws InvalidArgumentException
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
public function setPageNo($pageNo)
{
$pageNo = ((int) $pageNo) - 1;
2009-03-06 03:20:46 +00:00
2014-05-15 13:34:18 +00:00
if ($pageNo < 0 || $pageNo >= $this->getPageCount()) {
throw new InvalidArgumentException('Invalid page number!');
2009-03-06 03:20:46 +00:00
}
2014-05-15 13:34:18 +00:00
$this->pageNo = $pageNo;
2009-03-06 03:20:46 +00:00
}
/**
* Get page-resources from current page
*
2014-05-15 13:34:18 +00:00
* @return array|boolean
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
public function getPageResources()
{
return $this->_getPageResources($this->_pages[$this->pageNo]);
2009-03-06 03:20:46 +00:00
}
/**
2014-05-15 13:34:18 +00:00
* Get page-resources from a /Page dictionary.
2009-03-06 03:20:46 +00:00
*
* @param array $obj Array of pdf-data
2014-05-15 13:34:18 +00:00
* @return array|boolean
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
protected function _getPageResources($obj)
{
$obj = $this->resolveObject($obj);
2009-03-06 03:20:46 +00:00
// If the current object has a resources
// dictionary associated with it, we use
// it. Otherwise, we move back to its
// parent object.
2014-05-15 13:34:18 +00:00
if (isset($obj[1][1]['/Resources'])) {
$res = $this->resolveObject($obj[1][1]['/Resources']);
if ($res[0] == pdf_parser::TYPE_OBJECT)
2009-03-06 03:20:46 +00:00
return $res[1];
return $res;
}
2014-05-15 13:34:18 +00:00
if (!isset($obj[1][1]['/Parent'])) {
return false;
}
$res = $this->_getPageResources($obj[1][1]['/Parent']);
if ($res[0] == pdf_parser::TYPE_OBJECT)
return $res[1];
return $res;
}
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* Get content of current page.
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* If /Contents is an array, the streams are concatenated
2009-03-06 03:20:46 +00:00
*
* @return string
*/
2014-05-15 13:34:18 +00:00
public function getContent()
{
$buffer = '';
2009-03-06 03:20:46 +00:00
2014-05-15 13:34:18 +00:00
if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) {
$contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']);
foreach ($contents AS $tmpContent) {
$buffer .= $this->_unFilterStream($tmpContent) . ' ';
2009-03-06 03:20:46 +00:00
}
}
return $buffer;
}
2014-05-15 13:34:18 +00:00
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* Resolve all content objects.
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param array $contentRef
2009-03-06 03:20:46 +00:00
* @return array
*/
2014-05-15 13:34:18 +00:00
protected function _getPageContent($contentRef)
{
2009-03-06 03:20:46 +00:00
$contents = array();
2014-05-15 13:34:18 +00:00
if ($contentRef[0] == pdf_parser::TYPE_OBJREF) {
$content = $this->resolveObject($contentRef);
if ($content[1][0] == pdf_parser::TYPE_ARRAY) {
2009-03-06 03:20:46 +00:00
$contents = $this->_getPageContent($content[1]);
} else {
$contents[] = $content;
}
2014-05-15 13:34:18 +00:00
} else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) {
foreach ($contentRef[1] AS $tmp_content_ref) {
$contents = array_merge($contents, $this->_getPageContent($tmp_content_ref));
2009-03-06 03:20:46 +00:00
}
}
return $contents;
}
/**
2014-05-15 13:34:18 +00:00
* Get a boundary box from a page
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* Array format is same as used by FPDF_TPL.
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param array $page a /Page dictionary
* @param string $boxIndex Type of box {see {@link $availableBoxes})
2011-11-03 02:33:37 +00:00
* @param float Scale factor from user space units to points
2014-05-15 13:34:18 +00:00
*
* @return array|boolean
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
protected function _getPageBox($page, $boxIndex, $k)
{
$page = $this->resolveObject($page);
2009-03-06 03:20:46 +00:00
$box = null;
2014-05-15 13:34:18 +00:00
if (isset($page[1][1][$boxIndex])) {
$box = $page[1][1][$boxIndex];
}
2009-03-06 03:20:46 +00:00
2014-05-15 13:34:18 +00:00
if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) {
$tmp_box = $this->resolveObject($box);
2009-03-06 03:20:46 +00:00
$box = $tmp_box[1];
}
2014-05-15 13:34:18 +00:00
if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) {
$b = $box[1];
return array(
'x' => $b[0][1] / $k,
'y' => $b[1][1] / $k,
'w' => abs($b[0][1] - $b[2][1]) / $k,
'h' => abs($b[1][1] - $b[3][1]) / $k,
'llx' => min($b[0][1], $b[2][1]) / $k,
'lly' => min($b[1][1], $b[3][1]) / $k,
'urx' => max($b[0][1], $b[2][1]) / $k,
'ury' => max($b[1][1], $b[3][1]) / $k,
);
} else if (!isset($page[1][1]['/Parent'])) {
2009-03-06 03:20:46 +00:00
return false;
} else {
2014-05-15 13:34:18 +00:00
return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k);
2009-03-06 03:20:46 +00:00
}
}
2011-11-03 02:33:37 +00:00
/**
2014-05-15 13:34:18 +00:00
* Get all page boundary boxes by page number
2011-11-03 02:33:37 +00:00
*
2014-05-15 13:34:18 +00:00
* @param int $pageNo The page number
* @param float $k Scale factor from user space units to points
2011-11-03 02:33:37 +00:00
* @return array
2014-05-15 13:34:18 +00:00
* @throws InvalidArgumentException
2011-11-03 02:33:37 +00:00
*/
2014-05-15 13:34:18 +00:00
public function getPageBoxes($pageNo, $k)
{
if (!isset($this->_pages[$pageNo - 1])) {
throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
}
return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k);
2009-03-06 03:20:46 +00:00
}
/**
2014-05-15 13:34:18 +00:00
* Get all boxes from /Page dictionary
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param array $page A /Page dictionary
* @param float $k Scale factor from user space units to points
2009-03-06 03:20:46 +00:00
* @return array
*/
2014-05-15 13:34:18 +00:00
protected function _getPageBoxes($page, $k)
{
2009-03-06 03:20:46 +00:00
$boxes = array();
foreach($this->availableBoxes AS $box) {
2014-05-15 13:34:18 +00:00
if ($_box = $this->_getPageBox($page, $box, $k)) {
2009-03-06 03:20:46 +00:00
$boxes[$box] = $_box;
}
}
return $boxes;
}
/**
2014-05-15 13:34:18 +00:00
* Get the page rotation by page number
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param integer $pageNo
* @throws InvalidArgumentException
2009-03-06 03:20:46 +00:00
* @return array
*/
2014-05-15 13:34:18 +00:00
public function getPageRotation($pageNo)
{
if (!isset($this->_pages[$pageNo - 1])) {
throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.');
}
return $this->_getPageRotation($this->_pages[$pageNo - 1]);
2009-03-06 03:20:46 +00:00
}
2014-05-15 13:34:18 +00:00
/**
* Get the rotation value of a page
*
* @param array $obj A /Page dictionary
* @return array|bool
*/
protected function _getPageRotation($obj)
{
$obj = $this->resolveObject($obj);
if (isset($obj[1][1]['/Rotate'])) {
$res = $this->resolveObject($obj[1][1]['/Rotate']);
if ($res[0] == pdf_parser::TYPE_OBJECT)
2009-03-06 03:20:46 +00:00
return $res[1];
return $res;
}
2014-05-15 13:34:18 +00:00
if (!isset($obj[1][1]['/Parent'])) {
return false;
}
$res = $this->_getPageRotation($obj[1][1]['/Parent']);
if ($res[0] == pdf_parser::TYPE_OBJECT)
return $res[1];
return $res;
2009-03-06 03:20:46 +00:00
}
2014-05-15 13:34:18 +00:00
2009-03-06 03:20:46 +00:00
/**
2014-05-15 13:34:18 +00:00
* Read all pages
2009-03-06 03:20:46 +00:00
*
2014-05-15 13:34:18 +00:00
* @param array $pages /Pages dictionary
* @param array $result The result array
* @throws Exception
2009-03-06 03:20:46 +00:00
*/
2014-05-15 13:34:18 +00:00
protected function _readPages(&$pages, &$result)
{
2009-03-06 03:20:46 +00:00
// Get the kids dictionary
2014-05-15 13:34:18 +00:00
$_kids = $this->resolveObject($pages[1][1]['/Kids']);
if (!is_array($_kids)) {
throw new Exception('Cannot find /Kids in current /Page-Dictionary');
2011-11-03 02:33:37 +00:00
}
2014-05-15 13:34:18 +00:00
if ($_kids[0] === self::TYPE_OBJECT) {
$_kids = $_kids[1];
}
$kids = $_kids[1];
2011-11-03 02:33:37 +00:00
foreach ($kids as $v) {
2014-05-15 13:34:18 +00:00
$pg = $this->resolveObject($v);
2009-03-06 03:20:46 +00:00
if ($pg[1][1]['/Type'][1] === '/Pages') {
// If one of the kids is an embedded
// /Pages array, resolve it as well.
2014-05-15 13:34:18 +00:00
$this->_readPages($pg, $result);
2009-03-06 03:20:46 +00:00
} else {
$result[] = $pg;
}
}
}
}