fpdi_pdf_parser.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. <?php
  2. //
  3. // FPDI - Version 1.2
  4. //
  5. // Copyright 2004-2007 Setasign - Jan Slabon
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. class fpdi_pdf_parser extends pdf_parser {
  20. /**
  21. * Pages
  22. * Index beginns at 0
  23. *
  24. * @var array
  25. */
  26. var $pages;
  27. /**
  28. * Page count
  29. * @var integer
  30. */
  31. var $page_count;
  32. /**
  33. * actual page number
  34. * @var integer
  35. */
  36. var $pageno;
  37. /**
  38. * FPDI Reference
  39. * @var object
  40. */
  41. var $fpdi;
  42. /**
  43. * Available BoxTypes
  44. *
  45. * @var array
  46. */
  47. var $availableBoxes = array("/MediaBox","/CropBox","/BleedBox","/TrimBox","/ArtBox");
  48. /**
  49. * Constructor
  50. *
  51. * @param string $filename Source-Filename
  52. * @param object $fpdi Object of type fpdi
  53. */
  54. function fpdi_pdf_parser($filename,&$fpdi) {
  55. $this->fpdi =& $fpdi;
  56. $this->filename = $filename;
  57. parent::pdf_parser($filename);
  58. if ($this->success == false) { return false; }
  59. // resolve Pages-Dictonary
  60. $pages = $this->pdf_resolve_object($this->c, $this->root[1][1]['/Pages']);
  61. if ($this->success == false) { return false; }
  62. // Read pages
  63. $this->read_pages($this->c, $pages, $this->pages);
  64. if ($this->success == false) { return false; }
  65. // count pages;
  66. $this->page_count = count($this->pages);
  67. }
  68. /**
  69. * Get pagecount from sourcefile
  70. *
  71. * @return int
  72. */
  73. function getPageCount() {
  74. return $this->page_count;
  75. }
  76. /**
  77. * Set pageno
  78. *
  79. * @param int $pageno Pagenumber to use
  80. */
  81. function setPageno($pageno) {
  82. $pageno = ((int) $pageno) - 1;
  83. if ($pageno < 0 || $pageno >= $this->getPageCount()) {
  84. $this->fpdi->error("Pagenumber is wrong!");
  85. }
  86. $this->pageno = $pageno;
  87. }
  88. /**
  89. * Get page-resources from current page
  90. *
  91. * @return array
  92. */
  93. function getPageResources() {
  94. return $this->_getPageResources($this->pages[$this->pageno]);
  95. }
  96. /**
  97. * Get page-resources from /Page
  98. *
  99. * @param array $obj Array of pdf-data
  100. */
  101. function _getPageResources ($obj) { // $obj = /Page
  102. $obj = $this->pdf_resolve_object($this->c, $obj);
  103. // If the current object has a resources
  104. // dictionary associated with it, we use
  105. // it. Otherwise, we move back to its
  106. // parent object.
  107. if (isset ($obj[1][1]['/Resources'])) {
  108. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Resources']);
  109. if ($res[0] == PDF_TYPE_OBJECT)
  110. return $res[1];
  111. return $res;
  112. } else {
  113. if (!isset ($obj[1][1]['/Parent'])) {
  114. return false;
  115. } else {
  116. $res = $this->_getPageResources($obj[1][1]['/Parent']);
  117. if ($res[0] == PDF_TYPE_OBJECT)
  118. return $res[1];
  119. return $res;
  120. }
  121. }
  122. }
  123. /**
  124. * Get content of current page
  125. *
  126. * If more /Contents is an array, the streams are concated
  127. *
  128. * @return string
  129. */
  130. function getContent() {
  131. $buffer = "";
  132. if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
  133. $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
  134. foreach($contents AS $tmp_content) {
  135. $buffer .= $this->_rebuildContentStream($tmp_content).' ';
  136. }
  137. }
  138. return $buffer;
  139. }
  140. /**
  141. * Resolve all content-objects
  142. *
  143. * @param array $content_ref
  144. * @return array
  145. */
  146. function _getPageContent($content_ref) {
  147. $contents = array();
  148. if ($content_ref[0] == PDF_TYPE_OBJREF) {
  149. $content = $this->pdf_resolve_object($this->c, $content_ref);
  150. if ($content[1][0] == PDF_TYPE_ARRAY) {
  151. $contents = $this->_getPageContent($content[1]);
  152. } else {
  153. $contents[] = $content;
  154. }
  155. } else if ($content_ref[0] == PDF_TYPE_ARRAY) {
  156. foreach ($content_ref[1] AS $tmp_content_ref) {
  157. $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
  158. }
  159. }
  160. return $contents;
  161. }
  162. /**
  163. * Rebuild content-streams
  164. *
  165. * @param array $obj
  166. * @return string
  167. */
  168. function _rebuildContentStream($obj) {
  169. $filters = array();
  170. if (isset($obj[1][1]['/Filter'])) {
  171. $_filter = $obj[1][1]['/Filter'];
  172. if ($_filter[0] == PDF_TYPE_TOKEN) {
  173. $filters[] = $_filter;
  174. } else if ($_filter[0] == PDF_TYPE_ARRAY) {
  175. $filters = $_filter[1];
  176. }
  177. }
  178. $stream = $obj[2][1];
  179. foreach ($filters AS $_filter) {
  180. switch ($_filter[1]) {
  181. case "/FlateDecode":
  182. if (function_exists('gzuncompress')) {
  183. $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
  184. } else {
  185. $this->fpdi->error(sprintf("To handle %s filter, please compile php with zlib support.",$_filter[1]));
  186. }
  187. if ($stream === false) {
  188. $this->fpdi->error("Error while decompressing stream.");
  189. }
  190. break;
  191. // mPDF 4.2.003
  192. case '/LZWDecode':
  193. include_once(_MPDF_PATH.'mpdfi/filters/FilterLZW.php');
  194. $decoder =& new FilterLZW();
  195. $stream = $decoder->decode($stream);
  196. break;
  197. case '/ASCII85Decode':
  198. include_once(_MPDF_PATH.'mpdfi/filters/FilterASCII85.php');
  199. $decoder =& new FilterASCII85();
  200. $stream = $decoder->decode($stream);
  201. break;
  202. case null:
  203. $stream = $stream;
  204. break;
  205. default:
  206. $this->fpdi->error(sprintf("Unsupported Filter: %s",$_filter[1]));
  207. }
  208. }
  209. return $stream;
  210. }
  211. /**
  212. * Get a Box from a page
  213. * Arrayformat is same as used by fpdf_tpl
  214. *
  215. * @param array $page a /Page
  216. * @param string $box_index Type of Box @see $availableBoxes
  217. * @return array
  218. */
  219. function getPageBox($page, $box_index) {
  220. $page = $this->pdf_resolve_object($this->c,$page);
  221. $box = null;
  222. if (isset($page[1][1][$box_index]))
  223. $box =& $page[1][1][$box_index];
  224. if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
  225. $tmp_box = $this->pdf_resolve_object($this->c,$box);
  226. $box = $tmp_box[1];
  227. }
  228. if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
  229. $b =& $box[1];
  230. return array("x" => $b[0][1]/$this->fpdi->k,
  231. "y" => $b[1][1]/$this->fpdi->k,
  232. "w" => abs($b[0][1]-$b[2][1])/$this->fpdi->k,
  233. "h" => abs($b[1][1]-$b[3][1])/$this->fpdi->k);
  234. } else if (!isset ($page[1][1]['/Parent'])) {
  235. return false;
  236. } else {
  237. return $this->getPageBox($this->pdf_resolve_object($this->c, $page[1][1]['/Parent']), $box_index);
  238. }
  239. }
  240. function getPageBoxes($pageno) {
  241. return $this->_getPageBoxes($this->pages[$pageno-1]);
  242. }
  243. /**
  244. * Get all Boxes from /Page
  245. *
  246. * @param array a /Page
  247. * @return array
  248. */
  249. function _getPageBoxes($page) {
  250. $boxes = array();
  251. foreach($this->availableBoxes AS $box) {
  252. if ($_box = $this->getPageBox($page,$box)) {
  253. $boxes[$box] = $_box;
  254. }
  255. }
  256. return $boxes;
  257. }
  258. function getPageRotation($pageno) {
  259. return $this->_getPageRotation($this->pages[$pageno-1]);
  260. }
  261. function _getPageRotation ($obj) { // $obj = /Page
  262. $obj = $this->pdf_resolve_object($this->c, $obj);
  263. if (isset ($obj[1][1]['/Rotate'])) {
  264. $res = $this->pdf_resolve_object($this->c, $obj[1][1]['/Rotate']);
  265. if ($res[0] == PDF_TYPE_OBJECT)
  266. return $res[1];
  267. return $res;
  268. } else {
  269. if (!isset ($obj[1][1]['/Parent'])) {
  270. return false;
  271. } else {
  272. $res = $this->_getPageRotation($obj[1][1]['/Parent']);
  273. if ($res[0] == PDF_TYPE_OBJECT)
  274. return $res[1];
  275. return $res;
  276. }
  277. }
  278. }
  279. /**
  280. * Read all /Page(es)
  281. *
  282. * @param object pdf_context
  283. * @param array /Pages
  284. * @param array the result-array
  285. */
  286. function read_pages (&$c, &$pages, &$result) {
  287. // Get the kids dictionary
  288. $kids = $this->pdf_resolve_object ($c, $pages[1][1]['/Kids']);
  289. if (!is_array($kids)) {
  290. // mPDF 4.0
  291. $this->success = false;
  292. $this->errormsg = sprintf("Cannot find /Kids in current /Page-Dictionary");
  293. return false;
  294. }
  295. foreach ($kids[1] as $v) {
  296. $pg = $this->pdf_resolve_object ($c, $v);
  297. if ($pg[1][1]['/Type'][1] === '/Pages') {
  298. // If one of the kids is an embedded
  299. // /Pages array, resolve it as well.
  300. $this->read_pages ($c, $pg, $result);
  301. } else {
  302. $result[] = $pg;
  303. }
  304. }
  305. }
  306. }
  307. ?>