TransformDoc.inc 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. <?php
  2. /**
  3. * Transform DOCX to PDF or XHTML
  4. *
  5. * @category Phpdocx
  6. * @package elements
  7. * @copyright Copyright (c) 2009-2011 Narcea Producciones Multimedia S.L.
  8. * (http://www.2mdc.com)
  9. * @license LGPL
  10. * @version 1.0
  11. * @link http://www.phpdocx.com
  12. * @since File available since Release 1.0
  13. */
  14. error_reporting(E_ALL & ~E_NOTICE);
  15. require_once dirname(__FILE__) . '/AutoLoader.inc';
  16. AutoLoader::loadPDF();
  17. /**
  18. * Transform DOCX to PDF or XHTML
  19. *
  20. * @category Phpdocx
  21. * @package elements
  22. * @copyright Copyright (c) 2009-2011 Narcea Producciones Multimedia S.L.
  23. * (http://www.2mdc.com)
  24. * @license http://www.phpdocx.com/wp-content/themes/lightword/pro_license.php
  25. * @version 1.0
  26. * @link http://www.phpdocx.com
  27. * @since Class available since Release 1.0
  28. */
  29. class TransformDoc
  30. {
  31. const SCHEMA_IMAGEDOCUMENT =
  32. 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image';
  33. const SCHEMA_OFFICEDOCUMENT =
  34. 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
  35. /**
  36. *
  37. * @access private
  38. * @var string
  39. */
  40. private $_xhtml;
  41. /**
  42. * Construct
  43. *
  44. * @access public
  45. */
  46. public function __construct()
  47. {
  48. }
  49. /**
  50. * Destruct
  51. *
  52. * @access public
  53. */
  54. public function __destruct()
  55. {
  56. }
  57. /**
  58. * Getter. File
  59. *
  60. * @access public
  61. */
  62. public function getstrFile()
  63. {
  64. return $this->strFile;
  65. }
  66. /**
  67. * Getter. XHTML
  68. *
  69. * @access public
  70. */
  71. public function getStrXHTML()
  72. {
  73. return $this->_xhtml;
  74. }
  75. /**
  76. * Getter. Output file
  77. *
  78. * @access public
  79. */
  80. public function getStrOutputFile()
  81. {
  82. return $this->strOutputFile;
  83. }
  84. /**
  85. * Setter. File
  86. *
  87. * @access public
  88. */
  89. public function setstrFile($file)
  90. {
  91. $this->strFile = $file;
  92. }
  93. /**
  94. * Setter. XHTML
  95. *
  96. * @access public
  97. */
  98. public function setStrXHTML($strXHTML)
  99. {
  100. $this->_xhtml = $strXHTML;
  101. }
  102. /**
  103. *
  104. * @param string $outputFile
  105. */
  106. public function setStrOutputFile($outputFile)
  107. {
  108. $this->strOutputFile = $outputFile;
  109. }
  110. /**
  111. * Return zip path
  112. *
  113. * @access public
  114. * @return string
  115. */
  116. public function absoluteZipPath($path)
  117. {
  118. $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
  119. $parts = array_filter(
  120. explode(DIRECTORY_SEPARATOR, $path), 'strlen'
  121. );
  122. $arrAbsolutes = array();
  123. foreach ($parts as $datParts) {
  124. if ('.' == $datParts)
  125. continue;
  126. if ('..' == $datParts) {
  127. array_pop($arrAbsolutes);
  128. } else {
  129. $arrAbsolutes[] = $datParts;
  130. }
  131. }
  132. return implode('/', $arrAbsolutes);
  133. }
  134. /**
  135. * Clean HTML
  136. *
  137. * @access public
  138. */
  139. public function cleanXHTML()
  140. {
  141. $sectionOne = explode('<head>', $this->_xhtml);
  142. $sectionTwo = explode('</head>', $this->_xhtml);
  143. $sectionTwo = str_replace(
  144. 'src="?image="', 'src="imagen_not_found.jpg"', $sectionTwo
  145. );
  146. if (!isset($sectionTwo[1])) {
  147. $sectionTwo[1] = $this->_xhtml;
  148. }
  149. $this->_xhtml = $sectionOne[0] .
  150. '<head><meta http-equiv="Content-Type" ' .
  151. 'content="text/html; charset=utf-8" /></head>' . $sectionTwo[1];
  152. }
  153. /**
  154. * Return file name
  155. *
  156. * @access public
  157. */
  158. public function getFileName()
  159. {
  160. try {
  161. $partsFile = explode('/', $this->strFile);
  162. $divideFile = explode('.', array_pop($partsFile));
  163. $fileName = array_shift($divideFile);
  164. }
  165. catch (Exception $e) {
  166. $fileName = 'file';
  167. }
  168. return $fileName;
  169. }
  170. /**
  171. * Convert DOCX to XHTML
  172. *
  173. * @access public
  174. */
  175. public function generateXHTML()
  176. {
  177. $package = new ZipArchive();
  178. if (!$package->open($this->strFile)) {
  179. echo 'Unable to find the DOCX file';
  180. exit();
  181. }
  182. $relations = simplexml_load_string(
  183. $package->getFromName('_rels/.rels')
  184. );
  185. foreach ($relations->Relationship as $rel) {
  186. if ($rel["Type"] == TransformDoc::SCHEMA_OFFICEDOCUMENT) {
  187. $xml = $package->getFromName(
  188. $this->absoluteZipPath(
  189. dirname($rel['Target']) . '/' .
  190. basename($rel['Target'])
  191. )
  192. );
  193. $xmlDOM = new DOMDocument();
  194. $xml = str_replace('</w:wordDocument>', '', $xml);
  195. $xml = preg_replace(
  196. '/(<w:wordDocument)+(.)*(><w:body>)/', '<w:body>', $xml
  197. );
  198. @$xmlDOM->loadXML($xml);
  199. $xsl = new DOMDocument();
  200. $xsl->load(dirname(__FILE__) . '/../xsl/docx2html.xsl');
  201. $xsltProc = new XSLTProcessor();
  202. $xsltProc->importStylesheet($xsl);
  203. $this->_xhtml = $xsltProc->transformToXML($xmlDOM);
  204. }
  205. }
  206. $pattern = "'src\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx";
  207. preg_match_all($pattern, $this->_xhtml, $domImgs);
  208. $idImgs = array();
  209. foreach ($domImgs[0] as $dats) {
  210. $datsFiltered = explode('"', $dats);
  211. if (preg_match('/^\?image=rId/', $datsFiltered[1])) {
  212. $datFiltered = explode('?image=', $dats);
  213. $idImgs[] = substr($datFiltered[1], 0, -1);
  214. }
  215. }
  216. $relationsImgs = simplexml_load_string(
  217. $package->getFromName('word/_rels/document.xml.rels')
  218. );
  219. $pathImgs = array();
  220. foreach ($relationsImgs->relationship as $relImg) {
  221. if ($relImg["Type"] == cTransformDoc::SCHEMA_IMAGEDOCUMENT) {
  222. $pathImgs[(string) $relImg["Id"]] =
  223. (string) $relImg["Target"];
  224. $pathZip[] = 'word/' . (string) $relImg["Target"];
  225. }
  226. }
  227. foreach ($idImgs as $datsIdImgs) {
  228. $this->_xhtml = str_replace(
  229. "src=\"?image=$datsIdImgs\"",
  230. "src=\"files/files_" .
  231. "$this->strFile/media/word/$pathImgs[$datsIdImgs]\"",
  232. $this->_xhtml
  233. );
  234. }
  235. if (!empty($pathZip)) {
  236. $package->extractTo(
  237. "files/files_$this->strFile/media", $pathZip
  238. );
  239. $package->close();
  240. }
  241. }
  242. /**
  243. * Convert DOCX to PDF, using dompdf. DOCX->XHTML->PDF
  244. *
  245. * @access public
  246. */
  247. public function generatePDF()
  248. {
  249. $this->generateXHTML();
  250. $this->cleanXHTML();
  251. try {
  252. $domPDF = new DOMPDF();
  253. $domPDF->load_html($this->_xhtml);
  254. $domPDF->render();
  255. $fileName = $this->getFileName() . '.pdf';
  256. $domPDF->stream($fileName);
  257. }
  258. catch (Exception $err) {
  259. echo 'Unable to generate PDF file. ';
  260. echo $err;
  261. }
  262. }
  263. /**
  264. * Validate HTML using tidy
  265. *
  266. * @access public
  267. */
  268. public function validatorXHTML()
  269. {
  270. ob_start();
  271. echo $this->_xhtml;
  272. $html = ob_get_clean();
  273. $config = array(
  274. 'indent' => true,
  275. 'output-xhtml' => true,
  276. 'wrap' => 200);
  277. $tidy = new tidy();
  278. $tidy->parseString($html, $config, 'utf8');
  279. $tidy->cleanRepair();
  280. $this->_xhtml = $tidy;
  281. }
  282. }