123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- <?php
- /**
- * Transform DOCX to PDF or XHTML
- *
- * @category Phpdocx
- * @package elements
- * @copyright Copyright (c) 2009-2011 Narcea Producciones Multimedia S.L.
- * (http://www.2mdc.com)
- * @license LGPL
- * @version 1.0
- * @link http://www.phpdocx.com
- * @since File available since Release 1.0
- */
- error_reporting(E_ALL & ~E_NOTICE);
- require_once dirname(__FILE__) . '/AutoLoader.inc';
- AutoLoader::loadPDF();
- /**
- * Transform DOCX to PDF or XHTML
- *
- * @category Phpdocx
- * @package elements
- * @copyright Copyright (c) 2009-2011 Narcea Producciones Multimedia S.L.
- * (http://www.2mdc.com)
- * @license http://www.phpdocx.com/wp-content/themes/lightword/pro_license.php
- * @version 1.0
- * @link http://www.phpdocx.com
- * @since Class available since Release 1.0
- */
- class TransformDoc
- {
- const SCHEMA_IMAGEDOCUMENT =
- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image';
- const SCHEMA_OFFICEDOCUMENT =
- 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
- /**
- *
- * @access private
- * @var string
- */
- private $_xhtml;
- /**
- * Construct
- *
- * @access public
- */
- public function __construct()
- {
-
- }
- /**
- * Destruct
- *
- * @access public
- */
- public function __destruct()
- {
-
- }
- /**
- * Getter. File
- *
- * @access public
- */
- public function getstrFile()
- {
- return $this->strFile;
- }
- /**
- * Getter. XHTML
- *
- * @access public
- */
- public function getStrXHTML()
- {
- return $this->_xhtml;
- }
- /**
- * Getter. Output file
- *
- * @access public
- */
- public function getStrOutputFile()
- {
- return $this->strOutputFile;
- }
- /**
- * Setter. File
- *
- * @access public
- */
- public function setstrFile($file)
- {
- $this->strFile = $file;
- }
- /**
- * Setter. XHTML
- *
- * @access public
- */
- public function setStrXHTML($strXHTML)
- {
- $this->_xhtml = $strXHTML;
- }
- /**
- *
- * @param string $outputFile
- */
- public function setStrOutputFile($outputFile)
- {
- $this->strOutputFile = $outputFile;
- }
- /**
- * Return zip path
- *
- * @access public
- * @return string
- */
- public function absoluteZipPath($path)
- {
- $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
- $parts = array_filter(
- explode(DIRECTORY_SEPARATOR, $path), 'strlen'
- );
- $arrAbsolutes = array();
- foreach ($parts as $datParts) {
- if ('.' == $datParts)
- continue;
- if ('..' == $datParts) {
- array_pop($arrAbsolutes);
- } else {
- $arrAbsolutes[] = $datParts;
- }
- }
- return implode('/', $arrAbsolutes);
- }
- /**
- * Clean HTML
- *
- * @access public
- */
- public function cleanXHTML()
- {
- $sectionOne = explode('<head>', $this->_xhtml);
- $sectionTwo = explode('</head>', $this->_xhtml);
- $sectionTwo = str_replace(
- 'src="?image="', 'src="imagen_not_found.jpg"', $sectionTwo
- );
- if (!isset($sectionTwo[1])) {
- $sectionTwo[1] = $this->_xhtml;
- }
- $this->_xhtml = $sectionOne[0] .
- '<head><meta http-equiv="Content-Type" ' .
- 'content="text/html; charset=utf-8" /></head>' . $sectionTwo[1];
- }
- /**
- * Return file name
- *
- * @access public
- */
- public function getFileName()
- {
- try {
- $partsFile = explode('/', $this->strFile);
- $divideFile = explode('.', array_pop($partsFile));
- $fileName = array_shift($divideFile);
- }
- catch (Exception $e) {
- $fileName = 'file';
- }
- return $fileName;
- }
- /**
- * Convert DOCX to XHTML
- *
- * @access public
- */
- public function generateXHTML()
- {
- $package = new ZipArchive();
- if (!$package->open($this->strFile)) {
- echo 'Unable to find the DOCX file';
- exit();
- }
- $relations = simplexml_load_string(
- $package->getFromName('_rels/.rels')
- );
- foreach ($relations->Relationship as $rel) {
- if ($rel["Type"] == TransformDoc::SCHEMA_OFFICEDOCUMENT) {
- $xml = $package->getFromName(
- $this->absoluteZipPath(
- dirname($rel['Target']) . '/' .
- basename($rel['Target'])
- )
- );
- $xmlDOM = new DOMDocument();
- $xml = str_replace('</w:wordDocument>', '', $xml);
- $xml = preg_replace(
- '/(<w:wordDocument)+(.)*(><w:body>)/', '<w:body>', $xml
- );
- @$xmlDOM->loadXML($xml);
- $xsl = new DOMDocument();
- $xsl->load(dirname(__FILE__) . '/../xsl/docx2html.xsl');
- $xsltProc = new XSLTProcessor();
-
- $xsltProc->importStylesheet($xsl);
- $this->_xhtml = $xsltProc->transformToXML($xmlDOM);
- }
- }
- $pattern = "'src\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx";
- preg_match_all($pattern, $this->_xhtml, $domImgs);
- $idImgs = array();
- foreach ($domImgs[0] as $dats) {
- $datsFiltered = explode('"', $dats);
- if (preg_match('/^\?image=rId/', $datsFiltered[1])) {
- $datFiltered = explode('?image=', $dats);
- $idImgs[] = substr($datFiltered[1], 0, -1);
- }
- }
- $relationsImgs = simplexml_load_string(
- $package->getFromName('word/_rels/document.xml.rels')
- );
- $pathImgs = array();
- foreach ($relationsImgs->relationship as $relImg) {
- if ($relImg["Type"] == cTransformDoc::SCHEMA_IMAGEDOCUMENT) {
- $pathImgs[(string) $relImg["Id"]] =
- (string) $relImg["Target"];
- $pathZip[] = 'word/' . (string) $relImg["Target"];
- }
- }
- foreach ($idImgs as $datsIdImgs) {
- $this->_xhtml = str_replace(
- "src=\"?image=$datsIdImgs\"",
- "src=\"files/files_" .
- "$this->strFile/media/word/$pathImgs[$datsIdImgs]\"",
- $this->_xhtml
- );
- }
- if (!empty($pathZip)) {
- $package->extractTo(
- "files/files_$this->strFile/media", $pathZip
- );
- $package->close();
- }
- }
- /**
- * Convert DOCX to PDF, using dompdf. DOCX->XHTML->PDF
- *
- * @access public
- */
- public function generatePDF()
- {
- $this->generateXHTML();
- $this->cleanXHTML();
- try {
- $domPDF = new DOMPDF();
- $domPDF->load_html($this->_xhtml);
- $domPDF->render();
- $fileName = $this->getFileName() . '.pdf';
- $domPDF->stream($fileName);
- }
- catch (Exception $err) {
- echo 'Unable to generate PDF file. ';
- echo $err;
- }
- }
- /**
- * Validate HTML using tidy
- *
- * @access public
- */
- public function validatorXHTML()
- {
- ob_start();
- echo $this->_xhtml;
- $html = ob_get_clean();
- $config = array(
- 'indent' => true,
- 'output-xhtml' => true,
- 'wrap' => 200);
- $tidy = new tidy();
- $tidy->parseString($html, $config, 'utf8');
- $tidy->cleanRepair();
- $this->_xhtml = $tidy;
- }
- }
|