OLE.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2002 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 2.02 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/2_02.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Author: Xavier Noguer <xnoguer@php.net> |
  17. // | Based on OLE::Storage_Lite by Kawai, Takanori |
  18. // +----------------------------------------------------------------------+
  19. //
  20. // $Id: OLE.php,v 1.15 2007/12/18 20:59:11 schmidt Exp $
  21. /**
  22. * Constants for OLE package
  23. */
  24. define('OLE_PPS_TYPE_ROOT', 5);
  25. define('OLE_PPS_TYPE_DIR', 1);
  26. define('OLE_PPS_TYPE_FILE', 2);
  27. define('OLE_DATA_SIZE_SMALL', 0x1000);
  28. define('OLE_LONG_INT_SIZE', 4);
  29. define('OLE_PPS_SIZE', 0x80);
  30. require_once 'PEAR.php';
  31. /**
  32. * Array for storing OLE instances that are accessed from
  33. * OLE_ChainedBlockStream::stream_open().
  34. * @var array
  35. */
  36. $GLOBALS['_OLE_INSTANCES'] = array();
  37. /**
  38. * OLE package base class.
  39. *
  40. * @category Structures
  41. * @package OLE
  42. * @author Xavier Noguer <xnoguer@php.net>
  43. * @author Christian Schmidt <schmidt@php.net>
  44. */
  45. class OLE extends PEAR
  46. {
  47. /**
  48. * The file handle for reading an OLE container
  49. * @var resource
  50. */
  51. var $_file_handle;
  52. /**
  53. * Array of PPS's found on the OLE container
  54. * @var array
  55. */
  56. var $_list;
  57. /**
  58. * Root directory of OLE container
  59. * @var OLE_PPS_Root
  60. */
  61. var $root;
  62. /**
  63. * Big Block Allocation Table
  64. * @var array (blockId => nextBlockId)
  65. */
  66. var $bbat;
  67. /**
  68. * Short Block Allocation Table
  69. * @var array (blockId => nextBlockId)
  70. */
  71. var $sbat;
  72. /**
  73. * Size of big blocks. This is usually 512.
  74. * @var int number of octets per block.
  75. */
  76. var $bigBlockSize;
  77. /**
  78. * Size of small blocks. This is usually 64.
  79. * @var int number of octets per block
  80. */
  81. var $smallBlockSize;
  82. /**
  83. * Creates a new OLE object
  84. * @access public
  85. */
  86. function OLE()
  87. {
  88. $this->_list = array();
  89. }
  90. /**
  91. * Destructor (using PEAR)
  92. * Just closes the file handle on the OLE file.
  93. *
  94. * @access private
  95. */
  96. function _OLE()
  97. {
  98. fclose($this->_file_handle);
  99. }
  100. /**
  101. * Reads an OLE container from the contents of the file given.
  102. *
  103. * @access public
  104. * @param string $file
  105. * @return mixed true on success, PEAR_Error on failure
  106. */
  107. function read($file)
  108. {
  109. $fh = @fopen($file, "r");
  110. if (!$fh) {
  111. return $this->raiseError("Can't open file $file");
  112. }
  113. $this->_file_handle = $fh;
  114. $signature = fread($fh, 8);
  115. if ("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" != $signature) {
  116. return $this->raiseError("File doesn't seem to be an OLE container.");
  117. }
  118. fseek($fh, 28);
  119. if (fread($fh, 2) != "\xFE\xFF") {
  120. // This shouldn't be a problem in practice
  121. return $this->raiseError("Only Little-Endian encoding is supported.");
  122. }
  123. // Size of blocks and short blocks in bytes
  124. $this->bigBlockSize = pow(2, $this->_readInt2($fh));
  125. $this->smallBlockSize = pow(2, $this->_readInt2($fh));
  126. // Skip UID, revision number and version number
  127. fseek($fh, 44);
  128. // Number of blocks in Big Block Allocation Table
  129. $bbatBlockCount = $this->_readInt4($fh);
  130. // Root chain 1st block
  131. $directoryFirstBlockId = $this->_readInt4($fh);
  132. // Skip unused bytes
  133. fseek($fh, 56);
  134. // Streams shorter than this are stored using small blocks
  135. $this->bigBlockThreshold = $this->_readInt4($fh);
  136. // Block id of first sector in Short Block Allocation Table
  137. $sbatFirstBlockId = $this->_readInt4($fh);
  138. // Number of blocks in Short Block Allocation Table
  139. $sbbatBlockCount = $this->_readInt4($fh);
  140. // Block id of first sector in Master Block Allocation Table
  141. $mbatFirstBlockId = $this->_readInt4($fh);
  142. // Number of blocks in Master Block Allocation Table
  143. $mbbatBlockCount = $this->_readInt4($fh);
  144. $this->bbat = array();
  145. // Remaining 4 * 109 bytes of current block is beginning of Master
  146. // Block Allocation Table
  147. $mbatBlocks = array();
  148. for ($i = 0; $i < 109; $i++) {
  149. $mbatBlocks[] = $this->_readInt4($fh);
  150. }
  151. // Read rest of Master Block Allocation Table (if any is left)
  152. $pos = $this->_getBlockOffset($mbatFirstBlockId);
  153. for ($i = 0; $i < $mbbatBlockCount; $i++) {
  154. fseek($fh, $pos);
  155. for ($j = 0; $j < $this->bigBlockSize / 4 - 1; $j++) {
  156. $mbatBlocks[] = $this->_readInt4($fh);
  157. }
  158. // Last block id in each block points to next block
  159. $pos = $this->_getBlockOffset($this->_readInt4($fh));
  160. }
  161. // Read Big Block Allocation Table according to chain specified by
  162. // $mbatBlocks
  163. for ($i = 0; $i < $bbatBlockCount; $i++) {
  164. $pos = $this->_getBlockOffset($mbatBlocks[$i]);
  165. fseek($fh, $pos);
  166. for ($j = 0 ; $j < $this->bigBlockSize / 4; $j++) {
  167. $this->bbat[] = $this->_readInt4($fh);
  168. }
  169. }
  170. // Read short block allocation table (SBAT)
  171. $this->sbat = array();
  172. $shortBlockCount = $sbbatBlockCount * $this->bigBlockSize / 4;
  173. $sbatFh = $this->getStream($sbatFirstBlockId);
  174. for ($blockId = 0; $blockId < $shortBlockCount; $blockId++) {
  175. $this->sbat[$blockId] = $this->_readInt4($sbatFh);
  176. }
  177. fclose($sbatFh);
  178. $this->_readPpsWks($directoryFirstBlockId);
  179. return true;
  180. }
  181. /**
  182. * @param int $blockId block id
  183. * @return int byte offset from beginning of file
  184. * @access private
  185. */
  186. function _getBlockOffset($blockId)
  187. {
  188. return 512 + $blockId * $this->bigBlockSize;
  189. }
  190. /**
  191. * Returns a stream for use with fread() etc. External callers should
  192. * use OLE_PPS_File::getStream().
  193. * @param int|PPS $blockIdOrPps block id or PPS
  194. * @return resource read-only stream
  195. */
  196. function getStream($blockIdOrPps)
  197. {
  198. include_once 'OLE/ChainedBlockStream.php';
  199. static $isRegistered = false;
  200. if (!$isRegistered) {
  201. stream_wrapper_register('ole-chainedblockstream',
  202. 'OLE_ChainedBlockStream');
  203. $isRegistered = true;
  204. }
  205. // Store current instance in global array, so that it can be accessed
  206. // in OLE_ChainedBlockStream::stream_open().
  207. // Object is removed from self::$instances in OLE_Stream::close().
  208. $GLOBALS['_OLE_INSTANCES'][] = $this;
  209. $instanceId = end(array_keys($GLOBALS['_OLE_INSTANCES']));
  210. $path = 'ole-chainedblockstream://oleInstanceId=' . $instanceId;
  211. if (is_a($blockIdOrPps, 'OLE_PPS')) {
  212. $path .= '&blockId=' . $blockIdOrPps->_StartBlock;
  213. $path .= '&size=' . $blockIdOrPps->Size;
  214. } else {
  215. $path .= '&blockId=' . $blockIdOrPps;
  216. }
  217. return fopen($path, 'r');
  218. }
  219. /**
  220. * Reads a signed char.
  221. * @param resource $fh file handle
  222. * @return int
  223. * @access private
  224. */
  225. function _readInt1($fh)
  226. {
  227. list(, $tmp) = unpack("c", fread($fh, 1));
  228. return $tmp;
  229. }
  230. /**
  231. * Reads an unsigned short (2 octets).
  232. * @param resource $fh file handle
  233. * @return int
  234. * @access private
  235. */
  236. function _readInt2($fh)
  237. {
  238. list(, $tmp) = unpack("v", fread($fh, 2));
  239. return $tmp;
  240. }
  241. /**
  242. * Reads an unsigned long (4 octets).
  243. * @param resource file handle
  244. * @return int
  245. * @access private
  246. */
  247. function _readInt4($fh)
  248. {
  249. list(, $tmp) = unpack("V", fread($fh, 4));
  250. return $tmp;
  251. }
  252. /**
  253. * Gets information about all PPS's on the OLE container from the PPS WK's
  254. * creates an OLE_PPS object for each one.
  255. *
  256. * @access private
  257. * @param integer $blockId the block id of the first block
  258. * @return mixed true on success, PEAR_Error on failure
  259. */
  260. function _readPpsWks($blockId)
  261. {
  262. $fh = $this->getStream($blockId);
  263. for ($pos = 0; ; $pos += 128) {
  264. fseek($fh, $pos, SEEK_SET);
  265. $nameUtf16 = fread($fh, 64);
  266. $nameLength = $this->_readInt2($fh);
  267. $nameUtf16 = substr($nameUtf16, 0, $nameLength - 2);
  268. // Simple conversion from UTF-16LE to ISO-8859-1
  269. $name = str_replace("\x00", "", $nameUtf16);
  270. $type = $this->_readInt1($fh);
  271. switch ($type) {
  272. case OLE_PPS_TYPE_ROOT:
  273. require_once 'OLE/PPS/Root.php';
  274. $pps = new OLE_PPS_Root(null, null, array());
  275. $this->root = $pps;
  276. break;
  277. case OLE_PPS_TYPE_DIR:
  278. $pps = new OLE_PPS(null, null, null, null, null,
  279. null, null, null, null, array());
  280. break;
  281. case OLE_PPS_TYPE_FILE:
  282. require_once 'OLE/PPS/File.php';
  283. $pps = new OLE_PPS_File($name);
  284. break;
  285. default:
  286. continue;
  287. }
  288. fseek($fh, 1, SEEK_CUR);
  289. $pps->Type = $type;
  290. $pps->Name = $name;
  291. $pps->PrevPps = $this->_readInt4($fh);
  292. $pps->NextPps = $this->_readInt4($fh);
  293. $pps->DirPps = $this->_readInt4($fh);
  294. fseek($fh, 20, SEEK_CUR);
  295. $pps->Time1st = OLE::OLE2LocalDate(fread($fh, 8));
  296. $pps->Time2nd = OLE::OLE2LocalDate(fread($fh, 8));
  297. $pps->_StartBlock = $this->_readInt4($fh);
  298. $pps->Size = $this->_readInt4($fh);
  299. $pps->No = count($this->_list);
  300. $this->_list[] = $pps;
  301. // check if the PPS tree (starting from root) is complete
  302. if (isset($this->root) &&
  303. $this->_ppsTreeComplete($this->root->No)) {
  304. break;
  305. }
  306. }
  307. fclose($fh);
  308. // Initialize $pps->children on directories
  309. foreach ($this->_list as $pps) {
  310. if ($pps->Type == OLE_PPS_TYPE_DIR || $pps->Type == OLE_PPS_TYPE_ROOT) {
  311. $nos = array($pps->DirPps);
  312. $pps->children = array();
  313. while ($nos) {
  314. $no = array_pop($nos);
  315. if ($no != -1) {
  316. $childPps = $this->_list[$no];
  317. $nos[] = $childPps->PrevPps;
  318. $nos[] = $childPps->NextPps;
  319. $pps->children[] = $childPps;
  320. }
  321. }
  322. }
  323. }
  324. return true;
  325. }
  326. /**
  327. * It checks whether the PPS tree is complete (all PPS's read)
  328. * starting with the given PPS (not necessarily root)
  329. *
  330. * @access private
  331. * @param integer $index The index of the PPS from which we are checking
  332. * @return boolean Whether the PPS tree for the given PPS is complete
  333. */
  334. function _ppsTreeComplete($index)
  335. {
  336. return isset($this->_list[$index]) &&
  337. ($pps = $this->_list[$index]) &&
  338. ($pps->PrevPps == -1 ||
  339. $this->_ppsTreeComplete($pps->PrevPps)) &&
  340. ($pps->NextPps == -1 ||
  341. $this->_ppsTreeComplete($pps->NextPps)) &&
  342. ($pps->DirPps == -1 ||
  343. $this->_ppsTreeComplete($pps->DirPps));
  344. }
  345. /**
  346. * Checks whether a PPS is a File PPS or not.
  347. * If there is no PPS for the index given, it will return false.
  348. * @param integer $index The index for the PPS
  349. * @return bool true if it's a File PPS, false otherwise
  350. * @access public
  351. */
  352. function isFile($index)
  353. {
  354. if (isset($this->_list[$index])) {
  355. return ($this->_list[$index]->Type == OLE_PPS_TYPE_FILE);
  356. }
  357. return false;
  358. }
  359. /**
  360. * Checks whether a PPS is a Root PPS or not.
  361. * If there is no PPS for the index given, it will return false.
  362. * @param integer $index The index for the PPS.
  363. * @return bool true if it's a Root PPS, false otherwise
  364. * @access public
  365. */
  366. function isRoot($index)
  367. {
  368. if (isset($this->_list[$index])) {
  369. return ($this->_list[$index]->Type == OLE_PPS_TYPE_ROOT);
  370. }
  371. return false;
  372. }
  373. /**
  374. * Gives the total number of PPS's found in the OLE container.
  375. * @return integer The total number of PPS's found in the OLE container
  376. * @access public
  377. */
  378. function ppsTotal()
  379. {
  380. return count($this->_list);
  381. }
  382. /**
  383. * Gets data from a PPS
  384. * If there is no PPS for the index given, it will return an empty string.
  385. * @param integer $index The index for the PPS
  386. * @param integer $position The position from which to start reading
  387. * (relative to the PPS)
  388. * @param integer $length The amount of bytes to read (at most)
  389. * @return string The binary string containing the data requested
  390. * @access public
  391. * @see OLE_PPS_File::getStream()
  392. */
  393. function getData($index, $position, $length)
  394. {
  395. // if position is not valid return empty string
  396. if (!isset($this->_list[$index]) ||
  397. $position >= $this->_list[$index]->Size ||
  398. $position < 0) {
  399. return '';
  400. }
  401. $fh = $this->getStream($this->_list[$index]);
  402. $data = stream_get_contents($fh, $length, $position);
  403. fclose($fh);
  404. return $data;
  405. }
  406. /**
  407. * Gets the data length from a PPS
  408. * If there is no PPS for the index given, it will return 0.
  409. * @param integer $index The index for the PPS
  410. * @return integer The amount of bytes in data the PPS has
  411. * @access public
  412. */
  413. function getDataLength($index)
  414. {
  415. if (isset($this->_list[$index])) {
  416. return $this->_list[$index]->Size;
  417. }
  418. return 0;
  419. }
  420. /**
  421. * Utility function to transform ASCII text to Unicode
  422. *
  423. * @access public
  424. * @static
  425. * @param string $ascii The ASCII string to transform
  426. * @return string The string in Unicode
  427. */
  428. function Asc2Ucs($ascii)
  429. {
  430. $rawname = '';
  431. for ($i = 0; $i < strlen($ascii); $i++) {
  432. $rawname .= $ascii{$i} . "\x00";
  433. }
  434. return $rawname;
  435. }
  436. /**
  437. * Utility function
  438. * Returns a string for the OLE container with the date given
  439. *
  440. * @access public
  441. * @static
  442. * @param integer $date A timestamp
  443. * @return string The string for the OLE container
  444. */
  445. function LocalDate2OLE($date = null)
  446. {
  447. if (!isset($date)) {
  448. return "\x00\x00\x00\x00\x00\x00\x00\x00";
  449. }
  450. // factor used for separating numbers into 4 bytes parts
  451. $factor = pow(2, 32);
  452. // days from 1-1-1601 until the beggining of UNIX era
  453. $days = 134774;
  454. // calculate seconds
  455. $big_date = $days * 24 * 3600 +
  456. gmmktime(date("H",$date),date("i",$date),date("s",$date),
  457. date("m",$date),date("d",$date),date("Y",$date));
  458. // multiply just to make MS happy
  459. $big_date *= 10000000;
  460. $high_part = floor($big_date / $factor);
  461. // lower 4 bytes
  462. $low_part = floor((($big_date / $factor) - $high_part) * $factor);
  463. // Make HEX string
  464. $res = '';
  465. for ($i = 0; $i < 4; $i++) {
  466. $hex = $low_part % 0x100;
  467. $res .= pack('c', $hex);
  468. $low_part /= 0x100;
  469. }
  470. for ($i = 0; $i < 4; $i++) {
  471. $hex = $high_part % 0x100;
  472. $res .= pack('c', $hex);
  473. $high_part /= 0x100;
  474. }
  475. return $res;
  476. }
  477. /**
  478. * Returns a timestamp from an OLE container's date
  479. * @param integer $string A binary string with the encoded date
  480. * @return string The timestamp corresponding to the string
  481. * @access public
  482. * @static
  483. */
  484. function OLE2LocalDate($string)
  485. {
  486. if (strlen($string) != 8) {
  487. return new PEAR_Error("Expecting 8 byte string");
  488. }
  489. // factor used for separating numbers into 4 bytes parts
  490. $factor = pow(2,32);
  491. $high_part = 0;
  492. for ($i = 0; $i < 4; $i++) {
  493. list(, $high_part) = unpack('C', $string{(7 - $i)});
  494. if ($i < 3) {
  495. $high_part *= 0x100;
  496. }
  497. }
  498. $low_part = 0;
  499. for ($i = 4; $i < 8; $i++) {
  500. list(, $low_part) = unpack('C', $string{(7 - $i)});
  501. if ($i < 7) {
  502. $low_part *= 0x100;
  503. }
  504. }
  505. $big_date = ($high_part * $factor) + $low_part;
  506. // translate to seconds
  507. $big_date /= 10000000;
  508. // days from 1-1-1601 until the beggining of UNIX era
  509. $days = 134774;
  510. // translate to seconds from beggining of UNIX era
  511. $big_date -= $days * 24 * 3600;
  512. return floor($big_date);
  513. }
  514. }
  515. ?>