OLE.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. <?php
  2. /* vim: set expandtab tabstop=4 shiftwidth=4: */
  3. // +----------------------------------------------------------------------+
  4. // | PHP Version 4 |
  5. // +----------------------------------------------------------------------+
  6. // | Copyright (c) 1997-2002 The PHP Group |
  7. // +----------------------------------------------------------------------+
  8. // | This source file is subject to version 2.02 of the PHP license, |
  9. // | that is bundled with this package in the file LICENSE, and is |
  10. // | available at through the world-wide-web at |
  11. // | http://www.php.net/license/2_02.txt. |
  12. // | If you did not receive a copy of the PHP license and are unable to |
  13. // | obtain it through the world-wide-web, please send a note to |
  14. // | license@php.net so we can mail you a copy immediately. |
  15. // +----------------------------------------------------------------------+
  16. // | Author: Xavier Noguer <xnoguer@php.net> |
  17. // | Based on OLE::Storage_Lite by Kawai, Takanori |
  18. // +----------------------------------------------------------------------+
  19. //
  20. // $Id: OLE.php,v 1.15 2007/12/18 20:59:11 schmidt Exp $
  21. /**
  22. * Constants for OLE package
  23. */
  24. define('OLE_PPS_TYPE_ROOT', 5);
  25. define('OLE_PPS_TYPE_DIR', 1);
  26. define('OLE_PPS_TYPE_FILE', 2);
  27. define('OLE_DATA_SIZE_SMALL', 0x1000);
  28. define('OLE_LONG_INT_SIZE', 4);
  29. define('OLE_PPS_SIZE', 0x80);
  30. /**
  31. * Array for storing OLE instances that are accessed from
  32. * OLE_ChainedBlockStream::stream_open().
  33. * @var array
  34. */
  35. $GLOBALS['_OLE_INSTANCES'] = array();
  36. /**
  37. * OLE package base class.
  38. *
  39. * @category Structures
  40. * @package OLE
  41. * @author Xavier Noguer <xnoguer@php.net>
  42. * @author Christian Schmidt <schmidt@php.net>
  43. */
  44. class OLE extends PEAR
  45. {
  46. /**
  47. * The file handle for reading an OLE container
  48. * @var resource
  49. */
  50. var $_file_handle;
  51. /**
  52. * Array of PPS's found on the OLE container
  53. * @var array
  54. */
  55. var $_list;
  56. /**
  57. * Root directory of OLE container
  58. * @var OLE_PPS_Root
  59. */
  60. var $root;
  61. /**
  62. * Big Block Allocation Table
  63. * @var array (blockId => nextBlockId)
  64. */
  65. var $bbat;
  66. /**
  67. * Short Block Allocation Table
  68. * @var array (blockId => nextBlockId)
  69. */
  70. var $sbat;
  71. /**
  72. * Size of big blocks. This is usually 512.
  73. * @var int number of octets per block.
  74. */
  75. var $bigBlockSize;
  76. /**
  77. * Size of small blocks. This is usually 64.
  78. * @var int number of octets per block
  79. */
  80. var $smallBlockSize;
  81. /**
  82. * Creates a new OLE object
  83. * @access public
  84. */
  85. function OLE()
  86. {
  87. $this->_list = array();
  88. }
  89. /**
  90. * Destructor (using PEAR)
  91. * Just closes the file handle on the OLE file.
  92. *
  93. * @access private
  94. */
  95. function _OLE()
  96. {
  97. fclose($this->_file_handle);
  98. }
  99. /**
  100. * Reads an OLE container from the contents of the file given.
  101. *
  102. * @access public
  103. * @param string $file
  104. * @return mixed true on success, PEAR_Error on failure
  105. */
  106. function read($file)
  107. {
  108. $fh = @fopen($file, "r");
  109. if (!$fh) {
  110. return $this->raiseError("Can't open file $file");
  111. }
  112. $this->_file_handle = $fh;
  113. $signature = fread($fh, 8);
  114. if ("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" != $signature) {
  115. return $this->raiseError("File doesn't seem to be an OLE container.");
  116. }
  117. fseek($fh, 28);
  118. if (fread($fh, 2) != "\xFE\xFF") {
  119. // This shouldn't be a problem in practice
  120. return $this->raiseError("Only Little-Endian encoding is supported.");
  121. }
  122. // Size of blocks and short blocks in bytes
  123. $this->bigBlockSize = pow(2, $this->_readInt2($fh));
  124. $this->smallBlockSize = pow(2, $this->_readInt2($fh));
  125. // Skip UID, revision number and version number
  126. fseek($fh, 44);
  127. // Number of blocks in Big Block Allocation Table
  128. $bbatBlockCount = $this->_readInt4($fh);
  129. // Root chain 1st block
  130. $directoryFirstBlockId = $this->_readInt4($fh);
  131. // Skip unused bytes
  132. fseek($fh, 56);
  133. // Streams shorter than this are stored using small blocks
  134. $this->bigBlockThreshold = $this->_readInt4($fh);
  135. // Block id of first sector in Short Block Allocation Table
  136. $sbatFirstBlockId = $this->_readInt4($fh);
  137. // Number of blocks in Short Block Allocation Table
  138. $sbbatBlockCount = $this->_readInt4($fh);
  139. // Block id of first sector in Master Block Allocation Table
  140. $mbatFirstBlockId = $this->_readInt4($fh);
  141. // Number of blocks in Master Block Allocation Table
  142. $mbbatBlockCount = $this->_readInt4($fh);
  143. $this->bbat = array();
  144. // Remaining 4 * 109 bytes of current block is beginning of Master
  145. // Block Allocation Table
  146. $mbatBlocks = array();
  147. for ($i = 0; $i < 109; $i++) {
  148. $mbatBlocks[] = $this->_readInt4($fh);
  149. }
  150. // Read rest of Master Block Allocation Table (if any is left)
  151. $pos = $this->_getBlockOffset($mbatFirstBlockId);
  152. for ($i = 0; $i < $mbbatBlockCount; $i++) {
  153. fseek($fh, $pos);
  154. for ($j = 0; $j < $this->bigBlockSize / 4 - 1; $j++) {
  155. $mbatBlocks[] = $this->_readInt4($fh);
  156. }
  157. // Last block id in each block points to next block
  158. $pos = $this->_getBlockOffset($this->_readInt4($fh));
  159. }
  160. // Read Big Block Allocation Table according to chain specified by
  161. // $mbatBlocks
  162. for ($i = 0; $i < $bbatBlockCount; $i++) {
  163. $pos = $this->_getBlockOffset($mbatBlocks[$i]);
  164. fseek($fh, $pos);
  165. for ($j = 0 ; $j < $this->bigBlockSize / 4; $j++) {
  166. $this->bbat[] = $this->_readInt4($fh);
  167. }
  168. }
  169. // Read short block allocation table (SBAT)
  170. $this->sbat = array();
  171. $shortBlockCount = $sbbatBlockCount * $this->bigBlockSize / 4;
  172. $sbatFh = $this->getStream($sbatFirstBlockId);
  173. for ($blockId = 0; $blockId < $shortBlockCount; $blockId++) {
  174. $this->sbat[$blockId] = $this->_readInt4($sbatFh);
  175. }
  176. fclose($sbatFh);
  177. $this->_readPpsWks($directoryFirstBlockId);
  178. return true;
  179. }
  180. /**
  181. * @param int $blockId block id
  182. * @return int byte offset from beginning of file
  183. * @access private
  184. */
  185. function _getBlockOffset($blockId)
  186. {
  187. return 512 + $blockId * $this->bigBlockSize;
  188. }
  189. /**
  190. * Returns a stream for use with fread() etc. External callers should
  191. * use OLE_PPS_File::getStream().
  192. * @param int|PPS $blockIdOrPps block id or PPS
  193. * @return resource read-only stream
  194. */
  195. function getStream($blockIdOrPps)
  196. {
  197. include_once 'OLE/ChainedBlockStream.php';
  198. static $isRegistered = false;
  199. if (!$isRegistered) {
  200. stream_wrapper_register('ole-chainedblockstream',
  201. 'OLE_ChainedBlockStream');
  202. $isRegistered = true;
  203. }
  204. // Store current instance in global array, so that it can be accessed
  205. // in OLE_ChainedBlockStream::stream_open().
  206. // Object is removed from self::$instances in OLE_Stream::close().
  207. $GLOBALS['_OLE_INSTANCES'][] = $this;
  208. $instanceId = end(array_keys($GLOBALS['_OLE_INSTANCES']));
  209. $path = 'ole-chainedblockstream://oleInstanceId=' . $instanceId;
  210. if (is_a($blockIdOrPps, 'OLE_PPS')) {
  211. $path .= '&blockId=' . $blockIdOrPps->_StartBlock;
  212. $path .= '&size=' . $blockIdOrPps->Size;
  213. } else {
  214. $path .= '&blockId=' . $blockIdOrPps;
  215. }
  216. return fopen($path, 'r');
  217. }
  218. /**
  219. * Reads a signed char.
  220. * @param resource $fh file handle
  221. * @return int
  222. * @access private
  223. */
  224. function _readInt1($fh)
  225. {
  226. list(, $tmp) = unpack("c", fread($fh, 1));
  227. return $tmp;
  228. }
  229. /**
  230. * Reads an unsigned short (2 octets).
  231. * @param resource $fh file handle
  232. * @return int
  233. * @access private
  234. */
  235. function _readInt2($fh)
  236. {
  237. list(, $tmp) = unpack("v", fread($fh, 2));
  238. return $tmp;
  239. }
  240. /**
  241. * Reads an unsigned long (4 octets).
  242. * @param resource file handle
  243. * @return int
  244. * @access private
  245. */
  246. function _readInt4($fh)
  247. {
  248. list(, $tmp) = unpack("V", fread($fh, 4));
  249. return $tmp;
  250. }
  251. /**
  252. * Gets information about all PPS's on the OLE container from the PPS WK's
  253. * creates an OLE_PPS object for each one.
  254. *
  255. * @access private
  256. * @param integer $blockId the block id of the first block
  257. * @return mixed true on success, PEAR_Error on failure
  258. */
  259. function _readPpsWks($blockId)
  260. {
  261. $fh = $this->getStream($blockId);
  262. for ($pos = 0; ; $pos += 128) {
  263. fseek($fh, $pos, SEEK_SET);
  264. $nameUtf16 = fread($fh, 64);
  265. $nameLength = $this->_readInt2($fh);
  266. $nameUtf16 = substr($nameUtf16, 0, $nameLength - 2);
  267. // Simple conversion from UTF-16LE to ISO-8859-1
  268. $name = str_replace("\x00", "", $nameUtf16);
  269. $type = $this->_readInt1($fh);
  270. switch ($type) {
  271. case OLE_PPS_TYPE_ROOT:
  272. require_once 'OLE/PPS/Root.php';
  273. $pps = new OLE_PPS_Root(null, null, array());
  274. $this->root = $pps;
  275. break;
  276. case OLE_PPS_TYPE_DIR:
  277. $pps = new OLE_PPS(null, null, null, null, null,
  278. null, null, null, null, array());
  279. break;
  280. case OLE_PPS_TYPE_FILE:
  281. require_once 'OLE/PPS/File.php';
  282. $pps = new OLE_PPS_File($name);
  283. break;
  284. default:
  285. continue;
  286. }
  287. fseek($fh, 1, SEEK_CUR);
  288. $pps->Type = $type;
  289. $pps->Name = $name;
  290. $pps->PrevPps = $this->_readInt4($fh);
  291. $pps->NextPps = $this->_readInt4($fh);
  292. $pps->DirPps = $this->_readInt4($fh);
  293. fseek($fh, 20, SEEK_CUR);
  294. $pps->Time1st = OLE::OLE2LocalDate(fread($fh, 8));
  295. $pps->Time2nd = OLE::OLE2LocalDate(fread($fh, 8));
  296. $pps->_StartBlock = $this->_readInt4($fh);
  297. $pps->Size = $this->_readInt4($fh);
  298. $pps->No = count($this->_list);
  299. $this->_list[] = $pps;
  300. // check if the PPS tree (starting from root) is complete
  301. if (isset($this->root) &&
  302. $this->_ppsTreeComplete($this->root->No)) {
  303. break;
  304. }
  305. }
  306. fclose($fh);
  307. // Initialize $pps->children on directories
  308. foreach ($this->_list as $pps) {
  309. if ($pps->Type == OLE_PPS_TYPE_DIR || $pps->Type == OLE_PPS_TYPE_ROOT) {
  310. $nos = array($pps->DirPps);
  311. $pps->children = array();
  312. while ($nos) {
  313. $no = array_pop($nos);
  314. if ($no != -1) {
  315. $childPps = $this->_list[$no];
  316. $nos[] = $childPps->PrevPps;
  317. $nos[] = $childPps->NextPps;
  318. $pps->children[] = $childPps;
  319. }
  320. }
  321. }
  322. }
  323. return true;
  324. }
  325. /**
  326. * It checks whether the PPS tree is complete (all PPS's read)
  327. * starting with the given PPS (not necessarily root)
  328. *
  329. * @access private
  330. * @param integer $index The index of the PPS from which we are checking
  331. * @return boolean Whether the PPS tree for the given PPS is complete
  332. */
  333. function _ppsTreeComplete($index)
  334. {
  335. return isset($this->_list[$index]) &&
  336. ($pps = $this->_list[$index]) &&
  337. ($pps->PrevPps == -1 ||
  338. $this->_ppsTreeComplete($pps->PrevPps)) &&
  339. ($pps->NextPps == -1 ||
  340. $this->_ppsTreeComplete($pps->NextPps)) &&
  341. ($pps->DirPps == -1 ||
  342. $this->_ppsTreeComplete($pps->DirPps));
  343. }
  344. /**
  345. * Checks whether a PPS is a File PPS or not.
  346. * If there is no PPS for the index given, it will return false.
  347. * @param integer $index The index for the PPS
  348. * @return bool true if it's a File PPS, false otherwise
  349. * @access public
  350. */
  351. function isFile($index)
  352. {
  353. if (isset($this->_list[$index])) {
  354. return ($this->_list[$index]->Type == OLE_PPS_TYPE_FILE);
  355. }
  356. return false;
  357. }
  358. /**
  359. * Checks whether a PPS is a Root PPS or not.
  360. * If there is no PPS for the index given, it will return false.
  361. * @param integer $index The index for the PPS.
  362. * @return bool true if it's a Root PPS, false otherwise
  363. * @access public
  364. */
  365. function isRoot($index)
  366. {
  367. if (isset($this->_list[$index])) {
  368. return ($this->_list[$index]->Type == OLE_PPS_TYPE_ROOT);
  369. }
  370. return false;
  371. }
  372. /**
  373. * Gives the total number of PPS's found in the OLE container.
  374. * @return integer The total number of PPS's found in the OLE container
  375. * @access public
  376. */
  377. function ppsTotal()
  378. {
  379. return count($this->_list);
  380. }
  381. /**
  382. * Gets data from a PPS
  383. * If there is no PPS for the index given, it will return an empty string.
  384. * @param integer $index The index for the PPS
  385. * @param integer $position The position from which to start reading
  386. * (relative to the PPS)
  387. * @param integer $length The amount of bytes to read (at most)
  388. * @return string The binary string containing the data requested
  389. * @access public
  390. * @see OLE_PPS_File::getStream()
  391. */
  392. function getData($index, $position, $length)
  393. {
  394. // if position is not valid return empty string
  395. if (!isset($this->_list[$index]) ||
  396. $position >= $this->_list[$index]->Size ||
  397. $position < 0) {
  398. return '';
  399. }
  400. $fh = $this->getStream($this->_list[$index]);
  401. $data = stream_get_contents($fh, $length, $position);
  402. fclose($fh);
  403. return $data;
  404. }
  405. /**
  406. * Gets the data length from a PPS
  407. * If there is no PPS for the index given, it will return 0.
  408. * @param integer $index The index for the PPS
  409. * @return integer The amount of bytes in data the PPS has
  410. * @access public
  411. */
  412. function getDataLength($index)
  413. {
  414. if (isset($this->_list[$index])) {
  415. return $this->_list[$index]->Size;
  416. }
  417. return 0;
  418. }
  419. /**
  420. * Utility function to transform ASCII text to Unicode
  421. *
  422. * @access public
  423. * @static
  424. * @param string $ascii The ASCII string to transform
  425. * @return string The string in Unicode
  426. */
  427. function Asc2Ucs($ascii)
  428. {
  429. $rawname = '';
  430. for ($i = 0; $i < strlen($ascii); $i++) {
  431. $rawname .= $ascii{$i} . "\x00";
  432. }
  433. return $rawname;
  434. }
  435. /**
  436. * Utility function
  437. * Returns a string for the OLE container with the date given
  438. *
  439. * @access public
  440. * @static
  441. * @param integer $date A timestamp
  442. * @return string The string for the OLE container
  443. */
  444. function LocalDate2OLE($date = null)
  445. {
  446. if (!isset($date)) {
  447. return "\x00\x00\x00\x00\x00\x00\x00\x00";
  448. }
  449. // factor used for separating numbers into 4 bytes parts
  450. $factor = pow(2, 32);
  451. // days from 1-1-1601 until the beggining of UNIX era
  452. $days = 134774;
  453. // calculate seconds
  454. $big_date = $days * 24 * 3600 +
  455. gmmktime(date("H",$date),date("i",$date),date("s",$date),
  456. date("m",$date),date("d",$date),date("Y",$date));
  457. // multiply just to make MS happy
  458. $big_date *= 10000000;
  459. $high_part = floor($big_date / $factor);
  460. // lower 4 bytes
  461. $low_part = floor((($big_date / $factor) - $high_part) * $factor);
  462. // Make HEX string
  463. $res = '';
  464. for ($i = 0; $i < 4; $i++) {
  465. $hex = $low_part % 0x100;
  466. $res .= pack('c', $hex);
  467. $low_part /= 0x100;
  468. }
  469. for ($i = 0; $i < 4; $i++) {
  470. $hex = $high_part % 0x100;
  471. $res .= pack('c', $hex);
  472. $high_part /= 0x100;
  473. }
  474. return $res;
  475. }
  476. /**
  477. * Returns a timestamp from an OLE container's date
  478. * @param integer $string A binary string with the encoded date
  479. * @return string The timestamp corresponding to the string
  480. * @access public
  481. * @static
  482. */
  483. function OLE2LocalDate($string)
  484. {
  485. if (strlen($string) != 8) {
  486. return new PEAR_Error("Expecting 8 byte string");
  487. }
  488. // factor used for separating numbers into 4 bytes parts
  489. $factor = pow(2,32);
  490. $high_part = 0;
  491. for ($i = 0; $i < 4; $i++) {
  492. list(, $high_part) = unpack('C', $string{(7 - $i)});
  493. if ($i < 3) {
  494. $high_part *= 0x100;
  495. }
  496. }
  497. $low_part = 0;
  498. for ($i = 4; $i < 8; $i++) {
  499. list(, $low_part) = unpack('C', $string{(7 - $i)});
  500. if ($i < 7) {
  501. $low_part *= 0x100;
  502. }
  503. }
  504. $big_date = ($high_part * $factor) + $low_part;
  505. // translate to seconds
  506. $big_date /= 10000000;
  507. // days from 1-1-1601 until the beggining of UNIX era
  508. $days = 134774;
  509. // translate to seconds from beggining of UNIX era
  510. $big_date -= $days * 24 * 3600;
  511. return floor($big_date);
  512. }
  513. }
  514. ?>