OLE.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Shared;
  3. // vim: set expandtab tabstop=4 shiftwidth=4:
  4. // +----------------------------------------------------------------------+
  5. // | PHP Version 4 |
  6. // +----------------------------------------------------------------------+
  7. // | Copyright (c) 1997-2002 The PHP Group |
  8. // +----------------------------------------------------------------------+
  9. // | This source file is subject to version 2.02 of the PHP license, |
  10. // | that is bundled with this package in the file LICENSE, and is |
  11. // | available at through the world-wide-web at |
  12. // | http://www.php.net/license/2_02.txt. |
  13. // | If you did not receive a copy of the PHP license and are unable to |
  14. // | obtain it through the world-wide-web, please send a note to |
  15. // | license@php.net so we can mail you a copy immediately. |
  16. // +----------------------------------------------------------------------+
  17. // | Author: Xavier Noguer <xnoguer@php.net> |
  18. // | Based on OLE::Storage_Lite by Kawai, Takanori |
  19. // +----------------------------------------------------------------------+
  20. //
  21. use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  22. use PhpOffice\PhpSpreadsheet\Shared\OLE\ChainedBlockStream;
  23. use PhpOffice\PhpSpreadsheet\Shared\OLE\PPS\Root;
  24. /*
  25. * Array for storing OLE instances that are accessed from
  26. * OLE_ChainedBlockStream::stream_open().
  27. *
  28. * @var array
  29. */
  30. $GLOBALS['_OLE_INSTANCES'] = [];
  31. /**
  32. * OLE package base class.
  33. *
  34. * @author Xavier Noguer <xnoguer@php.net>
  35. * @author Christian Schmidt <schmidt@php.net>
  36. *
  37. * @category PhpSpreadsheet
  38. */
  39. class OLE
  40. {
  41. const OLE_PPS_TYPE_ROOT = 5;
  42. const OLE_PPS_TYPE_DIR = 1;
  43. const OLE_PPS_TYPE_FILE = 2;
  44. const OLE_DATA_SIZE_SMALL = 0x1000;
  45. const OLE_LONG_INT_SIZE = 4;
  46. const OLE_PPS_SIZE = 0x80;
  47. /**
  48. * The file handle for reading an OLE container.
  49. *
  50. * @var resource
  51. */
  52. public $_file_handle;
  53. /**
  54. * Array of PPS's found on the OLE container.
  55. *
  56. * @var array
  57. */
  58. public $_list = [];
  59. /**
  60. * Root directory of OLE container.
  61. *
  62. * @var Root
  63. */
  64. public $root;
  65. /**
  66. * Big Block Allocation Table.
  67. *
  68. * @var array (blockId => nextBlockId)
  69. */
  70. public $bbat;
  71. /**
  72. * Short Block Allocation Table.
  73. *
  74. * @var array (blockId => nextBlockId)
  75. */
  76. public $sbat;
  77. /**
  78. * Size of big blocks. This is usually 512.
  79. *
  80. * @var int number of octets per block
  81. */
  82. public $bigBlockSize;
  83. /**
  84. * Size of small blocks. This is usually 64.
  85. *
  86. * @var int number of octets per block
  87. */
  88. public $smallBlockSize;
  89. /**
  90. * Threshold for big blocks.
  91. *
  92. * @var int
  93. */
  94. public $bigBlockThreshold;
  95. /**
  96. * Reads an OLE container from the contents of the file given.
  97. *
  98. * @acces public
  99. *
  100. * @param string $file
  101. *
  102. * @throws ReaderException
  103. *
  104. * @return bool true on success, PEAR_Error on failure
  105. */
  106. public function read($file)
  107. {
  108. $fh = fopen($file, 'r');
  109. if (!$fh) {
  110. throw new ReaderException("Can't open file $file");
  111. }
  112. $this->_file_handle = $fh;
  113. $signature = fread($fh, 8);
  114. if ("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" != $signature) {
  115. throw new ReaderException("File doesn't seem to be an OLE container.");
  116. }
  117. fseek($fh, 28);
  118. if (fread($fh, 2) != "\xFE\xFF") {
  119. // This shouldn't be a problem in practice
  120. throw new ReaderException('Only Little-Endian encoding is supported.');
  121. }
  122. // Size of blocks and short blocks in bytes
  123. $this->bigBlockSize = pow(2, self::_readInt2($fh));
  124. $this->smallBlockSize = pow(2, self::_readInt2($fh));
  125. // Skip UID, revision number and version number
  126. fseek($fh, 44);
  127. // Number of blocks in Big Block Allocation Table
  128. $bbatBlockCount = self::_readInt4($fh);
  129. // Root chain 1st block
  130. $directoryFirstBlockId = self::_readInt4($fh);
  131. // Skip unused bytes
  132. fseek($fh, 56);
  133. // Streams shorter than this are stored using small blocks
  134. $this->bigBlockThreshold = self::_readInt4($fh);
  135. // Block id of first sector in Short Block Allocation Table
  136. $sbatFirstBlockId = self::_readInt4($fh);
  137. // Number of blocks in Short Block Allocation Table
  138. $sbbatBlockCount = self::_readInt4($fh);
  139. // Block id of first sector in Master Block Allocation Table
  140. $mbatFirstBlockId = self::_readInt4($fh);
  141. // Number of blocks in Master Block Allocation Table
  142. $mbbatBlockCount = self::_readInt4($fh);
  143. $this->bbat = [];
  144. // Remaining 4 * 109 bytes of current block is beginning of Master
  145. // Block Allocation Table
  146. $mbatBlocks = [];
  147. for ($i = 0; $i < 109; ++$i) {
  148. $mbatBlocks[] = self::_readInt4($fh);
  149. }
  150. // Read rest of Master Block Allocation Table (if any is left)
  151. $pos = $this->_getBlockOffset($mbatFirstBlockId);
  152. for ($i = 0; $i < $mbbatBlockCount; ++$i) {
  153. fseek($fh, $pos);
  154. for ($j = 0; $j < $this->bigBlockSize / 4 - 1; ++$j) {
  155. $mbatBlocks[] = self::_readInt4($fh);
  156. }
  157. // Last block id in each block points to next block
  158. $pos = $this->_getBlockOffset(self::_readInt4($fh));
  159. }
  160. // Read Big Block Allocation Table according to chain specified by $mbatBlocks
  161. for ($i = 0; $i < $bbatBlockCount; ++$i) {
  162. $pos = $this->_getBlockOffset($mbatBlocks[$i]);
  163. fseek($fh, $pos);
  164. for ($j = 0; $j < $this->bigBlockSize / 4; ++$j) {
  165. $this->bbat[] = self::_readInt4($fh);
  166. }
  167. }
  168. // Read short block allocation table (SBAT)
  169. $this->sbat = [];
  170. $shortBlockCount = $sbbatBlockCount * $this->bigBlockSize / 4;
  171. $sbatFh = $this->getStream($sbatFirstBlockId);
  172. for ($blockId = 0; $blockId < $shortBlockCount; ++$blockId) {
  173. $this->sbat[$blockId] = self::_readInt4($sbatFh);
  174. }
  175. fclose($sbatFh);
  176. $this->_readPpsWks($directoryFirstBlockId);
  177. return true;
  178. }
  179. /**
  180. * @param int $blockId byte offset from beginning of file
  181. *
  182. * @return int
  183. */
  184. public function _getBlockOffset($blockId)
  185. {
  186. return 512 + $blockId * $this->bigBlockSize;
  187. }
  188. /**
  189. * Returns a stream for use with fread() etc. External callers should
  190. * use \PhpOffice\PhpSpreadsheet\Shared\OLE\PPS\File::getStream().
  191. *
  192. * @param int|OLE\PPS $blockIdOrPps block id or PPS
  193. *
  194. * @return resource read-only stream
  195. */
  196. public function getStream($blockIdOrPps)
  197. {
  198. static $isRegistered = false;
  199. if (!$isRegistered) {
  200. stream_wrapper_register('ole-chainedblockstream', ChainedBlockStream::class);
  201. $isRegistered = true;
  202. }
  203. // Store current instance in global array, so that it can be accessed
  204. // in OLE_ChainedBlockStream::stream_open().
  205. // Object is removed from self::$instances in OLE_Stream::close().
  206. $GLOBALS['_OLE_INSTANCES'][] = $this;
  207. $instanceId = end(array_keys($GLOBALS['_OLE_INSTANCES']));
  208. $path = 'ole-chainedblockstream://oleInstanceId=' . $instanceId;
  209. if ($blockIdOrPps instanceof OLE\PPS) {
  210. $path .= '&blockId=' . $blockIdOrPps->startBlock;
  211. $path .= '&size=' . $blockIdOrPps->Size;
  212. } else {
  213. $path .= '&blockId=' . $blockIdOrPps;
  214. }
  215. return fopen($path, 'r');
  216. }
  217. /**
  218. * Reads a signed char.
  219. *
  220. * @param resource $fh file handle
  221. *
  222. * @return int
  223. */
  224. private static function _readInt1($fh)
  225. {
  226. list(, $tmp) = unpack('c', fread($fh, 1));
  227. return $tmp;
  228. }
  229. /**
  230. * Reads an unsigned short (2 octets).
  231. *
  232. * @param resource $fh file handle
  233. *
  234. * @return int
  235. */
  236. private static function _readInt2($fh)
  237. {
  238. list(, $tmp) = unpack('v', fread($fh, 2));
  239. return $tmp;
  240. }
  241. /**
  242. * Reads an unsigned long (4 octets).
  243. *
  244. * @param resource $fh file handle
  245. *
  246. * @return int
  247. */
  248. private static function _readInt4($fh)
  249. {
  250. list(, $tmp) = unpack('V', fread($fh, 4));
  251. return $tmp;
  252. }
  253. /**
  254. * Gets information about all PPS's on the OLE container from the PPS WK's
  255. * creates an OLE_PPS object for each one.
  256. *
  257. * @param int $blockId the block id of the first block
  258. *
  259. * @return bool true on success, PEAR_Error on failure
  260. */
  261. public function _readPpsWks($blockId)
  262. {
  263. $fh = $this->getStream($blockId);
  264. for ($pos = 0; true; $pos += 128) {
  265. fseek($fh, $pos, SEEK_SET);
  266. $nameUtf16 = fread($fh, 64);
  267. $nameLength = self::_readInt2($fh);
  268. $nameUtf16 = substr($nameUtf16, 0, $nameLength - 2);
  269. // Simple conversion from UTF-16LE to ISO-8859-1
  270. $name = str_replace("\x00", '', $nameUtf16);
  271. $type = self::_readInt1($fh);
  272. switch ($type) {
  273. case self::OLE_PPS_TYPE_ROOT:
  274. $pps = new OLE\PPS\Root(null, null, []);
  275. $this->root = $pps;
  276. break;
  277. case self::OLE_PPS_TYPE_DIR:
  278. $pps = new OLE\PPS(null, null, null, null, null, null, null, null, null, []);
  279. break;
  280. case self::OLE_PPS_TYPE_FILE:
  281. $pps = new OLE\PPS\File($name);
  282. break;
  283. default:
  284. continue;
  285. }
  286. fseek($fh, 1, SEEK_CUR);
  287. $pps->Type = $type;
  288. $pps->Name = $name;
  289. $pps->PrevPps = self::_readInt4($fh);
  290. $pps->NextPps = self::_readInt4($fh);
  291. $pps->DirPps = self::_readInt4($fh);
  292. fseek($fh, 20, SEEK_CUR);
  293. $pps->Time1st = self::OLE2LocalDate(fread($fh, 8));
  294. $pps->Time2nd = self::OLE2LocalDate(fread($fh, 8));
  295. $pps->startBlock = self::_readInt4($fh);
  296. $pps->Size = self::_readInt4($fh);
  297. $pps->No = count($this->_list);
  298. $this->_list[] = $pps;
  299. // check if the PPS tree (starting from root) is complete
  300. if (isset($this->root) && $this->_ppsTreeComplete($this->root->No)) {
  301. break;
  302. }
  303. }
  304. fclose($fh);
  305. // Initialize $pps->children on directories
  306. foreach ($this->_list as $pps) {
  307. if ($pps->Type == self::OLE_PPS_TYPE_DIR || $pps->Type == self::OLE_PPS_TYPE_ROOT) {
  308. $nos = [$pps->DirPps];
  309. $pps->children = [];
  310. while ($nos) {
  311. $no = array_pop($nos);
  312. if ($no != -1) {
  313. $childPps = $this->_list[$no];
  314. $nos[] = $childPps->PrevPps;
  315. $nos[] = $childPps->NextPps;
  316. $pps->children[] = $childPps;
  317. }
  318. }
  319. }
  320. }
  321. return true;
  322. }
  323. /**
  324. * It checks whether the PPS tree is complete (all PPS's read)
  325. * starting with the given PPS (not necessarily root).
  326. *
  327. * @param int $index The index of the PPS from which we are checking
  328. *
  329. * @return bool Whether the PPS tree for the given PPS is complete
  330. */
  331. public function _ppsTreeComplete($index)
  332. {
  333. return isset($this->_list[$index]) &&
  334. ($pps = $this->_list[$index]) &&
  335. ($pps->PrevPps == -1 ||
  336. $this->_ppsTreeComplete($pps->PrevPps)) &&
  337. ($pps->NextPps == -1 ||
  338. $this->_ppsTreeComplete($pps->NextPps)) &&
  339. ($pps->DirPps == -1 ||
  340. $this->_ppsTreeComplete($pps->DirPps));
  341. }
  342. /**
  343. * Checks whether a PPS is a File PPS or not.
  344. * If there is no PPS for the index given, it will return false.
  345. *
  346. * @param int $index The index for the PPS
  347. *
  348. * @return bool true if it's a File PPS, false otherwise
  349. */
  350. public function isFile($index)
  351. {
  352. if (isset($this->_list[$index])) {
  353. return $this->_list[$index]->Type == self::OLE_PPS_TYPE_FILE;
  354. }
  355. return false;
  356. }
  357. /**
  358. * Checks whether a PPS is a Root PPS or not.
  359. * If there is no PPS for the index given, it will return false.
  360. *
  361. * @param int $index the index for the PPS
  362. *
  363. * @return bool true if it's a Root PPS, false otherwise
  364. */
  365. public function isRoot($index)
  366. {
  367. if (isset($this->_list[$index])) {
  368. return $this->_list[$index]->Type == self::OLE_PPS_TYPE_ROOT;
  369. }
  370. return false;
  371. }
  372. /**
  373. * Gives the total number of PPS's found in the OLE container.
  374. *
  375. * @return int The total number of PPS's found in the OLE container
  376. */
  377. public function ppsTotal()
  378. {
  379. return count($this->_list);
  380. }
  381. /**
  382. * Gets data from a PPS
  383. * If there is no PPS for the index given, it will return an empty string.
  384. *
  385. * @param int $index The index for the PPS
  386. * @param int $position The position from which to start reading
  387. * (relative to the PPS)
  388. * @param int $length The amount of bytes to read (at most)
  389. *
  390. * @return string The binary string containing the data requested
  391. *
  392. * @see OLE_PPS_File::getStream()
  393. */
  394. public function getData($index, $position, $length)
  395. {
  396. // if position is not valid return empty string
  397. if (!isset($this->_list[$index]) || ($position >= $this->_list[$index]->Size) || ($position < 0)) {
  398. return '';
  399. }
  400. $fh = $this->getStream($this->_list[$index]);
  401. $data = stream_get_contents($fh, $length, $position);
  402. fclose($fh);
  403. return $data;
  404. }
  405. /**
  406. * Gets the data length from a PPS
  407. * If there is no PPS for the index given, it will return 0.
  408. *
  409. * @param int $index The index for the PPS
  410. *
  411. * @return int The amount of bytes in data the PPS has
  412. */
  413. public function getDataLength($index)
  414. {
  415. if (isset($this->_list[$index])) {
  416. return $this->_list[$index]->Size;
  417. }
  418. return 0;
  419. }
  420. /**
  421. * Utility function to transform ASCII text to Unicode.
  422. *
  423. * @param string $ascii The ASCII string to transform
  424. *
  425. * @return string The string in Unicode
  426. */
  427. public static function ascToUcs($ascii)
  428. {
  429. $rawname = '';
  430. $iMax = strlen($ascii);
  431. for ($i = 0; $i < $iMax; ++$i) {
  432. $rawname .= $ascii[$i]
  433. . "\x00";
  434. }
  435. return $rawname;
  436. }
  437. /**
  438. * Utility function
  439. * Returns a string for the OLE container with the date given.
  440. *
  441. * @param int $date A timestamp
  442. *
  443. * @return string The string for the OLE container
  444. */
  445. public static function localDateToOLE($date)
  446. {
  447. if (!isset($date)) {
  448. return "\x00\x00\x00\x00\x00\x00\x00\x00";
  449. }
  450. // factor used for separating numbers into 4 bytes parts
  451. $factor = pow(2, 32);
  452. // days from 1-1-1601 until the beggining of UNIX era
  453. $days = 134774;
  454. // calculate seconds
  455. $big_date = $days * 24 * 3600 + gmmktime(date('H', $date), date('i', $date), date('s', $date), date('m', $date), date('d', $date), date('Y', $date));
  456. // multiply just to make MS happy
  457. $big_date *= 10000000;
  458. $high_part = floor($big_date / $factor);
  459. // lower 4 bytes
  460. $low_part = floor((($big_date / $factor) - $high_part) * $factor);
  461. // Make HEX string
  462. $res = '';
  463. for ($i = 0; $i < 4; ++$i) {
  464. $hex = $low_part % 0x100;
  465. $res .= pack('c', $hex);
  466. $low_part /= 0x100;
  467. }
  468. for ($i = 0; $i < 4; ++$i) {
  469. $hex = $high_part % 0x100;
  470. $res .= pack('c', $hex);
  471. $high_part /= 0x100;
  472. }
  473. return $res;
  474. }
  475. /**
  476. * Returns a timestamp from an OLE container's date.
  477. *
  478. * @param int $string A binary string with the encoded date
  479. *
  480. * @return string The timestamp corresponding to the string
  481. */
  482. public static function OLE2LocalDate($string)
  483. {
  484. if (strlen($string) != 8) {
  485. throw new ReaderException('Expecting 8 byte string');
  486. }
  487. // factor used for separating numbers into 4 bytes parts
  488. $factor = pow(2, 32);
  489. list(, $high_part) = unpack('V', substr($string, 4, 4));
  490. list(, $low_part) = unpack('V', substr($string, 0, 4));
  491. $big_date = ($high_part * $factor) + $low_part;
  492. // translate to seconds
  493. $big_date /= 10000000;
  494. // days from 1-1-1601 until the beggining of UNIX era
  495. $days = 134774;
  496. // translate to seconds from beggining of UNIX era
  497. $big_date -= $days * 24 * 3600;
  498. return floor($big_date);
  499. }
  500. }