OLERead.php 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. <?php
  2. namespace PhpOffice\PhpSpreadsheet\Shared;
  3. use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
  4. class OLERead
  5. {
  6. private $data = '';
  7. // Size of a sector = 512 bytes
  8. const BIG_BLOCK_SIZE = 0x200;
  9. // Size of a short sector = 64 bytes
  10. const SMALL_BLOCK_SIZE = 0x40;
  11. // Size of a directory entry always = 128 bytes
  12. const PROPERTY_STORAGE_BLOCK_SIZE = 0x80;
  13. // Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams
  14. const SMALL_BLOCK_THRESHOLD = 0x1000;
  15. // header offsets
  16. const NUM_BIG_BLOCK_DEPOT_BLOCKS_POS = 0x2c;
  17. const ROOT_START_BLOCK_POS = 0x30;
  18. const SMALL_BLOCK_DEPOT_BLOCK_POS = 0x3c;
  19. const EXTENSION_BLOCK_POS = 0x44;
  20. const NUM_EXTENSION_BLOCK_POS = 0x48;
  21. const BIG_BLOCK_DEPOT_BLOCKS_POS = 0x4c;
  22. // property storage offsets (directory offsets)
  23. const SIZE_OF_NAME_POS = 0x40;
  24. const TYPE_POS = 0x42;
  25. const START_BLOCK_POS = 0x74;
  26. const SIZE_POS = 0x78;
  27. public $wrkbook;
  28. public $summaryInformation;
  29. public $documentSummaryInformation;
  30. /**
  31. * @var int
  32. */
  33. private $numBigBlockDepotBlocks;
  34. /**
  35. * @var int
  36. */
  37. private $rootStartBlock;
  38. /**
  39. * @var int
  40. */
  41. private $sbdStartBlock;
  42. /**
  43. * @var int
  44. */
  45. private $extensionBlock;
  46. /**
  47. * @var int
  48. */
  49. private $numExtensionBlocks;
  50. /**
  51. * @var string
  52. */
  53. private $bigBlockChain;
  54. /**
  55. * @var string
  56. */
  57. private $smallBlockChain;
  58. /**
  59. * @var string
  60. */
  61. private $entry;
  62. /**
  63. * @var int
  64. */
  65. private $rootentry;
  66. /**
  67. * @var array
  68. */
  69. private $props = [];
  70. /**
  71. * Read the file.
  72. *
  73. * @param $pFilename string Filename
  74. *
  75. * @throws ReaderException
  76. */
  77. public function read($pFilename)
  78. {
  79. File::assertFile($pFilename);
  80. // Get the file identifier
  81. // Don't bother reading the whole file until we know it's a valid OLE file
  82. $this->data = file_get_contents($pFilename, false, null, 0, 8);
  83. // Check OLE identifier
  84. $identifierOle = pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1);
  85. if ($this->data != $identifierOle) {
  86. throw new ReaderException('The filename ' . $pFilename . ' is not recognised as an OLE file');
  87. }
  88. // Get the file data
  89. $this->data = file_get_contents($pFilename);
  90. // Total number of sectors used for the SAT
  91. $this->numBigBlockDepotBlocks = self::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
  92. // SecID of the first sector of the directory stream
  93. $this->rootStartBlock = self::getInt4d($this->data, self::ROOT_START_BLOCK_POS);
  94. // SecID of the first sector of the SSAT (or -2 if not extant)
  95. $this->sbdStartBlock = self::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
  96. // SecID of the first sector of the MSAT (or -2 if no additional sectors are used)
  97. $this->extensionBlock = self::getInt4d($this->data, self::EXTENSION_BLOCK_POS);
  98. // Total number of sectors used by MSAT
  99. $this->numExtensionBlocks = self::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
  100. $bigBlockDepotBlocks = [];
  101. $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS;
  102. $bbdBlocks = $this->numBigBlockDepotBlocks;
  103. if ($this->numExtensionBlocks != 0) {
  104. $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4;
  105. }
  106. for ($i = 0; $i < $bbdBlocks; ++$i) {
  107. $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos);
  108. $pos += 4;
  109. }
  110. for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
  111. $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
  112. $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
  113. for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
  114. $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos);
  115. $pos += 4;
  116. }
  117. $bbdBlocks += $blocksToRead;
  118. if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
  119. $this->extensionBlock = self::getInt4d($this->data, $pos);
  120. }
  121. }
  122. $pos = 0;
  123. $this->bigBlockChain = '';
  124. $bbs = self::BIG_BLOCK_SIZE / 4;
  125. for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
  126. $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
  127. $this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs);
  128. $pos += 4 * $bbs;
  129. }
  130. $pos = 0;
  131. $sbdBlock = $this->sbdStartBlock;
  132. $this->smallBlockChain = '';
  133. while ($sbdBlock != -2) {
  134. $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
  135. $this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs);
  136. $pos += 4 * $bbs;
  137. $sbdBlock = self::getInt4d($this->bigBlockChain, $sbdBlock * 4);
  138. }
  139. // read the directory stream
  140. $block = $this->rootStartBlock;
  141. $this->entry = $this->_readData($block);
  142. $this->readPropertySets();
  143. }
  144. /**
  145. * Extract binary stream data.
  146. *
  147. * @param int $stream
  148. *
  149. * @return string
  150. */
  151. public function getStream($stream)
  152. {
  153. if ($stream === null) {
  154. return null;
  155. }
  156. $streamData = '';
  157. if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) {
  158. $rootdata = $this->_readData($this->props[$this->rootentry]['startBlock']);
  159. $block = $this->props[$stream]['startBlock'];
  160. while ($block != -2) {
  161. $pos = $block * self::SMALL_BLOCK_SIZE;
  162. $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE);
  163. $block = self::getInt4d($this->smallBlockChain, $block * 4);
  164. }
  165. return $streamData;
  166. }
  167. $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE;
  168. if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) {
  169. ++$numBlocks;
  170. }
  171. if ($numBlocks == 0) {
  172. return '';
  173. }
  174. $block = $this->props[$stream]['startBlock'];
  175. while ($block != -2) {
  176. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  177. $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  178. $block = self::getInt4d($this->bigBlockChain, $block * 4);
  179. }
  180. return $streamData;
  181. }
  182. /**
  183. * Read a standard stream (by joining sectors using information from SAT).
  184. *
  185. * @param int $bl Sector ID where the stream starts
  186. *
  187. * @return string Data for standard stream
  188. */
  189. private function _readData($bl)
  190. {
  191. $block = $bl;
  192. $data = '';
  193. while ($block != -2) {
  194. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  195. $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  196. $block = self::getInt4d($this->bigBlockChain, $block * 4);
  197. }
  198. return $data;
  199. }
  200. /**
  201. * Read entries in the directory stream.
  202. */
  203. private function readPropertySets()
  204. {
  205. $offset = 0;
  206. // loop through entires, each entry is 128 bytes
  207. $entryLen = strlen($this->entry);
  208. while ($offset < $entryLen) {
  209. // entry data (128 bytes)
  210. $d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE);
  211. // size in bytes of name
  212. $nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS + 1]) << 8);
  213. // type of entry
  214. $type = ord($d[self::TYPE_POS]);
  215. // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook)
  216. // sectorID of first sector of the short-stream container stream, if this entry is root entry
  217. $startBlock = self::getInt4d($d, self::START_BLOCK_POS);
  218. $size = self::getInt4d($d, self::SIZE_POS);
  219. $name = str_replace("\x00", '', substr($d, 0, $nameSize));
  220. $this->props[] = [
  221. 'name' => $name,
  222. 'type' => $type,
  223. 'startBlock' => $startBlock,
  224. 'size' => $size,
  225. ];
  226. // tmp helper to simplify checks
  227. $upName = strtoupper($name);
  228. // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
  229. if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) {
  230. $this->wrkbook = count($this->props) - 1;
  231. } elseif ($upName === 'ROOT ENTRY' || $upName === 'R') {
  232. // Root entry
  233. $this->rootentry = count($this->props) - 1;
  234. }
  235. // Summary information
  236. if ($name == chr(5) . 'SummaryInformation') {
  237. $this->summaryInformation = count($this->props) - 1;
  238. }
  239. // Additional Document Summary information
  240. if ($name == chr(5) . 'DocumentSummaryInformation') {
  241. $this->documentSummaryInformation = count($this->props) - 1;
  242. }
  243. $offset += self::PROPERTY_STORAGE_BLOCK_SIZE;
  244. }
  245. }
  246. /**
  247. * Read 4 bytes of data at specified position.
  248. *
  249. * @param string $data
  250. * @param int $pos
  251. *
  252. * @return int
  253. */
  254. private static function getInt4d($data, $pos)
  255. {
  256. if (trim($data) == '') {
  257. // No data provided
  258. throw new ReaderException('Parameter data is empty.');
  259. } elseif ($pos < 0) {
  260. // Invalid position
  261. throw new ReaderException('Parameter pos=' . $pos . ' is invalid.');
  262. }
  263. $len = strlen($data);
  264. if ($len < $pos + 4) {
  265. $data .= str_repeat("\0", $pos + 4 - $len);
  266. }
  267. // FIX: represent numbers correctly on 64-bit system
  268. // http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
  269. // Changed by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
  270. $_or_24 = ord($data[$pos + 3]);
  271. if ($_or_24 >= 128) {
  272. // negative number
  273. $_ord_24 = -abs((256 - $_or_24) << 24);
  274. } else {
  275. $_ord_24 = ($_or_24 & 127) << 24;
  276. }
  277. return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $_ord_24;
  278. }
  279. }