SimpleXLSX.class.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785
  1. <?php
  2. /**
  3. * SimpleXLSX php class v0.7.11
  4. * MS Excel 2007 workbooks reader
  5. *
  6. * Copyright (c) 2012 - 2018 SimpleXLSX
  7. *
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with this library; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. *
  22. * @category SimpleXLSX
  23. * @package SimpleXLSX
  24. * @copyright Copyright (c) 2012 - 2018 SimpleXLSX (https://github.com/shuchkin/simplexlsx/)
  25. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt LGPL
  26. * @version 0.7.11, 2018-03-21
  27. */
  28. /** Examples & Changelog
  29. *
  30. * Example 1:
  31. * if ( $xlsx = SimpleXLSX::parse('book.xlsx') ) {
  32. * print_r( $xlsx->rows() );
  33. * } else {
  34. * echo SimpleXLSX::parse_error();
  35. * }
  36. *
  37. * Example 2: html table
  38. * if ( $xlsx = SimpleXLSX::parse('book.xlsx') ) {
  39. * echo '<table>';
  40. * foreach( $xlsx->rows() as $r ) {
  41. * echo '<tr><td>'.implode('</td><td>', $r ).'</td></tr>';
  42. * }
  43. * echo '</table>';
  44. * } else {
  45. * echo SimpleXLSX::parse_error();
  46. * }
  47. *
  48. * Example 3: rowsEx
  49. * $xlsx = SimpleXLSX::parse('book.xlsx');
  50. * print_r( $xlsx->rowsEx() );
  51. *
  52. * Example 4: select worksheet
  53. * $xlsx = SimpleXLSX::parse('book.xlsx');
  54. * print_r( $xlsx->rows(2) ); // second worksheet
  55. *
  56. * Example 5: IDs and worksheet names
  57. * $xlsx = SimpleXLSX::parse('book.xlsx');
  58. * print_r( $xlsx->sheetNames() ); // array( 1 => 'Sheet 1', 3 => 'Catalog' );
  59. *
  60. * Example 6: get sheet name by id
  61. * $xlsx = SimpleXLSX::parse('book.xlsx');
  62. * echo 'Sheet Name 2 = '.$xlsx->sheetName(2);
  63. *
  64. * Example 7: read data
  65. * if ( $xslx = SimpleXLSX::parse( file_get_contents('http://www.example.com/example.xlsx'), true) ) {
  66. * list($num_cols, $num_rows) = $xlsx->dimension(2);
  67. * echo $xlsx->sheetName(2).':'.$num_cols.'x'.$num_rows;
  68. * } else {
  69. * echo SimpleXLSX::parse_error();
  70. * }
  71. *
  72. * Example 8: old style
  73. * $xlsx = new SimpleXLSX('book.xlsx');
  74. * if ( $xlsx->success() ) {
  75. * print_r( $xlsx->rows() );
  76. * } else {
  77. * echo 'xlsx error: '.$xlsx->error();
  78. * }
  79. *
  80. * v0.7.11 (2018-04-25) rowsEx(), added row index "r" to cell info
  81. * v0.7.10 (2018-04-21) fixed getCell, returns NULL if not exits
  82. * v0.7.9 (2018-01-15) fixed sheetNames() (namespaced or not namespaced attr)
  83. * v0.7.8 (2018-01-15) remove namespace prefixes (hardcoded)
  84. * v0.7.7 (2017-10-02) XML External Entity (XXE) Prevention (<!ENTITY xxe SYSTEM "file: ///etc/passwd" >]>)
  85. * v0.7.6 (2017-09-26) if worksheet_id === 0 (default) then detect first sheet (for LibreOffice capabilities)
  86. * v0.7.5 (2017-09-10) ->getCell() - fixed
  87. * v0.7.4 (2017-08-22) ::parse_error() - get last error in "static style"
  88. * v0.7.3 (2017-08-14) ->_parse fixed relations reader, added ->getCell( sheet_id, address, format ) for direct cell reading
  89. * v0.7.2 (2017-05-13) ::parse( $filename ) helper method
  90. * v0.7.1 (2017-03-29) License added
  91. * v0.6.11 (2016-07-27) fixed timestamp()
  92. * v0.6.10 (2016-06-10) fixed search entries (UPPERCASE)
  93. * v0.6.9 (2015-04-12) $xlsx->datetime_format to force dates out
  94. * v0.6.8 (2013-10-13) fixed dimension() where 1 row only, fixed rowsEx() empty cells indexes (Daniel Stastka)
  95. * v0.6.7 (2013-08-10) fixed unzip (mac), added $debug param to _constructor to display errors
  96. * v0.6.6 (2013-06-03) +entryExists(),
  97. * v0.6.5 (2013-03-18) fixed sheetName()
  98. * v0.6.4 (2013-03-13) rowsEx(), _parse(): fixed date column type & format detection
  99. * v0.6.3 (2013-03-13) rowsEx(): fixed formulas, added date type 'd', added format 'format'
  100. * dimension(): fixed empty sheet dimension
  101. * + sheetNames() - returns array( sheet_id => sheet_name, sheet_id2 => sheet_name2 ...)
  102. * v0.6.2 (2012-10-04) fixed empty cells, rowsEx() returns type and formulas now
  103. * v0.6.1 (2012-09-14) removed "raise exception" and fixed _unzip
  104. * v0.6 (2012-09-13) success(), error(), __constructor( $filename, $is_data = false )
  105. * v0.5.1 (2012-09-13) sheetName() fixed
  106. * v0.5 (2012-09-12) sheetName()
  107. * v0.4 sheets(), sheetsCount(), unixstamp( $excelDateTime )
  108. * v0.3 - fixed empty cells (Gonzo patch),
  109. */
  110. class SimpleXLSX {
  111. // Don't remove this string! Created by Sergey Shuchkin http://www.shuchkin.ru/simplexlsx/ 2010-2016
  112. const SCHEMA_REL_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
  113. const SCHEMA_REL_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
  114. const SCHEMA_REL_WORKSHEET = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet';
  115. const SCHEMA_REL_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles';
  116. public static $CF = array( // Cell formats
  117. 0 => 'General',
  118. 1 => '0',
  119. 2 => '0.00',
  120. 3 => '#,##0',
  121. 4 => '#,##0.00',
  122. 9 => '0%',
  123. 10 => '0.00%',
  124. 11 => '0.00E+00',
  125. 12 => '# ?/?',
  126. 13 => '# ??/??',
  127. 14 => 'mm-dd-yy',
  128. 15 => 'd-mmm-yy',
  129. 16 => 'd-mmm',
  130. 17 => 'mmm-yy',
  131. 18 => 'h:mm AM/PM',
  132. 19 => 'h:mm:ss AM/PM',
  133. 20 => 'h:mm',
  134. 21 => 'h:mm:ss',
  135. 22 => 'm/d/yy h:mm',
  136. 37 => '#,##0 ;(#,##0)',
  137. 38 => '#,##0 ;[Red](#,##0)',
  138. 39 => '#,##0.00;(#,##0.00)',
  139. 40 => '#,##0.00;[Red](#,##0.00)',
  140. 44 => '_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)',
  141. 45 => 'mm:ss',
  142. 46 => '[h]:mm:ss',
  143. 47 => 'mmss.0',
  144. 48 => '##0.0E+0',
  145. 49 => '@',
  146. 27 => '[$-404]e/m/d',
  147. 30 => 'm/d/yy',
  148. 36 => '[$-404]e/m/d',
  149. 50 => '[$-404]e/m/d',
  150. 57 => '[$-404]e/m/d',
  151. 59 => 't0',
  152. 60 => 't0.00',
  153. 61 => 't#,##0',
  154. 62 => 't#,##0.00',
  155. 67 => 't0%',
  156. 68 => 't0.00%',
  157. 69 => 't# ?/?',
  158. 70 => 't# ??/??',
  159. );
  160. public $workbook_cell_formats = array();
  161. public $datetime_format = 'Y-m-d H:i:s';
  162. /* @var SimpleXMLElement $workbook */
  163. private $workbook;
  164. /* @var SimpleXMLElement[] $sheets */
  165. private $sheets = array();
  166. // scheme
  167. private $styles;
  168. private $hyperlinks;
  169. /* @var array[] $package */
  170. private $package;
  171. private $datasec;
  172. private $sharedstrings;
  173. /*
  174. private $date_formats = array(
  175. 0xe => "d/m/Y",
  176. 0xf => "d-M-Y",
  177. 0x10 => "d-M",
  178. 0x11 => "M-Y",
  179. 0x12 => "h:i a",
  180. 0x13 => "h:i:s a",
  181. 0x14 => "H:i",
  182. 0x15 => "H:i:s",
  183. 0x16 => "d/m/Y H:i",
  184. 0x2d => "i:s",
  185. 0x2e => "H:i:s",
  186. 0x2f => "i:s.S"
  187. );
  188. private $number_formats = array(
  189. 0x1 => "%1.0f", // "0"
  190. 0x2 => "%1.2f", // "0.00",
  191. 0x3 => "%1.0f", //"#,##0",
  192. 0x4 => "%1.2f", //"#,##0.00",
  193. 0x5 => "%1.0f", //"$#,##0;($#,##0)",
  194. 0x6 => '$%1.0f', //"$#,##0;($#,##0)",
  195. 0x7 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  196. 0x8 => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  197. 0x9 => '%1.0f%%', //"0%"
  198. 0xa => '%1.2f%%', //"0.00%"
  199. 0xb => '%1.2f', //"0.00E00",
  200. 0x25 => '%1.0f', //"#,##0;(#,##0)",
  201. 0x26 => '%1.0f', //"#,##0;(#,##0)",
  202. 0x27 => '%1.2f', //"#,##0.00;(#,##0.00)",
  203. 0x28 => '%1.2f', //"#,##0.00;(#,##0.00)",
  204. 0x29 => '%1.0f', //"#,##0;(#,##0)",
  205. 0x2a => '$%1.0f', //"$#,##0;($#,##0)",
  206. 0x2b => '%1.2f', //"#,##0.00;(#,##0.00)",
  207. 0x2c => '$%1.2f', //"$#,##0.00;($#,##0.00)",
  208. 0x30 => '%1.0f'); //"##0.0E0";
  209. // }}}
  210. */
  211. private $error = false;
  212. private $debug;
  213. public function __construct( $filename, $is_data = false, $debug = false ) {
  214. $this->debug = $debug;
  215. $this->package = array(
  216. 'filename' => '',
  217. 'mtime' => 0,
  218. 'size' => 0,
  219. 'comment' => '',
  220. 'entries' => array()
  221. );
  222. if ( $this->_unzip( $filename, $is_data ) ) {
  223. $this->_parse();
  224. }
  225. }
  226. private function _unzip( $filename, $is_data = false ) {
  227. // Clear current file
  228. $this->datasec = array();
  229. if ( $is_data ) {
  230. $this->package['filename'] = 'default.xlsx';
  231. $this->package['mtime'] = time();
  232. $this->package['size'] = strlen( $filename );
  233. $vZ = $filename;
  234. } else {
  235. if ( ! is_readable( $filename ) ) {
  236. $this->error( 'File not found ' . $filename );
  237. return false;
  238. }
  239. // Package information
  240. $this->package['filename'] = $filename;
  241. $this->package['mtime'] = filemtime( $filename );
  242. $this->package['size'] = filesize( $filename );
  243. // Read file
  244. $vZ = file_get_contents( $filename );
  245. }
  246. // Cut end of central directory
  247. /* $aE = explode("\x50\x4b\x05\x06", $vZ);
  248. if (count($aE) == 1) {
  249. $this->error('Unknown format');
  250. return false;
  251. }
  252. */
  253. if ( ( $pcd = strrpos( $vZ, "\x50\x4b\x05\x06" ) ) === false ) {
  254. $this->error( 'Unknown archive format' );
  255. return false;
  256. }
  257. $aE = array(
  258. 0 => substr( $vZ, 0, $pcd ),
  259. 1 => substr( $vZ, $pcd + 3 )
  260. );
  261. // Normal way
  262. $aP = unpack( 'x16/v1CL', $aE[1] );
  263. $this->package['comment'] = substr( $aE[1], 18, $aP['CL'] );
  264. // Translates end of line from other operating systems
  265. $this->package['comment'] = strtr( $this->package['comment'], array( "\r\n" => "\n", "\r" => "\n" ) );
  266. // Cut the entries from the central directory
  267. $aE = explode( "\x50\x4b\x01\x02", $vZ );
  268. // Explode to each part
  269. $aE = explode( "\x50\x4b\x03\x04", $aE[0] );
  270. // Shift out spanning signature or empty entry
  271. array_shift( $aE );
  272. // Loop through the entries
  273. foreach ( $aE as $vZ ) {
  274. $aI = array();
  275. $aI['E'] = 0;
  276. $aI['EM'] = '';
  277. // Retrieving local file header information
  278. // $aP = unpack('v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL', $vZ);
  279. $aP = unpack( 'v1VN/v1GPF/v1CM/v1FT/v1FD/V1CRC/V1CS/V1UCS/v1FNL/v1EFL', $vZ );
  280. // Check if data is encrypted
  281. // $bE = ($aP['GPF'] && 0x0001) ? TRUE : FALSE;
  282. $bE = false;
  283. $nF = $aP['FNL'];
  284. $mF = $aP['EFL'];
  285. // Special case : value block after the compressed data
  286. if ( $aP['GPF'] & 0x0008 ) {
  287. $aP1 = unpack( 'V1CRC/V1CS/V1UCS', substr( $vZ, - 12 ) );
  288. $aP['CRC'] = $aP1['CRC'];
  289. $aP['CS'] = $aP1['CS'];
  290. $aP['UCS'] = $aP1['UCS'];
  291. // 2013-08-10
  292. $vZ = substr( $vZ, 0, - 12 );
  293. if ( substr( $vZ, - 4 ) === "\x50\x4b\x07\x08" ) {
  294. $vZ = substr( $vZ, 0, - 4 );
  295. }
  296. }
  297. // Getting stored filename
  298. $aI['N'] = substr( $vZ, 26, $nF );
  299. if ( substr( $aI['N'], - 1 ) === '/' ) {
  300. // is a directory entry - will be skipped
  301. continue;
  302. }
  303. // Truncate full filename in path and filename
  304. $aI['P'] = dirname( $aI['N'] );
  305. $aI['P'] = $aI['P'] === '.' ? '' : $aI['P'];
  306. $aI['N'] = basename( $aI['N'] );
  307. $vZ = substr( $vZ, 26 + $nF + $mF );
  308. if ( strlen( $vZ ) !== (int) $aP['CS'] ) { // check only if availabled
  309. $aI['E'] = 1;
  310. $aI['EM'] = 'Compressed size is not equal with the value in header information.';
  311. } else {
  312. if ( $bE ) {
  313. $aI['E'] = 5;
  314. $aI['EM'] = 'File is encrypted, which is not supported from this class.';
  315. } else {
  316. switch ( $aP['CM'] ) {
  317. case 0: // Stored
  318. // Here is nothing to do, the file ist flat.
  319. break;
  320. case 8: // Deflated
  321. $vZ = gzinflate( $vZ );
  322. break;
  323. case 12: // BZIP2
  324. if ( extension_loaded( 'bz2' ) ) {
  325. $vZ = bzdecompress( $vZ );
  326. } else {
  327. $aI['E'] = 7;
  328. $aI['EM'] = 'PHP BZIP2 extension not available.';
  329. }
  330. break;
  331. default:
  332. $aI['E'] = 6;
  333. $aI['EM'] = "De-/Compression method {$aP['CM']} is not supported.";
  334. }
  335. if ( ! $aI['E'] ) {
  336. if ( $vZ === false ) {
  337. $aI['E'] = 2;
  338. $aI['EM'] = 'Decompression of data failed.';
  339. } else {
  340. if ( strlen( $vZ ) !== (int) $aP['UCS'] ) {
  341. $aI['E'] = 3;
  342. $aI['EM'] = 'Uncompressed size is not equal with the value in header information.';
  343. } else {
  344. if ( crc32( $vZ ) !== $aP['CRC'] ) {
  345. $aI['E'] = 4;
  346. $aI['EM'] = 'CRC32 checksum is not equal with the value in header information.';
  347. }
  348. }
  349. }
  350. }
  351. }
  352. }
  353. $aI['D'] = $vZ;
  354. // DOS to UNIX timestamp
  355. $aI['T'] = mktime( ( $aP['FT'] & 0xf800 ) >> 11,
  356. ( $aP['FT'] & 0x07e0 ) >> 5,
  357. ( $aP['FT'] & 0x001f ) << 1,
  358. ( $aP['FD'] & 0x01e0 ) >> 5,
  359. $aP['FD'] & 0x001f,
  360. ( ( $aP['FD'] & 0xfe00 ) >> 9 ) + 1980 );
  361. //$this->Entries[] = &new SimpleUnzipEntry($aI);
  362. $this->package['entries'][] = array(
  363. 'data' => $aI['D'],
  364. 'error' => $aI['E'],
  365. 'error_msg' => $aI['EM'],
  366. 'name' => $aI['N'],
  367. 'path' => $aI['P'],
  368. 'time' => $aI['T']
  369. );
  370. } // end for each entries
  371. return true;
  372. }
  373. // sheets numeration: 1,2,3....
  374. public function error( $set = false ) {
  375. if ( $set ) {
  376. $this->error = $set;
  377. if ( $this->debug ) {
  378. trigger_error( __CLASS__ . ': ' . $set, E_USER_WARNING );
  379. }
  380. }
  381. return $this->error;
  382. }
  383. private function _parse() {
  384. // Document data holders
  385. $this->sharedstrings = array();
  386. $this->sheets = array();
  387. // $this->styles = array();
  388. // Read relations and search for officeDocument
  389. if ( $relations = $this->getEntryXML( '_rels/.rels' ) ) {
  390. foreach ( $relations->Relationship as $rel ) {
  391. $rel_type = trim( (string) $rel['Type'] );
  392. $rel_target = trim( (string) $rel['Target'] );
  393. if ( $rel_type === self::SCHEMA_REL_OFFICEDOCUMENT ) {
  394. // Found office document! Read workbook & relations...
  395. // Workbook
  396. if ( $this->workbook = $this->getEntryXML( $rel_target ) ) {
  397. if ( $workbookRelations = $this->getEntryXML( dirname( $rel_target ) . '/_rels/workbook.xml.rels' ) ) {
  398. // Loop relations for workbook and extract sheets...
  399. foreach ( $workbookRelations->Relationship as $workbookRelation ) {
  400. $wrel_type = trim( (string) $workbookRelation['Type'] );
  401. $wrel_path = dirname( trim( (string) $rel['Target'] ) ) . '/' . trim( (string) $workbookRelation['Target'] );
  402. if ( ! $this->entryExists( $wrel_path ) ) {
  403. continue;
  404. }
  405. if ( $wrel_type === self::SCHEMA_REL_WORKSHEET ) { // Sheets
  406. if ( $sheet = $this->getEntryXML( $wrel_path ) ) {
  407. $this->sheets[ str_replace( 'rId', '', (string) $workbookRelation['Id'] ) ] = $sheet;
  408. }
  409. } else if ( $wrel_type === self::SCHEMA_REL_SHAREDSTRINGS ) {
  410. if ( $sharedStrings = $this->getEntryXML( $wrel_path ) ) {
  411. foreach ( $sharedStrings->si as $val ) {
  412. if ( isset( $val->t ) ) {
  413. $this->sharedstrings[] = (string) $val->t;
  414. } elseif ( isset( $val->r ) ) {
  415. $this->sharedstrings[] = $this->_parseRichText( $val );
  416. }
  417. }
  418. }
  419. } else if ( $wrel_type === self::SCHEMA_REL_STYLES ) {
  420. $this->styles = $this->getEntryXML( $wrel_path );
  421. $nf = array();
  422. if ( $this->styles->numFmts->numFmt !== null ) {
  423. foreach ( $this->styles->numFmts->numFmt as $v ) {
  424. $nf[ (int) $v['numFmtId'] ] = (string) $v['formatCode'];
  425. }
  426. }
  427. if ( $this->styles->cellXfs->xf !== null ) {
  428. foreach ( $this->styles->cellXfs->xf as $v ) {
  429. $v = (array) $v->attributes();
  430. $v['format'] = '';
  431. if ( isset( $v['@attributes']['numFmtId'] ) ) {
  432. $v = $v['@attributes'];
  433. $fid = (int) $v['numFmtId'];
  434. if ( isset( self::$CF[ $fid ] ) ) {
  435. $v['format'] = self::$CF[ $fid ];
  436. } else if ( isset( $nf[ $fid ] ) ) {
  437. $v['format'] = $nf[ $fid ];
  438. }
  439. }
  440. $this->workbook_cell_formats[] = $v;
  441. }
  442. }
  443. }
  444. }
  445. break;
  446. }
  447. }
  448. }
  449. }
  450. }
  451. if ( count( $this->sheets ) ) {
  452. // Sort sheets
  453. ksort( $this->sheets );
  454. return true;
  455. }
  456. return false;
  457. }
  458. /*
  459. * @param string $name Filename in archive
  460. * @return SimpleXMLElement|bool
  461. */
  462. public function getEntryXML( $name ) {
  463. if ( $entry_xml = $this->getEntryData( $name ) ) {
  464. // dirty remove namespace prefixes
  465. $entry_xml = preg_replace('/xmlns[^=]*="[^"]*"/i','', $entry_xml ); // remove namespaces
  466. $entry_xml = preg_replace('/[a-zA-Z0-9]+:([a-zA-Z0-9]+="[^"]+")/','$1$2', $entry_xml ); // remove namespaced attrs
  467. $entry_xml = preg_replace('/<[a-zA-Z0-9]+:([^>]+)>/', '<$1>', $entry_xml); // fix namespaced openned tags
  468. $entry_xml = preg_replace('/<\/[a-zA-Z0-9]+:([^>]+)>/', '</$1>', $entry_xml); // fix namespaced closed tags
  469. // echo '<pre>'.$name."\r\n".htmlspecialchars( $entry_xml ).'</pre>'.
  470. // XML External Entity (XXE) Prevention
  471. $_old = libxml_disable_entity_loader(true);
  472. $entry_xmlobj = simplexml_load_string( $entry_xml );
  473. // echo '<pre>'.print_r( $entry_xmlobj, true).'</pre>';
  474. libxml_disable_entity_loader($_old);
  475. if ( $entry_xmlobj ) {
  476. return $entry_xmlobj;
  477. }
  478. $e = libxml_get_last_error();
  479. $this->error( 'XML-entry ' . $name.' parser error '.$e->message.' line '.$e->line );
  480. } else {
  481. $this->error( 'XML-entry not found: ' . $name );
  482. }
  483. return false;
  484. }
  485. public function getEntryData( $name ) {
  486. $dir = strtoupper( dirname( $name ) );
  487. $name = strtoupper( basename( $name ) );
  488. foreach ( $this->package['entries'] as $entry ) {
  489. if ( strtoupper( $entry['path'] ) === $dir && strtoupper( $entry['name'] ) === $name ) {
  490. return $entry['data'];
  491. }
  492. }
  493. $this->error( 'Entry not found: '.$name );
  494. return false;
  495. }
  496. public function entryExists( $name ) { // 0.6.6
  497. $dir = strtoupper( dirname( $name ) );
  498. $name = strtoupper( basename( $name ) );
  499. foreach ( $this->package['entries'] as $entry ) {
  500. if ( strtoupper( $entry['path'] ) === $dir && strtoupper( $entry['name'] ) === $name ) {
  501. return true;
  502. }
  503. }
  504. return false;
  505. }
  506. private function _parseRichText( $is = null ) {
  507. $value = array();
  508. if ( isset( $is->t ) ) {
  509. $value[] = (string) $is->t;
  510. } else {
  511. foreach ( $is->r as $run ) {
  512. $value[] = (string) $run->t;
  513. }
  514. }
  515. return implode( ' ', $value );
  516. }
  517. public static function parse( $filename, $is_data = false, $debug = false ) {
  518. $xlsx = new self( $filename, $is_data, $debug );
  519. if ( $xlsx->success() ) {
  520. return $xlsx;
  521. }
  522. self::parse_error( $xlsx->error() );
  523. return false;
  524. }
  525. public static function parse_error( $set = false ) {
  526. static $error = false;
  527. return ($set) ? $error = $set : $error;
  528. }
  529. public function success() {
  530. return ! $this->error;
  531. }
  532. public function rows( $worksheet_id = 0 ) {
  533. if ( ( $ws = $this->worksheet( $worksheet_id ) ) === false ) {
  534. return false;
  535. }
  536. $rows = array();
  537. $curR = 0;
  538. list( $cols, ) = $this->dimension( $worksheet_id );
  539. /* @var SimpleXMLElement $ws */
  540. foreach ( $ws->sheetData->row as $row ) {
  541. $rows[ $curR ] = array();
  542. foreach ( $row->c as $c ) {
  543. list( $curC, ) = $this->_columnIndex( (string) $c['r'] );
  544. $rows[ $curR ][ $curC ] = $this->value( $c );
  545. }
  546. for ( $i = 0; $i < $cols; $i ++ ) {
  547. if ( ! isset( $rows[ $curR ][ $i ] ) ) {
  548. $rows[ $curR ][ $i ] = '';
  549. }
  550. }
  551. ksort( $rows[ $curR ] );
  552. $curR ++;
  553. }
  554. return $rows;
  555. }
  556. public function worksheet( $worksheet_id = 0 ) {
  557. if ( $worksheet_id === 0 ) {
  558. reset( $this->sheets );
  559. $worksheet_id = key( $this->sheets );
  560. }
  561. if ( isset( $this->sheets[ $worksheet_id ] ) ) {
  562. $ws = $this->sheets[ $worksheet_id ];
  563. if ( isset( $ws->hyperlinks ) ) {
  564. $this->hyperlinks = array();
  565. foreach ( $ws->hyperlinks->hyperlink as $hyperlink ) {
  566. $this->hyperlinks[ (string) $hyperlink['ref'] ] = (string) $hyperlink['display'];
  567. }
  568. }
  569. return $ws;
  570. }
  571. $this->error( 'Worksheet ' . $worksheet_id . ' not found.' );
  572. return false;
  573. }
  574. // don't trust ->dimension(), so xlsx generators very lazy and don't public a dimension attribute
  575. public function dimension( $worksheet_id = 0 ) {
  576. if ( ( $ws = $this->worksheet( $worksheet_id ) ) === false ) {
  577. return false;
  578. }
  579. /* @var SimpleXMLElement $ws */
  580. $ref = (string) $ws->dimension['ref'];
  581. if ( strpos( $ref, ':' ) !== false ) {
  582. $d = explode( ':', $ref );
  583. $index = $this->_columnIndex( $d[1] );
  584. return array( $index[0] + 1, $index[1] + 1 );
  585. }
  586. if ( $ref !== '' ) { // 0.6.8
  587. $index = $this->_columnIndex( $ref );
  588. return array( $index[0] + 1, $index[1] + 1 );
  589. }
  590. return array( 0, 0 );
  591. }
  592. private function _columnIndex( $cell = 'A1' ) {
  593. if ( preg_match( '/([A-Z]+)(\d+)/', $cell, $m ) ) {
  594. list( ,$col, $row ) = $m;
  595. $colLen = strlen( $col );
  596. $index = 0;
  597. for ( $i = $colLen - 1; $i >= 0; $i -- ) {
  598. /** @noinspection PowerOperatorCanBeUsedInspection */
  599. $index += ( ord( $col[$i] ) - 64 ) * pow( 26, $colLen - $i - 1 );
  600. }
  601. return array( $index - 1, $row - 1 );
  602. }
  603. $this->error( 'Invalid cell index ' . $cell );
  604. return false;
  605. }
  606. public function value( $cell, $format = null ) {
  607. // Determine data type
  608. $dataType = (string) $cell['t'];
  609. if ( $format === null ) {
  610. $s = (int) $cell['s'];
  611. if ( $s > 0 && isset( $this->workbook_cell_formats[ $s ] ) ) {
  612. $format = $this->workbook_cell_formats[ $s ]['format'];
  613. }
  614. }
  615. if ( strpos( $format, 'm' ) !== false ) {
  616. $dataType = 'd';
  617. }
  618. $value = '';
  619. switch ( $dataType ) {
  620. case 's':
  621. // Value is a shared string
  622. if ( (string) $cell->v !== '' ) {
  623. $value = $this->sharedstrings[ (int) $cell->v ];
  624. }
  625. break;
  626. case 'b':
  627. // Value is boolean
  628. $value = (string) $cell->v;
  629. if ( $value === '0' ) {
  630. $value = false;
  631. } else if ( $value === '1' ) {
  632. $value = true;
  633. } else {
  634. $value = (bool) $cell->v;
  635. }
  636. break;
  637. case 'inlineStr':
  638. // Value is rich text inline
  639. $value = $this->_parseRichText( $cell->is );
  640. break;
  641. case 'e':
  642. // Value is an error message
  643. if ( (string) $cell->v !== '' ) {
  644. $value = (string) $cell->v;
  645. }
  646. break;
  647. case 'd':
  648. // Value is a date
  649. $value = $this->datetime_format ? gmdate( $this->datetime_format, $this->unixstamp( (float) $cell->v ) ) : (float) $cell->v;
  650. break;
  651. default:
  652. // Value is a string
  653. $value = (string) $cell->v;
  654. // Check for numeric values
  655. if ( is_numeric( $value ) && $dataType !== 's' ) {
  656. if ( $value == (int) $value ) {
  657. $value = (int) $value;
  658. } elseif ( $value == (float) $value ) {
  659. $value = (float) $value;
  660. }
  661. }
  662. }
  663. return $value;
  664. }
  665. public function unixstamp( $excelDateTime ) {
  666. $d = floor( $excelDateTime ); // seconds since 1900
  667. $t = $excelDateTime - $d;
  668. /** @noinspection SummerTimeUnsafeTimeManipulationInspection */
  669. return ( abs( $d ) > 0 ) ? ( $d - 25569 ) * 86400 + round( $t * 86400 ) : round( $t * 86400 );
  670. // return floor( ($d > 0) ? ( $d - 25568 ) * 86400 + $t * 86400 : $t * 86400 ); // Yuri Nunes
  671. }
  672. public function rowsEx( $worksheet_id = 0 ) {
  673. if ( ( $ws = $this->worksheet( $worksheet_id ) ) === false ) {
  674. return false;
  675. }
  676. $rows = array();
  677. $curR = 0;
  678. list( $cols, ) = $this->dimension( $worksheet_id );
  679. /* @var SimpleXMLElement $ws */
  680. foreach ( $ws->sheetData->row as $row ) {
  681. $r_idx = (int) $row['r'];
  682. foreach ( $row->c as $c ) {
  683. $r = (string) $c['r'];
  684. $t = (string) $c['t'];
  685. $s = (int) $c['s'];
  686. list( $curC, ) = $this->_columnIndex( $r );
  687. if ( $s > 0 && isset( $this->workbook_cell_formats[ $s ] ) ) {
  688. $format = $this->workbook_cell_formats[ $s ]['format'];
  689. if ( strpos( $format, 'm' ) !== false ) {
  690. $t = 'd';
  691. }
  692. } else {
  693. $format = '';
  694. }
  695. $rows[ $curR ][ $curC ] = array(
  696. 'type' => $t,
  697. 'name' => $c['r'],
  698. 'value' => $this->value( $c, $format ),
  699. 'href' => $this->href( $c ),
  700. 'f' => (string) $c->f,
  701. 'format' => $format,
  702. 'r' => $r_idx
  703. );
  704. }
  705. for ( $i = 0; $i < $cols; $i ++ ) {
  706. if ( ! isset( $rows[ $curR ][ $i ] ) ) {
  707. // 0.6.8
  708. for ( $c = '', $j = $i; $j >= 0; $j = (int) ( $j / 26 ) - 1 ) {
  709. $c = chr( $j % 26 + 65 ) . $c;
  710. }
  711. $rows[ $curR ][ $i ] = array(
  712. 'type' => '',
  713. // 'name' => chr($i + 65).($curR+1),
  714. 'name' => $c . ( $curR + 1 ),
  715. 'value' => '',
  716. 'href' => '',
  717. 'f' => '',
  718. 'format' => '',
  719. 'r' => $r_idx
  720. );
  721. }
  722. }
  723. ksort( $rows[ $curR ] );
  724. $curR ++;
  725. }
  726. return $rows;
  727. }
  728. /** Example: xlsx->getCell(2,'B87', 0);
  729. * Get cell B87 from 2nd worksheet, formatted by General (see $CF for all formats).
  730. * It's useful when we need to get a cell that has the wrong format,
  731. * Or just for direct cell reading. (thx EGO7000)
  732. *
  733. * @param int $worksheet_id
  734. * @param string|array $cell A1 or [0,0]
  735. * @param null|int $format
  736. *
  737. * @return mixed
  738. */
  739. public function getCell( $worksheet_id = 0, $cell = 'A1', $format = null ) {
  740. if (($ws = $this->worksheet( $worksheet_id)) === false) { return false; }
  741. list( $curC, $curR ) = is_array( $cell ) ? $cell : $this->_columnIndex( (string) $cell );
  742. if (isset($ws->sheetData->row[$curR], $ws->sheetData->row[$curR]->c[$curC])) {
  743. $c = $ws->sheetData->row[ $curR ]->c[ $curC ];
  744. return $this->value( $c, $format );
  745. }
  746. return null;
  747. }
  748. public function href( $cell ) {
  749. return isset( $this->hyperlinks[ (string) $cell['r'] ] ) ? $this->hyperlinks[ (string) $cell['r'] ] : '';
  750. }
  751. public function sheets() {
  752. return $this->sheets;
  753. }
  754. public function sheetsCount() {
  755. return count( $this->sheets );
  756. }
  757. public function sheetName( $worksheet_id ) {
  758. if ( ! isset( $this->workbook->sheets->sheet ) ) {
  759. return false;
  760. }
  761. foreach ( $this->workbook->sheets->sheet as $s ) {
  762. /* @var SimpleXMLElement $s */
  763. if ( (int) $s->attributes()->sheetId === (int) $worksheet_id ) {
  764. return (string) $s->attributes()->name;
  765. }
  766. }
  767. return false;
  768. }
  769. public function sheetNames() {
  770. $result = array();
  771. foreach ( $this->workbook->sheets->sheet as $s ) {
  772. /* @var SimpleXMLElement $s */
  773. /** @noinspection AmbiguousMethodsCallsInArrayMappingInspection */
  774. $result[ (int) $s->attributes()->sheetId ] = (string) $s->attributes()->name;
  775. }
  776. return $result;
  777. }
  778. // thx Gonzo
  779. public function getStyles() {
  780. return $this->styles;
  781. }
  782. public function getPackage() {
  783. return $this->package;
  784. }
  785. }