123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- <?php
- namespace PhpOffice\PhpSpreadsheet\Calculation;
- /**
- * PARTLY BASED ON:
- * Copyright (c) 2007 E. W. Bachtal, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of this software
- * and associated documentation files (the "Software"), to deal in the Software without restriction,
- * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
- * subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all copies or substantial
- * portions of the Software.
- *
- * The software is provided "as is", without warranty of any kind, express or implied, including but not
- * limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In
- * no event shall the authors or copyright holders be liable for any claim, damages or other liability,
- * whether in an action of contract, tort or otherwise, arising from, out of or in connection with the
- * software or the use or other dealings in the software.
- *
- * http://ewbi.blogs.com/develops/2007/03/excel_formula_p.html
- * http://ewbi.blogs.com/develops/2004/12/excel_formula_p.html
- */
- class FormulaParser
- {
- // Character constants
- const QUOTE_DOUBLE = '"';
- const QUOTE_SINGLE = '\'';
- const BRACKET_CLOSE = ']';
- const BRACKET_OPEN = '[';
- const BRACE_OPEN = '{';
- const BRACE_CLOSE = '}';
- const PAREN_OPEN = '(';
- const PAREN_CLOSE = ')';
- const SEMICOLON = ';';
- const WHITESPACE = ' ';
- const COMMA = ',';
- const ERROR_START = '#';
- const OPERATORS_SN = '+-';
- const OPERATORS_INFIX = '+-*/^&=><';
- const OPERATORS_POSTFIX = '%';
- /**
- * Formula.
- *
- * @var string
- */
- private $formula;
- /**
- * Tokens.
- *
- * @var FormulaToken[]
- */
- private $tokens = [];
- /**
- * Create a new FormulaParser.
- *
- * @param string $pFormula Formula to parse
- *
- * @throws Exception
- */
- public function __construct($pFormula = '')
- {
- // Check parameters
- if ($pFormula === null) {
- throw new Exception('Invalid parameter passed: formula');
- }
- // Initialise values
- $this->formula = trim($pFormula);
- // Parse!
- $this->parseToTokens();
- }
- /**
- * Get Formula.
- *
- * @return string
- */
- public function getFormula()
- {
- return $this->formula;
- }
- /**
- * Get Token.
- *
- * @param int $pId Token id
- *
- * @throws Exception
- *
- * @return string
- */
- public function getToken($pId = 0)
- {
- if (isset($this->tokens[$pId])) {
- return $this->tokens[$pId];
- }
- throw new Exception("Token with id $pId does not exist.");
- }
- /**
- * Get Token count.
- *
- * @return int
- */
- public function getTokenCount()
- {
- return count($this->tokens);
- }
- /**
- * Get Tokens.
- *
- * @return FormulaToken[]
- */
- public function getTokens()
- {
- return $this->tokens;
- }
- /**
- * Parse to tokens.
- */
- private function parseToTokens()
- {
- // No attempt is made to verify formulas; assumes formulas are derived from Excel, where
- // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions.
- // Check if the formula has a valid starting =
- $formulaLength = strlen($this->formula);
- if ($formulaLength < 2 || $this->formula[0] != '=') {
- return;
- }
- // Helper variables
- $tokens1 = $tokens2 = $stack = [];
- $inString = $inPath = $inRange = $inError = false;
- $token = $previousToken = $nextToken = null;
- $index = 1;
- $value = '';
- $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A'];
- $COMPARATORS_MULTI = ['>=', '<=', '<>'];
- while ($index < $formulaLength) {
- // state-dependent character evaluation (order is important)
- // double-quoted strings
- // embeds are doubled
- // end marks token
- if ($inString) {
- if ($this->formula[$index] == self::QUOTE_DOUBLE) {
- if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) {
- $value .= self::QUOTE_DOUBLE;
- ++$index;
- } else {
- $inString = false;
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT);
- $value = '';
- }
- } else {
- $value .= $this->formula[$index];
- }
- ++$index;
- continue;
- }
- // single-quoted strings (links)
- // embeds are double
- // end does not mark a token
- if ($inPath) {
- if ($this->formula[$index] == self::QUOTE_SINGLE) {
- if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) {
- $value .= self::QUOTE_SINGLE;
- ++$index;
- } else {
- $inPath = false;
- }
- } else {
- $value .= $this->formula[$index];
- }
- ++$index;
- continue;
- }
- // bracked strings (R1C1 range index or linked workbook name)
- // no embeds (changed to "()" by Excel)
- // end does not mark a token
- if ($inRange) {
- if ($this->formula[$index] == self::BRACKET_CLOSE) {
- $inRange = false;
- }
- $value .= $this->formula[$index];
- ++$index;
- continue;
- }
- // error values
- // end marks a token, determined from absolute list of values
- if ($inError) {
- $value .= $this->formula[$index];
- ++$index;
- if (in_array($value, $ERRORS)) {
- $inError = false;
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR);
- $value = '';
- }
- continue;
- }
- // scientific notation check
- if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) {
- if (strlen($value) > 1) {
- if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) {
- $value .= $this->formula[$index];
- ++$index;
- continue;
- }
- }
- }
- // independent character evaluation (order not important)
- // establish state-dependent character evaluations
- if ($this->formula[$index] == self::QUOTE_DOUBLE) {
- if (strlen($value) > 0) {
- // unexpected
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
- $value = '';
- }
- $inString = true;
- ++$index;
- continue;
- }
- if ($this->formula[$index] == self::QUOTE_SINGLE) {
- if (strlen($value) > 0) {
- // unexpected
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
- $value = '';
- }
- $inPath = true;
- ++$index;
- continue;
- }
- if ($this->formula[$index] == self::BRACKET_OPEN) {
- $inRange = true;
- $value .= self::BRACKET_OPEN;
- ++$index;
- continue;
- }
- if ($this->formula[$index] == self::ERROR_START) {
- if (strlen($value) > 0) {
- // unexpected
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
- $value = '';
- }
- $inError = true;
- $value .= self::ERROR_START;
- ++$index;
- continue;
- }
- // mark start and end of arrays and array rows
- if ($this->formula[$index] == self::BRACE_OPEN) {
- if (strlen($value) > 0) {
- // unexpected
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);
- $value = '';
- }
- $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);
- $tokens1[] = $tmp;
- $stack[] = clone $tmp;
- $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);
- $tokens1[] = $tmp;
- $stack[] = clone $tmp;
- ++$index;
- continue;
- }
- if ($this->formula[$index] == self::SEMICOLON) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tmp = array_pop($stack);
- $tmp->setValue('');
- $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
- $tokens1[] = $tmp;
- $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);
- $tokens1[] = $tmp;
- $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);
- $tokens1[] = $tmp;
- $stack[] = clone $tmp;
- ++$index;
- continue;
- }
- if ($this->formula[$index] == self::BRACE_CLOSE) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tmp = array_pop($stack);
- $tmp->setValue('');
- $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
- $tokens1[] = $tmp;
- $tmp = array_pop($stack);
- $tmp->setValue('');
- $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
- $tokens1[] = $tmp;
- ++$index;
- continue;
- }
- // trim white-space
- if ($this->formula[$index] == self::WHITESPACE) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE);
- ++$index;
- while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) {
- ++$index;
- }
- continue;
- }
- // multi-character comparators
- if (($index + 2) <= $formulaLength) {
- if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL);
- $index += 2;
- continue;
- }
- }
- // standard infix operators
- if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX);
- ++$index;
- continue;
- }
- // standard postfix operators (only one)
- if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX);
- ++$index;
- continue;
- }
- // start subexpression or function
- if ($this->formula[$index] == self::PAREN_OPEN) {
- if (strlen($value) > 0) {
- $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);
- $tokens1[] = $tmp;
- $stack[] = clone $tmp;
- $value = '';
- } else {
- $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START);
- $tokens1[] = $tmp;
- $stack[] = clone $tmp;
- }
- ++$index;
- continue;
- }
- // function, subexpression, or array parameters, or operand unions
- if ($this->formula[$index] == self::COMMA) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tmp = array_pop($stack);
- $tmp->setValue('');
- $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
- $stack[] = $tmp;
- if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
- $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION);
- } else {
- $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);
- }
- ++$index;
- continue;
- }
- // stop subexpression
- if ($this->formula[$index] == self::PAREN_CLOSE) {
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- $value = '';
- }
- $tmp = array_pop($stack);
- $tmp->setValue('');
- $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);
- $tokens1[] = $tmp;
- ++$index;
- continue;
- }
- // token accumulation
- $value .= $this->formula[$index];
- ++$index;
- }
- // dump remaining accumulation
- if (strlen($value) > 0) {
- $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);
- }
- // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections
- $tokenCount = count($tokens1);
- for ($i = 0; $i < $tokenCount; ++$i) {
- $token = $tokens1[$i];
- if (isset($tokens1[$i - 1])) {
- $previousToken = $tokens1[$i - 1];
- } else {
- $previousToken = null;
- }
- if (isset($tokens1[$i + 1])) {
- $nextToken = $tokens1[$i + 1];
- } else {
- $nextToken = null;
- }
- if ($token === null) {
- continue;
- }
- if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) {
- $tokens2[] = $token;
- continue;
- }
- if ($previousToken === null) {
- continue;
- }
- if (!(
- (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
- )) {
- continue;
- }
- if ($nextToken === null) {
- continue;
- }
- if (!(
- (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
- (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||
- ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)
- )) {
- continue;
- }
- $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION);
- }
- // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators
- // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names
- $this->tokens = [];
- $tokenCount = count($tokens2);
- for ($i = 0; $i < $tokenCount; ++$i) {
- $token = $tokens2[$i];
- if (isset($tokens2[$i - 1])) {
- $previousToken = $tokens2[$i - 1];
- } else {
- $previousToken = null;
- }
- if (isset($tokens2[$i + 1])) {
- $nextToken = $tokens2[$i + 1];
- } else {
- $nextToken = null;
- }
- if ($token === null) {
- continue;
- }
- if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') {
- if ($i == 0) {
- $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX);
- } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
- ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
- ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
- ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
- } else {
- $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX);
- }
- $this->tokens[] = $token;
- continue;
- }
- if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') {
- if ($i == 0) {
- continue;
- } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&
- ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&
- ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||
- ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||
- ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
- } else {
- continue;
- }
- $this->tokens[] = $token;
- continue;
- }
- if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX &&
- $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) {
- if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL);
- } elseif ($token->getValue() == '&') {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION);
- } else {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);
- }
- $this->tokens[] = $token;
- continue;
- }
- if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND &&
- $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) {
- if (!is_numeric($token->getValue())) {
- if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL);
- } else {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE);
- }
- } else {
- $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER);
- }
- $this->tokens[] = $token;
- continue;
- }
- if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {
- if (strlen($token->getValue()) > 0) {
- if (substr($token->getValue(), 0, 1) == '@') {
- $token->setValue(substr($token->getValue(), 1));
- }
- }
- }
- $this->tokens[] = $token;
- }
- }
- }
|