math js get token

node v10.24.1

version: 3.0.0

let parse = {} // token types enumeration const TOKENTYPE = { NULL: 0, DELIMITER: 1, NUMBER: 2, SYMBOL: 3, UNKNOWN: 4 } // map with all delimiters const DELIMITERS = { ',': true, '(': true, ')': true, '[': true, ']': true, '{': true, '}': true, '"': true, '\'': true, ';': true, '+': true, '-': true, '*': true, '.*': true, '/': true, './': true, '%': true, '^': true, '.^': true, '~': true, '!': true, '&': true, '|': true, '^|': true, '=': true, ':': true, '?': true, '==': true, '!=': true, '<': true, '>': true, '<=': true, '>=': true, '<<': true, '>>': true, '>>>': true } // map with all named delimiters const NAMED_DELIMITERS = { 'mod': true, 'to': true, 'in': true, 'and': true, 'xor': true, 'or': true, 'not': true } const CONSTANTS = { 'true': true, 'false': false, 'null': null, 'undefined': undefined } const NUMERIC_CONSTANTS = [ 'NaN', 'Infinity' ] function initialState () { return { extraNodes: {}, // current extra nodes, must be careful not to mutate expression: '', // current expression comment: '', // last parsed comment index: 0, // current index in expr token: '', // current token tokenType: TOKENTYPE.NULL, // type of the token nestingLevel: 0, // level of nesting inside parameters, used to ignore newline characters conditionalLevel: null // when a conditional is being parsed, the level of the conditional is stored here } } /** * View upto `length` characters of the expression starting at the current character. * * @param {State} state * @param {number} [length=1] Number of characters to view * @returns {string} * @private */ function currentString (state, length) { return state.expression.substr(state.index, length) } /** * View the current character. Returns '' if end of expression is reached. * * @param {State} state * @returns {string} * @private */ function currentCharacter (state) { return currentString(state, 1) } /** * Get the next character from the expression. * The character is stored into the char c. If the end of the expression is * reached, the function puts an empty string in c. * @private */ function next (state) { state.index++ } /** * Preview the previous character from the expression. * @return {string} cNext * @private */ function prevCharacter (state) { return state.expression.charAt(state.index - 1) } /** * Preview the next character from the expression. * @return {string} cNext * @private */ function nextCharacter (state) { return state.expression.charAt(state.index + 1) }

function getToken (state) { state.tokenType = TOKENTYPE.NULL state.token = '' state.comment = '' // skip over whitespaces // space, tab, and newline when inside parameters while (parse.isWhitespace(currentCharacter(state), state.nestingLevel)) { next(state) } // skip comment if (currentCharacter(state) === '#') { while (currentCharacter(state) !== '\n' && currentCharacter(state) !== '') { state.comment += currentCharacter(state) next(state) } } // check for end of expression if (currentCharacter(state) === '') { // token is still empty state.tokenType = TOKENTYPE.DELIMITER return } // check for new line character if (currentCharacter(state) === '\n' && !state.nestingLevel) { state.tokenType = TOKENTYPE.DELIMITER state.token = currentCharacter(state) next(state) return } const c1 = currentCharacter(state) const c2 = currentString(state, 2) const c3 = currentString(state, 3) if (c3.length === 3 && DELIMITERS[c3]) { state.tokenType = TOKENTYPE.DELIMITER state.token = c3 next(state) next(state) next(state) return } // check for delimiters consisting of 2 characters if (c2.length === 2 && DELIMITERS[c2]) { state.tokenType = TOKENTYPE.DELIMITER state.token = c2 next(state) next(state) return } // check for delimiters consisting of 1 character if (DELIMITERS[c1]) { state.tokenType = TOKENTYPE.DELIMITER state.token = c1 next(state) return } // check for a number if (parse.isDigitDot(c1)) { state.tokenType = TOKENTYPE.NUMBER // get number, can have a single dot if (currentCharacter(state) === '.') { state.token += currentCharacter(state) next(state) if (!parse.isDigit(currentCharacter(state))) { // this is no number, it is just a dot (can be dot notation) state.tokenType = TOKENTYPE.DELIMITER } } else { while (parse.isDigit(currentCharacter(state))) { state.token += currentCharacter(state) next(state) } if (parse.isDecimalMark(currentCharacter(state), nextCharacter(state))) { state.token += currentCharacter(state) next(state) } } while (parse.isDigit(currentCharacter(state))) { state.token += currentCharacter(state) next(state) } // check for exponential notation like "2.3e-4", "1.23e50" or "2e+4" if (currentCharacter(state) === 'E' || currentCharacter(state) === 'e') { if (parse.isDigit(nextCharacter(state)) || nextCharacter(state) === '-' || nextCharacter(state) === '+') { state.token += currentCharacter(state) next(state) if (currentCharacter(state) === '+' || currentCharacter(state) === '-') { state.token += currentCharacter(state) next(state) } // Scientific notation MUST be followed by an exponent if (!parse.isDigit(currentCharacter(state))) { throw createSyntaxError(state, 'Digit expected, got "' + currentCharacter(state) + '"') } while (parse.isDigit(currentCharacter(state))) { state.token += currentCharacter(state) next(state) } if (parse.isDecimalMark(currentCharacter(state), nextCharacter(state))) { throw createSyntaxError(state, 'Digit expected, got "' + currentCharacter(state) + '"') } } else if (nextCharacter(state) === '.') { next(state) throw createSyntaxError(state, 'Digit expected, got "' + currentCharacter(state) + '"') } } return } // check for variables, functions, named operators if (parse.isAlpha(currentCharacter(state), prevCharacter(state), nextCharacter(state))) { while (parse.isAlpha(currentCharacter(state), prevCharacter(state), nextCharacter(state)) || parse.isDigit(currentCharacter(state))) { state.token += currentCharacter(state) next(state) } if (NAMED_DELIMITERS.hasOwnProperty(state.token)) { state.tokenType = TOKENTYPE.DELIMITER } else { state.tokenType = TOKENTYPE.SYMBOL } return } // something unknown is found, wrong characters -> a syntax error state.tokenType = TOKENTYPE.UNKNOWN while (currentCharacter(state) !== '') { state.token += currentCharacter(state) next(state) } throw createSyntaxError(state, 'Syntax error in part "' + state.token + '"') }

parse.isAlpha = function isAlpha (c, cPrev, cNext) { return parse.isValidLatinOrGreek(c) || parse.isValidMathSymbol(c, cNext) || parse.isValidMathSymbol(cPrev, c) } /** * Test whether a character is a valid latin, greek, or letter-like character * @param {string} c * @return {boolean} */ parse.isValidLatinOrGreek = function isValidLatinOrGreek (c) { return /^[a-zA-Z_$\u00C0-\u02AF\u0370-\u03FF\u2100-\u214F]$/.test(c) } /** * Test whether two given 16 bit characters form a surrogate pair of a * unicode math symbol. * * https://unicode-table.com/en/ * https://www.wikiwand.com/en/Mathematical_operators_and_symbols_in_Unicode * * Note: In ES6 will be unicode aware: * https://stackoverflow.com/questions/280712/javascript-unicode-regexes * https://mathiasbynens.be/notes/es6-unicode-regex * * @param {string} high * @param {string} low * @return {boolean} */ parse.isValidMathSymbol = function isValidMathSymbol (high, low) { return /^[\uD835]$/.test(high) && /^[\uDC00-\uDFFF]$/.test(low) && /^[^\uDC55\uDC9D\uDCA0\uDCA1\uDCA3\uDCA4\uDCA7\uDCA8\uDCAD\uDCBA\uDCBC\uDCC4\uDD06\uDD0B\uDD0C\uDD15\uDD1D\uDD3A\uDD3F\uDD45\uDD47-\uDD49\uDD51\uDEA6\uDEA7\uDFCC\uDFCD]$/.test(low) } /** * Check whether given character c is a white space character: space, tab, or enter * @param {string} c * @param {number} nestingLevel * @return {boolean} */ parse.isWhitespace = function isWhitespace (c, nestingLevel) { // TODO: also take '\r' carriage return as newline? Or does that give problems on mac? return c === ' ' || c === '\t' || (c === '\n' && nestingLevel > 0) } /** * Test whether the character c is a decimal mark (dot). * This is the case when it's not the start of a delimiter '.*', './', or '.^' * @param {string} c * @param {string} cNext * @return {boolean} */ parse.isDecimalMark = function isDecimalMark (c, cNext) { return c === '.' && cNext !== '/' && cNext !== '*' && cNext !== '^' } /** * checks if the given char c is a digit or dot * @param {string} c a string with one character * @return {boolean} */ parse.isDigitDot = function isDigitDot (c) { return ((c >= '0' && c <= '9') || c === '.') } /** * checks if the given char c is a digit * @param {string} c a string with one character * @return {boolean} */ parse.isDigit = function isDigit (c) { return (c >= '0' && c <= '9') }

module.exports = { getToken, initialState }

math js get token

no comments