diff options
Diffstat (limited to 'webapp/non_npm_dependencies/katex/src/Parser.js')
-rw-r--r-- | webapp/non_npm_dependencies/katex/src/Parser.js | 737 |
1 files changed, 737 insertions, 0 deletions
diff --git a/webapp/non_npm_dependencies/katex/src/Parser.js b/webapp/non_npm_dependencies/katex/src/Parser.js new file mode 100644 index 000000000..15b808e17 --- /dev/null +++ b/webapp/non_npm_dependencies/katex/src/Parser.js @@ -0,0 +1,737 @@ +/* eslint no-constant-condition:0 */ +var functions = require("./functions"); +var environments = require("./environments"); +var Lexer = require("./Lexer"); +var symbols = require("./symbols"); +var utils = require("./utils"); + +var parseData = require("./parseData"); +var ParseError = require("./ParseError"); + +/** + * This file contains the parser used to parse out a TeX expression from the + * input. Since TeX isn't context-free, standard parsers don't work particularly + * well. + * + * The strategy of this parser is as such: + * + * The main functions (the `.parse...` ones) take a position in the current + * parse string to parse tokens from. The lexer (found in Lexer.js, stored at + * this.lexer) also supports pulling out tokens at arbitrary places. When + * individual tokens are needed at a position, the lexer is called to pull out a + * token, which is then used. + * + * The parser has a property called "mode" indicating the mode that + * the parser is currently in. Currently it has to be one of "math" or + * "text", which denotes whether the current environment is a math-y + * one or a text-y one (e.g. inside \text). Currently, this serves to + * limit the functions which can be used in text mode. + * + * The main functions then return an object which contains the useful data that + * was parsed at its given point, and a new position at the end of the parsed + * data. The main functions can call each other and continue the parsing by + * using the returned position as a new starting point. + * + * There are also extra `.handle...` functions, which pull out some reused + * functionality into self-contained functions. + * + * The earlier functions return ParseNodes. + * The later functions (which are called deeper in the parse) sometimes return + * ParseFuncOrArgument, which contain a ParseNode as well as some data about + * whether the parsed object is a function which is missing some arguments, or a + * standalone object which can be used as an argument to another function. + */ + +/** + * Main Parser class + */ +function Parser(input, settings) { + // Make a new lexer + this.lexer = new Lexer(input); + // Store the settings for use in parsing + this.settings = settings; +} + +var ParseNode = parseData.ParseNode; + +/** + * An initial function (without its arguments), or an argument to a function. + * The `result` argument should be a ParseNode. + */ +function ParseFuncOrArgument(result, isFunction) { + this.result = result; + // Is this a function (i.e. is it something defined in functions.js)? + this.isFunction = isFunction; +} + +/** + * Checks a result to make sure it has the right type, and throws an + * appropriate error otherwise. + * + * @param {boolean=} consume whether to consume the expected token, + * defaults to true + */ +Parser.prototype.expect = function(text, consume) { + if (this.nextToken.text !== text) { + throw new ParseError( + "Expected '" + text + "', got '" + this.nextToken.text + "'", + this.lexer, this.nextToken.position + ); + } + if (consume !== false) { + this.consume(); + } +}; + +/** + * Considers the current look ahead token as consumed, + * and fetches the one after that as the new look ahead. + */ +Parser.prototype.consume = function() { + this.pos = this.nextToken.position; + this.nextToken = this.lexer.lex(this.pos, this.mode); +}; + +/** + * Main parsing function, which parses an entire input. + * + * @return {?Array.<ParseNode>} + */ +Parser.prototype.parse = function() { + // Try to parse the input + this.mode = "math"; + this.pos = 0; + this.nextToken = this.lexer.lex(this.pos, this.mode); + var parse = this.parseInput(); + return parse; +}; + +/** + * Parses an entire input tree. + */ +Parser.prototype.parseInput = function() { + // Parse an expression + var expression = this.parseExpression(false); + // If we succeeded, make sure there's an EOF at the end + this.expect("EOF", false); + return expression; +}; + +var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"]; + +/** + * Parses an "expression", which is a list of atoms. + * + * @param {boolean} breakOnInfix Should the parsing stop when we hit infix + * nodes? This happens when functions have higher precendence + * than infix nodes in implicit parses. + * + * @param {?string} breakOnToken The token that the expression should end with, + * or `null` if something else should end the expression. + * + * @return {ParseNode} + */ +Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) { + var body = []; + // Keep adding atoms to the body until we can't parse any more atoms (either + // we reached the end, a }, or a \right) + while (true) { + var lex = this.nextToken; + var pos = this.pos; + if (endOfExpression.indexOf(lex.text) !== -1) { + break; + } + if (breakOnToken && lex.text === breakOnToken) { + break; + } + var atom = this.parseAtom(); + if (!atom) { + if (!this.settings.throwOnError && lex.text[0] === "\\") { + var errorNode = this.handleUnsupportedCmd(); + body.push(errorNode); + + pos = lex.position; + continue; + } + + break; + } + if (breakOnInfix && atom.type === "infix") { + // rewind so we can parse the infix atom again + this.pos = pos; + this.nextToken = lex; + break; + } + body.push(atom); + } + return this.handleInfixNodes(body); +}; + +/** + * Rewrites infix operators such as \over with corresponding commands such + * as \frac. + * + * There can only be one infix operator per group. If there's more than one + * then the expression is ambiguous. This can be resolved by adding {}. + * + * @returns {Array} + */ +Parser.prototype.handleInfixNodes = function(body) { + var overIndex = -1; + var funcName; + + for (var i = 0; i < body.length; i++) { + var node = body[i]; + if (node.type === "infix") { + if (overIndex !== -1) { + throw new ParseError("only one infix operator per group", + this.lexer, -1); + } + overIndex = i; + funcName = node.value.replaceWith; + } + } + + if (overIndex !== -1) { + var numerNode; + var denomNode; + + var numerBody = body.slice(0, overIndex); + var denomBody = body.slice(overIndex + 1); + + if (numerBody.length === 1 && numerBody[0].type === "ordgroup") { + numerNode = numerBody[0]; + } else { + numerNode = new ParseNode("ordgroup", numerBody, this.mode); + } + + if (denomBody.length === 1 && denomBody[0].type === "ordgroup") { + denomNode = denomBody[0]; + } else { + denomNode = new ParseNode("ordgroup", denomBody, this.mode); + } + + var value = this.callFunction( + funcName, [numerNode, denomNode], null); + return [new ParseNode(value.type, value, this.mode)]; + } else { + return body; + } +}; + +// The greediness of a superscript or subscript +var SUPSUB_GREEDINESS = 1; + +/** + * Handle a subscript or superscript with nice errors. + */ +Parser.prototype.handleSupSubscript = function(name) { + var symbol = this.nextToken.text; + var symPos = this.pos; + this.consume(); + var group = this.parseGroup(); + + if (!group) { + if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") { + return this.handleUnsupportedCmd(); + } else { + throw new ParseError( + "Expected group after '" + symbol + "'", + this.lexer, + symPos + 1 + ); + } + } else if (group.isFunction) { + // ^ and _ have a greediness, so handle interactions with functions' + // greediness + var funcGreediness = functions[group.result].greediness; + if (funcGreediness > SUPSUB_GREEDINESS) { + return this.parseFunction(group); + } else { + throw new ParseError( + "Got function '" + group.result + "' with no arguments " + + "as " + name, + this.lexer, symPos + 1); + } + } else { + return group.result; + } +}; + +/** + * Converts the textual input of an unsupported command into a text node + * contained within a color node whose color is determined by errorColor + */ +Parser.prototype.handleUnsupportedCmd = function() { + var text = this.nextToken.text; + var textordArray = []; + + for (var i = 0; i < text.length; i++) { + textordArray.push(new ParseNode("textord", text[i], "text")); + } + + var textNode = new ParseNode( + "text", + { + body: textordArray, + type: "text", + }, + this.mode); + + var colorNode = new ParseNode( + "color", + { + color: this.settings.errorColor, + value: [textNode], + type: "color", + }, + this.mode); + + this.consume(); + return colorNode; +}; + +/** + * Parses a group with optional super/subscripts. + * + * @return {?ParseNode} + */ +Parser.prototype.parseAtom = function() { + // The body of an atom is an implicit group, so that things like + // \left(x\right)^2 work correctly. + var base = this.parseImplicitGroup(); + + // In text mode, we don't have superscripts or subscripts + if (this.mode === "text") { + return base; + } + + // Note that base may be empty (i.e. null) at this point. + + var superscript; + var subscript; + while (true) { + // Lex the first token + var lex = this.nextToken; + + if (lex.text === "\\limits" || lex.text === "\\nolimits") { + // We got a limit control + if (!base || base.type !== "op") { + throw new ParseError( + "Limit controls must follow a math operator", + this.lexer, this.pos); + } else { + var limits = lex.text === "\\limits"; + base.value.limits = limits; + base.value.alwaysHandleSupSub = true; + } + this.consume(); + } else if (lex.text === "^") { + // We got a superscript start + if (superscript) { + throw new ParseError( + "Double superscript", this.lexer, this.pos); + } + superscript = this.handleSupSubscript("superscript"); + } else if (lex.text === "_") { + // We got a subscript start + if (subscript) { + throw new ParseError( + "Double subscript", this.lexer, this.pos); + } + subscript = this.handleSupSubscript("subscript"); + } else if (lex.text === "'") { + // We got a prime + var prime = new ParseNode("textord", "\\prime", this.mode); + + // Many primes can be grouped together, so we handle this here + var primes = [prime]; + this.consume(); + // Keep lexing tokens until we get something that's not a prime + while (this.nextToken.text === "'") { + // For each one, add another prime to the list + primes.push(prime); + this.consume(); + } + // Put them into an ordgroup as the superscript + superscript = new ParseNode("ordgroup", primes, this.mode); + } else { + // If it wasn't ^, _, or ', stop parsing super/subscripts + break; + } + } + + if (superscript || subscript) { + // If we got either a superscript or subscript, create a supsub + return new ParseNode("supsub", { + base: base, + sup: superscript, + sub: subscript, + }, this.mode); + } else { + // Otherwise return the original body + return base; + } +}; + +// A list of the size-changing functions, for use in parseImplicitGroup +var sizeFuncs = [ + "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize", + "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge", +]; + +// A list of the style-changing functions, for use in parseImplicitGroup +var styleFuncs = [ + "\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle", +]; + +/** + * Parses an implicit group, which is a group that starts at the end of a + * specified, and ends right before a higher explicit group ends, or at EOL. It + * is used for functions that appear to affect the current style, like \Large or + * \textrm, where instead of keeping a style we just pretend that there is an + * implicit grouping after it until the end of the group. E.g. + * small text {\Large large text} small text again + * It is also used for \left and \right to get the correct grouping. + * + * @return {?ParseNode} + */ +Parser.prototype.parseImplicitGroup = function() { + var start = this.parseSymbol(); + + if (start == null) { + // If we didn't get anything we handle, fall back to parseFunction + return this.parseFunction(); + } + + var func = start.result; + var body; + + if (func === "\\left") { + // If we see a left: + // Parse the entire left function (including the delimiter) + var left = this.parseFunction(start); + // Parse out the implicit body + body = this.parseExpression(false); + // Check the next token + this.expect("\\right", false); + var right = this.parseFunction(); + return new ParseNode("leftright", { + body: body, + left: left.value.value, + right: right.value.value, + }, this.mode); + } else if (func === "\\begin") { + // begin...end is similar to left...right + var begin = this.parseFunction(start); + var envName = begin.value.name; + if (!environments.hasOwnProperty(envName)) { + throw new ParseError( + "No such environment: " + envName, + this.lexer, begin.value.namepos); + } + // Build the environment object. Arguments and other information will + // be made available to the begin and end methods using properties. + var env = environments[envName]; + var args = this.parseArguments("\\begin{" + envName + "}", env); + var context = { + mode: this.mode, + envName: envName, + parser: this, + lexer: this.lexer, + positions: args.pop(), + }; + var result = env.handler(context, args); + this.expect("\\end", false); + var end = this.parseFunction(); + if (end.value.name !== envName) { + throw new ParseError( + "Mismatch: \\begin{" + envName + "} matched " + + "by \\end{" + end.value.name + "}", + this.lexer /* , end.value.namepos */); + // TODO: Add position to the above line and adjust test case, + // requires #385 to get merged first + } + result.position = end.position; + return result; + } else if (utils.contains(sizeFuncs, func)) { + // If we see a sizing function, parse out the implict body + body = this.parseExpression(false); + return new ParseNode("sizing", { + // Figure out what size to use based on the list of functions above + size: "size" + (utils.indexOf(sizeFuncs, func) + 1), + value: body, + }, this.mode); + } else if (utils.contains(styleFuncs, func)) { + // If we see a styling function, parse out the implict body + body = this.parseExpression(true); + return new ParseNode("styling", { + // Figure out what style to use by pulling out the style from + // the function name + style: func.slice(1, func.length - 5), + value: body, + }, this.mode); + } else { + // Defer to parseFunction if it's not a function we handle + return this.parseFunction(start); + } +}; + +/** + * Parses an entire function, including its base and all of its arguments. + * The base might either have been parsed already, in which case + * it is provided as an argument, or it's the next group in the input. + * + * @param {ParseFuncOrArgument=} baseGroup optional as described above + * @return {?ParseNode} + */ +Parser.prototype.parseFunction = function(baseGroup) { + if (!baseGroup) { + baseGroup = this.parseGroup(); + } + + if (baseGroup) { + if (baseGroup.isFunction) { + var func = baseGroup.result; + var funcData = functions[func]; + if (this.mode === "text" && !funcData.allowedInText) { + throw new ParseError( + "Can't use function '" + func + "' in text mode", + this.lexer, baseGroup.position); + } + + var args = this.parseArguments(func, funcData); + var result = this.callFunction(func, args, args.pop()); + return new ParseNode(result.type, result, this.mode); + } else { + return baseGroup.result; + } + } else { + return null; + } +}; + +/** + * Call a function handler with a suitable context and arguments. + */ +Parser.prototype.callFunction = function(name, args, positions) { + var context = { + funcName: name, + parser: this, + lexer: this.lexer, + positions: positions, + }; + return functions[name].handler(context, args); +}; + +/** + * Parses the arguments of a function or environment + * + * @param {string} func "\name" or "\begin{name}" + * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData + * @return the array of arguments, with the list of positions as last element + */ +Parser.prototype.parseArguments = function(func, funcData) { + var totalArgs = funcData.numArgs + funcData.numOptionalArgs; + if (totalArgs === 0) { + return [[this.pos]]; + } + + var baseGreediness = funcData.greediness; + var positions = [this.pos]; + var args = []; + + for (var i = 0; i < totalArgs; i++) { + var argType = funcData.argTypes && funcData.argTypes[i]; + var arg; + if (i < funcData.numOptionalArgs) { + if (argType) { + arg = this.parseSpecialGroup(argType, true); + } else { + arg = this.parseOptionalGroup(); + } + if (!arg) { + args.push(null); + positions.push(this.pos); + continue; + } + } else { + if (argType) { + arg = this.parseSpecialGroup(argType); + } else { + arg = this.parseGroup(); + } + if (!arg) { + if (!this.settings.throwOnError && + this.nextToken.text[0] === "\\") { + arg = new ParseFuncOrArgument( + this.handleUnsupportedCmd(this.nextToken.text), + false); + } else { + throw new ParseError( + "Expected group after '" + func + "'", + this.lexer, this.pos); + } + } + } + var argNode; + if (arg.isFunction) { + var argGreediness = + functions[arg.result].greediness; + if (argGreediness > baseGreediness) { + argNode = this.parseFunction(arg); + } else { + throw new ParseError( + "Got function '" + arg.result + "' as " + + "argument to '" + func + "'", + this.lexer, this.pos - 1); + } + } else { + argNode = arg.result; + } + args.push(argNode); + positions.push(this.pos); + } + + args.push(positions); + + return args; +}; + + +/** + * Parses a group when the mode is changing. Takes a position, a new mode, and + * an outer mode that is used to parse the outside. + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseSpecialGroup = function(innerMode, optional) { + var outerMode = this.mode; + // Handle `original` argTypes + if (innerMode === "original") { + innerMode = outerMode; + } + + if (innerMode === "color" || innerMode === "size") { + // color and size modes are special because they should have braces and + // should only lex a single symbol inside + var openBrace = this.nextToken; + if (optional && openBrace.text !== "[") { + // optional arguments should return null if they don't exist + return null; + } + // The call to expect will lex the token after the '{' in inner mode + this.mode = innerMode; + this.expect(optional ? "[" : "{"); + var inner = this.nextToken; + this.mode = outerMode; + var data; + if (innerMode === "color") { + data = inner.text; + } else { + data = inner.data; + } + this.consume(); // consume the token stored in inner + this.expect(optional ? "]" : "}"); + return new ParseFuncOrArgument( + new ParseNode(innerMode, data, outerMode), + false); + } else if (innerMode === "text") { + // text mode is special because it should ignore the whitespace before + // it + var whitespace = this.lexer.lex(this.pos, "whitespace"); + this.pos = whitespace.position; + } + + // By the time we get here, innerMode is one of "text" or "math". + // We switch the mode of the parser, recurse, then restore the old mode. + this.mode = innerMode; + this.nextToken = this.lexer.lex(this.pos, innerMode); + var res; + if (optional) { + res = this.parseOptionalGroup(); + } else { + res = this.parseGroup(); + } + this.mode = outerMode; + this.nextToken = this.lexer.lex(this.pos, outerMode); + return res; +}; + +/** + * Parses a group, which is either a single nucleus (like "x") or an expression + * in braces (like "{x+y}") + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseGroup = function() { + // Try to parse an open brace + if (this.nextToken.text === "{") { + // If we get a brace, parse an expression + this.consume(); + var expression = this.parseExpression(false); + // Make sure we get a close brace + this.expect("}"); + return new ParseFuncOrArgument( + new ParseNode("ordgroup", expression, this.mode), + false); + } else { + // Otherwise, just return a nucleus + return this.parseSymbol(); + } +}; + +/** + * Parses a group, which is an expression in brackets (like "[x+y]") + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseOptionalGroup = function() { + // Try to parse an open bracket + if (this.nextToken.text === "[") { + // If we get a brace, parse an expression + this.consume(); + var expression = this.parseExpression(false, "]"); + // Make sure we get a close bracket + this.expect("]"); + return new ParseFuncOrArgument( + new ParseNode("ordgroup", expression, this.mode), + false); + } else { + // Otherwise, return null, + return null; + } +}; + +/** + * Parse a single symbol out of the string. Here, we handle both the functions + * we have defined, as well as the single character symbols + * + * @return {?ParseFuncOrArgument} + */ +Parser.prototype.parseSymbol = function() { + var nucleus = this.nextToken; + + if (functions[nucleus.text]) { + this.consume(); + // If there exists a function with this name, we return the function and + // say that it is a function. + return new ParseFuncOrArgument( + nucleus.text, + true); + } else if (symbols[this.mode][nucleus.text]) { + this.consume(); + // Otherwise if this is a no-argument function, find the type it + // corresponds to in the symbols map + return new ParseFuncOrArgument( + new ParseNode(symbols[this.mode][nucleus.text].group, + nucleus.text, this.mode), + false); + } else { + return null; + } +}; + +Parser.prototype.ParseNode = ParseNode; + +module.exports = Parser; |