summaryrefslogtreecommitdiffstats
path: root/webapp/non_npm_dependencies/katex/src/Parser.js
diff options
context:
space:
mode:
Diffstat (limited to 'webapp/non_npm_dependencies/katex/src/Parser.js')
-rw-r--r--webapp/non_npm_dependencies/katex/src/Parser.js737
1 files changed, 737 insertions, 0 deletions
diff --git a/webapp/non_npm_dependencies/katex/src/Parser.js b/webapp/non_npm_dependencies/katex/src/Parser.js
new file mode 100644
index 000000000..15b808e17
--- /dev/null
+++ b/webapp/non_npm_dependencies/katex/src/Parser.js
@@ -0,0 +1,737 @@
+/* eslint no-constant-condition:0 */
+var functions = require("./functions");
+var environments = require("./environments");
+var Lexer = require("./Lexer");
+var symbols = require("./symbols");
+var utils = require("./utils");
+
+var parseData = require("./parseData");
+var ParseError = require("./ParseError");
+
+/**
+ * This file contains the parser used to parse out a TeX expression from the
+ * input. Since TeX isn't context-free, standard parsers don't work particularly
+ * well.
+ *
+ * The strategy of this parser is as such:
+ *
+ * The main functions (the `.parse...` ones) take a position in the current
+ * parse string to parse tokens from. The lexer (found in Lexer.js, stored at
+ * this.lexer) also supports pulling out tokens at arbitrary places. When
+ * individual tokens are needed at a position, the lexer is called to pull out a
+ * token, which is then used.
+ *
+ * The parser has a property called "mode" indicating the mode that
+ * the parser is currently in. Currently it has to be one of "math" or
+ * "text", which denotes whether the current environment is a math-y
+ * one or a text-y one (e.g. inside \text). Currently, this serves to
+ * limit the functions which can be used in text mode.
+ *
+ * The main functions then return an object which contains the useful data that
+ * was parsed at its given point, and a new position at the end of the parsed
+ * data. The main functions can call each other and continue the parsing by
+ * using the returned position as a new starting point.
+ *
+ * There are also extra `.handle...` functions, which pull out some reused
+ * functionality into self-contained functions.
+ *
+ * The earlier functions return ParseNodes.
+ * The later functions (which are called deeper in the parse) sometimes return
+ * ParseFuncOrArgument, which contain a ParseNode as well as some data about
+ * whether the parsed object is a function which is missing some arguments, or a
+ * standalone object which can be used as an argument to another function.
+ */
+
+/**
+ * Main Parser class
+ */
+function Parser(input, settings) {
+ // Make a new lexer
+ this.lexer = new Lexer(input);
+ // Store the settings for use in parsing
+ this.settings = settings;
+}
+
+var ParseNode = parseData.ParseNode;
+
+/**
+ * An initial function (without its arguments), or an argument to a function.
+ * The `result` argument should be a ParseNode.
+ */
+function ParseFuncOrArgument(result, isFunction) {
+ this.result = result;
+ // Is this a function (i.e. is it something defined in functions.js)?
+ this.isFunction = isFunction;
+}
+
+/**
+ * Checks a result to make sure it has the right type, and throws an
+ * appropriate error otherwise.
+ *
+ * @param {boolean=} consume whether to consume the expected token,
+ * defaults to true
+ */
+Parser.prototype.expect = function(text, consume) {
+ if (this.nextToken.text !== text) {
+ throw new ParseError(
+ "Expected '" + text + "', got '" + this.nextToken.text + "'",
+ this.lexer, this.nextToken.position
+ );
+ }
+ if (consume !== false) {
+ this.consume();
+ }
+};
+
+/**
+ * Considers the current look ahead token as consumed,
+ * and fetches the one after that as the new look ahead.
+ */
+Parser.prototype.consume = function() {
+ this.pos = this.nextToken.position;
+ this.nextToken = this.lexer.lex(this.pos, this.mode);
+};
+
+/**
+ * Main parsing function, which parses an entire input.
+ *
+ * @return {?Array.<ParseNode>}
+ */
+Parser.prototype.parse = function() {
+ // Try to parse the input
+ this.mode = "math";
+ this.pos = 0;
+ this.nextToken = this.lexer.lex(this.pos, this.mode);
+ var parse = this.parseInput();
+ return parse;
+};
+
+/**
+ * Parses an entire input tree.
+ */
+Parser.prototype.parseInput = function() {
+ // Parse an expression
+ var expression = this.parseExpression(false);
+ // If we succeeded, make sure there's an EOF at the end
+ this.expect("EOF", false);
+ return expression;
+};
+
+var endOfExpression = ["}", "\\end", "\\right", "&", "\\\\", "\\cr"];
+
+/**
+ * Parses an "expression", which is a list of atoms.
+ *
+ * @param {boolean} breakOnInfix Should the parsing stop when we hit infix
+ * nodes? This happens when functions have higher precendence
+ * than infix nodes in implicit parses.
+ *
+ * @param {?string} breakOnToken The token that the expression should end with,
+ * or `null` if something else should end the expression.
+ *
+ * @return {ParseNode}
+ */
+Parser.prototype.parseExpression = function(breakOnInfix, breakOnToken) {
+ var body = [];
+ // Keep adding atoms to the body until we can't parse any more atoms (either
+ // we reached the end, a }, or a \right)
+ while (true) {
+ var lex = this.nextToken;
+ var pos = this.pos;
+ if (endOfExpression.indexOf(lex.text) !== -1) {
+ break;
+ }
+ if (breakOnToken && lex.text === breakOnToken) {
+ break;
+ }
+ var atom = this.parseAtom();
+ if (!atom) {
+ if (!this.settings.throwOnError && lex.text[0] === "\\") {
+ var errorNode = this.handleUnsupportedCmd();
+ body.push(errorNode);
+
+ pos = lex.position;
+ continue;
+ }
+
+ break;
+ }
+ if (breakOnInfix && atom.type === "infix") {
+ // rewind so we can parse the infix atom again
+ this.pos = pos;
+ this.nextToken = lex;
+ break;
+ }
+ body.push(atom);
+ }
+ return this.handleInfixNodes(body);
+};
+
+/**
+ * Rewrites infix operators such as \over with corresponding commands such
+ * as \frac.
+ *
+ * There can only be one infix operator per group. If there's more than one
+ * then the expression is ambiguous. This can be resolved by adding {}.
+ *
+ * @returns {Array}
+ */
+Parser.prototype.handleInfixNodes = function(body) {
+ var overIndex = -1;
+ var funcName;
+
+ for (var i = 0; i < body.length; i++) {
+ var node = body[i];
+ if (node.type === "infix") {
+ if (overIndex !== -1) {
+ throw new ParseError("only one infix operator per group",
+ this.lexer, -1);
+ }
+ overIndex = i;
+ funcName = node.value.replaceWith;
+ }
+ }
+
+ if (overIndex !== -1) {
+ var numerNode;
+ var denomNode;
+
+ var numerBody = body.slice(0, overIndex);
+ var denomBody = body.slice(overIndex + 1);
+
+ if (numerBody.length === 1 && numerBody[0].type === "ordgroup") {
+ numerNode = numerBody[0];
+ } else {
+ numerNode = new ParseNode("ordgroup", numerBody, this.mode);
+ }
+
+ if (denomBody.length === 1 && denomBody[0].type === "ordgroup") {
+ denomNode = denomBody[0];
+ } else {
+ denomNode = new ParseNode("ordgroup", denomBody, this.mode);
+ }
+
+ var value = this.callFunction(
+ funcName, [numerNode, denomNode], null);
+ return [new ParseNode(value.type, value, this.mode)];
+ } else {
+ return body;
+ }
+};
+
+// The greediness of a superscript or subscript
+var SUPSUB_GREEDINESS = 1;
+
+/**
+ * Handle a subscript or superscript with nice errors.
+ */
+Parser.prototype.handleSupSubscript = function(name) {
+ var symbol = this.nextToken.text;
+ var symPos = this.pos;
+ this.consume();
+ var group = this.parseGroup();
+
+ if (!group) {
+ if (!this.settings.throwOnError && this.nextToken.text[0] === "\\") {
+ return this.handleUnsupportedCmd();
+ } else {
+ throw new ParseError(
+ "Expected group after '" + symbol + "'",
+ this.lexer,
+ symPos + 1
+ );
+ }
+ } else if (group.isFunction) {
+ // ^ and _ have a greediness, so handle interactions with functions'
+ // greediness
+ var funcGreediness = functions[group.result].greediness;
+ if (funcGreediness > SUPSUB_GREEDINESS) {
+ return this.parseFunction(group);
+ } else {
+ throw new ParseError(
+ "Got function '" + group.result + "' with no arguments " +
+ "as " + name,
+ this.lexer, symPos + 1);
+ }
+ } else {
+ return group.result;
+ }
+};
+
+/**
+ * Converts the textual input of an unsupported command into a text node
+ * contained within a color node whose color is determined by errorColor
+ */
+Parser.prototype.handleUnsupportedCmd = function() {
+ var text = this.nextToken.text;
+ var textordArray = [];
+
+ for (var i = 0; i < text.length; i++) {
+ textordArray.push(new ParseNode("textord", text[i], "text"));
+ }
+
+ var textNode = new ParseNode(
+ "text",
+ {
+ body: textordArray,
+ type: "text",
+ },
+ this.mode);
+
+ var colorNode = new ParseNode(
+ "color",
+ {
+ color: this.settings.errorColor,
+ value: [textNode],
+ type: "color",
+ },
+ this.mode);
+
+ this.consume();
+ return colorNode;
+};
+
+/**
+ * Parses a group with optional super/subscripts.
+ *
+ * @return {?ParseNode}
+ */
+Parser.prototype.parseAtom = function() {
+ // The body of an atom is an implicit group, so that things like
+ // \left(x\right)^2 work correctly.
+ var base = this.parseImplicitGroup();
+
+ // In text mode, we don't have superscripts or subscripts
+ if (this.mode === "text") {
+ return base;
+ }
+
+ // Note that base may be empty (i.e. null) at this point.
+
+ var superscript;
+ var subscript;
+ while (true) {
+ // Lex the first token
+ var lex = this.nextToken;
+
+ if (lex.text === "\\limits" || lex.text === "\\nolimits") {
+ // We got a limit control
+ if (!base || base.type !== "op") {
+ throw new ParseError(
+ "Limit controls must follow a math operator",
+ this.lexer, this.pos);
+ } else {
+ var limits = lex.text === "\\limits";
+ base.value.limits = limits;
+ base.value.alwaysHandleSupSub = true;
+ }
+ this.consume();
+ } else if (lex.text === "^") {
+ // We got a superscript start
+ if (superscript) {
+ throw new ParseError(
+ "Double superscript", this.lexer, this.pos);
+ }
+ superscript = this.handleSupSubscript("superscript");
+ } else if (lex.text === "_") {
+ // We got a subscript start
+ if (subscript) {
+ throw new ParseError(
+ "Double subscript", this.lexer, this.pos);
+ }
+ subscript = this.handleSupSubscript("subscript");
+ } else if (lex.text === "'") {
+ // We got a prime
+ var prime = new ParseNode("textord", "\\prime", this.mode);
+
+ // Many primes can be grouped together, so we handle this here
+ var primes = [prime];
+ this.consume();
+ // Keep lexing tokens until we get something that's not a prime
+ while (this.nextToken.text === "'") {
+ // For each one, add another prime to the list
+ primes.push(prime);
+ this.consume();
+ }
+ // Put them into an ordgroup as the superscript
+ superscript = new ParseNode("ordgroup", primes, this.mode);
+ } else {
+ // If it wasn't ^, _, or ', stop parsing super/subscripts
+ break;
+ }
+ }
+
+ if (superscript || subscript) {
+ // If we got either a superscript or subscript, create a supsub
+ return new ParseNode("supsub", {
+ base: base,
+ sup: superscript,
+ sub: subscript,
+ }, this.mode);
+ } else {
+ // Otherwise return the original body
+ return base;
+ }
+};
+
+// A list of the size-changing functions, for use in parseImplicitGroup
+var sizeFuncs = [
+ "\\tiny", "\\scriptsize", "\\footnotesize", "\\small", "\\normalsize",
+ "\\large", "\\Large", "\\LARGE", "\\huge", "\\Huge",
+];
+
+// A list of the style-changing functions, for use in parseImplicitGroup
+var styleFuncs = [
+ "\\displaystyle", "\\textstyle", "\\scriptstyle", "\\scriptscriptstyle",
+];
+
+/**
+ * Parses an implicit group, which is a group that starts at the end of a
+ * specified, and ends right before a higher explicit group ends, or at EOL. It
+ * is used for functions that appear to affect the current style, like \Large or
+ * \textrm, where instead of keeping a style we just pretend that there is an
+ * implicit grouping after it until the end of the group. E.g.
+ * small text {\Large large text} small text again
+ * It is also used for \left and \right to get the correct grouping.
+ *
+ * @return {?ParseNode}
+ */
+Parser.prototype.parseImplicitGroup = function() {
+ var start = this.parseSymbol();
+
+ if (start == null) {
+ // If we didn't get anything we handle, fall back to parseFunction
+ return this.parseFunction();
+ }
+
+ var func = start.result;
+ var body;
+
+ if (func === "\\left") {
+ // If we see a left:
+ // Parse the entire left function (including the delimiter)
+ var left = this.parseFunction(start);
+ // Parse out the implicit body
+ body = this.parseExpression(false);
+ // Check the next token
+ this.expect("\\right", false);
+ var right = this.parseFunction();
+ return new ParseNode("leftright", {
+ body: body,
+ left: left.value.value,
+ right: right.value.value,
+ }, this.mode);
+ } else if (func === "\\begin") {
+ // begin...end is similar to left...right
+ var begin = this.parseFunction(start);
+ var envName = begin.value.name;
+ if (!environments.hasOwnProperty(envName)) {
+ throw new ParseError(
+ "No such environment: " + envName,
+ this.lexer, begin.value.namepos);
+ }
+ // Build the environment object. Arguments and other information will
+ // be made available to the begin and end methods using properties.
+ var env = environments[envName];
+ var args = this.parseArguments("\\begin{" + envName + "}", env);
+ var context = {
+ mode: this.mode,
+ envName: envName,
+ parser: this,
+ lexer: this.lexer,
+ positions: args.pop(),
+ };
+ var result = env.handler(context, args);
+ this.expect("\\end", false);
+ var end = this.parseFunction();
+ if (end.value.name !== envName) {
+ throw new ParseError(
+ "Mismatch: \\begin{" + envName + "} matched " +
+ "by \\end{" + end.value.name + "}",
+ this.lexer /* , end.value.namepos */);
+ // TODO: Add position to the above line and adjust test case,
+ // requires #385 to get merged first
+ }
+ result.position = end.position;
+ return result;
+ } else if (utils.contains(sizeFuncs, func)) {
+ // If we see a sizing function, parse out the implict body
+ body = this.parseExpression(false);
+ return new ParseNode("sizing", {
+ // Figure out what size to use based on the list of functions above
+ size: "size" + (utils.indexOf(sizeFuncs, func) + 1),
+ value: body,
+ }, this.mode);
+ } else if (utils.contains(styleFuncs, func)) {
+ // If we see a styling function, parse out the implict body
+ body = this.parseExpression(true);
+ return new ParseNode("styling", {
+ // Figure out what style to use by pulling out the style from
+ // the function name
+ style: func.slice(1, func.length - 5),
+ value: body,
+ }, this.mode);
+ } else {
+ // Defer to parseFunction if it's not a function we handle
+ return this.parseFunction(start);
+ }
+};
+
+/**
+ * Parses an entire function, including its base and all of its arguments.
+ * The base might either have been parsed already, in which case
+ * it is provided as an argument, or it's the next group in the input.
+ *
+ * @param {ParseFuncOrArgument=} baseGroup optional as described above
+ * @return {?ParseNode}
+ */
+Parser.prototype.parseFunction = function(baseGroup) {
+ if (!baseGroup) {
+ baseGroup = this.parseGroup();
+ }
+
+ if (baseGroup) {
+ if (baseGroup.isFunction) {
+ var func = baseGroup.result;
+ var funcData = functions[func];
+ if (this.mode === "text" && !funcData.allowedInText) {
+ throw new ParseError(
+ "Can't use function '" + func + "' in text mode",
+ this.lexer, baseGroup.position);
+ }
+
+ var args = this.parseArguments(func, funcData);
+ var result = this.callFunction(func, args, args.pop());
+ return new ParseNode(result.type, result, this.mode);
+ } else {
+ return baseGroup.result;
+ }
+ } else {
+ return null;
+ }
+};
+
+/**
+ * Call a function handler with a suitable context and arguments.
+ */
+Parser.prototype.callFunction = function(name, args, positions) {
+ var context = {
+ funcName: name,
+ parser: this,
+ lexer: this.lexer,
+ positions: positions,
+ };
+ return functions[name].handler(context, args);
+};
+
+/**
+ * Parses the arguments of a function or environment
+ *
+ * @param {string} func "\name" or "\begin{name}"
+ * @param {{numArgs:number,numOptionalArgs:number|undefined}} funcData
+ * @return the array of arguments, with the list of positions as last element
+ */
+Parser.prototype.parseArguments = function(func, funcData) {
+ var totalArgs = funcData.numArgs + funcData.numOptionalArgs;
+ if (totalArgs === 0) {
+ return [[this.pos]];
+ }
+
+ var baseGreediness = funcData.greediness;
+ var positions = [this.pos];
+ var args = [];
+
+ for (var i = 0; i < totalArgs; i++) {
+ var argType = funcData.argTypes && funcData.argTypes[i];
+ var arg;
+ if (i < funcData.numOptionalArgs) {
+ if (argType) {
+ arg = this.parseSpecialGroup(argType, true);
+ } else {
+ arg = this.parseOptionalGroup();
+ }
+ if (!arg) {
+ args.push(null);
+ positions.push(this.pos);
+ continue;
+ }
+ } else {
+ if (argType) {
+ arg = this.parseSpecialGroup(argType);
+ } else {
+ arg = this.parseGroup();
+ }
+ if (!arg) {
+ if (!this.settings.throwOnError &&
+ this.nextToken.text[0] === "\\") {
+ arg = new ParseFuncOrArgument(
+ this.handleUnsupportedCmd(this.nextToken.text),
+ false);
+ } else {
+ throw new ParseError(
+ "Expected group after '" + func + "'",
+ this.lexer, this.pos);
+ }
+ }
+ }
+ var argNode;
+ if (arg.isFunction) {
+ var argGreediness =
+ functions[arg.result].greediness;
+ if (argGreediness > baseGreediness) {
+ argNode = this.parseFunction(arg);
+ } else {
+ throw new ParseError(
+ "Got function '" + arg.result + "' as " +
+ "argument to '" + func + "'",
+ this.lexer, this.pos - 1);
+ }
+ } else {
+ argNode = arg.result;
+ }
+ args.push(argNode);
+ positions.push(this.pos);
+ }
+
+ args.push(positions);
+
+ return args;
+};
+
+
+/**
+ * Parses a group when the mode is changing. Takes a position, a new mode, and
+ * an outer mode that is used to parse the outside.
+ *
+ * @return {?ParseFuncOrArgument}
+ */
+Parser.prototype.parseSpecialGroup = function(innerMode, optional) {
+ var outerMode = this.mode;
+ // Handle `original` argTypes
+ if (innerMode === "original") {
+ innerMode = outerMode;
+ }
+
+ if (innerMode === "color" || innerMode === "size") {
+ // color and size modes are special because they should have braces and
+ // should only lex a single symbol inside
+ var openBrace = this.nextToken;
+ if (optional && openBrace.text !== "[") {
+ // optional arguments should return null if they don't exist
+ return null;
+ }
+ // The call to expect will lex the token after the '{' in inner mode
+ this.mode = innerMode;
+ this.expect(optional ? "[" : "{");
+ var inner = this.nextToken;
+ this.mode = outerMode;
+ var data;
+ if (innerMode === "color") {
+ data = inner.text;
+ } else {
+ data = inner.data;
+ }
+ this.consume(); // consume the token stored in inner
+ this.expect(optional ? "]" : "}");
+ return new ParseFuncOrArgument(
+ new ParseNode(innerMode, data, outerMode),
+ false);
+ } else if (innerMode === "text") {
+ // text mode is special because it should ignore the whitespace before
+ // it
+ var whitespace = this.lexer.lex(this.pos, "whitespace");
+ this.pos = whitespace.position;
+ }
+
+ // By the time we get here, innerMode is one of "text" or "math".
+ // We switch the mode of the parser, recurse, then restore the old mode.
+ this.mode = innerMode;
+ this.nextToken = this.lexer.lex(this.pos, innerMode);
+ var res;
+ if (optional) {
+ res = this.parseOptionalGroup();
+ } else {
+ res = this.parseGroup();
+ }
+ this.mode = outerMode;
+ this.nextToken = this.lexer.lex(this.pos, outerMode);
+ return res;
+};
+
+/**
+ * Parses a group, which is either a single nucleus (like "x") or an expression
+ * in braces (like "{x+y}")
+ *
+ * @return {?ParseFuncOrArgument}
+ */
+Parser.prototype.parseGroup = function() {
+ // Try to parse an open brace
+ if (this.nextToken.text === "{") {
+ // If we get a brace, parse an expression
+ this.consume();
+ var expression = this.parseExpression(false);
+ // Make sure we get a close brace
+ this.expect("}");
+ return new ParseFuncOrArgument(
+ new ParseNode("ordgroup", expression, this.mode),
+ false);
+ } else {
+ // Otherwise, just return a nucleus
+ return this.parseSymbol();
+ }
+};
+
+/**
+ * Parses a group, which is an expression in brackets (like "[x+y]")
+ *
+ * @return {?ParseFuncOrArgument}
+ */
+Parser.prototype.parseOptionalGroup = function() {
+ // Try to parse an open bracket
+ if (this.nextToken.text === "[") {
+ // If we get a brace, parse an expression
+ this.consume();
+ var expression = this.parseExpression(false, "]");
+ // Make sure we get a close bracket
+ this.expect("]");
+ return new ParseFuncOrArgument(
+ new ParseNode("ordgroup", expression, this.mode),
+ false);
+ } else {
+ // Otherwise, return null,
+ return null;
+ }
+};
+
+/**
+ * Parse a single symbol out of the string. Here, we handle both the functions
+ * we have defined, as well as the single character symbols
+ *
+ * @return {?ParseFuncOrArgument}
+ */
+Parser.prototype.parseSymbol = function() {
+ var nucleus = this.nextToken;
+
+ if (functions[nucleus.text]) {
+ this.consume();
+ // If there exists a function with this name, we return the function and
+ // say that it is a function.
+ return new ParseFuncOrArgument(
+ nucleus.text,
+ true);
+ } else if (symbols[this.mode][nucleus.text]) {
+ this.consume();
+ // Otherwise if this is a no-argument function, find the type it
+ // corresponds to in the symbols map
+ return new ParseFuncOrArgument(
+ new ParseNode(symbols[this.mode][nucleus.text].group,
+ nucleus.text, this.mode),
+ false);
+ } else {
+ return null;
+ }
+};
+
+Parser.prototype.ParseNode = ParseNode;
+
+module.exports = Parser;