1 files changed, 162 insertions, 0 deletions
diff --git a/webapp/non_npm_dependencies/katex/src/Lexer.js b/webapp/non_npm_dependencies/katex/src/Lexer.js
new file mode 100644
index 000000000..4d6697c6a
--- /dev/null
+++ b/webapp/non_npm_dependencies/katex/src/Lexer.js
@@ -0,0 +1,162 @@
+/**
+ * The Lexer class handles tokenizing the input in various ways. Since our
+ * parser expects us to be able to backtrack, the lexer allows lexing from any
+ * given starting point.
+ *
+ * Its main exposed function is the `lex` function, which takes a position to
+ * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
+ * function.
+ *
+ * The various `_innerLex` functions perform the actual lexing of different
+ * kinds.
+ */
+
+var matchAt = require("match-at");
+
+var ParseError = require("./ParseError");
+
+// The main lexer class
+function Lexer(input) {
+    this._input = input;
+}
+
+// The resulting token returned from `lex`.
+function Token(text, data, position) {
+    this.text = text;
+    this.data = data;
+    this.position = position;
+}
+
+/* The following tokenRegex
+ * - matches typical whitespace (but not NBSP etc.) using its first group
+ * - matches symbol combinations which result in a single output character
+ * - does not match any control character \x00-\x1f except whitespace
+ * - does not match a bare backslash
+ * - matches any ASCII character except those just mentioned
+ * - does not match the BMP private use area \uE000-\uF8FF
+ * - does not match bare surrogate code units
+ * - matches any BMP character except for those just described
+ * - matches any valid Unicode surrogate pair
+ * - matches a backslash followed by one or more letters
+ * - matches a backslash followed by any BMP character, including newline
+ * Just because the Lexer matches something doesn't mean it's valid input:
+ * If there is no matching function or symbol definition, the Parser will
+ * still reject the input.
+ */
+var tokenRegex = new RegExp(
+    "([ \r\n\t]+)|(" +                                // whitespace
+    "---?" +                                          // special combinations
+    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
+    "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" +           // function name
+    ")"
+);
+
+var whitespaceRegex = /\s*/;
+
+/**
+ * This function lexes a single normal token. It takes a position and
+ * whether it should completely ignore whitespace or not.
+ */
+Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
+    var input = this._input;
+    if (pos === input.length) {
+        return new Token("EOF", null, pos);
+    }
+    var match = matchAt(tokenRegex, input, pos);
+    if (match === null) {
+        throw new ParseError(
+            "Unexpected character: '" + input[pos] + "'",
+            this, pos);
+    } else if (match[2]) { // matched non-whitespace
+        return new Token(match[2], null, pos + match[2].length);
+    } else if (ignoreWhitespace) {
+        return this._innerLex(pos + match[1].length, true);
+    } else { // concatenate whitespace to a single space
+        return new Token(" ", null, pos + match[1].length);
+    }
+};
+
+// A regex to match a CSS color (like #ffffff or BlueViolet)
+var cssColor = /#[a-z0-9]+|[a-z]+/i;
+
+/**
+ * This function lexes a CSS color.
+ */
+Lexer.prototype._innerLexColor = function(pos) {
+    var input = this._input;
+
+    // Ignore whitespace
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+
+    var match;
+    if ((match = matchAt(cssColor, input, pos))) {
+        // If we look like a color, return a color
+        return new Token(match[0], null, pos + match[0].length);
+    } else {
+        throw new ParseError("Invalid color", this, pos);
+    }
+};
+
+// A regex to match a dimension. Dimensions look like
+// "1.2em" or ".4pt" or "1 ex"
+var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
+
+/**
+ * This function lexes a dimension.
+ */
+Lexer.prototype._innerLexSize = function(pos) {
+    var input = this._input;
+
+    // Ignore whitespace
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+
+    var match;
+    if ((match = matchAt(sizeRegex, input, pos))) {
+        var unit = match[3];
+        // We only currently handle "em" and "ex" units
+        if (unit !== "em" && unit !== "ex") {
+            throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
+        }
+        return new Token(match[0], {
+            number: +(match[1] + match[2]),
+            unit: unit,
+        }, pos + match[0].length);
+    }
+
+    throw new ParseError("Invalid size", this, pos);
+};
+
+/**
+ * This function lexes a string of whitespace.
+ */
+Lexer.prototype._innerLexWhitespace = function(pos) {
+    var input = this._input;
+
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+
+    return new Token(whitespace[0], null, pos);
+};
+
+/**
+ * This function lexes a single token starting at `pos` and of the given mode.
+ * Based on the mode, we defer to one of the `_innerLex` functions.
+ */
+Lexer.prototype.lex = function(pos, mode) {
+    if (mode === "math") {
+        return this._innerLex(pos, true);
+    } else if (mode === "text") {
+        return this._innerLex(pos, false);
+    } else if (mode === "color") {
+        return this._innerLexColor(pos);
+    } else if (mode === "size") {
+        return this._innerLexSize(pos);
+    } else if (mode === "whitespace") {
+        return this._innerLexWhitespace(pos);
+    }
+};
+
+module.exports = Lexer;