summaryrefslogtreecommitdiffstats
path: root/webapp/non_npm_dependencies/katex/src/Lexer.js
diff options
context:
space:
mode:
Diffstat (limited to 'webapp/non_npm_dependencies/katex/src/Lexer.js')
-rw-r--r--webapp/non_npm_dependencies/katex/src/Lexer.js162
1 files changed, 162 insertions, 0 deletions
diff --git a/webapp/non_npm_dependencies/katex/src/Lexer.js b/webapp/non_npm_dependencies/katex/src/Lexer.js
new file mode 100644
index 000000000..4d6697c6a
--- /dev/null
+++ b/webapp/non_npm_dependencies/katex/src/Lexer.js
@@ -0,0 +1,162 @@
+/**
+ * The Lexer class handles tokenizing the input in various ways. Since our
+ * parser expects us to be able to backtrack, the lexer allows lexing from any
+ * given starting point.
+ *
+ * Its main exposed function is the `lex` function, which takes a position to
+ * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
+ * function.
+ *
+ * The various `_innerLex` functions perform the actual lexing of different
+ * kinds.
+ */
+
+var matchAt = require("match-at");
+
+var ParseError = require("./ParseError");
+
+// The main lexer class
+function Lexer(input) {
+ this._input = input;
+}
+
+// The resulting token returned from `lex`.
+function Token(text, data, position) {
+ this.text = text;
+ this.data = data;
+ this.position = position;
+}
+
+/* The following tokenRegex
+ * - matches typical whitespace (but not NBSP etc.) using its first group
+ * - matches symbol combinations which result in a single output character
+ * - does not match any control character \x00-\x1f except whitespace
+ * - does not match a bare backslash
+ * - matches any ASCII character except those just mentioned
+ * - does not match the BMP private use area \uE000-\uF8FF
+ * - does not match bare surrogate code units
+ * - matches any BMP character except for those just described
+ * - matches any valid Unicode surrogate pair
+ * - matches a backslash followed by one or more letters
+ * - matches a backslash followed by any BMP character, including newline
+ * Just because the Lexer matches something doesn't mean it's valid input:
+ * If there is no matching function or symbol definition, the Parser will
+ * still reject the input.
+ */
+var tokenRegex = new RegExp(
+ "([ \r\n\t]+)|(" + // whitespace
+ "---?" + // special combinations
+ "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
+ "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
+ "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
+ ")"
+);
+
+var whitespaceRegex = /\s*/;
+
+/**
+ * This function lexes a single normal token. It takes a position and
+ * whether it should completely ignore whitespace or not.
+ */
+Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
+ var input = this._input;
+ if (pos === input.length) {
+ return new Token("EOF", null, pos);
+ }
+ var match = matchAt(tokenRegex, input, pos);
+ if (match === null) {
+ throw new ParseError(
+ "Unexpected character: '" + input[pos] + "'",
+ this, pos);
+ } else if (match[2]) { // matched non-whitespace
+ return new Token(match[2], null, pos + match[2].length);
+ } else if (ignoreWhitespace) {
+ return this._innerLex(pos + match[1].length, true);
+ } else { // concatenate whitespace to a single space
+ return new Token(" ", null, pos + match[1].length);
+ }
+};
+
+// A regex to match a CSS color (like #ffffff or BlueViolet)
+var cssColor = /#[a-z0-9]+|[a-z]+/i;
+
+/**
+ * This function lexes a CSS color.
+ */
+Lexer.prototype._innerLexColor = function(pos) {
+ var input = this._input;
+
+ // Ignore whitespace
+ var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+ pos += whitespace.length;
+
+ var match;
+ if ((match = matchAt(cssColor, input, pos))) {
+ // If we look like a color, return a color
+ return new Token(match[0], null, pos + match[0].length);
+ } else {
+ throw new ParseError("Invalid color", this, pos);
+ }
+};
+
+// A regex to match a dimension. Dimensions look like
+// "1.2em" or ".4pt" or "1 ex"
+var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
+
+/**
+ * This function lexes a dimension.
+ */
+Lexer.prototype._innerLexSize = function(pos) {
+ var input = this._input;
+
+ // Ignore whitespace
+ var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+ pos += whitespace.length;
+
+ var match;
+ if ((match = matchAt(sizeRegex, input, pos))) {
+ var unit = match[3];
+ // We only currently handle "em" and "ex" units
+ if (unit !== "em" && unit !== "ex") {
+ throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
+ }
+ return new Token(match[0], {
+ number: +(match[1] + match[2]),
+ unit: unit,
+ }, pos + match[0].length);
+ }
+
+ throw new ParseError("Invalid size", this, pos);
+};
+
+/**
+ * This function lexes a string of whitespace.
+ */
+Lexer.prototype._innerLexWhitespace = function(pos) {
+ var input = this._input;
+
+ var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+ pos += whitespace.length;
+
+ return new Token(whitespace[0], null, pos);
+};
+
+/**
+ * This function lexes a single token starting at `pos` and of the given mode.
+ * Based on the mode, we defer to one of the `_innerLex` functions.
+ */
+Lexer.prototype.lex = function(pos, mode) {
+ if (mode === "math") {
+ return this._innerLex(pos, true);
+ } else if (mode === "text") {
+ return this._innerLex(pos, false);
+ } else if (mode === "color") {
+ return this._innerLexColor(pos);
+ } else if (mode === "size") {
+ return this._innerLexSize(pos);
+ } else if (mode === "whitespace") {
+ return this._innerLexWhitespace(pos);
+ }
+};
+
+module.exports = Lexer;