diff options
Diffstat (limited to 'webapp/non_npm_dependencies/katex/src/Lexer.js')
-rw-r--r-- | webapp/non_npm_dependencies/katex/src/Lexer.js | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/webapp/non_npm_dependencies/katex/src/Lexer.js b/webapp/non_npm_dependencies/katex/src/Lexer.js new file mode 100644 index 000000000..4d6697c6a --- /dev/null +++ b/webapp/non_npm_dependencies/katex/src/Lexer.js @@ -0,0 +1,162 @@ +/** + * The Lexer class handles tokenizing the input in various ways. Since our + * parser expects us to be able to backtrack, the lexer allows lexing from any + * given starting point. + * + * Its main exposed function is the `lex` function, which takes a position to + * lex from and a type of token to lex. It defers to the appropriate `_innerLex` + * function. + * + * The various `_innerLex` functions perform the actual lexing of different + * kinds. + */ + +var matchAt = require("match-at"); + +var ParseError = require("./ParseError"); + +// The main lexer class +function Lexer(input) { + this._input = input; +} + +// The resulting token returned from `lex`. +function Token(text, data, position) { + this.text = text; + this.data = data; + this.position = position; +} + +/* The following tokenRegex + * - matches typical whitespace (but not NBSP etc.) using its first group + * - matches symbol combinations which result in a single output character + * - does not match any control character \x00-\x1f except whitespace + * - does not match a bare backslash + * - matches any ASCII character except those just mentioned + * - does not match the BMP private use area \uE000-\uF8FF + * - does not match bare surrogate code units + * - matches any BMP character except for those just described + * - matches any valid Unicode surrogate pair + * - matches a backslash followed by one or more letters + * - matches a backslash followed by any BMP character, including newline + * Just because the Lexer matches something doesn't mean it's valid input: + * If there is no matching function or symbol definition, the Parser will + * still reject the input. + */ +var tokenRegex = new RegExp( + "([ \r\n\t]+)|(" + // whitespace + "---?" + // special combinations + "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint + "|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair + "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name + ")" +); + +var whitespaceRegex = /\s*/; + +/** + * This function lexes a single normal token. It takes a position and + * whether it should completely ignore whitespace or not. + */ +Lexer.prototype._innerLex = function(pos, ignoreWhitespace) { + var input = this._input; + if (pos === input.length) { + return new Token("EOF", null, pos); + } + var match = matchAt(tokenRegex, input, pos); + if (match === null) { + throw new ParseError( + "Unexpected character: '" + input[pos] + "'", + this, pos); + } else if (match[2]) { // matched non-whitespace + return new Token(match[2], null, pos + match[2].length); + } else if (ignoreWhitespace) { + return this._innerLex(pos + match[1].length, true); + } else { // concatenate whitespace to a single space + return new Token(" ", null, pos + match[1].length); + } +}; + +// A regex to match a CSS color (like #ffffff or BlueViolet) +var cssColor = /#[a-z0-9]+|[a-z]+/i; + +/** + * This function lexes a CSS color. + */ +Lexer.prototype._innerLexColor = function(pos) { + var input = this._input; + + // Ignore whitespace + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + var match; + if ((match = matchAt(cssColor, input, pos))) { + // If we look like a color, return a color + return new Token(match[0], null, pos + match[0].length); + } else { + throw new ParseError("Invalid color", this, pos); + } +}; + +// A regex to match a dimension. Dimensions look like +// "1.2em" or ".4pt" or "1 ex" +var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/; + +/** + * This function lexes a dimension. + */ +Lexer.prototype._innerLexSize = function(pos) { + var input = this._input; + + // Ignore whitespace + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + var match; + if ((match = matchAt(sizeRegex, input, pos))) { + var unit = match[3]; + // We only currently handle "em" and "ex" units + if (unit !== "em" && unit !== "ex") { + throw new ParseError("Invalid unit: '" + unit + "'", this, pos); + } + return new Token(match[0], { + number: +(match[1] + match[2]), + unit: unit, + }, pos + match[0].length); + } + + throw new ParseError("Invalid size", this, pos); +}; + +/** + * This function lexes a string of whitespace. + */ +Lexer.prototype._innerLexWhitespace = function(pos) { + var input = this._input; + + var whitespace = matchAt(whitespaceRegex, input, pos)[0]; + pos += whitespace.length; + + return new Token(whitespace[0], null, pos); +}; + +/** + * This function lexes a single token starting at `pos` and of the given mode. + * Based on the mode, we defer to one of the `_innerLex` functions. + */ +Lexer.prototype.lex = function(pos, mode) { + if (mode === "math") { + return this._innerLex(pos, true); + } else if (mode === "text") { + return this._innerLex(pos, false); + } else if (mode === "color") { + return this._innerLexColor(pos); + } else if (mode === "size") { + return this._innerLexSize(pos); + } else if (mode === "whitespace") { + return this._innerLexWhitespace(pos); + } +}; + +module.exports = Lexer; |