From 98e2821b38a775737e42a2479a6bc65107210859 Mon Sep 17 00:00:00 2001 From: Elliot Kroo Date: Thu, 11 Mar 2010 15:21:30 -0800 Subject: reorganizing the first level of folders (trunk/branch folders are not the git way :) --- .../mozilla/javascript/regexp/NativeRegExp.java | 2782 -------------------- 1 file changed, 2782 deletions(-) delete mode 100644 trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java (limited to 'trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java') diff --git a/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java b/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java deleted file mode 100644 index a893841..0000000 --- a/trunk/infrastructure/rhino1_7R1/src/org/mozilla/javascript/regexp/NativeRegExp.java +++ /dev/null @@ -1,2782 +0,0 @@ -/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- - * - * ***** BEGIN LICENSE BLOCK ***** - * Version: MPL 1.1/GPL 2.0 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * The Original Code is Rhino code, released - * May 6, 1998. - * - * The Initial Developer of the Original Code is - * Netscape Communications Corporation. - * Portions created by the Initial Developer are Copyright (C) 1997-1999 - * the Initial Developer. All Rights Reserved. - * - * Contributor(s): - * Norris Boyd - * Igor Bukanov - * Brendan Eich - * Matthias Radestock - * - * Alternatively, the contents of this file may be used under the terms of - * the GNU General Public License Version 2 or later (the "GPL"), in which - * case the provisions of the GPL are applicable instead of those above. If - * you wish to allow use of your version of this file only under the terms of - * the GPL and not to allow others to use your version of this file under the - * MPL, indicate your decision by deleting the provisions above and replacing - * them with the notice and other provisions required by the GPL. If you do - * not delete the provisions above, a recipient may use your version of this - * file under either the MPL or the GPL. - * - * ***** END LICENSE BLOCK ***** */ - -package org.mozilla.javascript.regexp; - -import java.io.Serializable; - -import org.mozilla.javascript.Context; -import org.mozilla.javascript.Function; -import org.mozilla.javascript.IdFunctionObject; -import org.mozilla.javascript.IdScriptableObject; -import org.mozilla.javascript.Kit; -import org.mozilla.javascript.ScriptRuntime; -import org.mozilla.javascript.Scriptable; -import org.mozilla.javascript.ScriptableObject; -import org.mozilla.javascript.Undefined; - -/** - * This class implements the RegExp native object. - * - * Revision History: - * Implementation in C by Brendan Eich - * Initial port to Java by Norris Boyd from jsregexp.c version 1.36 - * Merged up to version 1.38, which included Unicode support. - * Merged bug fixes in version 1.39. - * Merged JSFUN13_BRANCH changes up to 1.32.2.13 - * - * @author Brendan Eich - * @author Norris Boyd - */ - - - -public class NativeRegExp extends IdScriptableObject implements Function -{ - static final long serialVersionUID = 4965263491464903264L; - - private static final Object REGEXP_TAG = new Object(); - - public static final int JSREG_GLOB = 0x1; // 'g' flag: global - public static final int JSREG_FOLD = 0x2; // 'i' flag: fold - public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline - - //type of match to perform - public static final int TEST = 0; - public static final int MATCH = 1; - public static final int PREFIX = 2; - - private static final boolean debug = false; - - private static final byte REOP_EMPTY = 0; /* match rest of input against rest of r.e. */ - private static final byte REOP_ALT = 1; /* alternative subexpressions in kid and next */ - private static final byte REOP_BOL = 2; /* beginning of input (or line if multiline) */ - private static final byte REOP_EOL = 3; /* end of input (or line if multiline) */ - private static final byte REOP_WBDRY = 4; /* match "" at word boundary */ - private static final byte REOP_WNONBDRY = 5; /* match "" at word non-boundary */ - private static final byte REOP_QUANT = 6; /* quantified atom: atom{1,2} */ - private static final byte REOP_STAR = 7; /* zero or more occurrences of kid */ - private static final byte REOP_PLUS = 8; /* one or more occurrences of kid */ - private static final byte REOP_OPT = 9; /* optional subexpression in kid */ - private static final byte REOP_LPAREN = 10; /* left paren bytecode: kid is u.num'th sub-regexp */ - private static final byte REOP_RPAREN = 11; /* right paren bytecode */ - private static final byte REOP_DOT = 12; /* stands for any character */ -// private static final byte REOP_CCLASS = 13; /* character class: [a-f] */ - private static final byte REOP_DIGIT = 14; /* match a digit char: [0-9] */ - private static final byte REOP_NONDIGIT = 15; /* match a non-digit char: [^0-9] */ - private static final byte REOP_ALNUM = 16; /* match an alphanumeric char: [0-9a-z_A-Z] */ - private static final byte REOP_NONALNUM = 17; /* match a non-alphanumeric char: [^0-9a-z_A-Z] */ - private static final byte REOP_SPACE = 18; /* match a whitespace char */ - private static final byte REOP_NONSPACE = 19; /* match a non-whitespace char */ - private static final byte REOP_BACKREF = 20; /* back-reference (e.g., \1) to a parenthetical */ - private static final byte REOP_FLAT = 21; /* match a flat string */ - private static final byte REOP_FLAT1 = 22; /* match a single char */ - private static final byte REOP_JUMP = 23; /* for deoptimized closure loops */ -// private static final byte REOP_DOTSTAR = 24; /* optimize .* to use a single opcode */ -// private static final byte REOP_ANCHOR = 25; /* like .* but skips left context to unanchored r.e. */ -// private static final byte REOP_EOLONLY = 26; /* $ not preceded by any pattern */ -// private static final byte REOP_UCFLAT = 27; /* flat Unicode string; len immediate counts chars */ - private static final byte REOP_UCFLAT1 = 28; /* single Unicode char */ -// private static final byte REOP_UCCLASS = 29; /* Unicode character class, vector of chars to match */ -// private static final byte REOP_NUCCLASS = 30; /* negated Unicode character class */ -// private static final byte REOP_BACKREFi = 31; /* case-independent REOP_BACKREF */ - private static final byte REOP_FLATi = 32; /* case-independent REOP_FLAT */ - private static final byte REOP_FLAT1i = 33; /* case-independent REOP_FLAT1 */ -// private static final byte REOP_UCFLATi = 34; /* case-independent REOP_UCFLAT */ - private static final byte REOP_UCFLAT1i = 35; /* case-independent REOP_UCFLAT1 */ -// private static final byte REOP_ANCHOR1 = 36; /* first-char discriminating REOP_ANCHOR */ -// private static final byte REOP_NCCLASS = 37; /* negated 8-bit character class */ -// private static final byte REOP_DOTSTARMIN = 38; /* ungreedy version of REOP_DOTSTAR */ -// private static final byte REOP_LPARENNON = 39; /* non-capturing version of REOP_LPAREN */ -// private static final byte REOP_RPARENNON = 40; /* non-capturing version of REOP_RPAREN */ - private static final byte REOP_ASSERT = 41; /* zero width positive lookahead assertion */ - private static final byte REOP_ASSERT_NOT = 42; /* zero width negative lookahead assertion */ - private static final byte REOP_ASSERTTEST = 43; /* sentinel at end of assertion child */ - private static final byte REOP_ASSERTNOTTEST = 44; /* sentinel at end of !assertion child */ - private static final byte REOP_MINIMALSTAR = 45; /* non-greedy version of * */ - private static final byte REOP_MINIMALPLUS = 46; /* non-greedy version of + */ - private static final byte REOP_MINIMALOPT = 47; /* non-greedy version of ? */ - private static final byte REOP_MINIMALQUANT = 48; /* non-greedy version of {} */ - private static final byte REOP_ENDCHILD = 49; /* sentinel at end of quantifier child */ - private static final byte REOP_CLASS = 50; /* character class with index */ - private static final byte REOP_REPEAT = 51; /* directs execution of greedy quantifier */ - private static final byte REOP_MINIMALREPEAT = 52; /* directs execution of non-greedy quantifier */ - private static final byte REOP_END = 53; - - - - public static void init(Context cx, Scriptable scope, boolean sealed) - { - - NativeRegExp proto = new NativeRegExp(); - proto.re = (RECompiled)compileRE(cx, "", null, false); - proto.activatePrototypeMap(MAX_PROTOTYPE_ID); - proto.setParentScope(scope); - proto.setPrototype(getObjectPrototype(scope)); - - NativeRegExpCtor ctor = new NativeRegExpCtor(); - // Bug #324006: ECMA-262 15.10.6.1 says "The initial value of - // RegExp.prototype.constructor is the builtin RegExp constructor." - proto.put("constructor", proto, ctor); - - ScriptRuntime.setFunctionProtoAndParent(ctor, scope); - - ctor.setImmunePrototypeProperty(proto); - - if (sealed) { - proto.sealObject(); - ctor.sealObject(); - } - - defineProperty(scope, "RegExp", ctor, ScriptableObject.DONTENUM); - } - - NativeRegExp(Scriptable scope, Object regexpCompiled) - { - this.re = (RECompiled)regexpCompiled; - this.lastIndex = 0; - ScriptRuntime.setObjectProtoAndParent(this, scope); - } - - public String getClassName() - { - return "RegExp"; - } - - public Object call(Context cx, Scriptable scope, Scriptable thisObj, - Object[] args) - { - return execSub(cx, scope, args, MATCH); - } - - public Scriptable construct(Context cx, Scriptable scope, Object[] args) - { - return (Scriptable)execSub(cx, scope, args, MATCH); - } - - Scriptable compile(Context cx, Scriptable scope, Object[] args) - { - if (args.length > 0 && args[0] instanceof NativeRegExp) { - if (args.length > 1 && args[1] != Undefined.instance) { - // report error - throw ScriptRuntime.typeError0("msg.bad.regexp.compile"); - } - NativeRegExp thatObj = (NativeRegExp) args[0]; - this.re = thatObj.re; - this.lastIndex = thatObj.lastIndex; - return this; - } - String s = args.length == 0 ? "" : ScriptRuntime.toString(args[0]); - String global = args.length > 1 && args[1] != Undefined.instance - ? ScriptRuntime.toString(args[1]) - : null; - this.re = (RECompiled)compileRE(cx, s, global, false); - this.lastIndex = 0; - return this; - } - - public String toString() - { - StringBuffer buf = new StringBuffer(); - buf.append('/'); - if (re.source.length != 0) { - buf.append(re.source); - } else { - // See bugzilla 226045 - buf.append("(?:)"); - } - buf.append('/'); - if ((re.flags & JSREG_GLOB) != 0) - buf.append('g'); - if ((re.flags & JSREG_FOLD) != 0) - buf.append('i'); - if ((re.flags & JSREG_MULTILINE) != 0) - buf.append('m'); - return buf.toString(); - } - - NativeRegExp() { } - - private static RegExpImpl getImpl(Context cx) - { - return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx); - } - - private Object execSub(Context cx, Scriptable scopeObj, - Object[] args, int matchType) - { - RegExpImpl reImpl = getImpl(cx); - String str; - if (args.length == 0) { - str = reImpl.input; - if (str == null) { - reportError("msg.no.re.input.for", toString()); - } - } else { - str = ScriptRuntime.toString(args[0]); - } - double d = ((re.flags & JSREG_GLOB) != 0) ? lastIndex : 0; - - Object rval; - if (d < 0 || str.length() < d) { - lastIndex = 0; - rval = null; - } - else { - int indexp[] = { (int)d }; - rval = executeRegExp(cx, scopeObj, reImpl, str, indexp, matchType); - if ((re.flags & JSREG_GLOB) != 0) { - lastIndex = (rval == null || rval == Undefined.instance) - ? 0 : indexp[0]; - } - } - return rval; - } - - static Object compileRE(Context cx, String str, String global, boolean flat) - { - RECompiled regexp = new RECompiled(); - regexp.source = str.toCharArray(); - int length = str.length(); - - int flags = 0; - if (global != null) { - for (int i = 0; i < global.length(); i++) { - char c = global.charAt(i); - if (c == 'g') { - flags |= JSREG_GLOB; - } else if (c == 'i') { - flags |= JSREG_FOLD; - } else if (c == 'm') { - flags |= JSREG_MULTILINE; - } else { - reportError("msg.invalid.re.flag", String.valueOf(c)); - } - } - } - regexp.flags = flags; - - CompilerState state = new CompilerState(cx, regexp.source, length, flags); - if (flat && length > 0) { -if (debug) { -System.out.println("flat = \"" + str + "\""); -} - state.result = new RENode(REOP_FLAT); - state.result.chr = state.cpbegin[0]; - state.result.length = length; - state.result.flatIndex = 0; - state.progLength += 5; - } - else - if (!parseDisjunction(state)) - return null; - - regexp.program = new byte[state.progLength + 1]; - if (state.classCount != 0) { - regexp.classList = new RECharSet[state.classCount]; - regexp.classCount = state.classCount; - } - int endPC = emitREBytecode(state, regexp, 0, state.result); - regexp.program[endPC++] = REOP_END; - -if (debug) { -System.out.println("Prog. length = " + endPC); -for (int i = 0; i < endPC; i++) { - System.out.print(regexp.program[i]); - if (i < (endPC - 1)) System.out.print(", "); -} -System.out.println(); -} - regexp.parenCount = state.parenCount; - - // If re starts with literal, init anchorCh accordingly - switch (regexp.program[0]) { - case REOP_UCFLAT1: - case REOP_UCFLAT1i: - regexp.anchorCh = (char)getIndex(regexp.program, 1); - break; - case REOP_FLAT1: - case REOP_FLAT1i: - regexp.anchorCh = (char)(regexp.program[1] & 0xFF); - break; - case REOP_FLAT: - case REOP_FLATi: - int k = getIndex(regexp.program, 1); - regexp.anchorCh = regexp.source[k]; - break; - } - -if (debug) { -if (regexp.anchorCh >= 0) { - System.out.println("Anchor ch = '" + (char)regexp.anchorCh + "'"); -} -} - return regexp; - } - - static boolean isDigit(char c) - { - return '0' <= c && c <= '9'; - } - - private static boolean isWord(char c) - { - return Character.isLetter(c) || isDigit(c) || c == '_'; - } - - private static boolean isLineTerm(char c) - { - return ScriptRuntime.isJSLineTerminator(c); - } - - private static boolean isREWhiteSpace(int c) - { - return (c == '\u0020' || c == '\u0009' - || c == '\n' || c == '\r' - || c == 0x2028 || c == 0x2029 - || c == '\u000C' || c == '\u000B' - || c == '\u00A0' - || Character.getType((char)c) == Character.SPACE_SEPARATOR); - } - - /* - * - * 1. If IgnoreCase is false, return ch. - * 2. Let u be ch converted to upper case as if by calling - * String.prototype.toUpperCase on the one-character string ch. - * 3. If u does not consist of a single character, return ch. - * 4. Let cu be u's character. - * 5. If ch's code point value is greater than or equal to decimal 128 and cu's - * code point value is less than decimal 128, then return ch. - * 6. Return cu. - */ - private static char upcase(char ch) - { - if (ch < 128) { - if ('a' <= ch && ch <= 'z') { - return (char)(ch + ('A' - 'a')); - } - return ch; - } - char cu = Character.toUpperCase(ch); - if ((ch >= 128) && (cu < 128)) return ch; - return cu; - } - - private static char downcase(char ch) - { - if (ch < 128) { - if ('A' <= ch && ch <= 'Z') { - return (char)(ch + ('a' - 'A')); - } - return ch; - } - char cl = Character.toLowerCase(ch); - if ((ch >= 128) && (cl < 128)) return ch; - return cl; - } - -/* - * Validates and converts hex ascii value. - */ - private static int toASCIIHexDigit(int c) - { - if (c < '0') - return -1; - if (c <= '9') { - return c - '0'; - } - c |= 0x20; - if ('a' <= c && c <= 'f') { - return c - 'a' + 10; - } - return -1; - } - -/* - * Top-down regular expression grammar, based closely on Perl4. - * - * regexp: altern A regular expression is one or more - * altern '|' regexp alternatives separated by vertical bar. - */ - private static boolean parseDisjunction(CompilerState state) - { - if (!parseAlternative(state)) - return false; - char[] source = state.cpbegin; - int index = state.cp; - if (index != source.length && source[index] == '|') { - RENode altResult; - ++state.cp; - altResult = new RENode(REOP_ALT); - altResult.kid = state.result; - if (!parseDisjunction(state)) - return false; - altResult.kid2 = state.result; - state.result = altResult; - /* ALT, , ..., JUMP, ... JUMP */ - state.progLength += 9; - } - return true; - } - -/* - * altern: item An alternative is one or more items, - * item altern concatenated together. - */ - private static boolean parseAlternative(CompilerState state) - { - RENode headTerm = null; - RENode tailTerm = null; - char[] source = state.cpbegin; - while (true) { - if (state.cp == state.cpend || source[state.cp] == '|' - || (state.parenNesting != 0 && source[state.cp] == ')')) - { - if (headTerm == null) { - state.result = new RENode(REOP_EMPTY); - } - else - state.result = headTerm; - return true; - } - if (!parseTerm(state)) - return false; - if (headTerm == null) - headTerm = state.result; - else { - if (tailTerm == null) { - headTerm.next = state.result; - tailTerm = state.result; - while (tailTerm.next != null) tailTerm = tailTerm.next; - } - else { - tailTerm.next = state.result; - tailTerm = tailTerm.next; - while (tailTerm.next != null) tailTerm = tailTerm.next; - } - } - } - } - - /* calculate the total size of the bitmap required for a class expression */ - private static boolean - calculateBitmapSize(CompilerState state, RENode target, char[] src, - int index, int end) - { - char rangeStart = 0; - char c; - int n; - int nDigits; - int i; - int max = 0; - boolean inRange = false; - - target.bmsize = 0; - - if (index == end) - return true; - - if (src[index] == '^') - ++index; - - while (index != end) { - int localMax = 0; - nDigits = 2; - switch (src[index]) { - case '\\': - ++index; - c = src[index++]; - switch (c) { - case 'b': - localMax = 0x8; - break; - case 'f': - localMax = 0xC; - break; - case 'n': - localMax = 0xA; - break; - case 'r': - localMax = 0xD; - break; - case 't': - localMax = 0x9; - break; - case 'v': - localMax = 0xB; - break; - case 'c': - if (((index + 1) < end) && Character.isLetter(src[index + 1])) - localMax = (char)(src[index++] & 0x1F); - else - localMax = '\\'; - break; - case 'u': - nDigits += 2; - // fall thru... - case 'x': - n = 0; - for (i = 0; (i < nDigits) && (index < end); i++) { - c = src[index++]; - n = Kit.xDigitToInt(c, n); - if (n < 0) { - // Back off to accepting the original - // '\' as a literal - index -= (i + 1); - n = '\\'; - break; - } - } - localMax = n; - break; - case 'd': - if (inRange) { - reportError("msg.bad.range", ""); - return false; - } - localMax = '9'; - break; - case 'D': - case 's': - case 'S': - case 'w': - case 'W': - if (inRange) { - reportError("msg.bad.range", ""); - return false; - } - target.bmsize = 65535; - return true; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - /* - * This is a non-ECMA extension - decimal escapes (in this - * case, octal!) are supposed to be an error inside class - * ranges, but supported here for backwards compatibility. - * - */ - n = (c - '0'); - c = src[index]; - if ('0' <= c && c <= '7') { - index++; - n = 8 * n + (c - '0'); - c = src[index]; - if ('0' <= c && c <= '7') { - index++; - i = 8 * n + (c - '0'); - if (i <= 0377) - n = i; - else - index--; - } - } - localMax = n; - break; - - default: - localMax = c; - break; - } - break; - default: - localMax = src[index++]; - break; - } - if (inRange) { - if (rangeStart > localMax) { - reportError("msg.bad.range", ""); - return false; - } - inRange = false; - } - else { - if (index < (end - 1)) { - if (src[index] == '-') { - ++index; - inRange = true; - rangeStart = (char)localMax; - continue; - } - } - } - if ((state.flags & JSREG_FOLD) != 0){ - char cu = upcase((char)localMax); - char cd = downcase((char)localMax); - localMax = (cu >= cd) ? cu : cd; - } - if (localMax > max) - max = localMax; - } - target.bmsize = max; - return true; - } - - /* - * item: assertion An item is either an assertion or - * quantatom a quantified atom. - * - * assertion: '^' Assertions match beginning of string - * (or line if the class static property - * RegExp.multiline is true). - * '$' End of string (or line if the class - * static property RegExp.multiline is - * true). - * '\b' Word boundary (between \w and \W). - * '\B' Word non-boundary. - * - * quantatom: atom An unquantified atom. - * quantatom '{' n ',' m '}' - * Atom must occur between n and m times. - * quantatom '{' n ',' '}' Atom must occur at least n times. - * quantatom '{' n '}' Atom must occur exactly n times. - * quantatom '*' Zero or more times (same as {0,}). - * quantatom '+' One or more times (same as {1,}). - * quantatom '?' Zero or one time (same as {0,1}). - * - * any of which can be optionally followed by '?' for ungreedy - * - * atom: '(' regexp ')' A parenthesized regexp (what matched - * can be addressed using a backreference, - * see '\' n below). - * '.' Matches any char except '\n'. - * '[' classlist ']' A character class. - * '[' '^' classlist ']' A negated character class. - * '\f' Form Feed. - * '\n' Newline (Line Feed). - * '\r' Carriage Return. - * '\t' Horizontal Tab. - * '\v' Vertical Tab. - * '\d' A digit (same as [0-9]). - * '\D' A non-digit. - * '\w' A word character, [0-9a-z_A-Z]. - * '\W' A non-word character. - * '\s' A whitespace character, [ \b\f\n\r\t\v]. - * '\S' A non-whitespace character. - * '\' n A backreference to the nth (n decimal - * and positive) parenthesized expression. - * '\' octal An octal escape sequence (octal must be - * two or three digits long, unless it is - * 0 for the null character). - * '\x' hex A hex escape (hex must be two digits). - * '\c' ctrl A control character, ctrl is a letter. - * '\' literalatomchar Any character except one of the above - * that follow '\' in an atom. - * otheratomchar Any character not first among the other - * atom right-hand sides. - */ - - private static void doFlat(CompilerState state, char c) - { - state.result = new RENode(REOP_FLAT); - state.result.chr = c; - state.result.length = 1; - state.result.flatIndex = -1; - state.progLength += 3; - } - - private static int - getDecimalValue(char c, CompilerState state, int maxValue, - String overflowMessageId) - { - boolean overflow = false; - int start = state.cp; - char[] src = state.cpbegin; - int value = c - '0'; - for (; state.cp != state.cpend; ++state.cp) { - c = src[state.cp]; - if (!isDigit(c)) { - break; - } - if (!overflow) { - int digit = c - '0'; - if (value < (maxValue - digit) / 10) { - value = value * 10 + digit; - } else { - overflow = true; - value = maxValue; - } - } - } - if (overflow) { - reportError(overflowMessageId, - String.valueOf(src, start, state.cp - start)); - } - return value; - } - - private static boolean - parseTerm(CompilerState state) - { - char[] src = state.cpbegin; - char c = src[state.cp++]; - int nDigits = 2; - int parenBaseCount = state.parenCount; - int num, tmp; - RENode term; - int termStart; - - switch (c) { - /* assertions and atoms */ - case '^': - state.result = new RENode(REOP_BOL); - state.progLength++; - return true; - case '$': - state.result = new RENode(REOP_EOL); - state.progLength++; - return true; - case '\\': - if (state.cp < state.cpend) { - c = src[state.cp++]; - switch (c) { - /* assertion escapes */ - case 'b' : - state.result = new RENode(REOP_WBDRY); - state.progLength++; - return true; - case 'B': - state.result = new RENode(REOP_WNONBDRY); - state.progLength++; - return true; - /* Decimal escape */ - case '0': -/* - * Under 'strict' ECMA 3, we interpret \0 as NUL and don't accept octal. - * However, (XXX and since Rhino doesn't have a 'strict' mode) we'll just - * behave the old way for compatibility reasons. - * (see http://bugzilla.mozilla.org/show_bug.cgi?id=141078) - * - */ - reportWarning(state.cx, "msg.bad.backref", ""); - /* octal escape */ - num = 0; - while (state.cp < state.cpend) { - c = src[state.cp]; - if ((c >= '0') && (c <= '7')) { - state.cp++; - tmp = 8 * num + (c - '0'); - if (tmp > 0377) - break; - num = tmp; - } - else - break; - } - c = (char)(num); - doFlat(state, c); - break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - termStart = state.cp - 1; - num = getDecimalValue(c, state, 0xFFFF, - "msg.overlarge.backref"); - if (num > state.parenCount) - reportWarning(state.cx, "msg.bad.backref", ""); - /* - * n > 9 or > count of parentheses, - * then treat as octal instead. - */ - if ((num > 9) && (num > state.parenCount)) { - state.cp = termStart; - num = 0; - while (state.cp < state.cpend) { - c = src[state.cp]; - if ((c >= '0') && (c <= '7')) { - state.cp++; - tmp = 8 * num + (c - '0'); - if (tmp > 0377) - break; - num = tmp; - } - else - break; - } - c = (char)(num); - doFlat(state, c); - break; - } - /* otherwise, it's a back-reference */ - state.result = new RENode(REOP_BACKREF); - state.result.parenIndex = num - 1; - state.progLength += 3; - break; - /* Control escape */ - case 'f': - c = 0xC; - doFlat(state, c); - break; - case 'n': - c = 0xA; - doFlat(state, c); - break; - case 'r': - c = 0xD; - doFlat(state, c); - break; - case 't': - c = 0x9; - doFlat(state, c); - break; - case 'v': - c = 0xB; - doFlat(state, c); - break; - /* Control letter */ - case 'c': - if (((state.cp + 1) < state.cpend) && - Character.isLetter(src[state.cp + 1])) - c = (char)(src[state.cp++] & 0x1F); - else { - /* back off to accepting the original '\' as a literal */ - --state.cp; - c = '\\'; - } - doFlat(state, c); - break; - /* UnicodeEscapeSequence */ - case 'u': - nDigits += 2; - // fall thru... - /* HexEscapeSequence */ - case 'x': - { - int n = 0; - int i; - for (i = 0; (i < nDigits) - && (state.cp < state.cpend); i++) { - c = src[state.cp++]; - n = Kit.xDigitToInt(c, n); - if (n < 0) { - // Back off to accepting the original - // 'u' or 'x' as a literal - state.cp -= (i + 2); - n = src[state.cp++]; - break; - } - } - c = (char)(n); - } - doFlat(state, c); - break; - /* Character class escapes */ - case 'd': - state.result = new RENode(REOP_DIGIT); - state.progLength++; - break; - case 'D': - state.result = new RENode(REOP_NONDIGIT); - state.progLength++; - break; - case 's': - state.result = new RENode(REOP_SPACE); - state.progLength++; - break; - case 'S': - state.result = new RENode(REOP_NONSPACE); - state.progLength++; - break; - case 'w': - state.result = new RENode(REOP_ALNUM); - state.progLength++; - break; - case 'W': - state.result = new RENode(REOP_NONALNUM); - state.progLength++; - break; - /* IdentityEscape */ - default: - state.result = new RENode(REOP_FLAT); - state.result.chr = c; - state.result.length = 1; - state.result.flatIndex = state.cp - 1; - state.progLength += 3; - break; - } - break; - } - else { - /* a trailing '\' is an error */ - reportError("msg.trail.backslash", ""); - return false; - } - case '(': { - RENode result = null; - termStart = state.cp; - if (state.cp + 1 < state.cpend && src[state.cp] == '?' - && ((c = src[state.cp + 1]) == '=' || c == '!' || c == ':')) - { - state.cp += 2; - if (c == '=') { - result = new RENode(REOP_ASSERT); - /* ASSERT, , ... ASSERTTEST */ - state.progLength += 4; - } else if (c == '!') { - result = new RENode(REOP_ASSERT_NOT); - /* ASSERTNOT, , ... ASSERTNOTTEST */ - state.progLength += 4; - } - } else { - result = new RENode(REOP_LPAREN); - /* LPAREN, , ... RPAREN, */ - state.progLength += 6; - result.parenIndex = state.parenCount++; - } - ++state.parenNesting; - if (!parseDisjunction(state)) - return false; - if (state.cp == state.cpend || src[state.cp] != ')') { - reportError("msg.unterm.paren", "in regular expression"/*APPJET*/); - return false; - } - ++state.cp; - --state.parenNesting; - if (result != null) { - result.kid = state.result; - state.result = result; - } - break; - } - case ')': - reportError("msg.re.unmatched.right.paren", ""); - return false; - case '[': - state.result = new RENode(REOP_CLASS); - termStart = state.cp; - state.result.startIndex = termStart; - while (true) { - if (state.cp == state.cpend) { - reportError("msg.unterm.class", ""); - return false; - } - if (src[state.cp] == '\\') - state.cp++; - else { - if (src[state.cp] == ']') { - state.result.kidlen = state.cp - termStart; - break; - } - } - state.cp++; - } - state.result.index = state.classCount++; - /* - * Call calculateBitmapSize now as we want any errors it finds - * to be reported during the parse phase, not at execution. - */ - if (!calculateBitmapSize(state, state.result, src, termStart, state.cp++)) - return false; - state.progLength += 3; /* CLASS, */ - break; - - case '.': - state.result = new RENode(REOP_DOT); - state.progLength++; - break; - case '*': - case '+': - case '?': - reportError("msg.bad.quant", String.valueOf(src[state.cp - 1])); - return false; - default: - state.result = new RENode(REOP_FLAT); - state.result.chr = c; - state.result.length = 1; - state.result.flatIndex = state.cp - 1; - state.progLength += 3; - break; - } - - term = state.result; - if (state.cp == state.cpend) { - return true; - } - boolean hasQ = false; - switch (src[state.cp]) { - case '+': - state.result = new RENode(REOP_QUANT); - state.result.min = 1; - state.result.max = -1; - /* , , , ... */ - state.progLength += 8; - hasQ = true; - break; - case '*': - state.result = new RENode(REOP_QUANT); - state.result.min = 0; - state.result.max = -1; - /* , , , ... */ - state.progLength += 8; - hasQ = true; - break; - case '?': - state.result = new RENode(REOP_QUANT); - state.result.min = 0; - state.result.max = 1; - /* , , , ... */ - state.progLength += 8; - hasQ = true; - break; - case '{': /* balance '}' */ - { - int min = 0; - int max = -1; - int leftCurl = state.cp; - - /* For Perl etc. compatibility, if quntifier does not match - * \{\d+(,\d*)?\} exactly back off from it - * being a quantifier, and chew it up as a literal - * atom next time instead. - */ - - c = src[++state.cp]; - if (isDigit(c)) { - ++state.cp; - min = getDecimalValue(c, state, 0xFFFF, - "msg.overlarge.min"); - c = src[state.cp]; - if (c == ',') { - c = src[++state.cp]; - if (isDigit(c)) { - ++state.cp; - max = getDecimalValue(c, state, 0xFFFF, - "msg.overlarge.max"); - c = src[state.cp]; - if (min > max) { - reportError("msg.max.lt.min", - String.valueOf(src[state.cp])); - return false; - } - } - } else { - max = min; - } - /* balance '{' */ - if (c == '}') { - state.result = new RENode(REOP_QUANT); - state.result.min = min; - state.result.max = max; - // QUANT, , , , - // , ... - state.progLength += 12; - hasQ = true; - } - } - if (!hasQ) { - state.cp = leftCurl; - } - break; - } - } - if (!hasQ) - return true; - - ++state.cp; - state.result.kid = term; - state.result.parenIndex = parenBaseCount; - state.result.parenCount = state.parenCount - parenBaseCount; - if ((state.cp < state.cpend) && (src[state.cp] == '?')) { - ++state.cp; - state.result.greedy = false; - } - else - state.result.greedy = true; - return true; - } - - private static void resolveForwardJump(byte[] array, int from, int pc) - { - if (from > pc) throw Kit.codeBug(); - addIndex(array, from, pc - from); - } - - private static int getOffset(byte[] array, int pc) - { - return getIndex(array, pc); - } - - private static int addIndex(byte[] array, int pc, int index) - { - if (index < 0) throw Kit.codeBug(); - if (index > 0xFFFF) - throw Context.reportRuntimeError("Too complex regexp"); - array[pc] = (byte)(index >> 8); - array[pc + 1] = (byte)(index); - return pc + 2; - } - - private static int getIndex(byte[] array, int pc) - { - return ((array[pc] & 0xFF) << 8) | (array[pc + 1] & 0xFF); - } - - private static final int OFFSET_LEN = 2; - private static final int INDEX_LEN = 2; - - private static int - emitREBytecode(CompilerState state, RECompiled re, int pc, RENode t) - { - RENode nextAlt; - int nextAltFixup, nextTermFixup; - byte[] program = re.program; - - while (t != null) { - program[pc++] = t.op; - switch (t.op) { - case REOP_EMPTY: - --pc; - break; - case REOP_ALT: - nextAlt = t.kid2; - nextAltFixup = pc; /* address of next alternate */ - pc += OFFSET_LEN; - pc = emitREBytecode(state, re, pc, t.kid); - program[pc++] = REOP_JUMP; - nextTermFixup = pc; /* address of following term */ - pc += OFFSET_LEN; - resolveForwardJump(program, nextAltFixup, pc); - pc = emitREBytecode(state, re, pc, nextAlt); - - program[pc++] = REOP_JUMP; - nextAltFixup = pc; - pc += OFFSET_LEN; - - resolveForwardJump(program, nextTermFixup, pc); - resolveForwardJump(program, nextAltFixup, pc); - break; - case REOP_FLAT: - /* - * Consecutize FLAT's if possible. - */ - if (t.flatIndex != -1) { - while ((t.next != null) && (t.next.op == REOP_FLAT) - && ((t.flatIndex + t.length) - == t.next.flatIndex)) { - t.length += t.next.length; - t.next = t.next.next; - } - } - if ((t.flatIndex != -1) && (t.length > 1)) { - if ((state.flags & JSREG_FOLD) != 0) - program[pc - 1] = REOP_FLATi; - else - program[pc - 1] = REOP_FLAT; - pc = addIndex(program, pc, t.flatIndex); - pc = addIndex(program, pc, t.length); - } - else { - if (t.chr < 256) { - if ((state.flags & JSREG_FOLD) != 0) - program[pc - 1] = REOP_FLAT1i; - else - program[pc - 1] = REOP_FLAT1; - program[pc++] = (byte)(t.chr); - } - else { - if ((state.flags & JSREG_FOLD) != 0) - program[pc - 1] = REOP_UCFLAT1i; - else - program[pc - 1] = REOP_UCFLAT1; - pc = addIndex(program, pc, t.chr); - } - } - break; - case REOP_LPAREN: - pc = addIndex(program, pc, t.parenIndex); - pc = emitREBytecode(state, re, pc, t.kid); - program[pc++] = REOP_RPAREN; - pc = addIndex(program, pc, t.parenIndex); - break; - case REOP_BACKREF: - pc = addIndex(program, pc, t.parenIndex); - break; - case REOP_ASSERT: - nextTermFixup = pc; - pc += OFFSET_LEN; - pc = emitREBytecode(state, re, pc, t.kid); - program[pc++] = REOP_ASSERTTEST; - resolveForwardJump(program, nextTermFixup, pc); - break; - case REOP_ASSERT_NOT: - nextTermFixup = pc; - pc += OFFSET_LEN; - pc = emitREBytecode(state, re, pc, t.kid); - program[pc++] = REOP_ASSERTNOTTEST; - resolveForwardJump(program, nextTermFixup, pc); - break; - case REOP_QUANT: - if ((t.min == 0) && (t.max == -1)) - program[pc - 1] = (t.greedy) ? REOP_STAR : REOP_MINIMALSTAR; - else - if ((t.min == 0) && (t.max == 1)) - program[pc - 1] = (t.greedy) ? REOP_OPT : REOP_MINIMALOPT; - else - if ((t.min == 1) && (t.max == -1)) - program[pc - 1] = (t.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; - else { - if (!t.greedy) program[pc - 1] = REOP_MINIMALQUANT; - pc = addIndex(program, pc, t.min); - // max can be -1 which addIndex does not accept - pc = addIndex(program, pc, t.max + 1); - } - pc = addIndex(program, pc, t.parenCount); - pc = addIndex(program, pc, t.parenIndex); - nextTermFixup = pc; - pc += OFFSET_LEN; - pc = emitREBytecode(state, re, pc, t.kid); - program[pc++] = REOP_ENDCHILD; - resolveForwardJump(program, nextTermFixup, pc); - break; - case REOP_CLASS: - pc = addIndex(program, pc, t.index); - re.classList[t.index] = new RECharSet(t.bmsize, t.startIndex, - t.kidlen); - break; - default: - break; - } - t = t.next; - } - return pc; - } - - private static void - pushProgState(REGlobalData gData, int min, int max, - REBackTrackData backTrackLastToSave, - int continuation_pc, int continuation_op) - { - gData.stateStackTop = new REProgState(gData.stateStackTop, min, max, - gData.cp, backTrackLastToSave, - continuation_pc, - continuation_op); - } - - private static REProgState - popProgState(REGlobalData gData) - { - REProgState state = gData.stateStackTop; - gData.stateStackTop = state.previous; - return state; - } - - private static void - pushBackTrackState(REGlobalData gData, byte op, int target) - { - gData.backTrackStackTop = new REBackTrackData(gData, op, target); - } - - /* - * Consecutive literal characters. - */ - private static boolean - flatNMatcher(REGlobalData gData, int matchChars, - int length, char[] chars, int end) - { - if ((gData.cp + length) > end) - return false; - for (int i = 0; i < length; i++) { - if (gData.regexp.source[matchChars + i] != chars[gData.cp + i]) { - return false; - } - } - gData.cp += length; - return true; - } - - private static boolean - flatNIMatcher(REGlobalData gData, int matchChars, - int length, char[] chars, int end) - { - if ((gData.cp + length) > end) - return false; - for (int i = 0; i < length; i++) { - if (upcase(gData.regexp.source[matchChars + i]) - != upcase(chars[gData.cp + i])) - { - return false; - } - } - gData.cp += length; - return true; - } - - /* - 1. Evaluate DecimalEscape to obtain an EscapeValue E. - 2. If E is not a character then go to step 6. - 3. Let ch be E's character. - 4. Let A be a one-element RECharSet containing the character ch. - 5. Call CharacterSetMatcher(A, false) and return its Matcher result. - 6. E must be an integer. Let n be that integer. - 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception. - 8. Return an internal Matcher closure that takes two arguments, a State x - and a Continuation c, and performs the following: - 1. Let cap be x's captures internal array. - 2. Let s be cap[n]. - 3. If s is undefined, then call c(x) and return its result. - 4. Let e be x's endIndex. - 5. Let len be s's length. - 6. Let f be e+len. - 7. If f>InputLength, return failure. - 8. If there exists an integer i between 0 (inclusive) and len (exclusive) - such that Canonicalize(s[i]) is not the same character as - Canonicalize(Input [e+i]), then return failure. - 9. Let y be the State (f, cap). - 10. Call c(y) and return its result. - */ - private static boolean - backrefMatcher(REGlobalData gData, int parenIndex, - char[] chars, int end) - { - int len; - int i; - int parenContent = gData.parens_index(parenIndex); - if (parenContent == -1) - return true; - - len = gData.parens_length(parenIndex); - if ((gData.cp + len) > end) - return false; - - if ((gData.regexp.flags & JSREG_FOLD) != 0) { - for (i = 0; i < len; i++) { - if (upcase(chars[parenContent + i]) != upcase(chars[gData.cp + i])) - return false; - } - } - else { - for (i = 0; i < len; i++) { - if (chars[parenContent + i] != chars[gData.cp + i]) - return false; - } - } - gData.cp += len; - return true; - } - - - /* Add a single character to the RECharSet */ - private static void - addCharacterToCharSet(RECharSet cs, char c) - { - int byteIndex = (c / 8); - if (c > cs.length) - throw new RuntimeException(); - cs.bits[byteIndex] |= 1 << (c & 0x7); - } - - - /* Add a character range, c1 to c2 (inclusive) to the RECharSet */ - private static void - addCharacterRangeToCharSet(RECharSet cs, char c1, char c2) - { - int i; - - int byteIndex1 = (c1 / 8); - int byteIndex2 = (c2 / 8); - - if ((c2 > cs.length) || (c1 > c2)) - throw new RuntimeException(); - - c1 &= 0x7; - c2 &= 0x7; - - if (byteIndex1 == byteIndex2) { - cs.bits[byteIndex1] |= ((0xFF) >> (7 - (c2 - c1))) << c1; - } - else { - cs.bits[byteIndex1] |= 0xFF << c1; - for (i = byteIndex1 + 1; i < byteIndex2; i++) - cs.bits[i] = (byte)0xFF; - cs.bits[byteIndex2] |= (0xFF) >> (7 - c2); - } - } - - /* Compile the source of the class into a RECharSet */ - private static void - processCharSet(REGlobalData gData, RECharSet charSet) - { - synchronized (charSet) { - if (!charSet.converted) { - processCharSetImpl(gData, charSet); - charSet.converted = true; - } - } - } - - - private static void - processCharSetImpl(REGlobalData gData, RECharSet charSet) - { - int src = charSet.startIndex; - int end = src + charSet.strlength; - - char rangeStart = 0, thisCh; - int byteLength; - char c; - int n; - int nDigits; - int i; - boolean inRange = false; - - charSet.sense = true; - byteLength = (charSet.length / 8) + 1; - charSet.bits = new byte[byteLength]; - - if (src == end) - return; - - if (gData.regexp.source[src] == '^') { - charSet.sense = false; - ++src; - } - - while (src != end) { - nDigits = 2; - switch (gData.regexp.source[src]) { - case '\\': - ++src; - c = gData.regexp.source[src++]; - switch (c) { - case 'b': - thisCh = 0x8; - break; - case 'f': - thisCh = 0xC; - break; - case 'n': - thisCh = 0xA; - break; - case 'r': - thisCh = 0xD; - break; - case 't': - thisCh = 0x9; - break; - case 'v': - thisCh = 0xB; - break; - case 'c': - if (((src + 1) < end) && isWord(gData.regexp.source[src + 1])) - thisCh = (char)(gData.regexp.source[src++] & 0x1F); - else { - --src; - thisCh = '\\'; - } - break; - case 'u': - nDigits += 2; - // fall thru - case 'x': - n = 0; - for (i = 0; (i < nDigits) && (src < end); i++) { - c = gData.regexp.source[src++]; - int digit = toASCIIHexDigit(c); - if (digit < 0) { - /* back off to accepting the original '\' - * as a literal - */ - src -= (i + 1); - n = '\\'; - break; - } - n = (n << 4) | digit; - } - thisCh = (char)(n); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - /* - * This is a non-ECMA extension - decimal escapes (in this - * case, octal!) are supposed to be an error inside class - * ranges, but supported here for backwards compatibility. - * - */ - n = (c - '0'); - c = gData.regexp.source[src]; - if ('0' <= c && c <= '7') { - src++; - n = 8 * n + (c - '0'); - c = gData.regexp.source[src]; - if ('0' <= c && c <= '7') { - src++; - i = 8 * n + (c - '0'); - if (i <= 0377) - n = i; - else - src--; - } - } - thisCh = (char)(n); - break; - - case 'd': - addCharacterRangeToCharSet(charSet, '0', '9'); - continue; /* don't need range processing */ - case 'D': - addCharacterRangeToCharSet(charSet, (char)0, (char)('0' - 1)); - addCharacterRangeToCharSet(charSet, (char)('9' + 1), - (char)(charSet.length)); - continue; - case 's': - for (i = charSet.length; i >= 0; i--) - if (isREWhiteSpace(i)) - addCharacterToCharSet(charSet, (char)(i)); - continue; - case 'S': - for (i = charSet.length; i >= 0; i--) - if (!isREWhiteSpace(i)) - addCharacterToCharSet(charSet, (char)(i)); - continue; - case 'w': - for (i = charSet.length; i >= 0; i--) - if (isWord((char)i)) - addCharacterToCharSet(charSet, (char)(i)); - continue; - case 'W': - for (i = charSet.length; i >= 0; i--) - if (!isWord((char)i)) - addCharacterToCharSet(charSet, (char)(i)); - continue; - default: - thisCh = c; - break; - - } - break; - - default: - thisCh = gData.regexp.source[src++]; - break; - - } - if (inRange) { - if ((gData.regexp.flags & JSREG_FOLD) != 0) { - addCharacterRangeToCharSet(charSet, - upcase(rangeStart), - upcase(thisCh)); - addCharacterRangeToCharSet(charSet, - downcase(rangeStart), - downcase(thisCh)); - } else { - addCharacterRangeToCharSet(charSet, rangeStart, thisCh); - } - inRange = false; - } - else { - if ((gData.regexp.flags & JSREG_FOLD) != 0) { - addCharacterToCharSet(charSet, upcase(thisCh)); - addCharacterToCharSet(charSet, downcase(thisCh)); - } else { - addCharacterToCharSet(charSet, thisCh); - } - if (src < (end - 1)) { - if (gData.regexp.source[src] == '-') { - ++src; - inRange = true; - rangeStart = thisCh; - } - } - } - } - } - - - /* - * Initialize the character set if it this is the first call. - * Test the bit - if the ^ flag was specified, non-inclusion is a success - */ - private static boolean - classMatcher(REGlobalData gData, RECharSet charSet, char ch) - { - if (!charSet.converted) { - processCharSet(gData, charSet); - } - - int byteIndex = ch / 8; - if (charSet.sense) { - if ((charSet.length == 0) || - ( (ch > charSet.length) - || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) )) - return false; - } else { - if (! ((charSet.length == 0) || - ( (ch > charSet.length) - || ((charSet.bits[byteIndex] & (1 << (ch & 0x7))) == 0) ))) - return false; - } - return true; - } - - private static boolean - executeREBytecode(REGlobalData gData, char[] chars, int end) - { - int pc = 0; - byte program[] = gData.regexp.program; - int currentContinuation_op; - int currentContinuation_pc; - boolean result = false; - - currentContinuation_pc = 0; - currentContinuation_op = REOP_END; -if (debug) { -System.out.println("Input = \"" + new String(chars) + "\", start at " + gData.cp); -} - int op = program[pc++]; - for (;;) { -if (debug) { -System.out.println("Testing at " + gData.cp + ", op = " + op); -} - switch (op) { - case REOP_EMPTY: - result = true; - break; - case REOP_BOL: - if (gData.cp != 0) { - if (gData.multiline || - ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { - if (!isLineTerm(chars[gData.cp - 1])) { - result = false; - break; - } - } - else { - result = false; - break; - } - } - result = true; - break; - case REOP_EOL: - if (gData.cp != end) { - if (gData.multiline || - ((gData.regexp.flags & JSREG_MULTILINE) != 0)) { - if (!isLineTerm(chars[gData.cp])) { - result = false; - break; - } - } - else { - result = false; - break; - } - } - result = true; - break; - case REOP_WBDRY: - result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) - ^ !((gData.cp < end) && isWord(chars[gData.cp]))); - break; - case REOP_WNONBDRY: - result = ((gData.cp == 0 || !isWord(chars[gData.cp - 1])) - ^ ((gData.cp < end) && isWord(chars[gData.cp]))); - break; - case REOP_DOT: - result = (gData.cp != end && !isLineTerm(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_DIGIT: - result = (gData.cp != end && isDigit(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_NONDIGIT: - result = (gData.cp != end && !isDigit(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_SPACE: - result = (gData.cp != end && isREWhiteSpace(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_NONSPACE: - result = (gData.cp != end && !isREWhiteSpace(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_ALNUM: - result = (gData.cp != end && isWord(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_NONALNUM: - result = (gData.cp != end && !isWord(chars[gData.cp])); - if (result) { - gData.cp++; - } - break; - case REOP_FLAT: - { - int offset = getIndex(program, pc); - pc += INDEX_LEN; - int length = getIndex(program, pc); - pc += INDEX_LEN; - result = flatNMatcher(gData, offset, length, chars, end); - } - break; - case REOP_FLATi: - { - int offset = getIndex(program, pc); - pc += INDEX_LEN; - int length = getIndex(program, pc); - pc += INDEX_LEN; - result = flatNIMatcher(gData, offset, length, chars, end); - } - break; - case REOP_FLAT1: - { - char matchCh = (char)(program[pc++] & 0xFF); - result = (gData.cp != end && chars[gData.cp] == matchCh); - if (result) { - gData.cp++; - } - } - break; - case REOP_FLAT1i: - { - char matchCh = (char)(program[pc++] & 0xFF); - result = (gData.cp != end - && upcase(chars[gData.cp]) == upcase(matchCh)); - if (result) { - gData.cp++; - } - } - break; - case REOP_UCFLAT1: - { - char matchCh = (char)getIndex(program, pc); - pc += INDEX_LEN; - result = (gData.cp != end && chars[gData.cp] == matchCh); - if (result) { - gData.cp++; - } - } - break; - case REOP_UCFLAT1i: - { - char matchCh = (char)getIndex(program, pc); - pc += INDEX_LEN; - result = (gData.cp != end - && upcase(chars[gData.cp]) == upcase(matchCh)); - if (result) { - gData.cp++; - } - } - break; - case REOP_ALT: - { - int nextpc; - byte nextop; - pushProgState(gData, 0, 0, null, - currentContinuation_pc, - currentContinuation_op); - nextpc = pc + getOffset(program, pc); - nextop = program[nextpc++]; - pushBackTrackState(gData, nextop, nextpc); - pc += INDEX_LEN; - op = program[pc++]; - } - continue; - - case REOP_JUMP: - { - int offset; - REProgState state = popProgState(gData); - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - offset = getOffset(program, pc); - pc += offset; - op = program[pc++]; - } - continue; - - - case REOP_LPAREN: - { - int parenIndex = getIndex(program, pc); - pc += INDEX_LEN; - gData.set_parens(parenIndex, gData.cp, 0); - op = program[pc++]; - } - continue; - case REOP_RPAREN: - { - int cap_index; - int parenIndex = getIndex(program, pc); - pc += INDEX_LEN; - cap_index = gData.parens_index(parenIndex); - gData.set_parens(parenIndex, cap_index, - gData.cp - cap_index); - if (parenIndex > gData.lastParen) - gData.lastParen = parenIndex; - op = program[pc++]; - } - continue; - case REOP_BACKREF: - { - int parenIndex = getIndex(program, pc); - pc += INDEX_LEN; - result = backrefMatcher(gData, parenIndex, chars, end); - } - break; - - case REOP_CLASS: - { - int index = getIndex(program, pc); - pc += INDEX_LEN; - if (gData.cp != end) { - if (classMatcher(gData, gData.regexp.classList[index], - chars[gData.cp])) - { - gData.cp++; - result = true; - break; - } - } - result = false; - } - break; - - case REOP_ASSERT: - case REOP_ASSERT_NOT: - { - byte testOp; - pushProgState(gData, 0, 0, gData.backTrackStackTop, - currentContinuation_pc, - currentContinuation_op); - if (op == REOP_ASSERT) { - testOp = REOP_ASSERTTEST; - } else { - testOp = REOP_ASSERTNOTTEST; - } - pushBackTrackState(gData, testOp, - pc + getOffset(program, pc)); - pc += INDEX_LEN; - op = program[pc++]; - } - continue; - - case REOP_ASSERTTEST: - case REOP_ASSERTNOTTEST: - { - REProgState state = popProgState(gData); - gData.cp = state.index; - gData.backTrackStackTop = state.backTrack; - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - if (result) { - if (op == REOP_ASSERTTEST) { - result = true; - } else { - result = false; - } - } else { - if (op == REOP_ASSERTTEST) { - // Do nothing - } else { - result = true; - } - } - } - break; - - case REOP_STAR: - case REOP_PLUS: - case REOP_OPT: - case REOP_QUANT: - case REOP_MINIMALSTAR: - case REOP_MINIMALPLUS: - case REOP_MINIMALOPT: - case REOP_MINIMALQUANT: - { - int min, max; - boolean greedy = false; - switch (op) { - case REOP_STAR: - greedy = true; - // fallthrough - case REOP_MINIMALSTAR: - min = 0; - max = -1; - break; - case REOP_PLUS: - greedy = true; - // fallthrough - case REOP_MINIMALPLUS: - min = 1; - max = -1; - break; - case REOP_OPT: - greedy = true; - // fallthrough - case REOP_MINIMALOPT: - min = 0; - max = 1; - break; - case REOP_QUANT: - greedy = true; - // fallthrough - case REOP_MINIMALQUANT: - min = getOffset(program, pc); - pc += INDEX_LEN; - // See comments in emitREBytecode for " - 1" reason - max = getOffset(program, pc) - 1; - pc += INDEX_LEN; - break; - default: - throw Kit.codeBug(); - } - pushProgState(gData, min, max, null, - currentContinuation_pc, - currentContinuation_op); - if (greedy) { - currentContinuation_op = REOP_REPEAT; - currentContinuation_pc = pc; - pushBackTrackState(gData, REOP_REPEAT, pc); - /* Step over , & */ - pc += 3 * INDEX_LEN; - op = program[pc++]; - } else { - if (min != 0) { - currentContinuation_op = REOP_MINIMALREPEAT; - currentContinuation_pc = pc; - /* & */ - pc += 3 * INDEX_LEN; - op = program[pc++]; - } else { - pushBackTrackState(gData, REOP_MINIMALREPEAT, pc); - popProgState(gData); - pc += 2 * INDEX_LEN; // & - pc = pc + getOffset(program, pc); - op = program[pc++]; - } - } - } - continue; - - case REOP_ENDCHILD: - // Use the current continuation. - pc = currentContinuation_pc; - op = currentContinuation_op; - continue; - - case REOP_REPEAT: - { - REProgState state = popProgState(gData); - if (!result) { - // - // There's been a failure, see if we have enough - // children. - // - if (state.min == 0) - result = true; - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - pc += 2 * INDEX_LEN; /* & */ - pc = pc + getOffset(program, pc); - break; - } - else { - if (state.min == 0 && gData.cp == state.index) { - // matched an empty string, that'll get us nowhere - result = false; - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - pc += 2 * INDEX_LEN; - pc = pc + getOffset(program, pc); - break; - } - int new_min = state.min, new_max = state.max; - if (new_min != 0) new_min--; - if (new_max != -1) new_max--; - if (new_max == 0) { - result = true; - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - pc += 2 * INDEX_LEN; - pc = pc + getOffset(program, pc); - break; - } - pushProgState(gData, new_min, new_max, null, - state.continuation_pc, - state.continuation_op); - currentContinuation_op = REOP_REPEAT; - currentContinuation_pc = pc; - pushBackTrackState(gData, REOP_REPEAT, pc); - int parenCount = getIndex(program, pc); - pc += INDEX_LEN; - int parenIndex = getIndex(program, pc); - pc += 2 * INDEX_LEN; - op = program[pc++]; - for (int k = 0; k < parenCount; k++) { - gData.set_parens(parenIndex + k, -1, 0); - } - } - } - continue; - - case REOP_MINIMALREPEAT: - { - REProgState state = popProgState(gData); - if (!result) { - // - // Non-greedy failure - try to consume another child. - // - if (state.max == -1 || state.max > 0) { - pushProgState(gData, state.min, state.max, null, - state.continuation_pc, - state.continuation_op); - currentContinuation_op = REOP_MINIMALREPEAT; - currentContinuation_pc = pc; - int parenCount = getIndex(program, pc); - pc += INDEX_LEN; - int parenIndex = getIndex(program, pc); - pc += 2 * INDEX_LEN; - for (int k = 0; k < parenCount; k++) { - gData.set_parens(parenIndex + k, -1, 0); - } - op = program[pc++]; - continue; - } else { - // Don't need to adjust pc since we're going to pop. - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - break; - } - } else { - if (state.min == 0 && gData.cp == state.index) { - // Matched an empty string, that'll get us nowhere. - result = false; - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - break; - } - int new_min = state.min, new_max = state.max; - if (new_min != 0) new_min--; - if (new_max != -1) new_max--; - pushProgState(gData, new_min, new_max, null, - state.continuation_pc, - state.continuation_op); - if (new_min != 0) { - currentContinuation_op = REOP_MINIMALREPEAT; - currentContinuation_pc = pc; - int parenCount = getIndex(program, pc); - pc += INDEX_LEN; - int parenIndex = getIndex(program, pc); - pc += 2 * INDEX_LEN; - for (int k = 0; k < parenCount; k++) { - gData.set_parens(parenIndex + k, -1, 0); - } - op = program[pc++]; - } else { - currentContinuation_pc = state.continuation_pc; - currentContinuation_op = state.continuation_op; - pushBackTrackState(gData, REOP_MINIMALREPEAT, pc); - popProgState(gData); - pc += 2 * INDEX_LEN; - pc = pc + getOffset(program, pc); - op = program[pc++]; - } - continue; - } - } - - case REOP_END: - return true; - - default: - throw Kit.codeBug(); - - } - /* - * If the match failed and there's a backtrack option, take it. - * Otherwise this is a complete and utter failure. - */ - if (!result) { - REBackTrackData backTrackData = gData.backTrackStackTop; - if (backTrackData != null) { - gData.backTrackStackTop = backTrackData.previous; - - gData.lastParen = backTrackData.lastParen; - - // XXX: If backTrackData will no longer be used, then - // there is no need to clone backTrackData.parens - if (backTrackData.parens != null) { - gData.parens = backTrackData.parens.clone(); - } - - gData.cp = backTrackData.cp; - - gData.stateStackTop = backTrackData.stateStackTop; - - currentContinuation_op - = gData.stateStackTop.continuation_op; - currentContinuation_pc - = gData.stateStackTop.continuation_pc; - pc = backTrackData.continuation_pc; - op = backTrackData.continuation_op; - continue; - } - else - return false; - } - - op = program[pc++]; - } - - } - - private static boolean - matchRegExp(REGlobalData gData, RECompiled re, - char[] chars, int start, int end, boolean multiline) - { - if (re.parenCount != 0) { - gData.parens = new long[re.parenCount]; - } else { - gData.parens = null; - } - - gData.backTrackStackTop = null; - - gData.stateStackTop = null; - - gData.multiline = multiline; - gData.regexp = re; - gData.lastParen = 0; - - int anchorCh = gData.regexp.anchorCh; - // - // have to include the position beyond the last character - // in order to detect end-of-input/line condition - // - for (int i = start; i <= end; ++i) { - // - // If the first node is a literal match, step the index into - // the string until that match is made, or fail if it can't be - // found at all. - // - if (anchorCh >= 0) { - for (;;) { - if (i == end) { - return false; - } - char matchCh = chars[i]; - if (matchCh == anchorCh || - ((gData.regexp.flags & JSREG_FOLD) != 0 - && upcase(matchCh) == upcase((char)anchorCh))) - { - break; - } - ++i; - } - } - gData.cp = i; - for (int j = 0; j < re.parenCount; j++) { - gData.set_parens(j, -1, 0); - } - boolean result = executeREBytecode(gData, chars, end); - - gData.backTrackStackTop = null; - gData.stateStackTop = null; - if (result) { - gData.skipped = i - start; - return true; - } - } - return false; - } - - /* - * indexp is assumed to be an array of length 1 - */ - Object executeRegExp(Context cx, Scriptable scopeObj, RegExpImpl res, - String str, int indexp[], int matchType) - { - REGlobalData gData = new REGlobalData(); - - int start = indexp[0]; - char[] charArray = str.toCharArray(); - int end = charArray.length; - if (start > end) - start = end; - // - // Call the recursive matcher to do the real work. - // - boolean matches = matchRegExp(gData, re, charArray, start, end, - res.multiline); - if (!matches) { - if (matchType != PREFIX) return null; - return Undefined.instance; - } - int index = gData.cp; - int i = index; - indexp[0] = i; - int matchlen = i - (start + gData.skipped); - int ep = index; - index -= matchlen; - Object result; - Scriptable obj; - - if (matchType == TEST) { - /* - * Testing for a match and updating cx.regExpImpl: don't allocate - * an array object, do return true. - */ - result = Boolean.TRUE; - obj = null; - } - else { - /* - * The array returned on match has element 0 bound to the matched - * string, elements 1 through re.parenCount bound to the paren - * matches, an index property telling the length of the left context, - * and an input property referring to the input string. - */ - Scriptable scope = getTopLevelScope(scopeObj); - result = ScriptRuntime.newObject(cx, scope, "Array", null); - obj = (Scriptable) result; - - String matchstr = new String(charArray, index, matchlen); - obj.put(0, obj, matchstr); - } - - if (re.parenCount == 0) { - res.parens = null; - res.lastParen = SubString.emptySubString; - } else { - SubString parsub = null; - int num; - res.parens = new SubString[re.parenCount]; - for (num = 0; num < re.parenCount; num++) { - int cap_index = gData.parens_index(num); - String parstr; - if (cap_index != -1) { - int cap_length = gData.parens_length(num); - parsub = new SubString(charArray, cap_index, cap_length); - res.parens[num] = parsub; - if (matchType == TEST) continue; - parstr = parsub.toString(); - obj.put(num+1, obj, parstr); - } - else { - if (matchType != TEST) - obj.put(num+1, obj, Undefined.instance); - } - } - res.lastParen = parsub; - } - - if (! (matchType == TEST)) { - /* - * Define the index and input properties last for better for/in loop - * order (so they come after the elements). - */ - obj.put("index", obj, new Integer(start + gData.skipped)); - obj.put("input", obj, str); - } - - if (res.lastMatch == null) { - res.lastMatch = new SubString(); - res.leftContext = new SubString(); - res.rightContext = new SubString(); - } - res.lastMatch.charArray = charArray; - res.lastMatch.index = index; - res.lastMatch.length = matchlen; - - res.leftContext.charArray = charArray; - if (cx.getLanguageVersion() == Context.VERSION_1_2) { - /* - * JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used - * in scalar contexts, and unintentionally for the string.match "list" - * psuedo-context. On "hi there bye", the following would result: - * - * Language while(/ /g){print("$`");} s/ /$`/g - * perl4.036 "hi", "there" "hihitherehi therebye" - * perl5 "hi", "hi there" "hihitherehi therebye" - * js1.2 "hi", "there" "hihitheretherebye" - * - * Insofar as JS1.2 always defined $` as "left context from the last - * match" for global regexps, it was more consistent than perl4. - */ - res.leftContext.index = start; - res.leftContext.length = gData.skipped; - } else { - /* - * For JS1.3 and ECMAv2, emulate Perl5 exactly: - * - * js1.3 "hi", "hi there" "hihitherehi therebye" - */ - res.leftContext.index = 0; - res.leftContext.length = start + gData.skipped; - } - - res.rightContext.charArray = charArray; - res.rightContext.index = ep; - res.rightContext.length = end - ep; - - return result; - } - - int getFlags() - { - return re.flags; - } - - private static void reportWarning(Context cx, String messageId, String arg) - { - if (cx.hasFeature(Context.FEATURE_STRICT_MODE)) { - String msg = ScriptRuntime.getMessage1(messageId, arg); - Context.reportWarning(msg); - } - } - - private static void reportError(String messageId, String arg) - { - String msg = ScriptRuntime.getMessage1(messageId, arg); - throw ScriptRuntime.constructError("SyntaxError", msg); - } - -// #string_id_map# - - private static final int - Id_lastIndex = 1, - Id_source = 2, - Id_global = 3, - Id_ignoreCase = 4, - Id_multiline = 5, - - MAX_INSTANCE_ID = 5; - - protected int getMaxInstanceId() - { - return MAX_INSTANCE_ID; - } - - protected int findInstanceIdInfo(String s) - { - int id; -// #generated# Last update: 2007-05-09 08:16:24 EDT - L0: { id = 0; String X = null; int c; - int s_length = s.length(); - if (s_length==6) { - c=s.charAt(0); - if (c=='g') { X="global";id=Id_global; } - else if (c=='s') { X="source";id=Id_source; } - } - else if (s_length==9) { - c=s.charAt(0); - if (c=='l') { X="lastIndex";id=Id_lastIndex; } - else if (c=='m') { X="multiline";id=Id_multiline; } - } - else if (s_length==10) { X="ignoreCase";id=Id_ignoreCase; } - if (X!=null && X!=s && !X.equals(s)) id = 0; - break L0; - } -// #/generated# -// #/string_id_map# - - if (id == 0) return super.findInstanceIdInfo(s); - - int attr; - switch (id) { - case Id_lastIndex: - attr = PERMANENT | DONTENUM; - break; - case Id_source: - case Id_global: - case Id_ignoreCase: - case Id_multiline: - attr = PERMANENT | READONLY | DONTENUM; - break; - default: - throw new IllegalStateException(); - } - return instanceIdInfo(attr, id); - } - - protected String getInstanceIdName(int id) - { - switch (id) { - case Id_lastIndex: return "lastIndex"; - case Id_source: return "source"; - case Id_global: return "global"; - case Id_ignoreCase: return "ignoreCase"; - case Id_multiline: return "multiline"; - } - return super.getInstanceIdName(id); - } - - protected Object getInstanceIdValue(int id) - { - switch (id) { - case Id_lastIndex: - return ScriptRuntime.wrapNumber(lastIndex); - case Id_source: - return new String(re.source); - case Id_global: - return ScriptRuntime.wrapBoolean((re.flags & JSREG_GLOB) != 0); - case Id_ignoreCase: - return ScriptRuntime.wrapBoolean((re.flags & JSREG_FOLD) != 0); - case Id_multiline: - return ScriptRuntime.wrapBoolean((re.flags & JSREG_MULTILINE) != 0); - } - return super.getInstanceIdValue(id); - } - - protected void setInstanceIdValue(int id, Object value) - { - if (id == Id_lastIndex) { - lastIndex = ScriptRuntime.toNumber(value); - return; - } - super.setInstanceIdValue(id, value); - } - - protected void initPrototypeId(int id) - { - String s; - int arity; - switch (id) { - case Id_compile: arity=1; s="compile"; break; - case Id_toString: arity=0; s="toString"; break; - case Id_toSource: arity=0; s="toSource"; break; - case Id_exec: arity=1; s="exec"; break; - case Id_test: arity=1; s="test"; break; - case Id_prefix: arity=1; s="prefix"; break; - default: throw new IllegalArgumentException(String.valueOf(id)); - } - initPrototypeMethod(REGEXP_TAG, id, s, arity); - } - - public Object execIdCall(IdFunctionObject f, Context cx, Scriptable scope, - Scriptable thisObj, Object[] args) - { - if (!f.hasTag(REGEXP_TAG)) { - return super.execIdCall(f, cx, scope, thisObj, args); - } - int id = f.methodId(); - switch (id) { - case Id_compile: - return realThis(thisObj, f).compile(cx, scope, args); - - case Id_toString: - case Id_toSource: - return realThis(thisObj, f).toString(); - - case Id_exec: - return realThis(thisObj, f).execSub(cx, scope, args, MATCH); - - case Id_test: { - Object x = realThis(thisObj, f).execSub(cx, scope, args, TEST); - return Boolean.TRUE.equals(x) ? Boolean.TRUE : Boolean.FALSE; - } - - case Id_prefix: - return realThis(thisObj, f).execSub(cx, scope, args, PREFIX); - } - throw new IllegalArgumentException(String.valueOf(id)); - } - - private static NativeRegExp realThis(Scriptable thisObj, IdFunctionObject f) - { - if (!(thisObj instanceof NativeRegExp)) - throw incompatibleCallError(f); - return (NativeRegExp)thisObj; - } - -// #string_id_map# - protected int findPrototypeId(String s) - { - int id; -// #generated# Last update: 2007-05-09 08:16:24 EDT - L0: { id = 0; String X = null; int c; - L: switch (s.length()) { - case 4: c=s.charAt(0); - if (c=='e') { X="exec";id=Id_exec; } - else if (c=='t') { X="test";id=Id_test; } - break L; - case 6: X="prefix";id=Id_prefix; break L; - case 7: X="compile";id=Id_compile; break L; - case 8: c=s.charAt(3); - if (c=='o') { X="toSource";id=Id_toSource; } - else if (c=='t') { X="toString";id=Id_toString; } - break L; - } - if (X!=null && X!=s && !X.equals(s)) id = 0; - break L0; - } -// #/generated# - return id; - } - - private static final int - Id_compile = 1, - Id_toString = 2, - Id_toSource = 3, - Id_exec = 4, - Id_test = 5, - Id_prefix = 6, - - MAX_PROTOTYPE_ID = 6; - -// #/string_id_map# - - private RECompiled re; - double lastIndex; /* index after last match, for //g iterator */ - -} // class NativeRegExp - -class RECompiled implements Serializable -{ - static final long serialVersionUID = -6144956577595844213L; - - char []source; /* locked source string, sans // */ - int parenCount; /* number of parenthesized submatches */ - int flags; /* flags */ - byte[] program; /* regular expression bytecode */ - int classCount; /* count [...] bitmaps */ - RECharSet[] classList; /* list of [...] bitmaps */ - int anchorCh = -1; /* if >= 0, then re starts with this literal char */ -} - -class RENode { - - RENode(byte op) - { - this.op = op; - } - - byte op; /* r.e. op bytecode */ - RENode next; /* next in concatenation order */ - RENode kid; /* first operand */ - - RENode kid2; /* second operand */ - int num; /* could be a number */ - int parenIndex; /* or a parenthesis index */ - - /* or a range */ - int min; - int max; - int parenCount; - boolean greedy; - - /* or a character class */ - int startIndex; - int kidlen; /* length of string at kid, in chars */ - int bmsize; /* bitmap size, based on max char code */ - int index; /* index into class list */ - - /* or a literal sequence */ - char chr; /* of one character */ - int length; /* or many (via the index) */ - int flatIndex; /* which is -1 if not sourced */ - -} - -class CompilerState { - - CompilerState(Context cx, char[] source, int length, int flags) - { - this.cx = cx; - this.cpbegin = source; - this.cp = 0; - this.cpend = length; - this.flags = flags; - this.parenCount = 0; - this.classCount = 0; - this.progLength = 0; - } - - Context cx; - char cpbegin[]; - int cpend; - int cp; - int flags; - int parenCount; - int parenNesting; - int classCount; /* number of [] encountered */ - int progLength; /* estimated bytecode length */ - RENode result; -} - -class REProgState -{ - REProgState(REProgState previous, int min, int max, int index, - REBackTrackData backTrack, - int continuation_pc, int continuation_op) - { - this.previous = previous; - this.min = min; - this.max = max; - this.index = index; - this.continuation_op = continuation_op; - this.continuation_pc = continuation_pc; - this.backTrack = backTrack; - } - - REProgState previous; // previous state in stack - - int min; /* current quantifier min */ - int max; /* current quantifier max */ - int index; /* progress in text */ - int continuation_op; - int continuation_pc; - REBackTrackData backTrack; // used by ASSERT_ to recover state -} - -class REBackTrackData { - - REBackTrackData(REGlobalData gData, int op, int pc) - { - previous = gData.backTrackStackTop; - continuation_op = op; - continuation_pc = pc; - lastParen = gData.lastParen; - if (gData.parens != null) { - parens = gData.parens.clone(); - } - cp = gData.cp; - stateStackTop = gData.stateStackTop; - } - - REBackTrackData previous; - - int continuation_op; /* where to backtrack to */ - int continuation_pc; - int lastParen; - long[] parens; /* parenthesis captures */ - int cp; /* char buffer index */ - REProgState stateStackTop; /* state of op that backtracked */ -} - -class REGlobalData { - boolean multiline; - RECompiled regexp; /* the RE in execution */ - int lastParen; /* highest paren set so far */ - int skipped; /* chars skipped anchoring this r.e. */ - - int cp; /* char buffer index */ - long[] parens; /* parens captures */ - - REProgState stateStackTop; /* stack of state of current ancestors */ - - REBackTrackData backTrackStackTop; /* last matched-so-far position */ - - - /** - * Get start of parenthesis capture contents, -1 for empty. - */ - int parens_index(int i) - { - return (int)(parens[i]); - } - - /** - * Get length of parenthesis capture contents. - */ - int parens_length(int i) - { - return (int)(parens[i] >>> 32); - } - - void set_parens(int i, int index, int length) - { - parens[i] = (index & 0xffffffffL) | ((long)length << 32); - } - -} - -/* - * This struct holds a bitmap representation of a class from a regexp. - * There's a list of these referenced by the classList field in the NativeRegExp - * struct below. The initial state has startIndex set to the offset in the - * original regexp source of the beginning of the class contents. The first - * use of the class converts the source representation into a bitmap. - * - */ -final class RECharSet implements Serializable -{ - static final long serialVersionUID = 7931787979395898394L; - - RECharSet(int length, int startIndex, int strlength) - { - this.length = length; - this.startIndex = startIndex; - this.strlength = strlength; - } - - int length; - int startIndex; - int strlength; - - volatile transient boolean converted; - volatile transient boolean sense; - volatile transient byte[] bits; -} - - -- cgit v1.2.3-1-g7c22