summaryrefslogtreecommitdiffstats
path: root/infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java
diff options
context:
space:
mode:
Diffstat (limited to 'infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java')
-rw-r--r--infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java1307
1 files changed, 0 insertions, 1307 deletions
diff --git a/infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java b/infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java
deleted file mode 100644
index e69ae1a..0000000
--- a/infrastructure/yuicompressor/src/com/yahoo/platform/yui/compressor/JavaScriptCompressor.java
+++ /dev/null
@@ -1,1307 +0,0 @@
-/*
- * YUI Compressor
- * Author: Julien Lecomte <jlecomte@yahoo-inc.com>
- * Copyright (c) 2007, Yahoo! Inc. All rights reserved.
- * Code licensed under the BSD License:
- * http://developer.yahoo.net/yui/license.txt
- */
-
-package com.yahoo.platform.yui.compressor;
-
-import yuicompressor.org.mozilla.javascript.*;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.Writer;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-public class JavaScriptCompressor {
-
- static final ArrayList ones;
- static final ArrayList twos;
- static final ArrayList threes;
-
- static final Set builtin = new HashSet();
- static final Map literals = new Hashtable();
- static final Set reserved = new HashSet();
-
- static {
-
- // This list contains all the 3 characters or less built-in global
- // symbols available in a browser. Please add to this list if you
- // see anything missing.
- builtin.add("NaN");
- builtin.add("top");
-
- ones = new ArrayList();
- for (char c = 'A'; c <= 'Z'; c++)
- ones.add(Character.toString(c));
- for (char c = 'a'; c <= 'z'; c++)
- ones.add(Character.toString(c));
-
- twos = new ArrayList();
- for (int i = 0; i < ones.size(); i++) {
- String one = (String) ones.get(i);
- for (char c = 'A'; c <= 'Z'; c++)
- twos.add(one + Character.toString(c));
- for (char c = 'a'; c <= 'z'; c++)
- twos.add(one + Character.toString(c));
- for (char c = '0'; c <= '9'; c++)
- twos.add(one + Character.toString(c));
- }
-
- // Remove two-letter JavaScript reserved words and built-in globals...
- twos.remove("as");
- twos.remove("is");
- twos.remove("do");
- twos.remove("if");
- twos.remove("in");
- twos.removeAll(builtin);
-
- threes = new ArrayList();
- for (int i = 0; i < twos.size(); i++) {
- String two = (String) twos.get(i);
- for (char c = 'A'; c <= 'Z'; c++)
- threes.add(two + Character.toString(c));
- for (char c = 'a'; c <= 'z'; c++)
- threes.add(two + Character.toString(c));
- for (char c = '0'; c <= '9'; c++)
- threes.add(two + Character.toString(c));
- }
-
- // Remove three-letter JavaScript reserved words and built-in globals...
- threes.remove("for");
- threes.remove("int");
- threes.remove("new");
- threes.remove("try");
- threes.remove("use");
- threes.remove("var");
- threes.removeAll(builtin);
-
- // That's up to ((26+26)*(1+(26+26+10)))*(1+(26+26+10))-8
- // (206,380 symbols per scope)
-
- // The following list comes from org/mozilla/javascript/Decompiler.java...
- literals.put(new Integer(Token.GET), "get ");
- literals.put(new Integer(Token.SET), "set ");
- literals.put(new Integer(Token.TRUE), "true");
- literals.put(new Integer(Token.FALSE), "false");
- literals.put(new Integer(Token.NULL), "null");
- literals.put(new Integer(Token.THIS), "this");
- literals.put(new Integer(Token.FUNCTION), "function");
- literals.put(new Integer(Token.COMMA), ",");
- literals.put(new Integer(Token.LC), "{");
- literals.put(new Integer(Token.RC), "}");
- literals.put(new Integer(Token.LP), "(");
- literals.put(new Integer(Token.RP), ")");
- literals.put(new Integer(Token.LB), "[");
- literals.put(new Integer(Token.RB), "]");
- literals.put(new Integer(Token.DOT), ".");
- literals.put(new Integer(Token.NEW), "new ");
- literals.put(new Integer(Token.DELPROP), "delete ");
- literals.put(new Integer(Token.IF), "if");
- literals.put(new Integer(Token.ELSE), "else");
- literals.put(new Integer(Token.FOR), "for");
- literals.put(new Integer(Token.IN), " in ");
- literals.put(new Integer(Token.WITH), "with");
- literals.put(new Integer(Token.WHILE), "while");
- literals.put(new Integer(Token.DO), "do");
- literals.put(new Integer(Token.TRY), "try");
- literals.put(new Integer(Token.CATCH), "catch");
- literals.put(new Integer(Token.FINALLY), "finally");
- literals.put(new Integer(Token.THROW), "throw");
- literals.put(new Integer(Token.SWITCH), "switch");
- literals.put(new Integer(Token.BREAK), "break");
- literals.put(new Integer(Token.CONTINUE), "continue");
- literals.put(new Integer(Token.CASE), "case");
- literals.put(new Integer(Token.DEFAULT), "default");
- literals.put(new Integer(Token.RETURN), "return");
- literals.put(new Integer(Token.VAR), "var ");
- literals.put(new Integer(Token.SEMI), ";");
- literals.put(new Integer(Token.ASSIGN), "=");
- literals.put(new Integer(Token.ASSIGN_ADD), "+=");
- literals.put(new Integer(Token.ASSIGN_SUB), "-=");
- literals.put(new Integer(Token.ASSIGN_MUL), "*=");
- literals.put(new Integer(Token.ASSIGN_DIV), "/=");
- literals.put(new Integer(Token.ASSIGN_MOD), "%=");
- literals.put(new Integer(Token.ASSIGN_BITOR), "|=");
- literals.put(new Integer(Token.ASSIGN_BITXOR), "^=");
- literals.put(new Integer(Token.ASSIGN_BITAND), "&=");
- literals.put(new Integer(Token.ASSIGN_LSH), "<<=");
- literals.put(new Integer(Token.ASSIGN_RSH), ">>=");
- literals.put(new Integer(Token.ASSIGN_URSH), ">>>=");
- literals.put(new Integer(Token.HOOK), "?");
- literals.put(new Integer(Token.OBJECTLIT), ":");
- literals.put(new Integer(Token.COLON), ":");
- literals.put(new Integer(Token.OR), "||");
- literals.put(new Integer(Token.AND), "&&");
- literals.put(new Integer(Token.BITOR), "|");
- literals.put(new Integer(Token.BITXOR), "^");
- literals.put(new Integer(Token.BITAND), "&");
- literals.put(new Integer(Token.SHEQ), "===");
- literals.put(new Integer(Token.SHNE), "!==");
- literals.put(new Integer(Token.EQ), "==");
- literals.put(new Integer(Token.NE), "!=");
- literals.put(new Integer(Token.LE), "<=");
- literals.put(new Integer(Token.LT), "<");
- literals.put(new Integer(Token.GE), ">=");
- literals.put(new Integer(Token.GT), ">");
- literals.put(new Integer(Token.INSTANCEOF), " instanceof ");
- literals.put(new Integer(Token.LSH), "<<");
- literals.put(new Integer(Token.RSH), ">>");
- literals.put(new Integer(Token.URSH), ">>>");
- literals.put(new Integer(Token.TYPEOF), "typeof");
- literals.put(new Integer(Token.VOID), "void ");
- literals.put(new Integer(Token.CONST), "const ");
- literals.put(new Integer(Token.NOT), "!");
- literals.put(new Integer(Token.BITNOT), "~");
- literals.put(new Integer(Token.POS), "+");
- literals.put(new Integer(Token.NEG), "-");
- literals.put(new Integer(Token.INC), "++");
- literals.put(new Integer(Token.DEC), "--");
- literals.put(new Integer(Token.ADD), "+");
- literals.put(new Integer(Token.SUB), "-");
- literals.put(new Integer(Token.MUL), "*");
- literals.put(new Integer(Token.DIV), "/");
- literals.put(new Integer(Token.MOD), "%");
- literals.put(new Integer(Token.COLONCOLON), "::");
- literals.put(new Integer(Token.DOTDOT), "..");
- literals.put(new Integer(Token.DOTQUERY), ".(");
- literals.put(new Integer(Token.XMLATTR), "@");
-
- // See http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Reference:Reserved_Words
-
- // JavaScript 1.5 reserved words
- reserved.add("break");
- reserved.add("case");
- reserved.add("catch");
- reserved.add("continue");
- reserved.add("default");
- reserved.add("delete");
- reserved.add("do");
- reserved.add("else");
- reserved.add("finally");
- reserved.add("for");
- reserved.add("function");
- reserved.add("if");
- reserved.add("in");
- reserved.add("instanceof");
- reserved.add("new");
- reserved.add("return");
- reserved.add("switch");
- reserved.add("this");
- reserved.add("throw");
- reserved.add("try");
- reserved.add("typeof");
- reserved.add("var");
- reserved.add("void");
- reserved.add("while");
- reserved.add("with");
- // Words reserved for future use
- reserved.add("abstract");
- reserved.add("boolean");
- reserved.add("byte");
- reserved.add("char");
- reserved.add("class");
- reserved.add("const");
- reserved.add("debugger");
- reserved.add("double");
- reserved.add("enum");
- reserved.add("export");
- reserved.add("extends");
- reserved.add("final");
- reserved.add("float");
- reserved.add("goto");
- reserved.add("implements");
- reserved.add("import");
- reserved.add("int");
- reserved.add("interface");
- reserved.add("long");
- reserved.add("native");
- reserved.add("package");
- reserved.add("private");
- reserved.add("protected");
- reserved.add("public");
- reserved.add("short");
- reserved.add("static");
- reserved.add("super");
- reserved.add("synchronized");
- reserved.add("throws");
- reserved.add("transient");
- reserved.add("volatile");
- // These are not reserved, but should be taken into account
- // in isValidIdentifier (See jslint source code)
- reserved.add("arguments");
- reserved.add("eval");
- reserved.add("true");
- reserved.add("false");
- reserved.add("Infinity");
- reserved.add("NaN");
- reserved.add("null");
- reserved.add("undefined");
- }
-
- private static int countChar(String haystack, char needle) {
- int idx = 0;
- int count = 0;
- int length = haystack.length();
- while (idx < length) {
- char c = haystack.charAt(idx++);
- if (c == needle) {
- count++;
- }
- }
- return count;
- }
-
- private static int printSourceString(String source, int offset, StringBuffer sb) {
- int length = source.charAt(offset);
- ++offset;
- if ((0x8000 & length) != 0) {
- length = ((0x7FFF & length) << 16) | source.charAt(offset);
- ++offset;
- }
- if (sb != null) {
- String str = source.substring(offset, offset + length);
- sb.append(str);
- }
- return offset + length;
- }
-
- private static int printSourceNumber(String source,
- int offset, StringBuffer sb) {
- double number = 0.0;
- char type = source.charAt(offset);
- ++offset;
- if (type == 'S') {
- if (sb != null) {
- number = source.charAt(offset);
- }
- ++offset;
- } else if (type == 'J' || type == 'D') {
- if (sb != null) {
- long lbits;
- lbits = (long) source.charAt(offset) << 48;
- lbits |= (long) source.charAt(offset + 1) << 32;
- lbits |= (long) source.charAt(offset + 2) << 16;
- lbits |= (long) source.charAt(offset + 3);
- if (type == 'J') {
- number = lbits;
- } else {
- number = Double.longBitsToDouble(lbits);
- }
- }
- offset += 4;
- } else {
- // Bad source
- throw new RuntimeException();
- }
- if (sb != null) {
- sb.append(ScriptRuntime.numberToString(number, 10));
- }
- return offset;
- }
-
- private static ArrayList parse(Reader in, ErrorReporter reporter)
- throws IOException, EvaluatorException {
-
- CompilerEnvirons env = new CompilerEnvirons();
- Parser parser = new Parser(env, reporter);
- parser.parse(in, null, 1);
- String source = parser.getEncodedSource();
-
- int offset = 0;
- int length = source.length();
- ArrayList tokens = new ArrayList();
- StringBuffer sb = new StringBuffer();
-
- while (offset < length) {
- int tt = source.charAt(offset++);
- switch (tt) {
-
- case Token.SPECIALCOMMENT:
- case Token.NAME:
- case Token.REGEXP:
- case Token.STRING:
- sb.setLength(0);
- offset = printSourceString(source, offset, sb);
- tokens.add(new JavaScriptToken(tt, sb.toString()));
- break;
-
- case Token.NUMBER:
- sb.setLength(0);
- offset = printSourceNumber(source, offset, sb);
- tokens.add(new JavaScriptToken(tt, sb.toString()));
- break;
-
- default:
- String literal = (String) literals.get(new Integer(tt));
- if (literal != null) {
- tokens.add(new JavaScriptToken(tt, literal));
- }
- break;
- }
- }
-
- return tokens;
- }
-
- private static void processStringLiterals(ArrayList tokens, boolean merge) {
-
- String tv;
- int i, length = tokens.size();
- JavaScriptToken token, prevToken, nextToken;
-
- if (merge) {
-
- // Concatenate string literals that are being appended wherever
- // it is safe to do so. Note that we take care of the case:
- // "a" + "b".toUpperCase()
-
- for (i = 0; i < length; i++) {
- token = (JavaScriptToken) tokens.get(i);
- switch (token.getType()) {
-
- case Token.ADD:
- if (i > 0 && i < length) {
- prevToken = (JavaScriptToken) tokens.get(i - 1);
- nextToken = (JavaScriptToken) tokens.get(i + 1);
- if (prevToken.getType() == Token.STRING && nextToken.getType() == Token.STRING &&
- (i == length - 1 || ((JavaScriptToken) tokens.get(i + 2)).getType() != Token.DOT)) {
- tokens.set(i - 1, new JavaScriptToken(Token.STRING,
- prevToken.getValue() + nextToken.getValue()));
- tokens.remove(i + 1);
- tokens.remove(i);
- i = i - 1;
- length = length - 2;
- break;
- }
- }
- }
- }
-
- }
-
- // Second pass...
-
- for (i = 0; i < length; i++) {
- // APPJET modifications in this loop
- token = (JavaScriptToken) tokens.get(i);
- if (token.getType() == Token.STRING || token.getType() == Token.REGEXP) {
- tv = token.getValue();
- if (token.getType() == Token.STRING) {
-
- // Finally, add the quoting characters and escape the string. We use
- // the quoting character that minimizes the amount of escaping to save
- // a few additional bytes.
-
- char quotechar;
- int singleQuoteCount = countChar(tv, '\'');
- int doubleQuoteCount = countChar(tv, '"');
- if (doubleQuoteCount <= singleQuoteCount) {
- quotechar = '"';
- } else {
- quotechar = '\'';
- }
-
- tv = quotechar + escapeString(tv, quotechar) + quotechar;
- }
-
- // String concatenation transforms the old script scheme:
- // '<scr'+'ipt ...><'+'/script>'
- // into the following:
- // '<script ...></script>'
- // which breaks if this code is embedded inside an HTML document.
- // Since this is not the right way to do this, let's fix the code by
- // transforming all "</script" into "<\/script"
-
- // (Anti-malware software can be sensitive to open tags)
- tv = tv.replace("<script", "\\x3cscript");
- tv = tv.replace("</script", "\\x3c/script");
-
- tokens.set(i, new JavaScriptToken(token.getType(), tv));
- }
- }
- }
-
- // Add necessary escaping that was removed in Rhino's tokenizer.
- private static String escapeString(String s, char quotechar) {
-
- assert quotechar == '"' || quotechar == '\'';
-
- if (s == null) {
- return null;
- }
-
- StringBuffer sb = new StringBuffer();
- for (int i = 0, L = s.length(); i < L; i++) {
- int c = s.charAt(i);
- if (c == quotechar) {
- sb.append("\\");
- }
- sb.append((char) c);
- }
-
- return sb.toString();
- }
-
- /*
- * Simple check to see whether a string is a valid identifier name.
- * If a string matches this pattern, it means it IS a valid
- * identifier name. If a string doesn't match it, it does not
- * necessarily mean it is not a valid identifier name.
- */
- private static final Pattern SIMPLE_IDENTIFIER_NAME_PATTERN = Pattern.compile("^[a-zA-Z_][a-zA-Z0-9_]*$");
-
- private static boolean isValidIdentifier(String s) {
- Matcher m = SIMPLE_IDENTIFIER_NAME_PATTERN.matcher(s);
- return (m.matches() && !reserved.contains(s));
- }
-
- /*
- * Transforms obj["foo"] into obj.foo whenever possible, saving 3 bytes.
- */
- private static void optimizeObjectMemberAccess(ArrayList tokens) {
-
- String tv;
- int i, length;
- JavaScriptToken token;
-
- for (i = 0, length = tokens.size(); i < length; i++) {
-
- if (((JavaScriptToken) tokens.get(i)).getType() == Token.LB &&
- i > 0 && i < length - 2 &&
- ((JavaScriptToken) tokens.get(i - 1)).getType() == Token.NAME &&
- ((JavaScriptToken) tokens.get(i + 1)).getType() == Token.STRING &&
- ((JavaScriptToken) tokens.get(i + 2)).getType() == Token.RB) {
- token = (JavaScriptToken) tokens.get(i + 1);
- tv = token.getValue();
- tv = tv.substring(1, tv.length() - 1);
- if (isValidIdentifier(tv)) {
- tokens.set(i, new JavaScriptToken(Token.DOT, "."));
- tokens.set(i + 1, new JavaScriptToken(Token.NAME, tv));
- tokens.remove(i + 2);
- i = i + 2;
- length = length - 1;
- }
- }
- }
- }
-
- /*
- * Transforms 'foo': ... into foo: ... whenever possible, saving 2 bytes.
- */
- private static void optimizeObjLitMemberDecl(ArrayList tokens) {
-
- String tv;
- int i, length;
- JavaScriptToken token;
-
- for (i = 0, length = tokens.size(); i < length; i++) {
- if (((JavaScriptToken) tokens.get(i)).getType() == Token.OBJECTLIT &&
- i > 0 && ((JavaScriptToken) tokens.get(i - 1)).getType() == Token.STRING) {
- token = (JavaScriptToken) tokens.get(i - 1);
- tv = token.getValue();
- tv = tv.substring(1, tv.length() - 1);
- if (isValidIdentifier(tv)) {
- tokens.set(i - 1, new JavaScriptToken(Token.NAME, tv));
- }
- }
- }
- }
-
- private ErrorReporter logger;
-
- private boolean munge;
- private boolean verbose;
-
- private static final int BUILDING_SYMBOL_TREE = 1;
- private static final int CHECKING_SYMBOL_TREE = 2;
-
- private int mode;
- private int offset;
- private int braceNesting;
- private ArrayList tokens;
- private Stack scopes = new Stack();
- private ScriptOrFnScope globalScope = new ScriptOrFnScope(-1, null);
- private Hashtable indexedScopes = new Hashtable();
-
- public JavaScriptCompressor(Reader in, ErrorReporter reporter)
- throws IOException, EvaluatorException {
-
- this.logger = reporter;
- this.tokens = parse(in, reporter);
- }
-
- public void compress(Writer out, int linebreak, boolean munge, boolean verbose,
- boolean preserveAllSemiColons, boolean disableOptimizations)
- throws IOException {
-
- this.munge = munge;
- this.verbose = verbose;
-
- processStringLiterals(this.tokens, !disableOptimizations);
-
- if (!disableOptimizations) {
- optimizeObjectMemberAccess(this.tokens);
- optimizeObjLitMemberDecl(this.tokens);
- }
-
- buildSymbolTree();
- // DO NOT TOUCH this.tokens BETWEEN THESE TWO PHASES (BECAUSE OF this.indexedScopes)
- mungeSymboltree();
- StringBuffer sb = printSymbolTree(linebreak, preserveAllSemiColons);
-
- out.write(sb.toString());
- }
-
- private ScriptOrFnScope getCurrentScope() {
- return (ScriptOrFnScope) scopes.peek();
- }
-
- private void enterScope(ScriptOrFnScope scope) {
- scopes.push(scope);
- }
-
- private void leaveCurrentScope() {
- scopes.pop();
- }
-
- private JavaScriptToken consumeToken() {
- return (JavaScriptToken) tokens.get(offset++);
- }
-
- private JavaScriptToken getToken(int delta) {
- return (JavaScriptToken) tokens.get(offset + delta);
- }
-
- /*
- * Returns the identifier for the specified symbol defined in
- * the specified scope or in any scope above it. Returns null
- * if this symbol does not have a corresponding identifier.
- */
- private JavaScriptIdentifier getIdentifier(String symbol, ScriptOrFnScope scope) {
- JavaScriptIdentifier identifier;
- while (scope != null) {
- identifier = scope.getIdentifier(symbol);
- if (identifier != null) {
- return identifier;
- }
- scope = scope.getParentScope();
- }
- return null;
- }
-
- /*
- * If either 'eval' or 'with' is used in a local scope, we must make
- * sure that all containing local scopes don't get munged. Otherwise,
- * the obfuscation would potentially introduce bugs.
- */
- private void protectScopeFromObfuscation(ScriptOrFnScope scope) {
- assert scope != null;
-
- if (scope == globalScope) {
- // The global scope does not get obfuscated,
- // so we don't need to worry about it...
- return;
- }
-
- // Find the highest local scope containing the specified scope.
- while (scope.getParentScope() != globalScope) {
- scope = scope.getParentScope();
- }
-
- assert scope.getParentScope() == globalScope;
- scope.preventMunging();
- }
-
- private String getDebugString(int max) {
- assert max > 0;
- StringBuffer result = new StringBuffer();
- int start = Math.max(offset - max, 0);
- int end = Math.min(offset + max, tokens.size());
- for (int i = start; i < end; i++) {
- JavaScriptToken token = (JavaScriptToken) tokens.get(i);
- if (i == offset - 1) {
- result.append(" ---> ");
- }
- result.append(token.getValue());
- if (i == offset - 1) {
- result.append(" <--- ");
- }
- }
- return result.toString();
- }
-
- private void warn(String message, boolean showDebugString) {
- if (verbose) {
- if (showDebugString) {
- message = message + "\n" + getDebugString(10);
- }
- logger.warning(message, null, -1, null, -1);
- }
- }
-
- private void parseFunctionDeclaration() {
-
- String symbol;
- JavaScriptToken token;
- ScriptOrFnScope currentScope, fnScope;
- JavaScriptIdentifier identifier;
-
- currentScope = getCurrentScope();
-
- token = consumeToken();
- if (token.getType() == Token.NAME) {
- if (mode == BUILDING_SYMBOL_TREE) {
- // Get the name of the function and declare it in the current scope.
- symbol = token.getValue();
- if (currentScope.getIdentifier(symbol) != null) {
- warn("The function " + symbol + " has already been declared in the same scope...", true);
- }
- currentScope.declareIdentifier(symbol);
- }
- token = consumeToken();
- }
-
- assert token.getType() == Token.LP;
- if (mode == BUILDING_SYMBOL_TREE) {
- fnScope = new ScriptOrFnScope(braceNesting, currentScope);
- indexedScopes.put(new Integer(offset), fnScope);
- } else {
- fnScope = (ScriptOrFnScope) indexedScopes.get(new Integer(offset));
- }
-
- // Parse function arguments.
- int argpos = 0;
- while ((token = consumeToken()).getType() != Token.RP) {
- assert token.getType() == Token.NAME ||
- token.getType() == Token.COMMA;
- if (token.getType() == Token.NAME && mode == BUILDING_SYMBOL_TREE) {
- symbol = token.getValue();
- identifier = fnScope.declareIdentifier(symbol);
- if (symbol.equals("$super") && argpos == 0) {
- // Exception for Prototype 1.6...
- identifier.preventMunging();
- }
- argpos++;
- }
- }
-
- token = consumeToken();
- assert token.getType() == Token.LC;
- braceNesting++;
-
- token = getToken(0);
- if (token.getType() == Token.STRING &&
- getToken(1).getType() == Token.SEMI) {
- // This is a hint. Hints are empty statements that look like
- // "localvar1:nomunge, localvar2:nomunge"; They allow developers
- // to prevent specific symbols from getting obfuscated (some heretic
- // implementations, such as Prototype 1.6, require specific variable
- // names, such as $super for example, in order to work appropriately.
- // Note: right now, only "nomunge" is supported in the right hand side
- // of a hint. However, in the future, the right hand side may contain
- // other values.
- consumeToken();
- String hints = token.getValue();
- // Remove the leading and trailing quotes...
- hints = hints.substring(1, hints.length() - 1).trim();
- StringTokenizer st1 = new StringTokenizer(hints, ",");
- while (st1.hasMoreTokens()) {
- String hint = st1.nextToken();
- int idx = hint.indexOf(':');
- if (idx <= 0 || idx >= hint.length() - 1) {
- if (mode == BUILDING_SYMBOL_TREE) {
- // No need to report the error twice, hence the test...
- warn("Invalid hint syntax: " + hint, true);
- }
- break;
- }
- String variableName = hint.substring(0, idx).trim();
- String variableType = hint.substring(idx + 1).trim();
- if (mode == BUILDING_SYMBOL_TREE) {
- fnScope.addHint(variableName, variableType);
- } else if (mode == CHECKING_SYMBOL_TREE) {
- identifier = fnScope.getIdentifier(variableName);
- if (identifier != null) {
- if (variableType.equals("nomunge")) {
- identifier.preventMunging();
- } else {
- warn("Unsupported hint value: " + hint, true);
- }
- } else {
- warn("Hint refers to an unknown identifier: " + hint, true);
- }
- }
- }
- }
-
- parseScope(fnScope);
- }
-
- private void parseCatch() {
-
- String symbol;
- JavaScriptToken token;
- ScriptOrFnScope currentScope;
- JavaScriptIdentifier identifier;
-
- token = getToken(-1);
- assert token.getType() == Token.CATCH;
- token = consumeToken();
- assert token.getType() == Token.LP;
- token = consumeToken();
- assert token.getType() == Token.NAME;
-
- symbol = token.getValue();
- currentScope = getCurrentScope();
-
- if (mode == BUILDING_SYMBOL_TREE) {
- // We must declare the exception identifier in the containing function
- // scope to avoid errors related to the obfuscation process. No need to
- // display a warning if the symbol was already declared here...
- currentScope.declareIdentifier(symbol);
- } else {
- identifier = getIdentifier(symbol, currentScope);
- identifier.incrementRefcount();
- }
-
- token = consumeToken();
- assert token.getType() == Token.RP;
- }
-
- private void parseExpression() {
-
- // Parse the expression until we encounter a comma or a semi-colon
- // in the same brace nesting, bracket nesting and paren nesting.
- // Parse functions if any...
-
- String symbol;
- JavaScriptToken token;
- ScriptOrFnScope currentScope;
- JavaScriptIdentifier identifier;
-
- int expressionBraceNesting = braceNesting;
- int bracketNesting = 0;
- int parensNesting = 0;
-
- int length = tokens.size();
-
- while (offset < length) {
-
- token = consumeToken();
- currentScope = getCurrentScope();
-
- switch (token.getType()) {
-
- case Token.SEMI:
- case Token.COMMA:
- if (braceNesting == expressionBraceNesting &&
- bracketNesting == 0 &&
- parensNesting == 0) {
- return;
- }
- break;
-
- case Token.FUNCTION:
- parseFunctionDeclaration();
- break;
-
- case Token.LC:
- braceNesting++;
- break;
-
- case Token.RC:
- braceNesting--;
- assert braceNesting >= expressionBraceNesting;
- break;
-
- case Token.LB:
- bracketNesting++;
- break;
-
- case Token.RB:
- bracketNesting--;
- break;
-
- case Token.LP:
- parensNesting++;
- break;
-
- case Token.RP:
- parensNesting--;
- break;
-
- case Token.SPECIALCOMMENT:
- if (mode == BUILDING_SYMBOL_TREE) {
- protectScopeFromObfuscation(currentScope);
- warn("Using JScript conditional comments is not recommended." + (munge ? " Moreover, using JScript conditional comments reduces the level of compression!" : ""), true);
- }
- break;
-
- case Token.NAME:
- symbol = token.getValue();
-
- if (mode == BUILDING_SYMBOL_TREE) {
-
- if (symbol.equals("eval")) {
-
- protectScopeFromObfuscation(currentScope);
- warn("Using 'eval' is not recommended." + (munge ? " Moreover, using 'eval' reduces the level of compression!" : ""), true);
-
- }
-
- } else if (mode == CHECKING_SYMBOL_TREE) {
-
- if ((offset < 2 ||
- (getToken(-2).getType() != Token.DOT &&
- getToken(-2).getType() != Token.GET &&
- getToken(-2).getType() != Token.SET)) &&
- getToken(0).getType() != Token.OBJECTLIT) {
-
- identifier = getIdentifier(symbol, currentScope);
-
- if (identifier == null) {
-
- if (symbol.length() <= 3 && !builtin.contains(symbol)) {
- // Here, we found an undeclared and un-namespaced symbol that is
- // 3 characters or less in length. Declare it in the global scope.
- // We don't need to declare longer symbols since they won't cause
- // any conflict with other munged symbols.
- globalScope.declareIdentifier(symbol);
- warn("Found an undeclared symbol: " + symbol, true);
- }
-
- } else {
-
- identifier.incrementRefcount();
- }
- }
- }
- break;
- }
- }
- }
-
- private void parseScope(ScriptOrFnScope scope) {
-
- String symbol;
- JavaScriptToken token;
- JavaScriptIdentifier identifier;
-
- int length = tokens.size();
-
- enterScope(scope);
-
- while (offset < length) {
-
- token = consumeToken();
-
- switch (token.getType()) {
-
- case Token.VAR:
-
- if (mode == BUILDING_SYMBOL_TREE && scope.incrementVarCount() > 1) {
- warn("Try to use a single 'var' statement per scope.", true);
- }
-
- /* FALLSTHROUGH */
-
- case Token.CONST:
-
- // The var keyword is followed by at least one symbol name.
- // If several symbols follow, they are comma separated.
- for (; ;) {
- token = consumeToken();
-
- assert token.getType() == Token.NAME;
-
- if (mode == BUILDING_SYMBOL_TREE) {
- symbol = token.getValue();
- if (scope.getIdentifier(symbol) == null) {
- scope.declareIdentifier(symbol);
- } else {
- warn("The variable " + symbol + " has already been declared in the same scope...", true);
- }
- }
-
- token = getToken(0);
-
- assert token.getType() == Token.SEMI ||
- token.getType() == Token.ASSIGN ||
- token.getType() == Token.COMMA ||
- token.getType() == Token.IN;
-
- if (token.getType() == Token.IN) {
- break;
- } else {
- parseExpression();
- token = getToken(-1);
- if (token.getType() == Token.SEMI) {
- break;
- }
- }
- }
- break;
-
- case Token.FUNCTION:
- parseFunctionDeclaration();
- break;
-
- case Token.LC:
- braceNesting++;
- break;
-
- case Token.RC:
- braceNesting--;
- assert braceNesting >= scope.getBraceNesting();
- if (braceNesting == scope.getBraceNesting()) {
- leaveCurrentScope();
- return;
- }
- break;
-
- case Token.WITH:
- if (mode == BUILDING_SYMBOL_TREE) {
- // Inside a 'with' block, it is impossible to figure out
- // statically whether a symbol is a local variable or an
- // object member. As a consequence, the only thing we can
- // do is turn the obfuscation off for the highest scope
- // containing the 'with' block.
- protectScopeFromObfuscation(scope);
- warn("Using 'with' is not recommended." + (munge ? " Moreover, using 'with' reduces the level of compression!" : ""), true);
- }
- break;
-
- case Token.CATCH:
- parseCatch();
- break;
-
- case Token.SPECIALCOMMENT:
- if (mode == BUILDING_SYMBOL_TREE) {
- protectScopeFromObfuscation(scope);
- warn("Using JScript conditional comments is not recommended." + (munge ? " Moreover, using JScript conditional comments reduces the level of compression." : ""), true);
- }
- break;
-
- case Token.NAME:
- symbol = token.getValue();
-
- if (mode == BUILDING_SYMBOL_TREE) {
-
- if (symbol.equals("eval")) {
-
- protectScopeFromObfuscation(scope);
- warn("Using 'eval' is not recommended." + (munge ? " Moreover, using 'eval' reduces the level of compression!" : ""), true);
-
- }
-
- } else if (mode == CHECKING_SYMBOL_TREE) {
-
- if ((offset < 2 || getToken(-2).getType() != Token.DOT) &&
- getToken(0).getType() != Token.OBJECTLIT) {
-
- identifier = getIdentifier(symbol, scope);
-
- if (identifier == null) {
-
- if (symbol.length() <= 3 && !builtin.contains(symbol)) {
- // Here, we found an undeclared and un-namespaced symbol that is
- // 3 characters or less in length. Declare it in the global scope.
- // We don't need to declare longer symbols since they won't cause
- // any conflict with other munged symbols.
- globalScope.declareIdentifier(symbol);
- warn("Found an undeclared symbol: " + symbol, true);
- }
-
- } else {
-
- identifier.incrementRefcount();
- }
- }
- }
- break;
- }
- }
- }
-
- private void buildSymbolTree() {
- offset = 0;
- braceNesting = 0;
- scopes.clear();
- indexedScopes.clear();
- indexedScopes.put(new Integer(0), globalScope);
- mode = BUILDING_SYMBOL_TREE;
- parseScope(globalScope);
- }
-
- private void mungeSymboltree() {
-
- if (!munge) {
- return;
- }
-
- // One problem with obfuscation resides in the use of undeclared
- // and un-namespaced global symbols that are 3 characters or less
- // in length. Here is an example:
- //
- // var declaredGlobalVar;
- //
- // function declaredGlobalFn() {
- // var localvar;
- // localvar = abc; // abc is an undeclared global symbol
- // }
- //
- // In the example above, there is a slim chance that localvar may be
- // munged to 'abc', conflicting with the undeclared global symbol
- // abc, creating a potential bug. The following code detects such
- // global symbols. This must be done AFTER the entire file has been
- // parsed, and BEFORE munging the symbol tree. Note that declaring
- // extra symbols in the global scope won't hurt.
- //
- // Note: Since we go through all the tokens to do this, we also use
- // the opportunity to count how many times each identifier is used.
-
- offset = 0;
- braceNesting = 0;
- scopes.clear();
- mode = CHECKING_SYMBOL_TREE;
- parseScope(globalScope);
- globalScope.munge();
- }
-
- private StringBuffer printSymbolTree(int linebreakpos, boolean preserveAllSemiColons)
- throws IOException {
-
- offset = 0;
- braceNesting = 0;
- scopes.clear();
-
- String symbol;
- JavaScriptToken token;
- ScriptOrFnScope currentScope;
- JavaScriptIdentifier identifier;
-
- int length = tokens.size();
- StringBuffer result = new StringBuffer();
-
- int linestartpos = 0;
-
- enterScope(globalScope);
-
- while (offset < length) {
-
- token = consumeToken();
- symbol = token.getValue();
- currentScope = getCurrentScope();
-
- switch (token.getType()) {
-
- case Token.NAME:
-
- if (offset >= 2 && getToken(-2).getType() == Token.DOT ||
- getToken(0).getType() == Token.OBJECTLIT) {
-
- result.append(symbol);
-
- } else {
-
- identifier = getIdentifier(symbol, currentScope);
- if (identifier != null) {
- if (identifier.getMungedValue() != null) {
- result.append(identifier.getMungedValue());
- } else {
- result.append(symbol);
- }
- if (currentScope != globalScope && identifier.getRefcount() == 0) {
- warn("The symbol " + symbol + " is declared but is apparently never used.\nThis code can probably be written in a more compact way.", true);
- }
- } else {
- result.append(symbol);
- }
- }
- break;
-
- case Token.REGEXP:
- case Token.NUMBER:
- case Token.STRING:
- result.append(symbol);
- break;
-
- case Token.ADD:
- case Token.SUB:
- result.append((String) literals.get(new Integer(token.getType())));
- if (offset < length) {
- token = getToken(0);
- if (token.getType() == Token.INC ||
- token.getType() == Token.DEC ||
- token.getType() == Token.ADD ||
- token.getType() == Token.DEC) {
- // Handle the case x +/- ++/-- y
- // We must keep a white space here. Otherwise, x +++ y would be
- // interpreted as x ++ + y by the compiler, which is a bug (due
- // to the implicit assignment being done on the wrong variable)
- result.append(' ');
- } else if (token.getType() == Token.POS && getToken(-1).getType() == Token.ADD ||
- token.getType() == Token.NEG && getToken(-1).getType() == Token.SUB) {
- // Handle the case x + + y and x - - y
- result.append(' ');
- }
- }
- break;
-
- case Token.FUNCTION:
- result.append("function");
- token = consumeToken();
- if (token.getType() == Token.NAME) {
- result.append(' ');
- symbol = token.getValue();
- identifier = getIdentifier(symbol, currentScope);
- assert identifier != null;
- if (identifier.getMungedValue() != null) {
- result.append(identifier.getMungedValue());
- } else {
- result.append(symbol);
- }
- if (currentScope != globalScope && identifier.getRefcount() == 0) {
- warn("The symbol " + symbol + " is declared but is apparently never used.\nThis code can probably be written in a more compact way.", true);
- }
- token = consumeToken();
- }
- assert token.getType() == Token.LP;
- result.append('(');
- currentScope = (ScriptOrFnScope) indexedScopes.get(new Integer(offset));
- enterScope(currentScope);
- while ((token = consumeToken()).getType() != Token.RP) {
- assert token.getType() == Token.NAME || token.getType() == Token.COMMA;
- if (token.getType() == Token.NAME) {
- symbol = token.getValue();
- identifier = getIdentifier(symbol, currentScope);
- assert identifier != null;
- if (identifier.getMungedValue() != null) {
- result.append(identifier.getMungedValue());
- } else {
- result.append(symbol);
- }
- } else if (token.getType() == Token.COMMA) {
- result.append(',');
- }
- }
- result.append(')');
- token = consumeToken();
- assert token.getType() == Token.LC;
- result.append('{');
- braceNesting++;
- token = getToken(0);
- if (token.getType() == Token.STRING &&
- getToken(1).getType() == Token.SEMI) {
- // This is a hint. Skip it!
- consumeToken();
- consumeToken();
- }
- break;
-
- case Token.RETURN:
- case Token.TYPEOF:
- result.append(literals.get(new Integer(token.getType())));
- // No space needed after 'return' and 'typeof' when followed
- // by '(', '[', '{', a string or a regexp.
- if (offset < length) {
- token = getToken(0);
- if (token.getType() != Token.LP &&
- token.getType() != Token.LB &&
- token.getType() != Token.LC &&
- token.getType() != Token.STRING &&
- token.getType() != Token.REGEXP &&
- token.getType() != Token.SEMI) {
- result.append(' ');
- }
- }
- break;
-
- case Token.CASE:
- case Token.THROW:
- result.append(literals.get(new Integer(token.getType())));
- // White-space needed after 'case' and 'throw' when not followed by a string.
- if (offset < length && getToken(0).getType() != Token.STRING) {
- result.append(' ');
- }
- break;
-
- case Token.BREAK:
- case Token.CONTINUE:
- result.append(literals.get(new Integer(token.getType())));
- if (offset < length && getToken(0).getType() != Token.SEMI) {
- // If 'break' or 'continue' is not followed by a semi-colon, it must
- // be followed by a label, hence the need for a white space.
- result.append(' ');
- }
- break;
-
- case Token.LC:
- result.append('{');
- braceNesting++;
- break;
-
- case Token.RC:
- result.append('}');
- braceNesting--;
- assert braceNesting >= currentScope.getBraceNesting();
- if (braceNesting == currentScope.getBraceNesting()) {
- leaveCurrentScope();
- }
- break;
-
- case Token.SEMI:
- // No need to output a semi-colon if the next character is a right-curly...
- if (preserveAllSemiColons || offset < length && getToken(0).getType() != Token.RC) {
- result.append(';');
- }
-
- if (linebreakpos >= 0 && result.length() - linestartpos > linebreakpos) {
- // Some source control tools don't like it when files containing lines longer
- // than, say 8000 characters, are checked in. The linebreak option is used in
- // that case to split long lines after a specific column.
- result.append('\n');
- linestartpos = result.length();
- }
- break;
-
- case Token.SPECIALCOMMENT:
- if (result.length() > 0 && result.charAt(result.length() - 1) != '\n') {
- result.append("\n");
- }
- result.append("/*");
- result.append(symbol);
- result.append("*/\n");
- break;
-
- default:
- String literal = (String) literals.get(new Integer(token.getType()));
- if (literal != null) {
- result.append(literal);
- } else {
- warn("This symbol cannot be printed: " + symbol, true);
- }
- break;
- }
- }
-
- // Append a semi-colon at the end, even if unnecessary semi-colons are
- // supposed to be removed. This is especially useful when concatenating
- // several minified files (the absence of an ending semi-colon at the
- // end of one file may very likely cause a syntax error)
- if (!preserveAllSemiColons && result.length() > 0) {
- if (result.charAt(result.length() - 1) == '\n') {
- result.setCharAt(result.length() - 1, ';');
- } else {
- result.append(';');
- }
- }
-
- return result;
- }
-}