From: Alan Knowles Date: Fri, 6 Nov 2015 08:18:09 +0000 (+0800) Subject: JSDOC/ScopeParser.vala X-Git-Url: http://git.roojs.org/?p=gnome.introspection-doc-generator;a=commitdiff_plain;h=98941c66c580822070f1363480ed1cf4f93c2e80 JSDOC/ScopeParser.vala --- diff --git a/JSDOC/ScopeParser.vala b/JSDOC/ScopeParser.vala new file mode 100644 index 0000000..cbe46cd --- /dev/null +++ b/JSDOC/ScopeParser.vala @@ -0,0 +1,951 @@ + + + +namespace JSDOC { + + public class ScopeParser : Object { + + + Gee.ArrayList warnings; + + bool debug = false; + string[] idents; + + public ScopeParser(TokenStream ts) { + this.ts = ts; // {TokenStream} + this.warnings = new Gee.ArrayList(); + //this.indexedg = {}; + //this.timer = new Date() * 1; + this.idents = { + + "break", + "case", + "continue", + "default", + "delete", + "do", + "else", + "export", + "false", + "for", + "function", + "if", + "import", + "in", + "new", + "null", + "return", + "switch", + "this", + "true", + "typeof", + "var", + "void", + "while", + "with", + + "catch", + "class", + "const", + "debugger", + "enum", + "extends", + "finally", + "super", + "throw", + "try", + + "abstract", + "boolean", + "byte", + "char", + "double", + "final", + "float", + "goto", + "implements", + "instanceof", + "int", + "interface", + "long", + "native", + "package", + "private", + "protected", + "public", + "short", + "static", + "synchronized", + "throws", + "transient", + "include", + "undefined" + }; + } + + + + + void warn(string s) + { + //print('****************' + s); + this.warnings.add(s); + //println("WARNING:" + htmlescape(s) + "
"); + } + + + // defaults should not be initialized here =- otherwise they get duped on new, rather than initalized.. + + ts : false, + global : false, + mode : "", //"BUILDING_SYMBOL_TREE", + braceNesting : 0, + indexedScopes : false, + munge: true, + + + + + + buildSymbolTree : function() + { + //println("
");
+        
+        this.ts.rewind();
+        this.braceNesting = 0;
+        
+       // print(JSON.stringify(this.ts.tokens, null,4));
+        
+        
+        this.globalScope = new  Scope(-1, false, -1, '');
+        this.indexedScopes = { 0 : this.globalScope };
+        
+        this.mode = 'BUILDING_SYMBOL_TREE';
+        this.parseScope(this.globalScope);
+        
+        //print("---------------END PASS 1 ---------------- ");
+        
+    },
+    mungeSymboltree : function()
+    {
+
+        if (!this.munge) {
+            return;
+        }
+
+        // One problem with obfuscation resides in the use of undeclared
+        // and un-namespaced global symbols that are 3 characters or less
+        // in length. Here is an example:
+        //
+        //     var declaredGlobalVar;
+        //
+        //     function declaredGlobalFn() {
+        //         var localvar;
+        //         localvar = abc; // abc is an undeclared global symbol
+        //     }
+        //
+        // In the example above, there is a slim chance that localvar may be
+        // munged to 'abc', conflicting with the undeclared global symbol
+        // abc, creating a potential bug. The following code detects such
+        // global symbols. This must be done AFTER the entire file has been
+        // parsed, and BEFORE munging the symbol tree. Note that declaring
+        // extra symbols in the global scope won't hurt.
+        //
+        // Note: Since we go through all the tokens to do this, we also use
+        // the opportunity to count how many times each identifier is used.
+
+        this.ts.rewind();
+        this.braceNesting = 0;
+        this.mode = 'PASS2_SYMBOL_TREE';
+        
+        //println("MUNGING?");
+        
+        this.parseScope(this.globalScope);
+        
+        //this.globalScope.dump();
+        
+        
+        this.globalScope.munge();
+    },
+
+
+    log : function(str)
+    {
+        print ("                    ".substring(0, this.braceNesting*2) + str);
+        
+        //println("LOG:" + htmlescape(str) + "
\n"); + }, + logR : function(str) + { + //println("LOG:" + str + "
"); + }, + + + + + + parseScope : function(scope) // parse a token stream.. + { + //this.timerPrint("parseScope EnterScope"); + //this.log(">>> ENTER SCOPE" + this.scopes.length); + var symbol; + var token; + + var identifier; + + var expressionBraceNesting = this.braceNesting + 0; + + var parensNesting = 0; + + var isObjectLitAr = [ false ]; + var isInObjectLitAr; + + + //var scopeIndent = ''; + //this.scopes.forEach(function() { + // scopeIndent += ' '; + //}); + //print(">> ENTER SCOPE"); + + + + + token = this.ts.lookTok(1); + while (token) { + // this.timerPrint("parseScope AFTER lookT: " + token.toString()); + //this.dumpToken(token , this.scopes, this.braceNesting); + //print('SCOPE:' + token.toString()); + //this.log(token.data); + //if (token.type == 'NAME') { + // print('*' + token.data); + //} + switch(token.type + '.' + token.name) { + case "KEYW.VAR": + case "KEYW.CONST": // not really relivant as it's only mozzy that does this. + //print('SCOPE-VAR:' + token.toString()); + var vstart = this.ts.cursor +1; + + //this.log("parseScope GOT VAR/CONST : " + token.toString()); + while (true) { + token = this.ts.nextTok(); + //!this.debug|| print( token.toString()); + // print('SCOPE-VAR-VAL:' + JSON.stringify(token, null, 4)); + if (!token) { // can return false at EOF! + break; + } + if (token.name == "VAR" || token.data == ',') { // kludge.. + continue; + } + //this.logR("parseScope GOT VAR : " + token.toString() + ""); + if (token.type != "NAME") { + for(var i = Math.max(this.ts.cursor-10,0); i < this.ts.cursor+1; i++) { + print(this.ts.tokens[i].toString()); + } + + print( "var without ident"); + Seed.quit() + } + + + if (this.mode == "BUILDING_SYMBOL_TREE") { + identifier = scope.getIdentifier(token.data,token) ; + + if (identifier == false) { + scope.declareIdentifier(token.data, token); + } else { + token.identifier = identifier; + this.warn("(SCOPE) The variable " + token.data + ' (line:' + token.line + ") has already been declared in the same scope..."); + } + } + + token = this.ts.nextTok(); + !this.debug|| print(token.toString()); + /* + assert token.getType() == Token.SEMI || + token.getType() == Token.ASSIGN || + token.getType() == Token.COMMA || + token.getType() == Token.IN; + */ + if (token.name == "IN") { + break; + } else { + //var bn = this.braceNesting; + var bn = this.braceNesting; + var nts = []; + while (true) { + if (!token || token.type == 'VOID' || token.data == ',') { + break; + } + nts.push(token); + token = this.ts.nextTok(); + } + if (nts.length) { + var TS = this.ts; + this.ts = new TokenStream(nts); + this.parseExpression(scope); + this.ts = TS; + } + + this.braceNesting = bn; + //this.braceNesting = bn; + //this.logR("parseScope DONE : ParseExpression - tok is:" + this.ts.lookT(0).toString()); + + token = this.ts.lookTok(1); + //!this.debug|| + // print("AFTER EXP: " + token.toString()); + if (token.data == ';') { + break; + } + } + } + + //print("VAR:") + //this.ts.dump(vstart , this.ts.cursor); + + break; + + + case "KEYW.FUNCTION": + //if (this.mode == 'BUILDING_SYMBOL_TREE') + // print('SCOPE-FUNC:' + JSON.stringify(token,null,4)); + //println(""+token.data+""); + var bn = this.braceNesting; + this.parseFunctionDeclaration(scope); + this.braceNesting = bn; + break; + + case "PUNC.LEFT_CURLY": // { + case "PUNC.LEFT_PAREN": // ( + case "PUNC.LEFT_BRACE": // [ + //print('SCOPE-CURLY/PAREN:' + token.toString()); + //println(""+token.data+""); + var curTS = this.ts; + if (token.props) { + + // { a : ... , c : .... } + + for (var prop in token.props) { + + + // print('SCOPE-PROPS:' + JSON.stringify(token.props[prop],null,4)); + if (token.props[prop].val[0].data == 'function') { + // parse a function.. + this.ts = new TokenStream(token.props[prop].val); + this.ts.nextTok(); + this.parseFunctionDeclaration(scope); + + continue; + } + // key value.. + + this.ts = new TokenStream(token.props[prop].val); + this.parseExpression(scope); + + } + this.ts = curTS; + + // it's an object literal.. + // the values could be replaced.. + break; + } + + // ( ... ) or { .... } not object literals.. + + var _this = this; + for (var xx =0; xx < token.items.length; xx++) { + expr = token.items[xx]; + //token.items.forEach(function(expr) { + //print(expr.toString()); + _this.ts = new TokenStream(expr); + //if (curTS.data == '(') { + _this.parseScope(scope) + //} else { + // _this.parseExpression(scope) + //} + + } + this.ts = curTS; + //print("NOT PROPS"); Seed.quit(); + + //isObjectLitAr.push(false); + //this.braceNesting++; + + //print(">>>>>> OBJLIT PUSH(false)" + this.braceNesting); + break; + + case "PUNC.RIGHT_CURLY": // } + //print("<< EXIT SCOPE"); + return; + + case "KEYW.WITH": + //print('SCOPE-WITH:' + token.toString()); + //println(""+token.data+""); + if (this.mode == "BUILDING_SYMBOL_TREE") { + // Inside a 'with' block, it is impossible to figure out + // statically whether a symbol is a local variable or an + // object member. As a consequence, the only thing we can + // do is turn the obfuscation off for the highest scope + // containing the 'with' block. + this.protectScopeFromObfuscation(scope); + this.warn("Using 'with' is not recommended." + (this.munge ? " Moreover, using 'with' reduces the level of compression!" : ""), true); + } + break; + + case "KEYW.CATCH": + //print('SCOPE-CATCH:' + token.toString()); + //println(""+token.data+""); + this.parseCatch(scope); + break; + + case "STRN.DOUBLE_QUOTE": // used for object lit detection.. + case "STRN.SINGLE_QUOTE": + // print('SCOPE-STRING:' + token.toString()); + //println(""+token.data+""); + + if (this.ts.lookTok(-1).data == '{' && this.ts.lookTok(1).data == ':') { + // then we are in an object lit.. -> we need to flag the brace as such... + isObjectLitAr.pop(); + isObjectLitAr.push(true); + //print(">>>>>> OBJLIT REPUSH(true)"); + } + isInObjectLitAr = isObjectLitAr[isObjectLitAr.length-1]; + + if (isInObjectLitAr && this.ts.lookTok(1).data == ':' && + ( this.ts.lookTok(-1).data == '{' || this.ts.lookTok(-1).data == ':' )) { + // see if we can replace.. + // remove the quotes.. + // should do a bit more checking!!!! (what about wierd char's in the string.. + var str = token.data.substring(1,token.data.length-1); + if (/^[a-z_]+$/i.test(str) && ScopeParser.idents.indexOf(str) < 0) { + token.outData = str; + } + + + + } + + break; + + case "NAME.NAME": + //print('SCOPE-NAME:' + token.toString()); + //print("DEAL WITH NAME:"); + // got identifier.. + // look for { ** : <- indicates obj literal.. ** this could occur with numbers .. + // skip anyting with "." before it..!! + + if (this.ts.lookTok(-1).data == ".") { + // skip, it's an object prop. + //println(""+token.data+""); + break; + } + //print("SYMBOL: " + token.toString()); + + symbol = token.data; + if (symbol == 'this') { + break; + } + if (this.mode == 'PASS2_SYMBOL_TREE') { + + //println("GOT IDENT: -2 : " + this.ts.lookT(-2).toString() + "
..... -1 : " + this.ts.lookT(-1).toString() + "
"); + + //print ("MUNGE?" + symbol); + + //println("GOT IDENT: " + symbol + "
"); + + //println("GOT IDENT (2): " + symbol + "
"); + identifier = this.getIdentifier(symbol, scope, token); + + if (identifier == false) { +// BUG!find out where builtin is defined... + if (symbol.length <= 3 && Scope.builtin.indexOf(symbol) < 0) { + // Here, we found an undeclared and un-namespaced symbol that is + // 3 characters or less in length. Declare it in the global scope. + // We don't need to declare longer symbols since they won't cause + // any conflict with other munged symbols. + this.globalScope.declareIdentifier(symbol, token); + this.warn("Found an undeclared symbol: " + symbol + ' (line:' + token.line + ')', true); + } + + //println("GOT IDENT IGNORE(3): " + symbol + "
"); + } else { + token.identifier = identifier; + identifier.refcount++; + } + } + + break; + //println("SID"); + default: + if (token.type != 'KEYW') { + break; + } + //print('SCOPE-KEYW:' + token.toString()); + // print("Check eval:"); + + symbol = token.data; + + if (this.mode == 'BUILDING_SYMBOL_TREE') { + + if (token.name == "EVAL") { + + //print(JSON.stringify(token, null,4)); + // look back one and see if we can find a comment!!! + //if (this.ts.look(-1).type == "COMM") { + if (token.prefix && token.prefix.match(/eval/)) { + // look for eval:var:noreplace\n + //print("MATCH!?"); + var _t = this; + token.prefix.replace(/eval:var:([a-z_]+)/ig, function(m, a) { + //print("GOT: " + a); + var hi = _t.getIdentifier(a, scope, token); + // println("PROTECT "+a+" from munge" + (hi ? "FOUND" : "MISSING")); + if (hi) { + // print("PROTECT "+a+" from munge"); + //print(JSON.stringify(hi,null,4)); + hi.toMunge = false; + } + + }); + + + } else { + + + this.protectScopeFromObfuscation(scope); + this.warn("Using 'eval' is not recommended. (use eval:var:noreplace in comments to optimize) " + (this.munge ? " Moreover, using 'eval' reduces the level of compression!" : ""), true); + } + + } + + } + break; + + + } // end switch + + + //print("parseScope TOK : " + token.toString()); + token = this.ts.nextTok(); + //if (this.ts.nextT()) break; + + } + //print("<<< EXIT SCOPE"); + //print("<<<<<<EXP
"); + !this.debug || print("PARSE EXPR"); + this.expN++; + + // for printing stuff.. + + + + var symbol; + var token; + + var identifier; + + var expressionBraceNesting = this.braceNesting + 0; + var bracketNesting = 0; + var parensNesting = 0; + var isInObjectLitAr; + var isObjectLitAr = [ false ]; + + + + + //print(scopeIndent + ">> ENTER EXPRESSION" + this.expN); + while ((token = this.ts.nextTok())) { + + + + /* + // moved out of loop? + currentScope = this.scopes[this.scopes.length-1]; + + var scopeIndent = ''; + this.scopes.forEach(function() { + scopeIndent += ' '; + }); + */ + + //this.dumpToken(token, this.scopes, this.braceNesting ); + //print('EXPR' + token.toString()); + + + //println(""+token.data+""); + //this.log("EXP:" + token.data); + switch (token.type) { + case 'PUNC': + //print("EXPR-PUNC:" + token.toString()); + + switch(token.data) { + + case ';': + //print("<< EXIT EXPRESSION"); + break; + + case ',': + + break; + + + case '(': //Token.LP: + case '{': //Token.LC: + case '[': //Token.LB: + //print('SCOPE-CURLY/PAREN/BRACE:' + token.toString()); + // print('SCOPE-CURLY/PAREN/BRACE:' + JSON.stringify(token, null,4)); + //println(""+token.data+""); + var curTS = this.ts; + if (token.props) { + + for (var prop in token.props) { + if (!token.props[prop].val.length) { + print(JSON.stringify(token.props, null,4)); + } + + if (token.props[prop].val[0].data == 'function') { + // parse a function.. + this.ts = new TokenStream(token.props[prop].val); + this.ts.nextTok(); + this.parseFunctionDeclaration(scope); + continue; + } + // key value.. + + this.ts = new TokenStream(token.props[prop].val); + this.parseExpression(scope); + + } + this.ts = curTS; + + // it's an object literal.. + // the values could be replaced.. + break; + } + + + var _this = this; + token.items.forEach(function(expr) { + _this.ts = new TokenStream(expr); + _this.parseExpression(scope) + }); + this.ts = curTS; + + + + ///print(">>>>> EXP PUSH(false)"+this.braceNesting); + break; + + + + + + case ')': //Token.RP: + case ']': //Token.RB: + case '}': //Token.RB: + //print("<< EXIT EXPRESSION"); + return; + + + + parensNesting++; + break; + + + + } + break; + + case 'STRN': // used for object lit detection.. + //if (this.mode == 'BUILDING_SYMBOL_TREE') + //print("EXPR-STR:" + JSON.stringify(token, null, 4)); + + + break; + + + + case 'NAME': + if (this.mode == 'BUILDING_SYMBOL_TREE') { + + //print("EXPR-NAME:" + JSON.stringify(token, null, 4)); + } else { + //print("EXPR-NAME:" + token.toString()); + } + symbol = token.data; + //print("in NAME = " + token.toString()); + //print("in NAME 0: " + this.ts.look(0).toString()); + //print("in NAME 2: " + this.ts.lookTok(2).toString()); + + //print(this.ts.lookTok(-1).data); + // prefixed with '.' + if (this.ts.lookTok(-1).data == ".") { + //skip '.' + break; + } + if (symbol == 'this') { + break; + } + + if (this.mode == 'PASS2_SYMBOL_TREE') { + + identifier = this.getIdentifier(symbol, scope, token); + //println("??"); + if (identifier == false) { + + if (symbol.length <= 3 && Scope.builtin.indexOf(symbol) < 0) { + // Here, we found an undeclared and un-namespaced symbol that is + // 3 characters or less in length. Declare it in the global scope. + // We don't need to declare longer symbols since they won't cause + // any conflict with other munged symbols. + this.globalScope.declareIdentifier(symbol, token); + this.warn("Found an undeclared symbol: " + symbol + ' (line:' + token.line + ')', true); + //print("Found an undeclared symbol: " + symbol + ' (line:' + token.line + ')'); + //throw "OOPS"; + } else { + //print("undeclared:" + token.toString()) + } + + + } else { + //println("++"); + token.identifier = identifier; + identifier.refcount++; + } + + } + break; + + + + + //println("EID"); + case 'KEYW': + //if (this.mode == 'BUILDING_SYMBOL_TREE') + // print("EXPR-KEYW:" + JSON.stringify(token, null, 4)); + + //print('EXPR-KEYW:' + token.toString()); + if (token.name == "FUNCTION") { + + this.parseFunctionDeclaration(scope); + break; + } + + + symbol = token.data; + if (this.mode == 'BUILDING_SYMBOL_TREE') { + + if (token.name == "EVAL") { + //print(JSON.stringify(token,null,4)); + if (token.prefix && token.prefix.match(/eval:var:/g)) { + // look for eval:var:noreplace\n + // print("GOT MATCH?"); + var _t = this; + token.prefix.replace(/eval:var:([a-z]+)/ig, function(m, a) { + + //print("PROTECT: " + a); + + + var hi = _t.getIdentifier(a, scope, token); + //println("PROTECT "+a+" from munge" + (hi ? "FOUND" : "MISSING")); + if (hi) { + // println("PROTECT "+a+" from munge"); + hi.toMunge = false; + } + + + }); + + } else { + this.protectScopeFromObfuscation(scope); + this.warn("Using 'eval' is not recommended." + (this.munge ? " Moreover, using 'eval' reduces the level of compression!" : ""), true); + } + + + } + break; + } + default: + //if (this.mode == 'BUILDING_SYMBOL_TREE') + // print("EXPR-SKIP:" + JSON.stringify(token, null, 4)); + break; + } + + } + //print("<< EXIT EXPRESSION"); + this.expN--; + }, + + + parseCatch : function(scope) { + + var symbol; + var token; + + var identifier; + + //token = getToken(-1); + //assert token.getType() == Token.CATCH; + token = this.ts.nextTok(1); + token = this.ts.nextTok(1); + + + //print(JSON.stringify(this.ts,null,4)); + //assert token.getType() == Token.LP; ( + //token = this.ts.nextTok(); + //assert token.getType() == Token.NAME; + + symbol = token.items[0][0].data; + + + if (this.mode == 'BUILDING_SYMBOL_TREE') { + // We must declare the exception identifier in the containing function + // scope to avoid errors related to the obfuscation process. No need to + // display a warning if the symbol was already declared here... + scope.declareIdentifier(symbol, token.items[0][0]); + } else { + //?? why inc the refcount?? - that should be set when building the tree??? + identifier = this.getIdentifier(symbol, scope, token.items[0][0]); + identifier.refcount++; + } + + //token = this.ts.nextTok(); + //assert token.getType() == Token.RP; // ) + }, + + parseFunctionDeclaration : function(scope) + { + //print("PARSE FUNCTION"); + var symbol; + var token; + + var fnScope = false; + var identifier; + var b4braceNesting = this.braceNesting + 0; + + //this.logR("PARSING FUNCTION"); + + + token = this.ts.nextTok(); + if (token.type == "NAME") { + if (this.mode == 'BUILDING_SYMBOL_TREE') { + // Get the name of the function and declare it in the current scope. + symbol = token.data; + if (scope.getIdentifier(symbol,token) != false) { + this.warn("The function " + symbol + " has already been declared in the same scope...", true); + } + scope.declareIdentifier(symbol,token); + } + token = this.ts.nextTok(); + } + + + // return function() {.... + while (token.data != "(") { + //print(token.toString()); + token = this.ts.nextTok(); + + } + + + //assert token.getType() == Token.LP; + if (this.mode == 'BUILDING_SYMBOL_TREE') { + fnScope = new Scope(1, scope, token.n, '', token); + + //println("STORING SCOPE" + this.ts.cursor); + + this.indexedScopes[token.id] = fnScope; + + } else { + //qln("FETCHING SCOPE" + this.ts.cursor); + fnScope = this.indexedScopes[token.id]; + } + //if (this.mode == 'BUILDING_SYMBOL_TREE') + // print('FUNC-PARSE:' + JSON.stringify(token,null,4)); + // Parse function arguments. + var args = token.items; + for (var argpos =0; argpos < args.length; argpos++) { + + token = args[argpos][0]; + //print ("FUNC ARGS: " + token.toString()) + //assert token.getType() == Token.NAME || + // token.getType() == Token.COMMA; + if (token.type == 'NAME' && this.mode == 'BUILDING_SYMBOL_TREE') { + symbol = token.data; + identifier = fnScope.declareIdentifier(symbol,token); + if (symbol == "$super" && argpos == 0) { + // Exception for Prototype 1.6... + identifier.preventMunging(); + } + //argpos++; + } + } + + token = this.ts.nextTok(); + //print('FUNC-BODY:' + JSON.stringify(token.items,null,4)); + //Seed.quit(); + //print(token.toString()); + // assert token.getType() == Token.LC; + //this.braceNesting++; + + //token = this.ts.nextTok(); + //print(token.toString()); + var outTS = this.ts; + var _this = this; + token.items.forEach(function(tar) { + _this.ts = new TokenStream(tar); + _this.parseScope(fnScope); + + + }); + + //print(JSON.stringify(this.ts,null,4)); + //this.parseScope(fnScope); + this.ts = outTS; + // now pop it off the stack!!! + + //this.braceNesting = b4braceNesting; + //print("ENDFN -1: " + this.ts.lookTok(-1).toString()); + //print("ENDFN 0: " + this.ts.lookTok(0).toString()); + //print("ENDFN 1: " + this.ts.lookTok(1).toString()); + }, + + protectScopeFromObfuscation : function(scope) { + //assert scope != null; + + if (scope == this.globalScope) { + // The global scope does not get obfuscated, + // so we don't need to worry about it... + return; + } + + // Find the highest local scope containing the specified scope. + while (scope && scope.parent != this.globalScope) { + scope = scope.parent; + } + + //assert scope.getParentScope() == globalScope; + scope.preventMunging(); + }, + + getIdentifier: function(symbol, scope, token) { + var identifier; + while (scope != false) { + identifier = scope.getIdentifier(symbol, token); + //println("ScopeParser.getIdentgetUsedSymbols("+symbol+")=" + scope.getUsedSymbols().join(',')); + if (identifier) { + return identifier; + } + scope = scope.parent; + } + return false; + } +};