X-Git-Url: http://git.roojs.org/?a=blobdiff_plain;f=JSDOC%2FTokenReader.vala;h=c87f4ac674547751b2d84d13a2fc07ed6ccf3fb8;hb=0f7dc348b9b4c03aff3fe808623af69b1116af6a;hp=9c5054da5a2432496947e44153675f7c7317ebed;hpb=cb7720e401b7b64d2138a5b89b3aee864a4d139c;p=gnome.introspection-doc-generator diff --git a/JSDOC/TokenReader.vala b/JSDOC/TokenReader.vala index 9c5054d..c87f4ac 100644 --- a/JSDOC/TokenReader.vala +++ b/JSDOC/TokenReader.vala @@ -16,7 +16,7 @@ namespace JSDOC { public Gee.ArrayList tokens; public int length { - get { return this.tokens.size } + get { return this.tokens.size; } } public TokenArray() @@ -41,11 +41,23 @@ namespace JSDOC { public void push (Token t) { this.tokens.add(t); } + public Token? pop () + { + if (this.size > 0) { + return this.tokens.remove_at(this.size-1); + } + return null; + } + public Token get(int i) { return this.tokens.get(i); } } + errordomain TokenReader_Error { + ArgumentError + } + public class TokenReader : Object { @@ -58,19 +70,19 @@ namespace JSDOC { */ /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/ - public bool collapseWhite = false, // only reduces white space... + public bool collapseWhite = false; // only reduces white space... /** @cfg {Boolean} keepDocs keep JSDOC comments **/ - public bool keepDocs = true, + public bool keepDocs = true; /** @cfg {Boolean} keepWhite keep White space **/ - public bool keepWhite = false, + public bool keepWhite = false; /** @cfg {Boolean} keepComments keep all comments **/ - public bool keepComments = false, + public bool keepComments = false; /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/ - public bool sepIdents = false, + public bool sepIdents = false; /** @cfg {String} filename name of file being parsed. **/ public string filename = ""; /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/ - public bool ignoreBadGrammer = false, + public bool ignoreBadGrammer = false; int line = 0; @@ -90,12 +102,9 @@ namespace JSDOC { var tokens = new TokenArray(); bool eof; - while (true) { + while (!stream.lookEOF()) { + - stream.look(0, out eof) - if (eof) { - break; - } if (this.read_mlcomment(stream, tokens)) continue; if (this.read_slcomment(stream, tokens)) continue; if (this.read_dbquote(stream, tokens)) continue; @@ -116,7 +125,7 @@ namespace JSDOC { return tokens; - }, + } /** * findPuncToken - find the id of a token (previous to current) @@ -127,21 +136,22 @@ namespace JSDOC { * @arg {Number} offset where to start reading from * @return {Number} position of token */ - public int findPuncToken(TokenArray tokens, string data, int n) { + public int findPuncToken(TokenArray tokens, string data, int n) + { n = n || tokens.length -1; var stack = 0; while (n > -1) { - if (!stack && tokens[n].data == data) { + if (!stack && tokens.get(n).data == data) { return n; } - if (tokens[n].data == ')' || tokens[n].data == '}') { + if (tokens.get(n).data == ')' || tokens.get(n).data == '}') { stack++; n--; continue; } - if (stack && (tokens[n].data == '{' || tokens[n].data == '(')) { + if (stack && (tokens.get(n).data == '{' || tokens.get(n).data == '(')) { stack--; n--; continue; @@ -151,7 +161,7 @@ namespace JSDOC { n--; } return -1; - }, + } /** * lastSym - find the last token symbol * need to back check syntax.. @@ -160,152 +170,176 @@ namespace JSDOC { * @arg {Number} offset where to start.. * @return {Token} the token */ - lastSym : function(tokens, n) { + public Token lastSym(TokenArray tokens, int n) + { for (var i = n-1; i >= 0; i--) { - if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i]; + if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) { + return tokens.get(i); + } } return null; - }, + } /** @returns {Boolean} Was the token found? */ - read_word : function(/**JSDOC.TokenStream*/stream, tokens) { - var found = ""; - while (!stream.look().eof && Lang.isWordChar(stream.look())) { + public bool read_word (TokenStream stream, TokenArray tokens) + { + string found = ""; + while (!stream.lookEOF() && Lang.isWordChar(stream.look())) { found += stream.next(); } - if (found === "") { + if (found == "") { return false; } - var name; - if ((name = Lang.keyword(found))) { - if (found == 'return' && tokens.lastSym().data == ')') { + var name = Lang.keyword(found); + if (name != null) { + + // look for "()return" ?? why ??? + var ls = tokens.lastSym(); + if (found == "return" && ls != null && ls.data == ")") { //Seed.print('@' + tokens.length); - var n = this.findPuncToken(tokens, ')'); + var n = this.findPuncToken(tokens, ")"); //Seed.print(')@' + n); - n = this.findPuncToken(tokens, '(', n-1); + n = this.findPuncToken(tokens, "(", n-1); //Seed.print('(@' + n); var lt = this.lastSym(tokens, n); - print(JSON.stringify(lt)); - if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) { + /* + //print(JSON.stringify(lt)); + if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) { if (!this.ignoreBadGrammer) { - throw { - name : "ArgumentError", - message: "\n" + this.filename + ':' + this.line + " Error - return found after )" - } + throw new TokenReader_Error.ArgumentError( + this.filename + ":" + this.line + " Error - return found after )" + ); } } - + */ } tokens.push(new Token(found, "KEYW", name, this.line)); return true; } + if (!this.sepIdents || found.indexOf('.') < 0 ) { tokens.push(new Token(found, "NAME", "NAME", this.line)); return true; } var n = found.split('.'); var p = false; - var _this = this; - n.forEach(function(nm) { + foreach (unowned string nm in n) { if (p) { - tokens.push(new Token('.', "PUNC", "DOT", _this.line)); + tokens.push(new Token('.', "PUNC", "DOT", this.line)); } p=true; - tokens.push(new Token(nm, "NAME", "NAME", _this.line)); - }); + tokens.push(new Token(nm, "NAME", "NAME", this.line)); + } return true; - }, + } /** @returns {Boolean} Was the token found? */ - read_punc : function(/**JSDOC.TokenStream*/stream, tokens) { - var found = ""; + public bool read_punc (TokenStream stream, TokenArray tokens) + { + string found = ""; var name; - while (!stream.look().eof && Lang.punc(found+stream.look())) { + while (!stream.lookEOF() && Lang.punc(found + stream.look()).length > 0) { found += stream.next(); } - if (found === "") { + if (found == "") { return false; } - if ((found == '}' || found == ']') && tokens.lastSym().data == ',') { + var ls = tokens.lastSym(); + + if ((found == "}" || found == "]") && ls != null && ls.data == ",") { //print("Error - comma found before " + found); //print(JSON.stringify(tokens.lastSym(), null,4)); if (this.ignoreBadGrammer) { print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found); } else { - - throw { - name : "ArgumentError", - message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found - } + throw new TokenReader_Error.ArgumentError( + this.filename + ":" + this.line + " comma found before " + found + + ); + } } tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line)); return true; - }, + } /** @returns {Boolean} Was the token found? */ - read_space : function(/**JSDOC.TokenStream*/stream, tokens) { + public bool read_space (TokenStream stream, TokenArray tokens) + { var found = ""; - while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) { + while (!stream.lookEOF() && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) { found += stream.next(); } - if (found === "") { + if (found == "") { return false; } - //print("WHITE = " + JSON.stringify(found)); - if (this.collapseWhite) found = " "; - if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line)); + //print("WHITE = " + JSON.stringify(found)); + + + if (this.collapseWhite) { + found = " "; // this might work better if it was a '\n' ??? + } + if (this.keepWhite) { + tokens.push(new Token(found, "WHIT", "SPACE", this.line)); + } return true; - }, + } /** @returns {Boolean} Was the token found? */ - read_newline : function(/**JSDOC.TokenStream*/stream, tokens) { + public bool read_newline (TokenStream stream, TokenArray tokens) + { var found = ""; var line = this.line; - while (!stream.look().eof && Lang.isNewline(stream.look())) { + while (!stream.lookEOF() && Lang.isNewline(stream.look())) { this.line++; found += stream.next(); } - if (found === "") { + if (found == "") { return false; } + + // if we found a new line, then we could check if previous character was a ';' - if so we can drop it. + // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines.. + + //this.line++; if (this.collapseWhite) { - found = "\n"; + found = "\n"; // reduces multiple line breaks into a single one... } - if (this.keepWhite) { - var last = tokens ? tokens.pop() : false; - if (last && last.name != "WHIT") { + + if (this.keepWhite) { + var last = tokens.pop(); + if (last != null && last.name != "WHIT") { tokens.push(last); } - + // replaces last new line... tokens.push(new Token(found, "WHIT", "NEWLINE", line)); } return true; @@ -314,42 +348,54 @@ namespace JSDOC { /** @returns {Boolean} Was the token found? */ - read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) { - if (stream.look() == "/" && stream.look(1) == "*") { - var found = stream.next(2); - var c = ''; - var line = this.line; - while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) { - c = stream.next(); - if (c == "\n") this.line++; - found += c; + public bool read_mlcomment (TokenStream stream, TokenArray tokens) + { + if (stream.look() != "/") { + return false; + } + if (stream.look(1) != "*") { + return false; + } + var found = stream.next(2); + var c = ''; + var line = this.line; + while (!stream.lookEOF() && !(stream.look(-1) == "/" && stream.look(-2) == "*")) { + c = stream.next(); + if (c == "\n") { + this.line++; } - - // to start doclet we allow /** or /*** but not /**/ or /**** - if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line)); - else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line)); - return true; + found += c; } - return false; - }, + + // to start doclet we allow /** or /*** but not /**/ or /**** + //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) { + if ((this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != "/") { + tokens.push(new Token(found, "COMM", "JSDOC", this.line)); + } else if (this.keepComments) { + tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line)); + } + return true; + + } /** @returns {Boolean} Was the token found? */ - read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) { - var found; + public bool read_slcomment (TokenStream stream, TokenArray tokens) + { + var found = ""; if ( (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2))) || (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4))) ) { var line = this.line; - while (!stream.look().eof && !Lang.isNewline(stream.look())) { + while (!stream.lookEOF() && !Lang.isNewline(stream.look())) { found += stream.next(); } - if (!stream.look().eof) { + //if (!stream.lookEOF()) { // what? << eat the EOL? found += stream.next(); - } + //} if (this.keepComments) { tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line)); } @@ -357,128 +403,114 @@ namespace JSDOC { return true; } return false; - }, + } /** @returns {Boolean} Was the token found? */ - read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) { - if (stream.look() == "\"") { + public bool read_dbquote (TokenStream stream, TokenArray tokens) + { + if (stream.look() != "\"") { + return false; + } // find terminator - var string = stream.next(); - - while (!stream.look().eof) { - if (stream.look() == "\\") { - if (Lang.isNewline(stream.look(1))) { - do { - stream.next(); - } while (!stream.look().eof && Lang.isNewline(stream.look())); - string += "\\\n"; - } - else { - string += stream.next(2); - } - } - else if (stream.look() == "\"") { - string += stream.next(); - tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line)); - return true; + var str = stream.next(); + + while (!stream.lookEOF()) { + if (stream.look() == "\\") { + if (Lang.isNewline(stream.look(1))) { + do { + stream.next(); + } while (!stream.lookEOF() && Lang.isNewline(stream.look())); + str += "\\\n"; } else { - string += stream.next(); + str += stream.next(2); } + continue; } + if (stream.look() == "\"") { + str += stream.next(); + tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line)); + return true; + } + + str += stream.next(); + } - return false; // error! unterminated string + return false; }, /** @returns {Boolean} Was the token found? */ - read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) { - if (stream.look() == "'") { - // find terminator - var string = stream.next(); - - while (!stream.look().eof) { - if (stream.look() == "\\") { // escape sequence - string += stream.next(2); - } - else if (stream.look() == "'") { - string += stream.next(); - tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line)); - return true; - } - else { - string += stream.next(); - } + public bool read_snquote (TokenStream stream, TokenArray tokens) + { + if (stream.look() != "'") { + return false; + } + // find terminator + var str = stream.next(); + + while (!stream.look().eof) { + if (stream.look() == "\\") { // escape sequence + str += stream.next(2); + continue; + } + if (stream.look() == "'") { + str += stream.next(); + tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line)); + return true; } + str += stream.next(); + } - return false; // error! unterminated string - }, + return false; + } + /** @returns {Boolean} Was the token found? */ - read_numb : function(/**JSDOC.TokenStream*/stream, tokens) { + public bool read_numb (TokenStream stream, TokenArray tokens) + { if (stream.look() === "0" && stream.look(1) == "x") { return this.read_hex(stream, tokens); } var found = ""; - while (!stream.look().eof && Lang.isNumber(found+stream.look())){ + while (!stream.lookEOF() && Lang.isNumber(found+stream.look())){ found += stream.next(); } if (found === "") { return false; } - else { - if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line)); - else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line)); + if (GLib.Regex.match_simple("^0[0-7]", found)) { + tokens.push(new Token(found, "NUMB", "OCTAL", this.line)); return true; } - }, - /*t: - requires("../lib/JSDOC/TextStream.js"); - requires("../lib/JSDOC/Token.js"); - requires("../lib/JSDOC/Lang.js"); - - plan(3, "testing read_numb"); - - //// setup - var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}"; - var tr = new TokenReader(); - var tokens = tr.tokenize(new TextStream(src)); - - var hexToken, octToken, decToken; - for (var i = 0; i < tokens.length; i++) { - if (tokens[i].name == "HEX_DEC") hexToken = tokens[i]; - if (tokens[i].name == "OCTAL") octToken = tokens[i]; - if (tokens[i].name == "DECIMAL") decToken = tokens[i]; - } - //// - - is(decToken.data, "8.0", "decimal number is found in source."); - is(hexToken.data, "0x20", "hexdec number is found in source (issue #99)."); - is(octToken.data, "0777", "octal number is found in source."); - */ - + tokens.push(new Token(found, "NUMB", "DECIMAL", this.line)); + return true; + + } + /** @returns {Boolean} Was the token found? */ - read_hex : function(/**JSDOC.TokenStream*/stream, tokens) { + public bool read_hex (TokenStream stream, TokenArray tokens) + { var found = stream.next(2); - while (!stream.look().eof) { + while (!stream.lookEOF()) { if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line)); return true; } - else { - found += stream.next(); - } + + found += stream.next(); + } return false; }, @@ -486,47 +518,47 @@ namespace JSDOC { /** @returns {Boolean} Was the token found? */ - read_regx : function(/**JSDOC.TokenStream*/stream, tokens) { - var last; + public bool read_regx (TokenStream stream, TokenArray tokens) + { + Token last; + if (stream.look() != "/") { + return false; + } + var last = tokens.lastSym(); if ( - stream.look() == "/" - && + (last == null) + || ( - - ( - !(last = tokens.lastSym()) // there is no last, the regex is the first symbol - || - ( - !last.is("NUMB") - && !last.is("NAME") - && !last.is("RIGHT_PAREN") - && !last.is("RIGHT_BRACKET") - ) - ) + !last.is("NUMB") // stuff that can not appear before a regex.. + && !last.is("NAME") + && !last.is("RIGHT_PAREN") + && !last.is("RIGHT_BRACKET") ) - ) { + ) { var regex = stream.next(); - while (!stream.look().eof) { + while (!stream.lookEOF()) { if (stream.look() == "\\") { // escape sequence regex += stream.next(2); + continue; } - else if (stream.look() == "/") { + if (stream.look() == "/") { regex += stream.next(); - while (/[gmi]/.test(stream.look())) { + while (GLib.Regex.match_simple("[gmi]", stream.look()) { regex += stream.next(); } tokens.push(new Token(regex, "REGX", "REGX", this.line)); return true; } - else { - regex += stream.next(); - } + + regex += stream.next(); + } // error: unterminated regex } return false; } -}); \ No newline at end of file + } +} \ No newline at end of file