1 //<script type="text/javascript">
7 var lc = new JSDOC.Lang_Class ();
8 var tr = new JSDOC.TokenReader();
11 tr.keepComments = true;
13 tr.collapseWhite = false;
16 FileUtils.get_contents("/home/alan/gitlive/gnome.introspection-doc-generator/JSDOC/Walker2.js", out str);
18 var toks = tr.tokenize(new JSDOC.TextStream(str)); // dont merge xxx + . + yyyy etc.
22 //const Token = imports.Token.Token;
23 //const Lang = imports.Lang.Lang;
26 @class Search a {@link JSDOC.TextStream} for language tokens.
34 public class TokenArray: Object {
36 public Gee.ArrayList<Token> tokens;
38 get { return this.tokens.size; }
43 this.tokens = new Gee.ArrayList<Token>();
46 public Token? last() {
47 if (this.tokens.size > 0) {
48 return this.tokens.get(this.tokens.size-1);
52 public Token? lastSym () {
53 for (var i = this.tokens.size-1; i >= 0; i--) {
54 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM"))) {
55 return this.tokens.get(i);
60 public void push (Token t) {
65 if (this.tokens.size > 0) {
66 return this.tokens.remove_at(this.tokens.size-1);
71 public new Token get(int i) {
72 return this.tokens.get(i);
76 foreach(var token in this.tokens) {
77 print(token.asString());
83 public errordomain TokenReader_Error {
88 public class TokenReader : Object
95 * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
98 /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
99 public bool collapseWhite = false; // only reduces white space...
100 /** @cfg {Boolean} keepDocs keep JSDOC comments **/
101 public bool keepDocs = true;
102 /** @cfg {Boolean} keepWhite keep White space **/
103 public bool keepWhite = false;
104 /** @cfg {Boolean} keepComments keep all comments **/
105 public bool keepComments = false;
106 /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
107 public bool sepIdents = false;
108 /** @cfg {String} filename name of file being parsed. **/
109 public string filename = "";
110 /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
111 public bool ignoreBadGrammer = false;
118 * @return {Array} of tokens
120 * ts = new TextStream(File.read(str));
121 * tr = TokenReader({ keepComments : true, keepWhite : true });
125 public TokenArray tokenize(TextStream stream)
128 var tokens = new TokenArray();
131 while (!stream.lookEOF()) {
134 if (this.read_mlcomment(stream, tokens)) continue;
135 if (this.read_slcomment(stream, tokens)) continue;
136 if (this.read_dbquote(stream, tokens)) continue;
137 if (this.read_snquote(stream, tokens)) continue;
138 if (this.read_regx(stream, tokens)) continue;
139 if (this.read_numb(stream, tokens)) continue;
140 if (this.read_punc(stream, tokens)) continue;
141 if (this.read_newline(stream, tokens)) continue;
142 if (this.read_space(stream, tokens)) continue;
143 if (this.read_word(stream, tokens)) continue;
145 // if execution reaches here then an error has happened
147 new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
157 * findPuncToken - find the id of a token (previous to current)
158 * need to back check syntax..
160 * @arg {Array} tokens the array of tokens.
161 * @arg {String} token data (eg. '(')
162 * @arg {Number} offset where to start reading from
163 * @return {Number} position of token
165 public int findPuncToken(TokenArray tokens, string data, int n)
167 n = n > 0 ? n : tokens.length -1;
171 if (stack < 1 && tokens.get(n).data == data) {
175 if (tokens.get(n).data == ")" || tokens.get(n).data == "}") {
180 if (stack > 0 && (tokens.get(n).data == "{" || tokens.get(n).data == "(")) {
192 * lastSym - find the last token symbol
193 * need to back check syntax..
195 * @arg {Array} tokens the array of tokens.
196 * @arg {Number} offset where to start..
197 * @return {Token} the token
199 public Token? lastSym(TokenArray tokens, int n)
201 for (var i = n-1; i >= 0; i--) {
202 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
203 return tokens.get(i);
212 @returns {Boolean} Was the token found?
214 public bool read_word (TextStream stream, TokenArray tokens)
217 while (!stream.lookEOF() && Lang.isWordChar(stream.look().to_string())) {
218 found += stream.next();
225 var name = Lang.keyword(found);
228 // look for "()return" ?? why ???
229 var ls = tokens.lastSym();
230 if (found == "return" && ls != null && ls.data == ")") {
231 //Seed.print('@' + tokens.length);
232 var n = this.findPuncToken(tokens, ")", 0);
233 //Seed.print(')@' + n);
234 n = this.findPuncToken(tokens, "(", n-1);
235 //Seed.print('(@' + n);
237 //var lt = this.lastSym(tokens, n);
239 //print(JSON.stringify(lt));
240 if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
241 if (!this.ignoreBadGrammer) {
242 throw new TokenReader_Error.ArgumentError(
243 this.filename + ":" + this.line + " Error - return found after )"
252 tokens.push(new Token(found, "KEYW", name, this.line));
256 if (!this.sepIdents || found.index_of(".") < 0 ) {
257 tokens.push(new Token(found, "NAME", "NAME", this.line));
260 var n = found.split(".");
262 foreach (unowned string nm in n) {
264 tokens.push(new Token(".", "PUNC", "DOT", this.line));
267 tokens.push(new Token(nm, "NAME", "NAME", this.line));
275 @returns {Boolean} Was the token found?
277 public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
281 while (!stream.lookEOF()) {
282 var ns = stream.look().to_string();
284 if (Lang.punc(found + stream.look().to_string()) ) {
285 found += stream.next();
293 var ls = tokens.lastSym();
295 if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
296 //print("Error - comma found before " + found);
297 //print(JSON.stringify(tokens.lastSym(), null,4));
298 if (this.ignoreBadGrammer) {
299 print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
301 throw new TokenReader_Error.ArgumentError(
302 this.filename + ":" + this.line.to_string() + " comma found before " + found
309 tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
315 @returns {Boolean} Was the token found?
317 public bool read_space (TextStream stream, TokenArray tokens)
321 while (!stream.lookEOF() && Lang.isSpaceC( stream.look()) && !Lang.isNewlineC(stream.look())) {
322 found += stream.next();
328 //print("WHITE = " + JSON.stringify(found));
331 if (this.collapseWhite) {
332 found = " "; // this might work better if it was a '\n' ???
334 if (this.keepWhite) {
335 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
342 @returns {Boolean} Was the token found?
344 public bool read_newline (TextStream stream, TokenArray tokens)
347 var line = this.line;
348 while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
350 found += stream.next();
357 // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
358 // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
362 if (this.collapseWhite) {
363 found = "\n"; // reduces multiple line breaks into a single one...
366 if (this.keepWhite) {
367 var last = tokens.pop();
368 if (last != null && last.name != "WHIT") {
371 // replaces last new line...
372 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
378 @returns {Boolean} Was the token found?
380 public bool read_mlcomment (TextStream stream, TokenArray tokens)
382 if (stream.look() != '/') {
385 if (stream.look(1) != '*') {
388 var found = stream.next(2);
390 var line = this.line;
391 while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
399 // to start doclet we allow /** or /*** but not /**/ or /****
400 //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
401 if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
402 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
403 } else if (this.keepComments) {
404 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
411 @returns {Boolean} Was the token found?
413 public bool read_slcomment (TextStream stream, TokenArray tokens)
417 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
419 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
421 var line = this.line;
422 while (!stream.lookEOF()) {
423 print(stream.look().to_string());
424 if ( Lang.isNewline(stream.look().to_string())) {
427 found += stream.next();
429 if (!stream.lookEOF()) { // lookinng for end of line... if we got it, then do not eat the character..
430 found += stream.next();
432 if (this.keepComments) {
433 tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
442 @returns {Boolean} Was the token found?
444 public bool read_dbquote (TextStream stream, TokenArray tokens)
446 if (stream.look() != '"') {
450 var str = stream.next();
452 while (!stream.lookEOF()) {
453 if (stream.look() == '\\') {
454 if (Lang.isNewline(stream.look(1).to_string())) {
457 } while (!stream.lookEOF() && Lang.isNewline(stream.look().to_string()));
461 str += stream.next(2);
465 if (stream.look() == '"') {
466 str += stream.next();
467 tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
471 str += stream.next();
478 @returns {Boolean} Was the token found?
480 public bool read_snquote (TextStream stream, TokenArray tokens)
482 if (stream.look() != '\'') {
486 var str = stream.next();
488 while (!stream.lookEOF()) {
489 if (stream.look() == '\\') { // escape sequence
490 str += stream.next(2);
493 if (stream.look() == '\'') {
494 str += stream.next();
495 tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
498 str += stream.next();
506 @returns {Boolean} Was the token found?
508 public bool read_numb (TextStream stream, TokenArray tokens)
510 if (stream.look() == '0' && stream.look(1) == 'x') {
511 return this.read_hex(stream, tokens);
516 while (!stream.lookEOF() && Lang.isNumber(found+stream.look().to_string())){
517 found += stream.next();
523 if (GLib.Regex.match_simple("^0[0-7]", found)) {
524 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
527 tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
533 @returns {Boolean} Was the token found?
535 public bool read_hex (TextStream stream, TokenArray tokens)
537 var found = stream.next(2);
539 while (!stream.lookEOF()) {
540 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look().to_string())) { // done
541 tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
545 found += stream.next();
552 @returns {Boolean} Was the token found?
554 public bool read_regx (TextStream stream, TokenArray tokens)
557 if (stream.look() != '/') {
560 var last = tokens.lastSym();
565 !last.is("NUMB") // stuff that can not appear before a regex..
567 && !last.is("RIGHT_PAREN")
568 && !last.is("RIGHT_BRACKET")
571 var regex = stream.next();
573 while (!stream.lookEOF()) {
574 if (stream.look() == '\\') { // escape sequence
575 regex += stream.next(2);
578 if (stream.look() == '/') {
579 regex += stream.next();
581 while (GLib.Regex.match_simple("[gmi]", stream.look().to_string())) {
582 regex += stream.next();
585 tokens.push(new Token(regex, "REGX", "REGX", this.line));
589 regex += stream.next();
592 // error: unterminated regex