git.roojs.org Git - gnome.introspection-doc-generator/blob - JSDOC/TokenReader.vala

   1 //<script type="text/javascript">
   2
   3
   4
   5
   6 //const Token   = imports.Token.Token;
   7 //const Lang    = imports.Lang.Lang;
   8
   9 /**
  10         @class Search a {@link JSDOC.TextStream} for language tokens.
  11 */
  12
  13 namespace JSDOC {
  14
  15     public class TokenArray: Object {
  16
  17         Gee.ArrayList<Token> tokens;
  18
  19         public TokenArray()
  20         {
  21             this.items = new Gee.ArrayList<Token>();
  22         }
  23
  24         public Token? last() {
  25             if (this.tokens > 0) {
  26                 return this.tokens[this.tokens.length-1];
  27             }
  28             return null;
  29         }
  30         public Token? lastSym = function() {
  31             for (var i = this.tokens.length-1; i >= 0; i--) {
  32                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
  33                     return this.tokens.get(i);
  34                 }
  35             }
  36             return null;
  37         }
  38     }
  39
  40
  41     public class TokenReader : Object
  42     {
  43
  44
  45
  46         /*
  47          *
  48          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
  49          */
  50
  51         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
  52         public bool collapseWhite = false, // only reduces white space...
  53         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
  54         public bool keepDocs = true,
  55         /** @cfg {Boolean} keepWhite keep White space **/
  56         public bool keepWhite = false,
  57         /** @cfg {Boolean} keepComments  keep all comments **/
  58         public bool keepComments = false,
  59         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
  60         public bool sepIdents = false,
  61         /** @cfg {String} filename name of file being parsed. **/
  62         public string filename = "";
  63         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
  64         public bool ignoreBadGrammer = false,
  65
  66
  67         int line = 0;
  68
  69         /**
  70          * tokenize a stream
  71          * @return {Array} of tokens
  72          *
  73          * ts = new TextStream(File.read(str));
  74          * tr = TokenReader({ keepComments : true, keepWhite : true });
  75          * tr.tokenize(ts)
  76          *
  77          */
  78         public TokenArray tokenize(TextStream stream)
  79         {
  80             this.line =1;
  81             var tokens = new TokenArray();
  82
  83             bool eof;
  84             while (true) {
  85
  86                 stream.look(0, out eof)
  87                 if (eof) {
  88                     break;
  89                 }
  90                 if (this.read_mlcomment(stream, tokens)) continue;
  91                 if (this.read_slcomment(stream, tokens)) continue;
  92                 if (this.read_dbquote(stream, tokens))   continue;
  93                 if (this.read_snquote(stream, tokens))   continue;
  94                 if (this.read_regx(stream, tokens))      continue;
  95                 if (this.read_numb(stream, tokens))      continue;
  96                 if (this.read_punc(stream, tokens))      continue;
  97                 if (this.read_newline(stream, tokens))   continue;
  98                 if (this.read_space(stream, tokens))     continue;
  99                 if (this.read_word(stream, tokens))      continue;
 100
 101                 // if execution reaches here then an error has happened
 102                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
 103             }
 104
 105
 106
 107             return tokens;
 108         },
 109
 110         /**
 111          * findPuncToken - find the id of a token (previous to current)
 112          * need to back check syntax..
 113          *
 114          * @arg {Array} tokens the array of tokens.
 115          * @arg {String} token data (eg. '(')
 116          * @arg {Number} offset where to start reading from
 117          * @return {Number} position of token
 118          */
 119         findPuncToken : function(tokens, data, n) {
 120             n = n || tokens.length -1;
 121             var stack = 0;
 122             while (n > -1) {
 123
 124                 if (!stack && tokens[n].data == data) {
 125                     return n;
 126                 }
 127
 128                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
 129                     stack++;
 130                     n--;
 131                     continue;
 132                 }
 133                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
 134                     stack--;
 135                     n--;
 136                     continue;
 137                 }
 138
 139
 140                 n--;
 141             }
 142             return -1;
 143         },
 144         /**
 145          * lastSym - find the last token symbol
 146          * need to back check syntax..
 147          *
 148          * @arg {Array} tokens the array of tokens.
 149          * @arg {Number} offset where to start..
 150          * @return {Token} the token
 151          */
 152         lastSym : function(tokens, n) {
 153             for (var i = n-1; i >= 0; i--) {
 154                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
 155             }
 156             return null;
 157         },
 158
 159
 160
 161         /**
 162             @returns {Boolean} Was the token found?
 163          */
 164         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
 165             var found = "";
 166             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
 167                 found += stream.next();
 168             }
 169
 170             if (found === "") {
 171                 return false;
 172             }
 173
 174             var name;
 175             if ((name = Lang.keyword(found))) {
 176                 if (found == 'return' && tokens.lastSym().data == ')') {
 177                     //Seed.print('@' + tokens.length);
 178                     var n = this.findPuncToken(tokens, ')');
 179                     //Seed.print(')@' + n);
 180                     n = this.findPuncToken(tokens, '(', n-1);
 181                     //Seed.print('(@' + n);
 182
 183                     var lt = this.lastSym(tokens, n);
 184                     print(JSON.stringify(lt));
 185                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
 186                         if (!this.ignoreBadGrammer) {
 187                             throw {
 188                                 name : "ArgumentError",
 189                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
 190                             }
 191                         }
 192                     }
 193
 194
 195
 196                 }
 197
 198                 tokens.push(new Token(found, "KEYW", name, this.line));
 199                 return true;
 200             }
 201             if (!this.sepIdents || found.indexOf('.') < 0 ) {
 202                 tokens.push(new Token(found, "NAME", "NAME", this.line));
 203                 return true;
 204             }
 205             var n = found.split('.');
 206             var p = false;
 207             var _this = this;
 208             n.forEach(function(nm) {
 209                 if (p) {
 210                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
 211                 }
 212                 p=true;
 213                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
 214             });
 215             return true;
 216
 217
 218         },
 219
 220         /**
 221             @returns {Boolean} Was the token found?
 222          */
 223         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
 224             var found = "";
 225             var name;
 226             while (!stream.look().eof && Lang.punc(found+stream.look())) {
 227                 found += stream.next();
 228             }
 229
 230
 231             if (found === "") {
 232                 return false;
 233             }
 234
 235             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
 236                 //print("Error - comma found before " + found);
 237                 //print(JSON.stringify(tokens.lastSym(), null,4));
 238                 if (this.ignoreBadGrammer) {
 239                     print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found);
 240                 } else {
 241
 242                     throw {
 243                         name : "ArgumentError",
 244                         message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
 245                     }
 246                 }
 247             }
 248
 249             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
 250             return true;
 251
 252         },
 253
 254         /**
 255             @returns {Boolean} Was the token found?
 256          */
 257         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
 258             var found = "";
 259
 260             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
 261                 found += stream.next();
 262             }
 263
 264             if (found === "") {
 265                 return false;
 266             }
 267             //print("WHITE = " + JSON.stringify(found));
 268             if (this.collapseWhite) found = " ";
 269             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
 270             return true;
 271
 272         },
 273
 274         /**
 275             @returns {Boolean} Was the token found?
 276          */
 277         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
 278             var found = "";
 279             var line = this.line;
 280             while (!stream.look().eof && Lang.isNewline(stream.look())) {
 281                 this.line++;
 282                 found += stream.next();
 283             }
 284
 285             if (found === "") {
 286                 return false;
 287             }
 288             //this.line++;
 289             if (this.collapseWhite) {
 290                 found = "\n";
 291             }
 292              if (this.keepWhite) {
 293                 var last = tokens ? tokens.pop() : false;
 294                 if (last && last.name != "WHIT") {
 295                     tokens.push(last);
 296                 }
 297
 298                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
 299             }
 300             return true;
 301         },
 302
 303         /**
 304             @returns {Boolean} Was the token found?
 305          */
 306         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
 307             if (stream.look() == "/" && stream.look(1) == "*") {
 308                 var found = stream.next(2);
 309                 var c = '';
 310                 var line = this.line;
 311                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
 312                     c = stream.next();
 313                     if (c == "\n") this.line++;
 314                     found += c;
 315                 }
 316
 317                 // to start doclet we allow /** or /*** but not /**/ or /****
 318                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
 319                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
 320                 return true;
 321             }
 322             return false;
 323         },
 324
 325         /**
 326             @returns {Boolean} Was the token found?
 327          */
 328         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
 329             var found;
 330             if (
 331                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
 332                 ||
 333                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
 334             ) {
 335                 var line = this.line;
 336                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
 337                     found += stream.next();
 338                 }
 339                 if (!stream.look().eof) {
 340                     found += stream.next();
 341                 }
 342                 if (this.keepComments) {
 343                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
 344                 }
 345                 this.line++;
 346                 return true;
 347             }
 348             return false;
 349         },
 350
 351         /**
 352             @returns {Boolean} Was the token found?
 353          */
 354         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
 355             if (stream.look() == "\"") {
 356                 // find terminator
 357                 var string = stream.next();
 358
 359                 while (!stream.look().eof) {
 360                     if (stream.look() == "\\") {
 361                         if (Lang.isNewline(stream.look(1))) {
 362                             do {
 363                                 stream.next();
 364                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
 365                             string += "\\\n";
 366                         }
 367                         else {
 368                             string += stream.next(2);
 369                         }
 370                     }
 371                     else if (stream.look() == "\"") {
 372                         string += stream.next();
 373                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
 374                         return true;
 375                     }
 376                     else {
 377                         string += stream.next();
 378                     }
 379                 }
 380             }
 381             return false; // error! unterminated string
 382         },
 383
 384         /**
 385             @returns {Boolean} Was the token found?
 386          */
 387         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
 388             if (stream.look() == "'") {
 389                 // find terminator
 390                 var string = stream.next();
 391
 392                 while (!stream.look().eof) {
 393                     if (stream.look() == "\\") { // escape sequence
 394                         string += stream.next(2);
 395                     }
 396                     else if (stream.look() == "'") {
 397                         string += stream.next();
 398                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
 399                         return true;
 400                     }
 401                     else {
 402                         string += stream.next();
 403                     }
 404                 }
 405             }
 406             return false; // error! unterminated string
 407         },
 408
 409         /**
 410             @returns {Boolean} Was the token found?
 411          */
 412         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
 413             if (stream.look() === "0" && stream.look(1) == "x") {
 414                 return this.read_hex(stream, tokens);
 415             }
 416
 417             var found = "";
 418
 419             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
 420                 found += stream.next();
 421             }
 422
 423             if (found === "") {
 424                 return false;
 425             }
 426             else {
 427                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
 428                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
 429                 return true;
 430             }
 431         },
 432         /*t:
 433             requires("../lib/JSDOC/TextStream.js");
 434             requires("../lib/JSDOC/Token.js");
 435             requires("../lib/JSDOC/Lang.js");
 436
 437             plan(3, "testing read_numb");
 438
 439             //// setup
 440             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
 441             var tr = new TokenReader();
 442             var tokens = tr.tokenize(new TextStream(src));
 443
 444             var hexToken, octToken, decToken;
 445             for (var i = 0; i < tokens.length; i++) {
 446                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
 447                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
 448                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
 449             }
 450             ////
 451
 452             is(decToken.data, "8.0", "decimal number is found in source.");
 453             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
 454             is(octToken.data, "0777", "octal number is found in source.");
 455         */
 456
 457         /**
 458             @returns {Boolean} Was the token found?
 459          */
 460         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
 461             var found = stream.next(2);
 462
 463             while (!stream.look().eof) {
 464                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
 465                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
 466                     return true;
 467                 }
 468                 else {
 469                     found += stream.next();
 470                 }
 471             }
 472             return false;
 473         },
 474
 475         /**
 476             @returns {Boolean} Was the token found?
 477          */
 478         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
 479             var last;
 480             if (
 481                 stream.look() == "/"
 482                 &&
 483                 (
 484
 485                     (
 486                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
 487                         ||
 488                         (
 489                                !last.is("NUMB")
 490                             && !last.is("NAME")
 491                             && !last.is("RIGHT_PAREN")
 492                             && !last.is("RIGHT_BRACKET")
 493                         )
 494                     )
 495                 )
 496             ) {
 497                 var regex = stream.next();
 498
 499                 while (!stream.look().eof) {
 500                     if (stream.look() == "\\") { // escape sequence
 501                         regex += stream.next(2);
 502                     }
 503                     else if (stream.look() == "/") {
 504                         regex += stream.next();
 505
 506                         while (/[gmi]/.test(stream.look())) {
 507                             regex += stream.next();
 508                         }
 509
 510                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
 511                         return true;
 512                     }
 513                     else {
 514                         regex += stream.next();
 515                     }
 516                 }
 517                 // error: unterminated regex
 518             }
 519             return false;
 520         }
 521 });