git.roojs.org Git - gnome.introspection-doc-generator/blob - JSDOC/TokenReader.vala

   1 //<script type="text/javascript">
   2
   3
   4 // test code
   5
   6 //const Token   = imports.Token.Token;
   7 //const Lang    = imports.Lang.Lang;
   8
   9 /**
  10         @class Search a {@link JSDOC.TextStream} for language tokens.
  11 */
  12
  13
  14
  15
  16 namespace JSDOC {
  17
  18     public class TokenArray: Object {
  19
  20         public Gee.ArrayList<Token> tokens;
  21         public int length {
  22             get { return this.tokens.size; }
  23         }
  24
  25         public TokenArray()
  26         {
  27             this.tokens = new Gee.ArrayList<Token>();
  28         }
  29
  30         public Token? last() {
  31             if (this.tokens.size > 0) {
  32                 return this.tokens.get(this.tokens.size-1);
  33             }
  34             return null;
  35         }
  36         public Token? lastSym () {
  37             for (var i = this.tokens.size-1; i >= 0; i--) {
  38                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
  39                     return this.tokens.get(i);
  40                 }
  41             }
  42             return null;
  43         }
  44         public void push (Token t) {
  45             this.tokens.add(t);
  46         }
  47         public Token? pop ()
  48         {
  49             if (this.tokens.size > 0) {
  50                 return this.tokens.remove_at(this.tokens.size-1);
  51             }
  52             return null;
  53         }
  54
  55             public new Token get(int i) {
  56             return this.tokens.get(i);
  57         }
  58         public void dump()
  59         {
  60                 foreach(var token in this.tokens) {
  61                         print(token.asString() +"\n");
  62                 }
  63         }
  64
  65     }
  66
  67     public errordomain TokenReader_Error {
  68             ArgumentError
  69     }
  70
  71
  72     public class TokenReader : Object
  73     {
  74
  75
  76
  77         /*
  78          *
  79          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
  80          */
  81
  82         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
  83         public bool collapseWhite = false; // only reduces white space...
  84         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
  85         public bool keepDocs = true;
  86         /** @cfg {Boolean} keepWhite keep White space **/
  87         public bool keepWhite = false;
  88         /** @cfg {Boolean} keepComments  keep all comments **/
  89         public bool keepComments = false;
  90         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
  91         public bool sepIdents = false;
  92         /** @cfg {String} filename name of file being parsed. **/
  93         public string filename = "";
  94         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
  95         public bool ignoreBadGrammer = false;
  96
  97
  98         int line = 0;
  99
 100         /**
 101          * tokenize a stream
 102          * @return {Array} of tokens
 103          *
 104          * ts = new TextStream(File.read(str));
 105          * tr = TokenReader({ keepComments : true, keepWhite : true });
 106          * tr.tokenize(ts)
 107          *
 108          */
 109         public TokenArray tokenize(TextStream stream)
 110         {
 111             this.line =1;
 112             var tokens = new TokenArray();
 113
 114
 115             while (!stream.lookEOF()) {
 116
 117
 118                 if (this.read_mlcomment(stream, tokens)) continue;
 119                 if (this.read_slcomment(stream, tokens)) continue;
 120                 if (this.read_dbquote(stream, tokens))   continue;
 121                 if (this.read_snquote(stream, tokens))   continue;
 122                 if (this.read_regx(stream, tokens))      continue;
 123                 if (this.read_numb(stream, tokens))      continue;
 124                 if (this.read_punc(stream, tokens))      continue;
 125                 if (this.read_newline(stream, tokens))   continue;
 126                 if (this.read_space(stream, tokens))     continue;
 127                 if (this.read_word(stream, tokens))      continue;
 128
 129                 // if execution reaches here then an error has happened
 130                 tokens.push(
 131                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
 132                 );
 133             }
 134
 135
 136
 137             return tokens;
 138         }
 139
 140         /**
 141          * findPuncToken - find the id of a token (previous to current)
 142          * need to back check syntax..
 143          *
 144          * @arg {Array} tokens the array of tokens.
 145          * @arg {String} token data (eg. '(')
 146          * @arg {Number} offset where to start reading from
 147          * @return {Number} position of token
 148          */
 149         public int findPuncToken(TokenArray tokens, string data, int n)
 150         {
 151             n = n > 0 ? n :  tokens.length -1;
 152             var stack = 0;
 153             while (n > -1) {
 154
 155                 if (stack < 1 && tokens.get(n).data == data) {
 156                     return n;
 157                 }
 158
 159                 if (tokens.get(n).data  == ")" || tokens.get(n).data  == "}") {
 160                     stack++;
 161                     n--;
 162                     continue;
 163                 }
 164                 if (stack > 0 && (tokens.get(n).data  == "{" || tokens.get(n).data  == "(")) {
 165                     stack--;
 166                     n--;
 167                     continue;
 168                 }
 169
 170
 171                 n--;
 172             }
 173             return -1;
 174         }
 175         /**
 176          * lastSym - find the last token symbol
 177          * need to back check syntax..
 178          *
 179          * @arg {Array} tokens the array of tokens.
 180          * @arg {Number} offset where to start..
 181          * @return {Token} the token
 182          */
 183         public Token? lastSym(TokenArray tokens, int n)
 184         {
 185             for (var i = n-1; i >= 0; i--) {
 186                 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
 187                     return tokens.get(i);
 188                 }
 189             }
 190             return null;
 191         }
 192
 193
 194
 195         /**
 196             @returns {Boolean} Was the token found?
 197          */
 198         public bool read_word (TextStream stream, TokenArray tokens)
 199         {
 200             string found = "";
 201             while (!stream.lookEOF() && Lang.isWordChar(stream.look().to_string())) {
 202                 found += stream.next();
 203             }
 204
 205             if (found == "") {
 206                 return false;
 207             }
 208
 209             var name = Lang.keyword(found);
 210             if (name != null) {
 211
 212                 // look for "()return" ?? why ???
 213                 var ls = tokens.lastSym();
 214                 if (found == "return" && ls != null && ls.data == ")") {
 215                     //Seed.print('@' + tokens.length);
 216                     var n = this.findPuncToken(tokens, ")", 0);
 217                     //Seed.print(')@' + n);
 218                     n = this.findPuncToken(tokens, "(", n-1);
 219                     //Seed.print('(@' + n);
 220
 221                     //var lt = this.lastSym(tokens, n);
 222                     /*
 223                     //print(JSON.stringify(lt));
 224                     if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
 225                         if (!this.ignoreBadGrammer) {
 226                             throw new TokenReader_Error.ArgumentError(
 227                                 this.filename + ":" + this.line + " Error - return found after )"
 228                             );
 229                         }
 230                     }
 231
 232                     */
 233
 234                 }
 235
 236                 tokens.push(new Token(found, "KEYW", name, this.line));
 237                 return true;
 238             }
 239
 240             if (!this.sepIdents || found.index_of(".") < 0 ) {
 241                 tokens.push(new Token(found, "NAME", "NAME", this.line));
 242                 return true;
 243             }
 244             var n = found.split(".");
 245             var p = false;
 246             foreach (unowned string nm in n) {
 247                 if (p) {
 248                     tokens.push(new Token(".", "PUNC", "DOT", this.line));
 249                 }
 250                 p=true;
 251                 tokens.push(new Token(nm, "NAME", "NAME", this.line));
 252             }
 253             return true;
 254
 255
 256         }
 257
 258         /**
 259             @returns {Boolean} Was the token found?
 260          */
 261         public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
 262         {
 263             string found = "";
 264
 265             while (!stream.lookEOF()) {
 266                         var ns = stream.look().to_string();
 267
 268                     if (null == Lang.punc(found + ns )) {
 269                                 break;
 270                         }
 271                 found += stream.next();
 272             }
 273
 274
 275             if (found == "") {
 276                 return false;
 277             }
 278
 279             var ls = tokens.lastSym();
 280
 281             if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
 282                 //print("Error - comma found before " + found);
 283                 //print(JSON.stringify(tokens.lastSym(), null,4));
 284                 if (this.ignoreBadGrammer) {
 285                     print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
 286                 } else {
 287                     throw new TokenReader_Error.ArgumentError(
 288                                 this.filename + ":" + this.line.to_string() + "  comma found before " + found
 289
 290                     );
 291
 292                 }
 293             }
 294
 295             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
 296             return true;
 297
 298         }
 299
 300         /**
 301             @returns {Boolean} Was the token found?
 302          */
 303         public bool read_space  (TextStream stream, TokenArray tokens)
 304         {
 305             var found = "";
 306
 307             while (!stream.lookEOF() && Lang.isSpaceC(  stream.look()) && !Lang.isNewlineC(stream.look())) {
 308                 found += stream.next();
 309             }
 310
 311             if (found == "") {
 312                 return false;
 313             }
 314             //print("WHITE = " + JSON.stringify(found));
 315
 316
 317             if (this.collapseWhite) {
 318                 found = " "; // this might work better if it was a '\n' ???
 319             }
 320             if (this.keepWhite) {
 321                 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
 322             }
 323             return true;
 324
 325         }
 326
 327         /**
 328             @returns {Boolean} Was the token found?
 329          */
 330         public bool read_newline  (TextStream stream, TokenArray tokens)
 331         {
 332             var found = "";
 333             var line = this.line;
 334             while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
 335                 this.line++;
 336                 found += stream.next();
 337             }
 338
 339             if (found == "") {
 340                 return false;
 341             }
 342
 343             // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
 344             // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
 345
 346
 347             //this.line++;
 348             if (this.collapseWhite) {
 349                 found = "\n"; // reduces multiple line breaks into a single one...
 350             }
 351
 352             if (this.keepWhite) {
 353                 var last = tokens.pop();
 354                 if (last != null && last.name != "WHIT") {
 355                     tokens.push(last);
 356                 }
 357                 // replaces last new line...
 358                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
 359             }
 360             return true;
 361         }
 362
 363         /**
 364             @returns {Boolean} Was the token found?
 365          */
 366         public bool read_mlcomment  (TextStream stream, TokenArray tokens)
 367         {
 368             if (stream.look() != '/') {
 369                 return false;
 370             }
 371             if (stream.look(1) != '*') {
 372                 return false;
 373             }
 374             var found = stream.next(2);
 375             string  c = "";
 376             var line = this.line;
 377             while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
 378                 c = stream.next();
 379                 if (c == "\n") {
 380                     this.line++;
 381                 }
 382                 found += c;
 383             }
 384
 385             // to start doclet we allow /** or /*** but not /**/ or /****
 386             //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
 387             if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
 388                 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
 389             } else if (this.keepComments) {
 390                 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
 391             }
 392             return true;
 393
 394         }
 395
 396         /**
 397             @returns {Boolean} Was the token found?
 398          */
 399          public bool read_slcomment  (TextStream stream, TokenArray tokens)
 400          {
 401             var found = "";
 402             if (
 403                 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
 404                 ||
 405                 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
 406             ) {
 407                 var line = this.line;
 408                 while (!stream.lookEOF()) {
 409                                         //print(stream.look().to_string());
 410                         if ( Lang.isNewline(stream.look().to_string())) {
 411                                 break;
 412                         }
 413                     found += stream.next();
 414                 }
 415                 if (!stream.lookEOF()) { // lookinng for end  of line... if we got it, then do not eat the character..
 416                     found += stream.next();
 417                 }
 418                 if (this.keepComments) {
 419                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
 420                 }
 421                 this.line++;
 422                 return true;
 423             }
 424             return false;
 425         }
 426
 427         /**
 428             @returns {Boolean} Was the token found?
 429          */
 430         public bool read_dbquote  (TextStream stream, TokenArray tokens)
 431         {
 432             if (stream.look() != '"') {
 433                 return false;
 434             }
 435                 // find terminator
 436             var str = stream.next();
 437
 438             while (!stream.lookEOF()) {
 439                 if (stream.look() == '\\') {
 440                     if (Lang.isNewline(stream.look(1).to_string())) {
 441                         do {
 442                             stream.next();
 443                         } while (!stream.lookEOF() && Lang.isNewline(stream.look().to_string()));
 444                         str += "\\\n";
 445                     }
 446                     else {
 447                         str += stream.next(2);
 448                     }
 449                     continue;
 450                 }
 451                 if (stream.look() == '"') {
 452                     str += stream.next();
 453                     tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
 454                     return true;
 455                 }
 456
 457                 str += stream.next();
 458
 459             }
 460             return false;
 461         }
 462
 463         /**
 464             @returns {Boolean} Was the token found?
 465          */
 466         public bool read_snquote  (TextStream stream, TokenArray tokens)
 467         {
 468             if (stream.look() != '\'') {
 469                 return false;
 470             }
 471             // find terminator
 472             var str = stream.next();
 473
 474             while (!stream.lookEOF()) {
 475                 if (stream.look() == '\\') { // escape sequence
 476                     str += stream.next(2);
 477                     continue;
 478                 }
 479                 if (stream.look() == '\'') {
 480                     str += stream.next();
 481                     tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
 482                     return true;
 483                 }
 484                 str += stream.next();
 485
 486             }
 487             return false;
 488         }
 489
 490
 491         /**
 492             @returns {Boolean} Was the token found?
 493          */
 494         public bool read_numb  (TextStream stream, TokenArray tokens)
 495         {
 496             if (stream.look() == '0' && stream.look(1) == 'x') {
 497                 return this.read_hex(stream, tokens);
 498             }
 499
 500             var found = "";
 501
 502             while (!stream.lookEOF() && Lang.isNumber(found+stream.look().to_string())){
 503                 found += stream.next();
 504             }
 505
 506             if (found == "") {
 507                 return false;
 508             }
 509             if (GLib.Regex.match_simple("^0[0-7]", found)) {
 510                 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
 511                 return true;
 512             }
 513             tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
 514             return true;
 515
 516         }
 517
 518         /**
 519             @returns {Boolean} Was the token found?
 520          */
 521         public bool read_hex  (TextStream stream, TokenArray tokens)
 522         {
 523             var found = stream.next(2);
 524
 525             while (!stream.lookEOF()) {
 526                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look().to_string())) { // done
 527                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
 528                     return true;
 529                 }
 530
 531                 found += stream.next();
 532
 533             }
 534             return false;
 535         }
 536
 537         /**
 538             @returns {Boolean} Was the token found?
 539          */
 540         public bool read_regx (TextStream stream, TokenArray tokens)
 541         {
 542
 543             if (stream.look() != '/') {
 544                 return false;
 545             }
 546             var  last = tokens.lastSym();
 547             if (
 548                 (last == null)
 549                 ||
 550                 (
 551                        !last.is("NUMB")   // stuff that can not appear before a regex..
 552                     && !last.is("NAME")
 553                     && !last.is("RIGHT_PAREN")
 554                     && !last.is("RIGHT_BRACKET")
 555                 )
 556             )  {
 557                 var regex = stream.next();
 558
 559                 while (!stream.lookEOF()) {
 560                     if (stream.look() == '\\') { // escape sequence
 561                         regex += stream.next(2);
 562                         continue;
 563                     }
 564                     if (stream.look() == '/') {
 565                         regex += stream.next();
 566
 567                         while (GLib.Regex.match_simple("[gmi]", stream.look().to_string())) {
 568                             regex += stream.next();
 569                         }
 570
 571                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
 572                         return true;
 573                     }
 574
 575                     regex += stream.next();
 576
 577                 }
 578                 // error: unterminated regex
 579             }
 580             return false;
 581         }
 582     }
 583 }