git.roojs.org Git - gnome.introspection-doc-generator/blob - JSDOC/TokenReader.vala

   1 //<script type="text/javascript">
   2
   3
   4 // test code
   5
   6 //const Token   = imports.Token.Token;
   7 //const Lang    = imports.Lang.Lang;
   8
   9 /**
  10         @class Search a {@link JSDOC.TextStream} for language tokens.
  11 */
  12
  13 namespace JSDOC {
  14
  15     public class TokenArray: Object {
  16
  17         public Gee.ArrayList<Token> tokens;
  18         public int length {
  19             get { return this.tokens.size; }
  20         }
  21
  22         public TokenArray()
  23         {
  24             this.tokens = new Gee.ArrayList<Token>();
  25         }
  26
  27         public Token? last() {
  28             if (this.tokens.size > 0) {
  29                 return this.tokens.get(this.tokens.size-1);
  30             }
  31             return null;
  32         }
  33         public Token? lastSym () {
  34             for (var i = this.tokens.size-1; i >= 0; i--) {
  35                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
  36                     return this.tokens.get(i);
  37                 }
  38             }
  39             return null;
  40         }
  41         public void push (Token t) {
  42             this.tokens.add(t);
  43         }
  44         public Token? pop ()
  45         {
  46             if (this.tokens.size > 0) {
  47                 return this.tokens.remove_at(this.tokens.size-1);
  48             }
  49             return null;
  50         }
  51
  52             public new Token get(int i) {
  53             return this.tokens.get(i);
  54         }
  55         public void dump()
  56         {
  57                 foreach(var token in this.tokens) {
  58                         print(token.asString() +"\n");
  59                 }
  60         }
  61
  62     }
  63
  64     public errordomain TokenReader_Error {
  65             ArgumentError
  66     }
  67
  68
  69     public class TokenReader : Object
  70     {
  71
  72
  73
  74         /*
  75          *
  76          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
  77          */
  78
  79         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
  80         public bool collapseWhite = false; // only reduces white space...
  81         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
  82         public bool keepDocs = true;
  83         /** @cfg {Boolean} keepWhite keep White space **/
  84         public bool keepWhite = false;
  85         /** @cfg {Boolean} keepComments  keep all comments **/
  86         public bool keepComments = false;
  87         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
  88         public bool sepIdents = false;
  89         /** @cfg {String} filename name of file being parsed. **/
  90         public string filename = "";
  91         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
  92         public bool ignoreBadGrammer = false;
  93
  94
  95         int line = 0;
  96
  97         /**
  98          * tokenize a stream
  99          * @return {Array} of tokens
 100          *
 101          * ts = new TextStream(File.read(str));
 102          * tr = TokenReader({ keepComments : true, keepWhite : true });
 103          * tr.tokenize(ts)
 104          *
 105          */
 106         public TokenArray tokenize(TextStream stream)
 107         {
 108             this.line =1;
 109             var tokens = new TokenArray();
 110
 111
 112             while (!stream.lookEOF()) {
 113
 114
 115                 if (this.read_mlcomment(stream, tokens)) continue;
 116                 if (this.read_slcomment(stream, tokens)) continue;
 117                 if (this.read_dbquote(stream, tokens))   continue;
 118                 if (this.read_snquote(stream, tokens))   continue;
 119                 if (this.read_regx(stream, tokens))      continue;
 120                 if (this.read_numb(stream, tokens))      continue;
 121                 if (this.read_punc(stream, tokens))      continue;
 122                 if (this.read_newline(stream, tokens))   continue;
 123                 if (this.read_space(stream, tokens))     continue;
 124                 if (this.read_word(stream, tokens))      continue;
 125
 126                 // if execution reaches here then an error has happened
 127                 tokens.push(
 128                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
 129                 );
 130             }
 131
 132
 133
 134             return tokens;
 135         }
 136
 137         /**
 138          * findPuncToken - find the id of a token (previous to current)
 139          * need to back check syntax..
 140          *
 141          * @arg {Array} tokens the array of tokens.
 142          * @arg {String} token data (eg. '(')
 143          * @arg {Number} offset where to start reading from
 144          * @return {Number} position of token
 145          */
 146         public int findPuncToken(TokenArray tokens, string data, int n)
 147         {
 148             n = n > 0 ? n :  tokens.length -1;
 149             var stack = 0;
 150             while (n > -1) {
 151
 152                 if (stack < 1 && tokens.get(n).data == data) {
 153                     return n;
 154                 }
 155
 156                 if (tokens.get(n).data  == ")" || tokens.get(n).data  == "}") {
 157                     stack++;
 158                     n--;
 159                     continue;
 160                 }
 161                 if (stack > 0 && (tokens.get(n).data  == "{" || tokens.get(n).data  == "(")) {
 162                     stack--;
 163                     n--;
 164                     continue;
 165                 }
 166
 167
 168                 n--;
 169             }
 170             return -1;
 171         }
 172         /**
 173          * lastSym - find the last token symbol
 174          * need to back check syntax..
 175          *
 176          * @arg {Array} tokens the array of tokens.
 177          * @arg {Number} offset where to start..
 178          * @return {Token} the token
 179          */
 180         public Token? lastSym(TokenArray tokens, int n)
 181         {
 182             for (var i = n-1; i >= 0; i--) {
 183                 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
 184                     return tokens.get(i);
 185                 }
 186             }
 187             return null;
 188         }
 189
 190
 191
 192         /**
 193             @returns {Boolean} Was the token found?
 194          */
 195         public bool read_word (TextStream stream, TokenArray tokens)
 196         {
 197             string found = "";
 198             while (!stream.lookEOF() && Lang.isWordChar(stream.look().to_string())) {
 199                 found += stream.next();
 200             }
 201
 202             if (found == "") {
 203                 return false;
 204             }
 205
 206             var name = Lang.keyword(found);
 207             if (name != null) {
 208
 209                 // look for "()return" ?? why ???
 210                 var ls = tokens.lastSym();
 211                 if (found == "return" && ls != null && ls.data == ")") {
 212                     //Seed.print('@' + tokens.length);
 213                     var n = this.findPuncToken(tokens, ")", 0);
 214                     //Seed.print(')@' + n);
 215                     n = this.findPuncToken(tokens, "(", n-1);
 216                     //Seed.print('(@' + n);
 217
 218                     //var lt = this.lastSym(tokens, n);
 219                     /*
 220                     //print(JSON.stringify(lt));
 221                     if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
 222                         if (!this.ignoreBadGrammer) {
 223                             throw new TokenReader_Error.ArgumentError(
 224                                 this.filename + ":" + this.line + " Error - return found after )"
 225                             );
 226                         }
 227                     }
 228
 229                     */
 230
 231                 }
 232
 233                 tokens.push(new Token(found, "KEYW", name, this.line));
 234                 return true;
 235             }
 236
 237             if (!this.sepIdents || found.index_of(".") < 0 ) {
 238                 tokens.push(new Token(found, "NAME", "NAME", this.line));
 239                 return true;
 240             }
 241             var n = found.split(".");
 242             var p = false;
 243             foreach (unowned string nm in n) {
 244                 if (p) {
 245                     tokens.push(new Token(".", "PUNC", "DOT", this.line));
 246                 }
 247                 p=true;
 248                 tokens.push(new Token(nm, "NAME", "NAME", this.line));
 249             }
 250             return true;
 251
 252
 253         }
 254
 255         /**
 256             @returns {Boolean} Was the token found?
 257          */
 258         public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
 259         {
 260             string found = "";
 261
 262             while (!stream.lookEOF()) {
 263                         var ns = stream.look().to_string();
 264
 265                     if (null == Lang.punc(found + ns )) {
 266                                 break;
 267                         }
 268                 found += stream.next();
 269             }
 270
 271
 272             if (found == "") {
 273                 return false;
 274             }
 275
 276             var ls = tokens.lastSym();
 277
 278             if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
 279                 //print("Error - comma found before " + found);
 280                 //print(JSON.stringify(tokens.lastSym(), null,4));
 281                 if (this.ignoreBadGrammer) {
 282                     print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
 283                 } else {
 284                     throw new TokenReader_Error.ArgumentError(
 285                                 this.filename + ":" + this.line.to_string() + "  comma found before " + found
 286
 287                     );
 288
 289                 }
 290             }
 291
 292             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
 293             return true;
 294
 295         }
 296
 297         /**
 298             @returns {Boolean} Was the token found?
 299          */
 300         public bool read_space  (TextStream stream, TokenArray tokens)
 301         {
 302             var found = "";
 303
 304             while (!stream.lookEOF() && Lang.isSpaceC(  stream.look()) && !Lang.isNewlineC(stream.look())) {
 305                 found += stream.next();
 306             }
 307
 308             if (found == "") {
 309                 return false;
 310             }
 311             //print("WHITE = " + JSON.stringify(found));
 312
 313
 314             if (this.collapseWhite) {
 315                 found = " "; // this might work better if it was a '\n' ???
 316             }
 317             if (this.keepWhite) {
 318                 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
 319             }
 320             return true;
 321
 322         }
 323
 324         /**
 325             @returns {Boolean} Was the token found?
 326          */
 327         public bool read_newline  (TextStream stream, TokenArray tokens)
 328         {
 329             var found = "";
 330             var line = this.line;
 331             while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
 332                 this.line++;
 333                 found += stream.next();
 334             }
 335
 336             if (found == "") {
 337                 return false;
 338             }
 339
 340             // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
 341             // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
 342
 343
 344             //this.line++;
 345             if (this.collapseWhite) {
 346                 found = "\n"; // reduces multiple line breaks into a single one...
 347             }
 348
 349             if (this.keepWhite) {
 350                 var last = tokens.pop();
 351                 if (last != null && last.name != "WHIT") {
 352                     tokens.push(last);
 353                 }
 354                 // replaces last new line...
 355                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
 356             }
 357             return true;
 358         }
 359
 360         /**
 361             @returns {Boolean} Was the token found?
 362          */
 363         public bool read_mlcomment  (TextStream stream, TokenArray tokens)
 364         {
 365             if (stream.look() != '/') {
 366                 return false;
 367             }
 368             if (stream.look(1) != '*') {
 369                 return false;
 370             }
 371             var found = stream.next(2);
 372             string  c = "";
 373             var line = this.line;
 374             while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
 375                 c = stream.next();
 376                 if (c == "\n") {
 377                     this.line++;
 378                 }
 379                 found += c;
 380             }
 381
 382             // to start doclet we allow /** or /*** but not /**/ or /****
 383             //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
 384             if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
 385                 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
 386             } else if (this.keepComments) {
 387                 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
 388             }
 389             return true;
 390
 391         }
 392
 393         /**
 394             @returns {Boolean} Was the token found?
 395          */
 396          public bool read_slcomment  (TextStream stream, TokenArray tokens)
 397          {
 398             var found = "";
 399             if (
 400                 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
 401                 ||
 402                 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
 403             ) {
 404                 var line = this.line;
 405                 while (!stream.lookEOF()) {
 406                                         //print(stream.look().to_string());
 407                         if ( Lang.isNewline(stream.look().to_string())) {
 408                                 break;
 409                         }
 410                     found += stream.next();
 411                 }
 412                 if (!stream.lookEOF()) { // lookinng for end  of line... if we got it, then do not eat the character..
 413                     found += stream.next();
 414                 }
 415                 if (this.keepComments) {
 416                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
 417                 }
 418                 this.line++;
 419                 return true;
 420             }
 421             return false;
 422         }
 423
 424         /**
 425             @returns {Boolean} Was the token found?
 426          */
 427         public bool read_dbquote  (TextStream stream, TokenArray tokens)
 428         {
 429             if (stream.look() != '"') {
 430                 return false;
 431             }
 432                 // find terminator
 433             var str = stream.next();
 434
 435             while (!stream.lookEOF()) {
 436                 if (stream.look() == '\\') {
 437                     if (Lang.isNewline(stream.look(1).to_string())) {
 438                         do {
 439                             stream.next();
 440                         } while (!stream.lookEOF() && Lang.isNewline(stream.look().to_string()));
 441                         str += "\\\n";
 442                     }
 443                     else {
 444                         str += stream.next(2);
 445                     }
 446                     continue;
 447                 }
 448                 if (stream.look() == '"') {
 449                     str += stream.next();
 450                     tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
 451                     return true;
 452                 }
 453
 454                 str += stream.next();
 455
 456             }
 457             return false;
 458         }
 459
 460         /**
 461             @returns {Boolean} Was the token found?
 462          */
 463         public bool read_snquote  (TextStream stream, TokenArray tokens)
 464         {
 465             if (stream.look() != '\'') {
 466                 return false;
 467             }
 468             // find terminator
 469             var str = stream.next();
 470
 471             while (!stream.lookEOF()) {
 472                 if (stream.look() == '\\') { // escape sequence
 473                     str += stream.next(2);
 474                     continue;
 475                 }
 476                 if (stream.look() == '\'') {
 477                     str += stream.next();
 478                     tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
 479                     return true;
 480                 }
 481                 str += stream.next();
 482
 483             }
 484             return false;
 485         }
 486
 487
 488         /**
 489             @returns {Boolean} Was the token found?
 490          */
 491         public bool read_numb  (TextStream stream, TokenArray tokens)
 492         {
 493             if (stream.look() == '0' && stream.look(1) == 'x') {
 494                 return this.read_hex(stream, tokens);
 495             }
 496
 497             var found = "";
 498
 499             while (!stream.lookEOF() && Lang.isNumber(found+stream.look().to_string())){
 500                 found += stream.next();
 501             }
 502
 503             if (found == "") {
 504                 return false;
 505             }
 506             if (GLib.Regex.match_simple("^0[0-7]", found)) {
 507                 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
 508                 return true;
 509             }
 510             tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
 511             return true;
 512
 513         }
 514
 515         /**
 516             @returns {Boolean} Was the token found?
 517          */
 518         public bool read_hex  (TextStream stream, TokenArray tokens)
 519         {
 520             var found = stream.next(2);
 521
 522             while (!stream.lookEOF()) {
 523                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look().to_string())) { // done
 524                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
 525                     return true;
 526                 }
 527
 528                 found += stream.next();
 529
 530             }
 531             return false;
 532         }
 533
 534         /**
 535             @returns {Boolean} Was the token found?
 536          */
 537         public bool read_regx (TextStream stream, TokenArray tokens)
 538         {
 539
 540             if (stream.look() != '/') {
 541                 return false;
 542             }
 543             var  last = tokens.lastSym();
 544             if (
 545                 (last == null)
 546                 ||
 547                 (
 548                        !last.is("NUMB")   // stuff that can not appear before a regex..
 549                     && !last.is("NAME")
 550                     && !last.is("RIGHT_PAREN")
 551                     && !last.is("RIGHT_BRACKET")
 552                 )
 553             )  {
 554                 var regex = stream.next();
 555
 556                 while (!stream.lookEOF()) {
 557                     if (stream.look() == '\\') { // escape sequence
 558                         regex += stream.next(2);
 559                         continue;
 560                     }
 561                     if (stream.look() == '/') {
 562                         regex += stream.next();
 563
 564                         while (GLib.Regex.match_simple("[gmi]", stream.look().to_string())) {
 565                             regex += stream.next();
 566                         }
 567
 568                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
 569                         return true;
 570                     }
 571
 572                     regex += stream.next();
 573
 574                 }
 575                 // error: unterminated regex
 576             }
 577             return false;
 578         }
 579     }
 580 }