git.roojs.org Git - gnome.introspection-doc-generator/blob - JSDOC/TokenReader.vala

   1 //<script type="text/javascript">
   2
   3
   4 // test code
   5
   6 void main() {
   7         var lc = new JSDOC.Lang_Class ();
   8          var tr = new  JSDOC.TokenReader();
   9          tr.keepDocs =true;
  10         tr.keepWhite = true;
  11         tr.keepComments = true;
  12         tr.sepIdents = true;
  13         tr.collapseWhite = false;
  14         tr.filename = "test";
  15         string str;
  16         FileUtils.get_contents("/home/alan/gitlive/gnome.introspection-doc-generator/JSDOC/Walker2.js", out  str);
  17
  18         var toks = tr.tokenize(new JSDOC.TextStream(str)); // dont merge xxx + . + yyyy etc.
  19     toks.dump();
  20 }
  21
  22 //const Token   = imports.Token.Token;
  23 //const Lang    = imports.Lang.Lang;
  24
  25 /**
  26         @class Search a {@link JSDOC.TextStream} for language tokens.
  27 */
  28
  29
  30
  31
  32 namespace JSDOC {
  33
  34     public class TokenArray: Object {
  35
  36         public Gee.ArrayList<Token> tokens;
  37         public int length {
  38             get { return this.tokens.size; }
  39         }
  40
  41         public TokenArray()
  42         {
  43             this.tokens = new Gee.ArrayList<Token>();
  44         }
  45
  46         public Token? last() {
  47             if (this.tokens.size > 0) {
  48                 return this.tokens.get(this.tokens.size-1);
  49             }
  50             return null;
  51         }
  52         public Token? lastSym () {
  53             for (var i = this.tokens.size-1; i >= 0; i--) {
  54                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
  55                     return this.tokens.get(i);
  56                 }
  57             }
  58             return null;
  59         }
  60         public void push (Token t) {
  61             this.tokens.add(t);
  62         }
  63         public Token? pop ()
  64         {
  65             if (this.tokens.size > 0) {
  66                 return this.tokens.remove_at(this.tokens.size-1);
  67             }
  68             return null;
  69         }
  70
  71             public new Token get(int i) {
  72             return this.tokens.get(i);
  73         }
  74         public void dump()
  75         {
  76                 foreach(var token in this.tokens) {
  77                         print(token.asString() +"\n");
  78                 }
  79         }
  80
  81     }
  82
  83     public errordomain TokenReader_Error {
  84             ArgumentError
  85     }
  86
  87
  88     public class TokenReader : Object
  89     {
  90
  91
  92
  93         /*
  94          *
  95          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
  96          */
  97
  98         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
  99         public bool collapseWhite = false; // only reduces white space...
 100         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
 101         public bool keepDocs = true;
 102         /** @cfg {Boolean} keepWhite keep White space **/
 103         public bool keepWhite = false;
 104         /** @cfg {Boolean} keepComments  keep all comments **/
 105         public bool keepComments = false;
 106         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
 107         public bool sepIdents = false;
 108         /** @cfg {String} filename name of file being parsed. **/
 109         public string filename = "";
 110         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
 111         public bool ignoreBadGrammer = false;
 112
 113
 114         int line = 0;
 115
 116         /**
 117          * tokenize a stream
 118          * @return {Array} of tokens
 119          *
 120          * ts = new TextStream(File.read(str));
 121          * tr = TokenReader({ keepComments : true, keepWhite : true });
 122          * tr.tokenize(ts)
 123          *
 124          */
 125         public TokenArray tokenize(TextStream stream)
 126         {
 127             this.line =1;
 128             var tokens = new TokenArray();
 129
 130
 131             while (!stream.lookEOF()) {
 132
 133
 134                 if (this.read_mlcomment(stream, tokens)) continue;
 135                 if (this.read_slcomment(stream, tokens)) continue;
 136                 if (this.read_dbquote(stream, tokens))   continue;
 137                 if (this.read_snquote(stream, tokens))   continue;
 138                 if (this.read_regx(stream, tokens))      continue;
 139                 if (this.read_numb(stream, tokens))      continue;
 140                 if (this.read_punc(stream, tokens))      continue;
 141                 if (this.read_newline(stream, tokens))   continue;
 142                 if (this.read_space(stream, tokens))     continue;
 143                 if (this.read_word(stream, tokens))      continue;
 144
 145                 // if execution reaches here then an error has happened
 146                 tokens.push(
 147                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
 148                 );
 149             }
 150
 151
 152
 153             return tokens;
 154         }
 155
 156         /**
 157          * findPuncToken - find the id of a token (previous to current)
 158          * need to back check syntax..
 159          *
 160          * @arg {Array} tokens the array of tokens.
 161          * @arg {String} token data (eg. '(')
 162          * @arg {Number} offset where to start reading from
 163          * @return {Number} position of token
 164          */
 165         public int findPuncToken(TokenArray tokens, string data, int n)
 166         {
 167             n = n > 0 ? n :  tokens.length -1;
 168             var stack = 0;
 169             while (n > -1) {
 170
 171                 if (stack < 1 && tokens.get(n).data == data) {
 172                     return n;
 173                 }
 174
 175                 if (tokens.get(n).data  == ")" || tokens.get(n).data  == "}") {
 176                     stack++;
 177                     n--;
 178                     continue;
 179                 }
 180                 if (stack > 0 && (tokens.get(n).data  == "{" || tokens.get(n).data  == "(")) {
 181                     stack--;
 182                     n--;
 183                     continue;
 184                 }
 185
 186
 187                 n--;
 188             }
 189             return -1;
 190         }
 191         /**
 192          * lastSym - find the last token symbol
 193          * need to back check syntax..
 194          *
 195          * @arg {Array} tokens the array of tokens.
 196          * @arg {Number} offset where to start..
 197          * @return {Token} the token
 198          */
 199         public Token? lastSym(TokenArray tokens, int n)
 200         {
 201             for (var i = n-1; i >= 0; i--) {
 202                 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
 203                     return tokens.get(i);
 204                 }
 205             }
 206             return null;
 207         }
 208
 209
 210
 211         /**
 212             @returns {Boolean} Was the token found?
 213          */
 214         public bool read_word (TextStream stream, TokenArray tokens)
 215         {
 216             string found = "";
 217             while (!stream.lookEOF() && Lang.isWordChar(stream.look().to_string())) {
 218                 found += stream.next();
 219             }
 220
 221             if (found == "") {
 222                 return false;
 223             }
 224
 225             var name = Lang.keyword(found);
 226             if (name != null) {
 227
 228                 // look for "()return" ?? why ???
 229                 var ls = tokens.lastSym();
 230                 if (found == "return" && ls != null && ls.data == ")") {
 231                     //Seed.print('@' + tokens.length);
 232                     var n = this.findPuncToken(tokens, ")", 0);
 233                     //Seed.print(')@' + n);
 234                     n = this.findPuncToken(tokens, "(", n-1);
 235                     //Seed.print('(@' + n);
 236
 237                     //var lt = this.lastSym(tokens, n);
 238                     /*
 239                     //print(JSON.stringify(lt));
 240                     if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
 241                         if (!this.ignoreBadGrammer) {
 242                             throw new TokenReader_Error.ArgumentError(
 243                                 this.filename + ":" + this.line + " Error - return found after )"
 244                             );
 245                         }
 246                     }
 247
 248                     */
 249
 250                 }
 251
 252                 tokens.push(new Token(found, "KEYW", name, this.line));
 253                 return true;
 254             }
 255
 256             if (!this.sepIdents || found.index_of(".") < 0 ) {
 257                 tokens.push(new Token(found, "NAME", "NAME", this.line));
 258                 return true;
 259             }
 260             var n = found.split(".");
 261             var p = false;
 262             foreach (unowned string nm in n) {
 263                 if (p) {
 264                     tokens.push(new Token(".", "PUNC", "DOT", this.line));
 265                 }
 266                 p=true;
 267                 tokens.push(new Token(nm, "NAME", "NAME", this.line));
 268             }
 269             return true;
 270
 271
 272         }
 273
 274         /**
 275             @returns {Boolean} Was the token found?
 276          */
 277         public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
 278         {
 279             string found = "";
 280
 281             while (!stream.lookEOF()) {
 282                         var ns = stream.look().to_string();
 283
 284                     if (null == Lang.punc(found + ns )) {
 285                                 break;
 286                         }
 287                 found += stream.next();
 288             }
 289
 290
 291             if (found == "") {
 292                 return false;
 293             }
 294
 295             var ls = tokens.lastSym();
 296
 297             if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
 298                 //print("Error - comma found before " + found);
 299                 //print(JSON.stringify(tokens.lastSym(), null,4));
 300                 if (this.ignoreBadGrammer) {
 301                     print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
 302                 } else {
 303                     throw new TokenReader_Error.ArgumentError(
 304                                 this.filename + ":" + this.line.to_string() + "  comma found before " + found
 305
 306                     );
 307
 308                 }
 309             }
 310
 311             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
 312             return true;
 313
 314         }
 315
 316         /**
 317             @returns {Boolean} Was the token found?
 318          */
 319         public bool read_space  (TextStream stream, TokenArray tokens)
 320         {
 321             var found = "";
 322
 323             while (!stream.lookEOF() && Lang.isSpaceC(  stream.look()) && !Lang.isNewlineC(stream.look())) {
 324                 found += stream.next();
 325             }
 326
 327             if (found == "") {
 328                 return false;
 329             }
 330             //print("WHITE = " + JSON.stringify(found));
 331
 332
 333             if (this.collapseWhite) {
 334                 found = " "; // this might work better if it was a '\n' ???
 335             }
 336             if (this.keepWhite) {
 337                 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
 338             }
 339             return true;
 340
 341         }
 342
 343         /**
 344             @returns {Boolean} Was the token found?
 345          */
 346         public bool read_newline  (TextStream stream, TokenArray tokens)
 347         {
 348             var found = "";
 349             var line = this.line;
 350             while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
 351                 this.line++;
 352                 found += stream.next();
 353             }
 354
 355             if (found == "") {
 356                 return false;
 357             }
 358
 359             // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
 360             // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
 361
 362
 363             //this.line++;
 364             if (this.collapseWhite) {
 365                 found = "\n"; // reduces multiple line breaks into a single one...
 366             }
 367
 368             if (this.keepWhite) {
 369                 var last = tokens.pop();
 370                 if (last != null && last.name != "WHIT") {
 371                     tokens.push(last);
 372                 }
 373                 // replaces last new line...
 374                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
 375             }
 376             return true;
 377         }
 378
 379         /**
 380             @returns {Boolean} Was the token found?
 381          */
 382         public bool read_mlcomment  (TextStream stream, TokenArray tokens)
 383         {
 384             if (stream.look() != '/') {
 385                 return false;
 386             }
 387             if (stream.look(1) != '*') {
 388                 return false;
 389             }
 390             var found = stream.next(2);
 391             string  c = "";
 392             var line = this.line;
 393             while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
 394                 c = stream.next();
 395                 if (c == "\n") {
 396                     this.line++;
 397                 }
 398                 found += c;
 399             }
 400
 401             // to start doclet we allow /** or /*** but not /**/ or /****
 402             //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
 403             if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
 404                 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
 405             } else if (this.keepComments) {
 406                 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
 407             }
 408             return true;
 409
 410         }
 411
 412         /**
 413             @returns {Boolean} Was the token found?
 414          */
 415          public bool read_slcomment  (TextStream stream, TokenArray tokens)
 416          {
 417             var found = "";
 418             if (
 419                 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
 420                 ||
 421                 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
 422             ) {
 423                 var line = this.line;
 424                 while (!stream.lookEOF()) {
 425                                         //print(stream.look().to_string());
 426                         if ( Lang.isNewline(stream.look().to_string())) {
 427                                 break;
 428                         }
 429                     found += stream.next();
 430                 }
 431                 if (!stream.lookEOF()) { // lookinng for end  of line... if we got it, then do not eat the character..
 432                     found += stream.next();
 433                 }
 434                 if (this.keepComments) {
 435                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
 436                 }
 437                 this.line++;
 438                 return true;
 439             }
 440             return false;
 441         }
 442
 443         /**
 444             @returns {Boolean} Was the token found?
 445          */
 446         public bool read_dbquote  (TextStream stream, TokenArray tokens)
 447         {
 448             if (stream.look() != '"') {
 449                 return false;
 450             }
 451                 // find terminator
 452             var str = stream.next();
 453
 454             while (!stream.lookEOF()) {
 455                 if (stream.look() == '\\') {
 456                     if (Lang.isNewline(stream.look(1).to_string())) {
 457                         do {
 458                             stream.next();
 459                         } while (!stream.lookEOF() && Lang.isNewline(stream.look().to_string()));
 460                         str += "\\\n";
 461                     }
 462                     else {
 463                         str += stream.next(2);
 464                     }
 465                     continue;
 466                 }
 467                 if (stream.look() == '"') {
 468                     str += stream.next();
 469                     tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
 470                     return true;
 471                 }
 472
 473                 str += stream.next();
 474
 475             }
 476             return false;
 477         }
 478
 479         /**
 480             @returns {Boolean} Was the token found?
 481          */
 482         public bool read_snquote  (TextStream stream, TokenArray tokens)
 483         {
 484             if (stream.look() != '\'') {
 485                 return false;
 486             }
 487             // find terminator
 488             var str = stream.next();
 489
 490             while (!stream.lookEOF()) {
 491                 if (stream.look() == '\\') { // escape sequence
 492                     str += stream.next(2);
 493                     continue;
 494                 }
 495                 if (stream.look() == '\'') {
 496                     str += stream.next();
 497                     tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
 498                     return true;
 499                 }
 500                 str += stream.next();
 501
 502             }
 503             return false;
 504         }
 505
 506
 507         /**
 508             @returns {Boolean} Was the token found?
 509          */
 510         public bool read_numb  (TextStream stream, TokenArray tokens)
 511         {
 512             if (stream.look() == '0' && stream.look(1) == 'x') {
 513                 return this.read_hex(stream, tokens);
 514             }
 515
 516             var found = "";
 517
 518             while (!stream.lookEOF() && Lang.isNumber(found+stream.look().to_string())){
 519                 found += stream.next();
 520             }
 521
 522             if (found == "") {
 523                 return false;
 524             }
 525             if (GLib.Regex.match_simple("^0[0-7]", found)) {
 526                 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
 527                 return true;
 528             }
 529             tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
 530             return true;
 531
 532         }
 533
 534         /**
 535             @returns {Boolean} Was the token found?
 536          */
 537         public bool read_hex  (TextStream stream, TokenArray tokens)
 538         {
 539             var found = stream.next(2);
 540
 541             while (!stream.lookEOF()) {
 542                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look().to_string())) { // done
 543                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
 544                     return true;
 545                 }
 546
 547                 found += stream.next();
 548
 549             }
 550             return false;
 551         }
 552
 553         /**
 554             @returns {Boolean} Was the token found?
 555          */
 556         public bool read_regx (TextStream stream, TokenArray tokens)
 557         {
 558
 559             if (stream.look() != '/') {
 560                 return false;
 561             }
 562             var  last = tokens.lastSym();
 563             if (
 564                 (last == null)
 565                 ||
 566                 (
 567                        !last.is("NUMB")   // stuff that can not appear before a regex..
 568                     && !last.is("NAME")
 569                     && !last.is("RIGHT_PAREN")
 570                     && !last.is("RIGHT_BRACKET")
 571                 )
 572             )  {
 573                 var regex = stream.next();
 574
 575                 while (!stream.lookEOF()) {
 576                     if (stream.look() == '\\') { // escape sequence
 577                         regex += stream.next(2);
 578                         continue;
 579                     }
 580                     if (stream.look() == '/') {
 581                         regex += stream.next();
 582
 583                         while (GLib.Regex.match_simple("[gmi]", stream.look().to_string())) {
 584                             regex += stream.next();
 585                         }
 586
 587                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
 588                         return true;
 589                     }
 590
 591                     regex += stream.next();
 592
 593                 }
 594                 // error: unterminated regex
 595             }
 596             return false;
 597         }
 598     }
 599 }