git.roojs.org Git - gnome.introspection-doc-generator/blob - JSDOC/TokenReader.vala

   1 //<script type="text/javascript">
   2
   3
   4 // test code
   5
   6 void main() {
   7          var tr = new  JSDOC.TokenReader();
   8          tr.keepDocs =true;
   9         tr.keepWhite = true;
  10         tr.keepComments = true;
  11         tr.sepIdents = true;
  12         tr.collapseWhite = false;
  13         tr.filename = "test";
  14
  15
  16         var toks = tr.tokenize(new JSDOC.TextStream(str)); // dont merge xxx + . + yyyy etc.
  17
  18 }
  19
  20 //const Token   = imports.Token.Token;
  21 //const Lang    = imports.Lang.Lang;
  22
  23 /**
  24         @class Search a {@link JSDOC.TextStream} for language tokens.
  25 */
  26
  27
  28
  29
  30 namespace JSDOC {
  31
  32     public class TokenArray: Object {
  33
  34         public Gee.ArrayList<Token> tokens;
  35         public int length {
  36             get { return this.tokens.size; }
  37         }
  38
  39         public TokenArray()
  40         {
  41             this.tokens = new Gee.ArrayList<Token>();
  42         }
  43
  44         public Token? last() {
  45             if (this.tokens.size > 0) {
  46                 return this.tokens.get(this.tokens.size-1);
  47             }
  48             return null;
  49         }
  50         public Token? lastSym () {
  51             for (var i = this.tokens.size-1; i >= 0; i--) {
  52                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
  53                     return this.tokens.get(i);
  54                 }
  55             }
  56             return null;
  57         }
  58         public void push (Token t) {
  59             this.tokens.add(t);
  60         }
  61         public Token? pop ()
  62         {
  63             if (this.tokens.size > 0) {
  64                 return this.tokens.remove_at(this.tokens.size-1);
  65             }
  66             return null;
  67         }
  68
  69             public new Token get(int i) {
  70             return this.tokens.get(i);
  71         }
  72     }
  73
  74     public errordomain TokenReader_Error {
  75             ArgumentError
  76     }
  77
  78
  79     public class TokenReader : Object
  80     {
  81
  82
  83
  84         /*
  85          *
  86          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
  87          */
  88
  89         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
  90         public bool collapseWhite = false; // only reduces white space...
  91         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
  92         public bool keepDocs = true;
  93         /** @cfg {Boolean} keepWhite keep White space **/
  94         public bool keepWhite = false;
  95         /** @cfg {Boolean} keepComments  keep all comments **/
  96         public bool keepComments = false;
  97         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
  98         public bool sepIdents = false;
  99         /** @cfg {String} filename name of file being parsed. **/
 100         public string filename = "";
 101         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
 102         public bool ignoreBadGrammer = false;
 103
 104
 105         int line = 0;
 106
 107         /**
 108          * tokenize a stream
 109          * @return {Array} of tokens
 110          *
 111          * ts = new TextStream(File.read(str));
 112          * tr = TokenReader({ keepComments : true, keepWhite : true });
 113          * tr.tokenize(ts)
 114          *
 115          */
 116         public TokenArray tokenize(TextStream stream)
 117         {
 118             this.line =1;
 119             var tokens = new TokenArray();
 120
 121
 122             while (!stream.lookEOF()) {
 123
 124
 125                 if (this.read_mlcomment(stream, tokens)) continue;
 126                 if (this.read_slcomment(stream, tokens)) continue;
 127                 if (this.read_dbquote(stream, tokens))   continue;
 128                 if (this.read_snquote(stream, tokens))   continue;
 129                 if (this.read_regx(stream, tokens))      continue;
 130                 if (this.read_numb(stream, tokens))      continue;
 131                 if (this.read_punc(stream, tokens))      continue;
 132                 if (this.read_newline(stream, tokens))   continue;
 133                 if (this.read_space(stream, tokens))     continue;
 134                 if (this.read_word(stream, tokens))      continue;
 135
 136                 // if execution reaches here then an error has happened
 137                 tokens.push(
 138                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
 139                 );
 140             }
 141
 142
 143
 144             return tokens;
 145         }
 146
 147         /**
 148          * findPuncToken - find the id of a token (previous to current)
 149          * need to back check syntax..
 150          *
 151          * @arg {Array} tokens the array of tokens.
 152          * @arg {String} token data (eg. '(')
 153          * @arg {Number} offset where to start reading from
 154          * @return {Number} position of token
 155          */
 156         public int findPuncToken(TokenArray tokens, string data, int n)
 157         {
 158             n = n > 0 ? n :  tokens.length -1;
 159             var stack = 0;
 160             while (n > -1) {
 161
 162                 if (stack < 1 && tokens.get(n).data == data) {
 163                     return n;
 164                 }
 165
 166                 if (tokens.get(n).data  == ")" || tokens.get(n).data  == "}") {
 167                     stack++;
 168                     n--;
 169                     continue;
 170                 }
 171                 if (stack > 0 && (tokens.get(n).data  == "{" || tokens.get(n).data  == "(")) {
 172                     stack--;
 173                     n--;
 174                     continue;
 175                 }
 176
 177
 178                 n--;
 179             }
 180             return -1;
 181         }
 182         /**
 183          * lastSym - find the last token symbol
 184          * need to back check syntax..
 185          *
 186          * @arg {Array} tokens the array of tokens.
 187          * @arg {Number} offset where to start..
 188          * @return {Token} the token
 189          */
 190         public Token? lastSym(TokenArray tokens, int n)
 191         {
 192             for (var i = n-1; i >= 0; i--) {
 193                 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
 194                     return tokens.get(i);
 195                 }
 196             }
 197             return null;
 198         }
 199
 200
 201
 202         /**
 203             @returns {Boolean} Was the token found?
 204          */
 205         public bool read_word (TextStream stream, TokenArray tokens)
 206         {
 207             string found = "";
 208             while (!stream.lookEOF() && Lang.isWordChar((string)stream.look())) {
 209                 found += stream.next();
 210             }
 211
 212             if (found == "") {
 213                 return false;
 214             }
 215
 216             var name = Lang.keyword(found);
 217             if (name != null) {
 218
 219                 // look for "()return" ?? why ???
 220                 var ls = tokens.lastSym();
 221                 if (found == "return" && ls != null && ls.data == ")") {
 222                     //Seed.print('@' + tokens.length);
 223                     var n = this.findPuncToken(tokens, ")", 0);
 224                     //Seed.print(')@' + n);
 225                     n = this.findPuncToken(tokens, "(", n-1);
 226                     //Seed.print('(@' + n);
 227
 228                     //var lt = this.lastSym(tokens, n);
 229                     /*
 230                     //print(JSON.stringify(lt));
 231                     if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
 232                         if (!this.ignoreBadGrammer) {
 233                             throw new TokenReader_Error.ArgumentError(
 234                                 this.filename + ":" + this.line + " Error - return found after )"
 235                             );
 236                         }
 237                     }
 238
 239                     */
 240
 241                 }
 242
 243                 tokens.push(new Token(found, "KEYW", name, this.line));
 244                 return true;
 245             }
 246
 247             if (!this.sepIdents || found.index_of(".") < 0 ) {
 248                 tokens.push(new Token(found, "NAME", "NAME", this.line));
 249                 return true;
 250             }
 251             var n = found.split(".");
 252             var p = false;
 253             foreach (unowned string nm in n) {
 254                 if (p) {
 255                     tokens.push(new Token(".", "PUNC", "DOT", this.line));
 256                 }
 257                 p=true;
 258                 tokens.push(new Token(nm, "NAME", "NAME", this.line));
 259             }
 260             return true;
 261
 262
 263         }
 264
 265         /**
 266             @returns {Boolean} Was the token found?
 267          */
 268         public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
 269         {
 270             string found = "";
 271
 272             while (!stream.lookEOF() && Lang.punc(found + (string)stream.look()).length > 0) {
 273                 found += stream.next();
 274             }
 275
 276
 277             if (found == "") {
 278                 return false;
 279             }
 280
 281             var ls = tokens.lastSym();
 282
 283             if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
 284                 //print("Error - comma found before " + found);
 285                 //print(JSON.stringify(tokens.lastSym(), null,4));
 286                 if (this.ignoreBadGrammer) {
 287                     print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
 288                 } else {
 289                     throw new TokenReader_Error.ArgumentError(
 290                                 this.filename + ":" + this.line.to_string() + "  comma found before " + found
 291
 292                     );
 293
 294                 }
 295             }
 296
 297             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
 298             return true;
 299
 300         }
 301
 302         /**
 303             @returns {Boolean} Was the token found?
 304          */
 305         public bool read_space  (TextStream stream, TokenArray tokens)
 306         {
 307             var found = "";
 308
 309             while (!stream.lookEOF() && Lang.isSpaceC(  stream.look()) && !Lang.isNewlineC(stream.look())) {
 310                 found += stream.next();
 311             }
 312
 313             if (found == "") {
 314                 return false;
 315             }
 316             //print("WHITE = " + JSON.stringify(found));
 317
 318
 319             if (this.collapseWhite) {
 320                 found = " "; // this might work better if it was a '\n' ???
 321             }
 322             if (this.keepWhite) {
 323                 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
 324             }
 325             return true;
 326
 327         }
 328
 329         /**
 330             @returns {Boolean} Was the token found?
 331          */
 332         public bool read_newline  (TextStream stream, TokenArray tokens)
 333         {
 334             var found = "";
 335             var line = this.line;
 336             while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
 337                 this.line++;
 338                 found += stream.next();
 339             }
 340
 341             if (found == "") {
 342                 return false;
 343             }
 344
 345             // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
 346             // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
 347
 348
 349             //this.line++;
 350             if (this.collapseWhite) {
 351                 found = "\n"; // reduces multiple line breaks into a single one...
 352             }
 353
 354             if (this.keepWhite) {
 355                 var last = tokens.pop();
 356                 if (last != null && last.name != "WHIT") {
 357                     tokens.push(last);
 358                 }
 359                 // replaces last new line...
 360                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
 361             }
 362             return true;
 363         }
 364
 365         /**
 366             @returns {Boolean} Was the token found?
 367          */
 368         public bool read_mlcomment  (TextStream stream, TokenArray tokens)
 369         {
 370             if (stream.look() != '/') {
 371                 return false;
 372             }
 373             if (stream.look(1) != '*') {
 374                 return false;
 375             }
 376             var found = stream.next(2);
 377             string  c = "";
 378             var line = this.line;
 379             while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
 380                 c = stream.next();
 381                 if (c == "\n") {
 382                     this.line++;
 383                 }
 384                 found += c;
 385             }
 386
 387             // to start doclet we allow /** or /*** but not /**/ or /****
 388             //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
 389             if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
 390                 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
 391             } else if (this.keepComments) {
 392                 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
 393             }
 394             return true;
 395
 396         }
 397
 398         /**
 399             @returns {Boolean} Was the token found?
 400          */
 401          public bool read_slcomment  (TextStream stream, TokenArray tokens)
 402          {
 403             var found = "";
 404             if (
 405                 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
 406                 ||
 407                 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
 408             ) {
 409                 var line = this.line;
 410                 while (!stream.lookEOF() && !Lang.isNewline((string)stream.look())) {
 411                     found += stream.next();
 412                 }
 413                 //if (!stream.lookEOF()) { // what? << eat the EOL?
 414                     found += stream.next();
 415                 //}
 416                 if (this.keepComments) {
 417                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
 418                 }
 419                 this.line++;
 420                 return true;
 421             }
 422             return false;
 423         }
 424
 425         /**
 426             @returns {Boolean} Was the token found?
 427          */
 428         public bool read_dbquote  (TextStream stream, TokenArray tokens)
 429         {
 430             if (stream.look() != '"') {
 431                 return false;
 432             }
 433                 // find terminator
 434             var str = stream.next();
 435
 436             while (!stream.lookEOF()) {
 437                 if (stream.look() == '\\') {
 438                     if (Lang.isNewline((string)stream.look(1))) {
 439                         do {
 440                             stream.next();
 441                         } while (!stream.lookEOF() && Lang.isNewline((string)stream.look()));
 442                         str += "\\\n";
 443                     }
 444                     else {
 445                         str += stream.next(2);
 446                     }
 447                     continue;
 448                 }
 449                 if (stream.look() == '"') {
 450                     str += stream.next();
 451                     tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
 452                     return true;
 453                 }
 454
 455                 str += stream.next();
 456
 457             }
 458             return false;
 459         }
 460
 461         /**
 462             @returns {Boolean} Was the token found?
 463          */
 464         public bool read_snquote  (TextStream stream, TokenArray tokens)
 465         {
 466             if (stream.look() != '\'') {
 467                 return false;
 468             }
 469             // find terminator
 470             var str = stream.next();
 471
 472             while (!stream.lookEOF()) {
 473                 if (stream.look() == '\\') { // escape sequence
 474                     str += stream.next(2);
 475                     continue;
 476                 }
 477                 if (stream.look() == '\'') {
 478                     str += stream.next();
 479                     tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
 480                     return true;
 481                 }
 482                 str += stream.next();
 483
 484             }
 485             return false;
 486         }
 487
 488
 489         /**
 490             @returns {Boolean} Was the token found?
 491          */
 492         public bool read_numb  (TextStream stream, TokenArray tokens)
 493         {
 494             if (stream.look() == '0' && stream.look(1) == 'x') {
 495                 return this.read_hex(stream, tokens);
 496             }
 497
 498             var found = "";
 499
 500             while (!stream.lookEOF() && Lang.isNumber(found+(string)stream.look())){
 501                 found += stream.next();
 502             }
 503
 504             if (found == "") {
 505                 return false;
 506             }
 507             if (GLib.Regex.match_simple("^0[0-7]", found)) {
 508                 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
 509                 return true;
 510             }
 511             tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
 512             return true;
 513
 514         }
 515
 516         /**
 517             @returns {Boolean} Was the token found?
 518          */
 519         public bool read_hex  (TextStream stream, TokenArray tokens)
 520         {
 521             var found = stream.next(2);
 522
 523             while (!stream.lookEOF()) {
 524                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+(string)stream.look())) { // done
 525                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
 526                     return true;
 527                 }
 528
 529                 found += stream.next();
 530
 531             }
 532             return false;
 533         }
 534
 535         /**
 536             @returns {Boolean} Was the token found?
 537          */
 538         public bool read_regx (TextStream stream, TokenArray tokens)
 539         {
 540
 541             if (stream.look() != '/') {
 542                 return false;
 543             }
 544             var  last = tokens.lastSym();
 545             if (
 546                 (last == null)
 547                 ||
 548                 (
 549                        !last.is("NUMB")   // stuff that can not appear before a regex..
 550                     && !last.is("NAME")
 551                     && !last.is("RIGHT_PAREN")
 552                     && !last.is("RIGHT_BRACKET")
 553                 )
 554             )  {
 555                 var regex = stream.next();
 556
 557                 while (!stream.lookEOF()) {
 558                     if (stream.look() == '\\') { // escape sequence
 559                         regex += stream.next(2);
 560                         continue;
 561                     }
 562                     if (stream.look() == '/') {
 563                         regex += stream.next();
 564
 565                         while (GLib.Regex.match_simple("[gmi]", (string)stream.look())) {
 566                             regex += stream.next();
 567                         }
 568
 569                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
 570                         return true;
 571                     }
 572
 573                     regex += stream.next();
 574
 575                 }
 576                 // error: unterminated regex
 577             }
 578             return false;
 579         }
 580     }
 581 }