JSDOC/TokenReader.vala
[gnome.introspection-doc-generator] / JSDOC / TokenReader.vala
1 //<script type="text/javascript">
2
3  
4
5
6 //const Token   = imports.Token.Token;
7 //const Lang    = imports.Lang.Lang;
8
9 /**
10         @class Search a {@link JSDOC.TextStream} for language tokens.
11 */
12
13 namespace JSDOC {
14
15     public class TokenArray: Object {
16         
17         Gee.ArrayList<Token> tokens;
18         
19         public TokenArray()
20         {
21             this.items = new Gee.ArrayList<Token>();
22         }
23         
24         public Token? last() {
25             if (this.tokens > 0) {
26                 return this.tokens[this.tokens.length-1];
27             }
28             return null;
29         }
30         public Token? lastSym = function() {
31             for (var i = this.tokens.length-1; i >= 0; i--) {
32                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
33                     return this.tokens.get(i);
34                 }
35             }
36             return null;
37         }
38     }
39
40
41     public class TokenReader : Object
42     {
43         
44         
45         
46         /*
47          *
48          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
49          */
50         
51         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
52         public bool collapseWhite = false, // only reduces white space...
53         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
54         public bool keepDocs = true,
55         /** @cfg {Boolean} keepWhite keep White space **/
56         public bool keepWhite = false,
57         /** @cfg {Boolean} keepComments  keep all comments **/
58         public bool keepComments = false,
59         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
60         public bool sepIdents = false,
61         /** @cfg {String} filename name of file being parsed. **/
62         public string filename = "";
63         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
64         public bool ignoreBadGrammer = false,
65         
66         
67         int line = 0;
68         
69         /**
70          * tokenize a stream
71          * @return {Array} of tokens
72          * 
73          * ts = new TextStream(File.read(str));
74          * tr = TokenReader({ keepComments : true, keepWhite : true });
75          * tr.tokenize(ts)
76          * 
77          */
78         public TokenArray tokenize(TextStream stream)
79         {
80             this.line =1;
81             var tokens = new TokenArray();
82            
83             bool eof;
84             while (true) {
85                 
86                 stream.look(0, out eof) 
87                 if (eof) {
88                     break;
89                 }
90                 if (this.read_mlcomment(stream, tokens)) continue;
91                 if (this.read_slcomment(stream, tokens)) continue;
92                 if (this.read_dbquote(stream, tokens))   continue;
93                 if (this.read_snquote(stream, tokens))   continue;
94                 if (this.read_regx(stream, tokens))      continue;
95                 if (this.read_numb(stream, tokens))      continue;
96                 if (this.read_punc(stream, tokens))      continue;
97                 if (this.read_newline(stream, tokens))   continue;
98                 if (this.read_space(stream, tokens))     continue;
99                 if (this.read_word(stream, tokens))      continue;
100                 
101                 // if execution reaches here then an error has happened
102                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
103             }
104             
105             
106             
107             return tokens;
108         },
109
110         /**
111          * findPuncToken - find the id of a token (previous to current)
112          * need to back check syntax..
113          * 
114          * @arg {Array} tokens the array of tokens.
115          * @arg {String} token data (eg. '(')
116          * @arg {Number} offset where to start reading from
117          * @return {Number} position of token
118          */
119         findPuncToken : function(tokens, data, n) {
120             n = n || tokens.length -1;
121             var stack = 0;
122             while (n > -1) {
123                 
124                 if (!stack && tokens[n].data == data) {
125                     return n;
126                 }
127                 
128                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
129                     stack++;
130                     n--;
131                     continue;
132                 }
133                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
134                     stack--;
135                     n--;
136                     continue;
137                 }
138                 
139                 
140                 n--;
141             }
142             return -1;
143         },
144         /**
145          * lastSym - find the last token symbol
146          * need to back check syntax..
147          * 
148          * @arg {Array} tokens the array of tokens.
149          * @arg {Number} offset where to start..
150          * @return {Token} the token
151          */
152         lastSym : function(tokens, n) {
153             for (var i = n-1; i >= 0; i--) {
154                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
155             }
156             return null;
157         },
158         
159          
160         
161         /**
162             @returns {Boolean} Was the token found?
163          */
164         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
165             var found = "";
166             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
167                 found += stream.next();
168             }
169             
170             if (found === "") {
171                 return false;
172             }
173             
174             var name;
175             if ((name = Lang.keyword(found))) {
176                 if (found == 'return' && tokens.lastSym().data == ')') {
177                     //Seed.print('@' + tokens.length);
178                     var n = this.findPuncToken(tokens, ')');
179                     //Seed.print(')@' + n);
180                     n = this.findPuncToken(tokens, '(', n-1);
181                     //Seed.print('(@' + n);
182                     
183                     var lt = this.lastSym(tokens, n);
184                     print(JSON.stringify(lt));
185                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
186                         if (!this.ignoreBadGrammer) {
187                             throw {
188                                 name : "ArgumentError", 
189                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
190                             }
191                         }
192                     }
193                     
194                     
195                     
196                 }
197                 
198                 tokens.push(new Token(found, "KEYW", name, this.line));
199                 return true;
200             }
201             if (!this.sepIdents || found.indexOf('.') < 0 ) {
202                 tokens.push(new Token(found, "NAME", "NAME", this.line));
203                 return true;
204             }
205             var n = found.split('.');
206             var p = false;
207             var _this = this;
208             n.forEach(function(nm) {
209                 if (p) {
210                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
211                 }
212                 p=true;
213                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
214             });
215             return true;
216                 
217
218         },
219
220         /**
221             @returns {Boolean} Was the token found?
222          */
223         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
224             var found = "";
225             var name;
226             while (!stream.look().eof && Lang.punc(found+stream.look())) {
227                 found += stream.next();
228             }
229             
230             
231             if (found === "") {
232                 return false;
233             }
234             
235             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
236                 //print("Error - comma found before " + found);
237                 //print(JSON.stringify(tokens.lastSym(), null,4));
238                 if (this.ignoreBadGrammer) {
239                     print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found);
240                 } else {
241                     
242                     throw {
243                         name : "ArgumentError", 
244                         message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
245                     }
246                 }
247             }
248             
249             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
250             return true;
251             
252         },
253
254         /**
255             @returns {Boolean} Was the token found?
256          */
257         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
258             var found = "";
259             
260             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
261                 found += stream.next();
262             }
263             
264             if (found === "") {
265                 return false;
266             }
267             //print("WHITE = " + JSON.stringify(found)); 
268             if (this.collapseWhite) found = " ";
269             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
270             return true;
271         
272         },
273
274         /**
275             @returns {Boolean} Was the token found?
276          */
277         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
278             var found = "";
279             var line = this.line;
280             while (!stream.look().eof && Lang.isNewline(stream.look())) {
281                 this.line++;
282                 found += stream.next();
283             }
284             
285             if (found === "") {
286                 return false;
287             }
288             //this.line++;
289             if (this.collapseWhite) {
290                 found = "\n";
291             }
292              if (this.keepWhite) {
293                 var last = tokens ? tokens.pop() : false;
294                 if (last && last.name != "WHIT") {
295                     tokens.push(last);
296                 }
297                 
298                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
299             }
300             return true;
301         },
302
303         /**
304             @returns {Boolean} Was the token found?
305          */
306         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
307             if (stream.look() == "/" && stream.look(1) == "*") {
308                 var found = stream.next(2);
309                 var c = '';
310                 var line = this.line;
311                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
312                     c = stream.next();
313                     if (c == "\n") this.line++;
314                     found += c;
315                 }
316                 
317                 // to start doclet we allow /** or /*** but not /**/ or /****
318                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
319                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
320                 return true;
321             }
322             return false;
323         },
324
325         /**
326             @returns {Boolean} Was the token found?
327          */
328         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
329             var found;
330             if (
331                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
332                 || 
333                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
334             ) {
335                 var line = this.line;
336                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
337                     found += stream.next();
338                 }
339                 if (!stream.look().eof) {
340                     found += stream.next();
341                 }
342                 if (this.keepComments) {
343                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
344                 }
345                 this.line++;
346                 return true;
347             }
348             return false;
349         },
350
351         /**
352             @returns {Boolean} Was the token found?
353          */
354         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
355             if (stream.look() == "\"") {
356                 // find terminator
357                 var string = stream.next();
358                 
359                 while (!stream.look().eof) {
360                     if (stream.look() == "\\") {
361                         if (Lang.isNewline(stream.look(1))) {
362                             do {
363                                 stream.next();
364                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
365                             string += "\\\n";
366                         }
367                         else {
368                             string += stream.next(2);
369                         }
370                     }
371                     else if (stream.look() == "\"") {
372                         string += stream.next();
373                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
374                         return true;
375                     }
376                     else {
377                         string += stream.next();
378                     }
379                 }
380             }
381             return false; // error! unterminated string
382         },
383
384         /**
385             @returns {Boolean} Was the token found?
386          */
387         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
388             if (stream.look() == "'") {
389                 // find terminator
390                 var string = stream.next();
391                 
392                 while (!stream.look().eof) {
393                     if (stream.look() == "\\") { // escape sequence
394                         string += stream.next(2);
395                     }
396                     else if (stream.look() == "'") {
397                         string += stream.next();
398                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
399                         return true;
400                     }
401                     else {
402                         string += stream.next();
403                     }
404                 }
405             }
406             return false; // error! unterminated string
407         },
408
409         /**
410             @returns {Boolean} Was the token found?
411          */
412         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
413             if (stream.look() === "0" && stream.look(1) == "x") {
414                 return this.read_hex(stream, tokens);
415             }
416             
417             var found = "";
418             
419             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
420                 found += stream.next();
421             }
422             
423             if (found === "") {
424                 return false;
425             }
426             else {
427                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
428                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
429                 return true;
430             }
431         },
432         /*t:
433             requires("../lib/JSDOC/TextStream.js");
434             requires("../lib/JSDOC/Token.js");
435             requires("../lib/JSDOC/Lang.js");
436             
437             plan(3, "testing read_numb");
438             
439             //// setup
440             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
441             var tr = new TokenReader();
442             var tokens = tr.tokenize(new TextStream(src));
443             
444             var hexToken, octToken, decToken;
445             for (var i = 0; i < tokens.length; i++) {
446                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
447                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
448                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
449             }
450             ////
451             
452             is(decToken.data, "8.0", "decimal number is found in source.");
453             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
454             is(octToken.data, "0777", "octal number is found in source.");
455         */
456
457         /**
458             @returns {Boolean} Was the token found?
459          */
460         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
461             var found = stream.next(2);
462             
463             while (!stream.look().eof) {
464                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
465                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
466                     return true;
467                 }
468                 else {
469                     found += stream.next();
470                 }
471             }
472             return false;
473         },
474
475         /**
476             @returns {Boolean} Was the token found?
477          */
478         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
479             var last;
480             if (
481                 stream.look() == "/"
482                 && 
483                 (
484                     
485                     (
486                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
487                         || 
488                         (
489                                !last.is("NUMB")
490                             && !last.is("NAME")
491                             && !last.is("RIGHT_PAREN")
492                             && !last.is("RIGHT_BRACKET")
493                         )
494                     )
495                 )
496             ) {
497                 var regex = stream.next();
498                 
499                 while (!stream.look().eof) {
500                     if (stream.look() == "\\") { // escape sequence
501                         regex += stream.next(2);
502                     }
503                     else if (stream.look() == "/") {
504                         regex += stream.next();
505                         
506                         while (/[gmi]/.test(stream.look())) {
507                             regex += stream.next();
508                         }
509                         
510                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
511                         return true;
512                     }
513                     else {
514                         regex += stream.next();
515                     }
516                 }
517                 // error: unterminated regex
518             }
519             return false;
520         }
521 });