b9fcbf79850fb0e1be5e37f10ca34873964cb825
[gnome.introspection-doc-generator] / JSDOC / TokenReader.vala
1 //<script type="text/javascript">
2
3  
4
5
6 //const Token   = imports.Token.Token;
7 //const Lang    = imports.Lang.Lang;
8
9 /**
10         @class Search a {@link JSDOC.TextStream} for language tokens.
11 */
12
13 namespace JSDOC {
14
15     public class TokenArray: Object {
16         
17         Gee.ArrayList<Token> tokens;
18         
19         public TokenArray()
20         {
21             this.items = new Gee.ArrayList<Token>();
22         }
23         
24         public Token? last() {
25             if (this.tokens > 0) {
26                 return this.tokens[this.tokens.length-1];
27             }
28             return null;
29         }
30         public Token? lastSym = function() {
31             for (var i = this.tokens.length-1; i >= 0; i--) {
32                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
33                     return this.tokens.get(i);
34                 }
35             }
36             return null;
37         }
38     }
39
40
41     public class TokenReader : Object
42     {
43         
44         
45         
46         /*
47          *
48          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
49          */
50         
51         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
52         public bool collapseWhite = false, // only reduces white space...
53         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
54         public bool keepDocs = true,
55         /** @cfg {Boolean} keepWhite keep White space **/
56         public bool keepWhite = false,
57         /** @cfg {Boolean} keepComments  keep all comments **/
58         public bool keepComments = false,
59         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
60         public bool sepIdents = false,
61         /** @cfg {String} filename name of file being parsed. **/
62         public string filename = "";
63         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
64         public bool ignoreBadGrammer = false,
65         
66         
67         int line = 0;
68         
69         /**
70          * tokenize a stream
71          * @return {Array} of tokens
72          * 
73          * ts = new TextStream(File.read(str));
74          * tr = TokenReader({ keepComments : true, keepWhite : true });
75          * tr.tokenize(ts)
76          * 
77          */
78         public TokenArray tokenize(TextStream stream)
79         {
80             this.line =1;
81             var tokens = new TokenArray();
82            
83
84             while (!stream.look().eof) {
85                 if (this.read_mlcomment(stream, tokens)) continue;
86                 if (this.read_slcomment(stream, tokens)) continue;
87                 if (this.read_dbquote(stream, tokens))   continue;
88                 if (this.read_snquote(stream, tokens))   continue;
89                 if (this.read_regx(stream, tokens))      continue;
90                 if (this.read_numb(stream, tokens))      continue;
91                 if (this.read_punc(stream, tokens))      continue;
92                 if (this.read_newline(stream, tokens))   continue;
93                 if (this.read_space(stream, tokens))     continue;
94                 if (this.read_word(stream, tokens))      continue;
95                 
96                 // if execution reaches here then an error has happened
97                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
98             }
99             
100             
101             
102             return tokens;
103         },
104
105         /**
106          * findPuncToken - find the id of a token (previous to current)
107          * need to back check syntax..
108          * 
109          * @arg {Array} tokens the array of tokens.
110          * @arg {String} token data (eg. '(')
111          * @arg {Number} offset where to start reading from
112          * @return {Number} position of token
113          */
114         findPuncToken : function(tokens, data, n) {
115             n = n || tokens.length -1;
116             var stack = 0;
117             while (n > -1) {
118                 
119                 if (!stack && tokens[n].data == data) {
120                     return n;
121                 }
122                 
123                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
124                     stack++;
125                     n--;
126                     continue;
127                 }
128                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
129                     stack--;
130                     n--;
131                     continue;
132                 }
133                 
134                 
135                 n--;
136             }
137             return -1;
138         },
139         /**
140          * lastSym - find the last token symbol
141          * need to back check syntax..
142          * 
143          * @arg {Array} tokens the array of tokens.
144          * @arg {Number} offset where to start..
145          * @return {Token} the token
146          */
147         lastSym : function(tokens, n) {
148             for (var i = n-1; i >= 0; i--) {
149                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
150             }
151             return null;
152         },
153         
154          
155         
156         /**
157             @returns {Boolean} Was the token found?
158          */
159         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
160             var found = "";
161             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
162                 found += stream.next();
163             }
164             
165             if (found === "") {
166                 return false;
167             }
168             
169             var name;
170             if ((name = Lang.keyword(found))) {
171                 if (found == 'return' && tokens.lastSym().data == ')') {
172                     //Seed.print('@' + tokens.length);
173                     var n = this.findPuncToken(tokens, ')');
174                     //Seed.print(')@' + n);
175                     n = this.findPuncToken(tokens, '(', n-1);
176                     //Seed.print('(@' + n);
177                     
178                     var lt = this.lastSym(tokens, n);
179                     print(JSON.stringify(lt));
180                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
181                         if (!this.ignoreBadGrammer) {
182                             throw {
183                                 name : "ArgumentError", 
184                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
185                             }
186                         }
187                     }
188                     
189                     
190                     
191                 }
192                 
193                 tokens.push(new Token(found, "KEYW", name, this.line));
194                 return true;
195             }
196             if (!this.sepIdents || found.indexOf('.') < 0 ) {
197                 tokens.push(new Token(found, "NAME", "NAME", this.line));
198                 return true;
199             }
200             var n = found.split('.');
201             var p = false;
202             var _this = this;
203             n.forEach(function(nm) {
204                 if (p) {
205                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
206                 }
207                 p=true;
208                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
209             });
210             return true;
211                 
212
213         },
214
215         /**
216             @returns {Boolean} Was the token found?
217          */
218         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
219             var found = "";
220             var name;
221             while (!stream.look().eof && Lang.punc(found+stream.look())) {
222                 found += stream.next();
223             }
224             
225             
226             if (found === "") {
227                 return false;
228             }
229             
230             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
231                 //print("Error - comma found before " + found);
232                 //print(JSON.stringify(tokens.lastSym(), null,4));
233                 if (this.ignoreBadGrammer) {
234                     print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found);
235                 } else {
236                     
237                     throw {
238                         name : "ArgumentError", 
239                         message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
240                     }
241                 }
242             }
243             
244             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
245             return true;
246             
247         },
248
249         /**
250             @returns {Boolean} Was the token found?
251          */
252         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
253             var found = "";
254             
255             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
256                 found += stream.next();
257             }
258             
259             if (found === "") {
260                 return false;
261             }
262             //print("WHITE = " + JSON.stringify(found)); 
263             if (this.collapseWhite) found = " ";
264             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
265             return true;
266         
267         },
268
269         /**
270             @returns {Boolean} Was the token found?
271          */
272         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
273             var found = "";
274             var line = this.line;
275             while (!stream.look().eof && Lang.isNewline(stream.look())) {
276                 this.line++;
277                 found += stream.next();
278             }
279             
280             if (found === "") {
281                 return false;
282             }
283             //this.line++;
284             if (this.collapseWhite) {
285                 found = "\n";
286             }
287              if (this.keepWhite) {
288                 var last = tokens ? tokens.pop() : false;
289                 if (last && last.name != "WHIT") {
290                     tokens.push(last);
291                 }
292                 
293                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
294             }
295             return true;
296         },
297
298         /**
299             @returns {Boolean} Was the token found?
300          */
301         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
302             if (stream.look() == "/" && stream.look(1) == "*") {
303                 var found = stream.next(2);
304                 var c = '';
305                 var line = this.line;
306                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
307                     c = stream.next();
308                     if (c == "\n") this.line++;
309                     found += c;
310                 }
311                 
312                 // to start doclet we allow /** or /*** but not /**/ or /****
313                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
314                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
315                 return true;
316             }
317             return false;
318         },
319
320         /**
321             @returns {Boolean} Was the token found?
322          */
323         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
324             var found;
325             if (
326                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
327                 || 
328                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
329             ) {
330                 var line = this.line;
331                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
332                     found += stream.next();
333                 }
334                 if (!stream.look().eof) {
335                     found += stream.next();
336                 }
337                 if (this.keepComments) {
338                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
339                 }
340                 this.line++;
341                 return true;
342             }
343             return false;
344         },
345
346         /**
347             @returns {Boolean} Was the token found?
348          */
349         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
350             if (stream.look() == "\"") {
351                 // find terminator
352                 var string = stream.next();
353                 
354                 while (!stream.look().eof) {
355                     if (stream.look() == "\\") {
356                         if (Lang.isNewline(stream.look(1))) {
357                             do {
358                                 stream.next();
359                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
360                             string += "\\\n";
361                         }
362                         else {
363                             string += stream.next(2);
364                         }
365                     }
366                     else if (stream.look() == "\"") {
367                         string += stream.next();
368                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
369                         return true;
370                     }
371                     else {
372                         string += stream.next();
373                     }
374                 }
375             }
376             return false; // error! unterminated string
377         },
378
379         /**
380             @returns {Boolean} Was the token found?
381          */
382         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
383             if (stream.look() == "'") {
384                 // find terminator
385                 var string = stream.next();
386                 
387                 while (!stream.look().eof) {
388                     if (stream.look() == "\\") { // escape sequence
389                         string += stream.next(2);
390                     }
391                     else if (stream.look() == "'") {
392                         string += stream.next();
393                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
394                         return true;
395                     }
396                     else {
397                         string += stream.next();
398                     }
399                 }
400             }
401             return false; // error! unterminated string
402         },
403
404         /**
405             @returns {Boolean} Was the token found?
406          */
407         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
408             if (stream.look() === "0" && stream.look(1) == "x") {
409                 return this.read_hex(stream, tokens);
410             }
411             
412             var found = "";
413             
414             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
415                 found += stream.next();
416             }
417             
418             if (found === "") {
419                 return false;
420             }
421             else {
422                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
423                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
424                 return true;
425             }
426         },
427         /*t:
428             requires("../lib/JSDOC/TextStream.js");
429             requires("../lib/JSDOC/Token.js");
430             requires("../lib/JSDOC/Lang.js");
431             
432             plan(3, "testing read_numb");
433             
434             //// setup
435             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
436             var tr = new TokenReader();
437             var tokens = tr.tokenize(new TextStream(src));
438             
439             var hexToken, octToken, decToken;
440             for (var i = 0; i < tokens.length; i++) {
441                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
442                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
443                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
444             }
445             ////
446             
447             is(decToken.data, "8.0", "decimal number is found in source.");
448             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
449             is(octToken.data, "0777", "octal number is found in source.");
450         */
451
452         /**
453             @returns {Boolean} Was the token found?
454          */
455         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
456             var found = stream.next(2);
457             
458             while (!stream.look().eof) {
459                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
460                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
461                     return true;
462                 }
463                 else {
464                     found += stream.next();
465                 }
466             }
467             return false;
468         },
469
470         /**
471             @returns {Boolean} Was the token found?
472          */
473         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
474             var last;
475             if (
476                 stream.look() == "/"
477                 && 
478                 (
479                     
480                     (
481                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
482                         || 
483                         (
484                                !last.is("NUMB")
485                             && !last.is("NAME")
486                             && !last.is("RIGHT_PAREN")
487                             && !last.is("RIGHT_BRACKET")
488                         )
489                     )
490                 )
491             ) {
492                 var regex = stream.next();
493                 
494                 while (!stream.look().eof) {
495                     if (stream.look() == "\\") { // escape sequence
496                         regex += stream.next(2);
497                     }
498                     else if (stream.look() == "/") {
499                         regex += stream.next();
500                         
501                         while (/[gmi]/.test(stream.look())) {
502                             regex += stream.next();
503                         }
504                         
505                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
506                         return true;
507                     }
508                     else {
509                         regex += stream.next();
510                     }
511                 }
512                 // error: unterminated regex
513             }
514             return false;
515         }
516 });