fb64a1e29075e1ca5cc9987353b543fc7fbe2149
[gnome.introspection-doc-generator] / JSDOC / TokenReader.vala
1 //<script type="text/javascript">
2
3  
4
5
6 //const Token   = imports.Token.Token;
7 //const Lang    = imports.Lang.Lang;
8
9 /**
10         @class Search a {@link JSDOC.TextStream} for language tokens.
11 */
12
13 namespace JSDOC {
14
15     public class TokenArray: Object {
16         
17         Gee.ArrayList<Token> tokens;
18         
19         public TokenArray()
20         {
21             this.items = new Gee.ArrayList<Token>();
22         }
23         
24         public Token? last() {
25             if (this.tokens > 0) {
26                 return this.tokens[this.tokens.length-1];
27             }
28             return null;
29         }
30         public Token? lastSym () {
31             for (var i = this.tokens.length-1; i >= 0; i--) {
32                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
33                     return this.tokens.get(i);
34                 }
35             }
36             return null;
37         }
38         public void push (Token t) {
39             this.tokens.add(t);
40         }
41     }
42
43
44     public class TokenReader : Object
45     {
46         
47         
48         
49         /*
50          *
51          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
52          */
53         
54         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
55         public bool collapseWhite = false, // only reduces white space...
56         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
57         public bool keepDocs = true,
58         /** @cfg {Boolean} keepWhite keep White space **/
59         public bool keepWhite = false,
60         /** @cfg {Boolean} keepComments  keep all comments **/
61         public bool keepComments = false,
62         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
63         public bool sepIdents = false,
64         /** @cfg {String} filename name of file being parsed. **/
65         public string filename = "";
66         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
67         public bool ignoreBadGrammer = false,
68         
69         
70         int line = 0;
71         
72         /**
73          * tokenize a stream
74          * @return {Array} of tokens
75          * 
76          * ts = new TextStream(File.read(str));
77          * tr = TokenReader({ keepComments : true, keepWhite : true });
78          * tr.tokenize(ts)
79          * 
80          */
81         public TokenArray tokenize(TextStream stream)
82         {
83             this.line =1;
84             var tokens = new TokenArray();
85            
86             bool eof;
87             while (true) {
88                 
89                 stream.look(0, out eof) 
90                 if (eof) {
91                     break;
92                 }
93                 if (this.read_mlcomment(stream, tokens)) continue;
94                 if (this.read_slcomment(stream, tokens)) continue;
95                 if (this.read_dbquote(stream, tokens))   continue;
96                 if (this.read_snquote(stream, tokens))   continue;
97                 if (this.read_regx(stream, tokens))      continue;
98                 if (this.read_numb(stream, tokens))      continue;
99                 if (this.read_punc(stream, tokens))      continue;
100                 if (this.read_newline(stream, tokens))   continue;
101                 if (this.read_space(stream, tokens))     continue;
102                 if (this.read_word(stream, tokens))      continue;
103                 
104                 // if execution reaches here then an error has happened
105                 tokens.push(
106                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
107                 );
108             }
109             
110             
111             
112             return tokens;
113         },
114
115         /**
116          * findPuncToken - find the id of a token (previous to current)
117          * need to back check syntax..
118          * 
119          * @arg {Array} tokens the array of tokens.
120          * @arg {String} token data (eg. '(')
121          * @arg {Number} offset where to start reading from
122          * @return {Number} position of token
123          */
124         findPuncToken : function(tokens, data, n) {
125             n = n || tokens.length -1;
126             var stack = 0;
127             while (n > -1) {
128                 
129                 if (!stack && tokens[n].data == data) {
130                     return n;
131                 }
132                 
133                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
134                     stack++;
135                     n--;
136                     continue;
137                 }
138                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
139                     stack--;
140                     n--;
141                     continue;
142                 }
143                 
144                 
145                 n--;
146             }
147             return -1;
148         },
149         /**
150          * lastSym - find the last token symbol
151          * need to back check syntax..
152          * 
153          * @arg {Array} tokens the array of tokens.
154          * @arg {Number} offset where to start..
155          * @return {Token} the token
156          */
157         lastSym : function(tokens, n) {
158             for (var i = n-1; i >= 0; i--) {
159                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
160             }
161             return null;
162         },
163         
164          
165         
166         /**
167             @returns {Boolean} Was the token found?
168          */
169         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
170             var found = "";
171             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
172                 found += stream.next();
173             }
174             
175             if (found === "") {
176                 return false;
177             }
178             
179             var name;
180             if ((name = Lang.keyword(found))) {
181                 if (found == 'return' && tokens.lastSym().data == ')') {
182                     //Seed.print('@' + tokens.length);
183                     var n = this.findPuncToken(tokens, ')');
184                     //Seed.print(')@' + n);
185                     n = this.findPuncToken(tokens, '(', n-1);
186                     //Seed.print('(@' + n);
187                     
188                     var lt = this.lastSym(tokens, n);
189                     print(JSON.stringify(lt));
190                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
191                         if (!this.ignoreBadGrammer) {
192                             throw {
193                                 name : "ArgumentError", 
194                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
195                             }
196                         }
197                     }
198                     
199                     
200                     
201                 }
202                 
203                 tokens.push(new Token(found, "KEYW", name, this.line));
204                 return true;
205             }
206             if (!this.sepIdents || found.indexOf('.') < 0 ) {
207                 tokens.push(new Token(found, "NAME", "NAME", this.line));
208                 return true;
209             }
210             var n = found.split('.');
211             var p = false;
212             var _this = this;
213             n.forEach(function(nm) {
214                 if (p) {
215                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
216                 }
217                 p=true;
218                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
219             });
220             return true;
221                 
222
223         },
224
225         /**
226             @returns {Boolean} Was the token found?
227          */
228         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
229             var found = "";
230             var name;
231             while (!stream.look().eof && Lang.punc(found+stream.look())) {
232                 found += stream.next();
233             }
234             
235             
236             if (found === "") {
237                 return false;
238             }
239             
240             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
241                 //print("Error - comma found before " + found);
242                 //print(JSON.stringify(tokens.lastSym(), null,4));
243                 if (this.ignoreBadGrammer) {
244                     print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found);
245                 } else {
246                     
247                     throw {
248                         name : "ArgumentError", 
249                         message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
250                     }
251                 }
252             }
253             
254             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
255             return true;
256             
257         },
258
259         /**
260             @returns {Boolean} Was the token found?
261          */
262         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
263             var found = "";
264             
265             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
266                 found += stream.next();
267             }
268             
269             if (found === "") {
270                 return false;
271             }
272             //print("WHITE = " + JSON.stringify(found)); 
273             if (this.collapseWhite) found = " ";
274             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
275             return true;
276         
277         },
278
279         /**
280             @returns {Boolean} Was the token found?
281          */
282         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
283             var found = "";
284             var line = this.line;
285             while (!stream.look().eof && Lang.isNewline(stream.look())) {
286                 this.line++;
287                 found += stream.next();
288             }
289             
290             if (found === "") {
291                 return false;
292             }
293             //this.line++;
294             if (this.collapseWhite) {
295                 found = "\n";
296             }
297              if (this.keepWhite) {
298                 var last = tokens ? tokens.pop() : false;
299                 if (last && last.name != "WHIT") {
300                     tokens.push(last);
301                 }
302                 
303                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
304             }
305             return true;
306         },
307
308         /**
309             @returns {Boolean} Was the token found?
310          */
311         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
312             if (stream.look() == "/" && stream.look(1) == "*") {
313                 var found = stream.next(2);
314                 var c = '';
315                 var line = this.line;
316                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
317                     c = stream.next();
318                     if (c == "\n") this.line++;
319                     found += c;
320                 }
321                 
322                 // to start doclet we allow /** or /*** but not /**/ or /****
323                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
324                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
325                 return true;
326             }
327             return false;
328         },
329
330         /**
331             @returns {Boolean} Was the token found?
332          */
333         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
334             var found;
335             if (
336                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
337                 || 
338                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
339             ) {
340                 var line = this.line;
341                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
342                     found += stream.next();
343                 }
344                 if (!stream.look().eof) {
345                     found += stream.next();
346                 }
347                 if (this.keepComments) {
348                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
349                 }
350                 this.line++;
351                 return true;
352             }
353             return false;
354         },
355
356         /**
357             @returns {Boolean} Was the token found?
358          */
359         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
360             if (stream.look() == "\"") {
361                 // find terminator
362                 var string = stream.next();
363                 
364                 while (!stream.look().eof) {
365                     if (stream.look() == "\\") {
366                         if (Lang.isNewline(stream.look(1))) {
367                             do {
368                                 stream.next();
369                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
370                             string += "\\\n";
371                         }
372                         else {
373                             string += stream.next(2);
374                         }
375                     }
376                     else if (stream.look() == "\"") {
377                         string += stream.next();
378                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
379                         return true;
380                     }
381                     else {
382                         string += stream.next();
383                     }
384                 }
385             }
386             return false; // error! unterminated string
387         },
388
389         /**
390             @returns {Boolean} Was the token found?
391          */
392         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
393             if (stream.look() == "'") {
394                 // find terminator
395                 var string = stream.next();
396                 
397                 while (!stream.look().eof) {
398                     if (stream.look() == "\\") { // escape sequence
399                         string += stream.next(2);
400                     }
401                     else if (stream.look() == "'") {
402                         string += stream.next();
403                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
404                         return true;
405                     }
406                     else {
407                         string += stream.next();
408                     }
409                 }
410             }
411             return false; // error! unterminated string
412         },
413
414         /**
415             @returns {Boolean} Was the token found?
416          */
417         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
418             if (stream.look() === "0" && stream.look(1) == "x") {
419                 return this.read_hex(stream, tokens);
420             }
421             
422             var found = "";
423             
424             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
425                 found += stream.next();
426             }
427             
428             if (found === "") {
429                 return false;
430             }
431             else {
432                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
433                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
434                 return true;
435             }
436         },
437         /*t:
438             requires("../lib/JSDOC/TextStream.js");
439             requires("../lib/JSDOC/Token.js");
440             requires("../lib/JSDOC/Lang.js");
441             
442             plan(3, "testing read_numb");
443             
444             //// setup
445             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
446             var tr = new TokenReader();
447             var tokens = tr.tokenize(new TextStream(src));
448             
449             var hexToken, octToken, decToken;
450             for (var i = 0; i < tokens.length; i++) {
451                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
452                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
453                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
454             }
455             ////
456             
457             is(decToken.data, "8.0", "decimal number is found in source.");
458             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
459             is(octToken.data, "0777", "octal number is found in source.");
460         */
461
462         /**
463             @returns {Boolean} Was the token found?
464          */
465         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
466             var found = stream.next(2);
467             
468             while (!stream.look().eof) {
469                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
470                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
471                     return true;
472                 }
473                 else {
474                     found += stream.next();
475                 }
476             }
477             return false;
478         },
479
480         /**
481             @returns {Boolean} Was the token found?
482          */
483         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
484             var last;
485             if (
486                 stream.look() == "/"
487                 && 
488                 (
489                     
490                     (
491                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
492                         || 
493                         (
494                                !last.is("NUMB")
495                             && !last.is("NAME")
496                             && !last.is("RIGHT_PAREN")
497                             && !last.is("RIGHT_BRACKET")
498                         )
499                     )
500                 )
501             ) {
502                 var regex = stream.next();
503                 
504                 while (!stream.look().eof) {
505                     if (stream.look() == "\\") { // escape sequence
506                         regex += stream.next(2);
507                     }
508                     else if (stream.look() == "/") {
509                         regex += stream.next();
510                         
511                         while (/[gmi]/.test(stream.look())) {
512                             regex += stream.next();
513                         }
514                         
515                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
516                         return true;
517                     }
518                     else {
519                         regex += stream.next();
520                     }
521                 }
522                 // error: unterminated regex
523             }
524             return false;
525         }
526 });