JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3  
4 XObject = imports.XObject.XObject;
5 console = imports.console.console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         XObject.extend(this, o || {});
18         
19     },
20     Object,
21     {
22         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
23         collapseWhite : false, // only reduces white space...
24         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
25         keepDocs : true,
26         /** @cfg {Boolean} keepWhite keep White space **/
27         keepWhite : false,
28         /** @cfg {Boolean} keepComments  keep all comments **/
29         keepComments : false,
30         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
31         sepIdents : false,
32         /** @cfg {String} filename name of file being parsed. **/
33         filename : '',
34         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
35         ignoreBadGrammer : false,
36         /**
37          * tokenize a stream
38          * @return {Array} of tokens
39          * 
40          * ts = new TextStream(File.read(str));
41          * tr = TokenReader({ keepComments : true, keepWhite : true });
42          * tr.tokenize(ts)
43          * 
44          */
45         tokenize : function(/**JSDOC.TextStream*/stream) {
46             this.line =1;
47             var tokens = [];
48             /**@ignore*/ 
49             tokens.last    = function() { return tokens[tokens.length-1]; }
50             /**@ignore*/ 
51             tokens.lastSym = function() {
52                 for (var i = tokens.length-1; i >= 0; i--) {
53                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
54                 }
55             }
56
57             while (!stream.look().eof) {
58                 if (this.read_mlcomment(stream, tokens)) continue;
59                 if (this.read_slcomment(stream, tokens)) continue;
60                 if (this.read_dbquote(stream, tokens))   continue;
61                 if (this.read_snquote(stream, tokens))   continue;
62                 if (this.read_regx(stream, tokens))      continue;
63                 if (this.read_numb(stream, tokens))      continue;
64                 if (this.read_punc(stream, tokens))      continue;
65                 if (this.read_newline(stream, tokens))   continue;
66                 if (this.read_space(stream, tokens))     continue;
67                 if (this.read_word(stream, tokens))      continue;
68                 
69                 // if execution reaches here then an error has happened
70                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
71             }
72             
73             
74             
75             return tokens;
76         },
77
78         /**
79          * findPuncToken - find the id of a token (previous to current)
80          * need to back check syntax..
81          * 
82          * @arg {Array} tokens the array of tokens.
83          * @arg {String} token data (eg. '(')
84          * @arg {Number} offset where to start reading from
85          * @return {Number} position of token
86          */
87         findPuncToken : function(tokens, data, n) {
88             n = n || tokens.length -1;
89             var stack = 0;
90             while (n > -1) {
91                 
92                 if (!stack && tokens[n].data == data) {
93                     return n;
94                 }
95                 
96                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
97                     stack++;
98                     n--;
99                     continue;
100                 }
101                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
102                     stack--;
103                     n--;
104                     continue;
105                 }
106                 
107                 
108                 n--;
109             }
110             return -1;
111         },
112         /**
113          * lastSym - find the last token symbol
114          * need to back check syntax..
115          * 
116          * @arg {Array} tokens the array of tokens.
117          * @arg {Number} offset where to start..
118          * @return {Token} the token
119          */
120         lastSym : function(tokens, n) {
121             for (var i = n-1; i >= 0; i--) {
122                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
123             }
124         },
125         
126          
127         
128         /**
129             @returns {Boolean} Was the token found?
130          */
131         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
132             var found = "";
133             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
134                 found += stream.next();
135             }
136             
137             if (found === "") {
138                 return false;
139             }
140             
141             var name;
142             if ((name = Lang.keyword(found))) {
143                 if (found == 'return' && tokens.lastSym().data == ')') {
144                     //Seed.print('@' + tokens.length);
145                     var n = this.findPuncToken(tokens, ')');
146                     //Seed.print(')@' + n);
147                     n = this.findPuncToken(tokens, '(', n-1);
148                     //Seed.print('(@' + n);
149                     
150                     var lt = this.lastSym(tokens, n);
151                    Seed.print(JSON.stringify(lt));
152                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
153                         if (!this.ignoreBadGrammer) {
154                             throw {
155                                 name : "ArgumentError", 
156                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
157                             }
158                         }
159                     }
160                     
161                     
162                     
163                 }
164                 
165                 tokens.push(new Token(found, "KEYW", name, this.line));
166                 return true;
167             }
168             if (!this.sepIdents || found.indexOf('.') < 0 ) {
169                 tokens.push(new Token(found, "NAME", "NAME", this.line));
170                 return true;
171             }
172             var n = found.split('.');
173             var p = false;
174             var _this = this;
175             n.forEach(function(nm) {
176                 if (p) {
177                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
178                 }
179                 p=true;
180                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
181             });
182             return true;
183                 
184
185         },
186
187         /**
188             @returns {Boolean} Was the token found?
189          */
190         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
191             var found = "";
192             var name;
193             while (!stream.look().eof && Lang.punc(found+stream.look())) {
194                 found += stream.next();
195             }
196             
197             
198             if (found === "") {
199                 return false;
200             }
201             
202             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
203                 //print("Error - comma found before " + found);
204                 //print(JSON.stringify(tokens.lastSym(), null,4));
205                 throw {
206                     name : "ArgumentError", 
207                     message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
208                 }   
209             }
210             
211             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
212             return true;
213             
214         },
215
216         /**
217             @returns {Boolean} Was the token found?
218          */
219         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
220             var found = "";
221             
222             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
223                 found += stream.next();
224             }
225             
226             if (found === "") {
227                 return false;
228             }
229             //print("WHITE = " + JSON.stringify(found)); 
230             if (this.collapseWhite) found = " ";
231             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
232             return true;
233         
234         },
235
236         /**
237             @returns {Boolean} Was the token found?
238          */
239         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
240             var found = "";
241             var line = this.line;
242             while (!stream.look().eof && Lang.isNewline(stream.look())) {
243                 this.line++;
244                 found += stream.next();
245             }
246             
247             if (found === "") {
248                 return false;
249             }
250             //this.line++;
251             if (this.collapseWhite) {
252                 found = "\n";
253             }
254             if (this.keepWhite) {
255                 var last = tokens.pop();
256                 if (last && last.name != "WHIT") {
257                     tokens.push(last);
258                 }
259                 
260                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
261             }
262             return true;
263         },
264
265         /**
266             @returns {Boolean} Was the token found?
267          */
268         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
269             if (stream.look() == "/" && stream.look(1) == "*") {
270                 var found = stream.next(2);
271                 var c = '';
272                 var line = this.line;
273                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
274                     c = stream.next();
275                     if (c == "\n") this.line++;
276                     found += c;
277                 }
278                 
279                 // to start doclet we allow /** or /*** but not /**/ or /****
280                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
281                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
282                 return true;
283             }
284             return false;
285         },
286
287         /**
288             @returns {Boolean} Was the token found?
289          */
290         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
291             var found;
292             if (
293                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
294                 || 
295                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
296             ) {
297                 var line = this.line;
298                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
299                     found += stream.next();
300                 }
301                 if (!stream.look().eof) {
302                     found += stream.next();
303                 }
304                 if (this.keepComments) {
305                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
306                 }
307                 this.line++;
308                 return true;
309             }
310             return false;
311         },
312
313         /**
314             @returns {Boolean} Was the token found?
315          */
316         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
317             if (stream.look() == "\"") {
318                 // find terminator
319                 var string = stream.next();
320                 
321                 while (!stream.look().eof) {
322                     if (stream.look() == "\\") {
323                         if (Lang.isNewline(stream.look(1))) {
324                             do {
325                                 stream.next();
326                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
327                             string += "\\\n";
328                         }
329                         else {
330                             string += stream.next(2);
331                         }
332                     }
333                     else if (stream.look() == "\"") {
334                         string += stream.next();
335                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
336                         return true;
337                     }
338                     else {
339                         string += stream.next();
340                     }
341                 }
342             }
343             return false; // error! unterminated string
344         },
345
346         /**
347             @returns {Boolean} Was the token found?
348          */
349         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
350             if (stream.look() == "'") {
351                 // find terminator
352                 var string = stream.next();
353                 
354                 while (!stream.look().eof) {
355                     if (stream.look() == "\\") { // escape sequence
356                         string += stream.next(2);
357                     }
358                     else if (stream.look() == "'") {
359                         string += stream.next();
360                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
361                         return true;
362                     }
363                     else {
364                         string += stream.next();
365                     }
366                 }
367             }
368             return false; // error! unterminated string
369         },
370
371         /**
372             @returns {Boolean} Was the token found?
373          */
374         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
375             if (stream.look() === "0" && stream.look(1) == "x") {
376                 return this.read_hex(stream, tokens);
377             }
378             
379             var found = "";
380             
381             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
382                 found += stream.next();
383             }
384             
385             if (found === "") {
386                 return false;
387             }
388             else {
389                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
390                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
391                 return true;
392             }
393         },
394         /*t:
395             requires("../lib/JSDOC/TextStream.js");
396             requires("../lib/JSDOC/Token.js");
397             requires("../lib/JSDOC/Lang.js");
398             
399             plan(3, "testing read_numb");
400             
401             //// setup
402             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
403             var tr = new TokenReader();
404             var tokens = tr.tokenize(new TextStream(src));
405             
406             var hexToken, octToken, decToken;
407             for (var i = 0; i < tokens.length; i++) {
408                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
409                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
410                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
411             }
412             ////
413             
414             is(decToken.data, "8.0", "decimal number is found in source.");
415             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
416             is(octToken.data, "0777", "octal number is found in source.");
417         */
418
419         /**
420             @returns {Boolean} Was the token found?
421          */
422         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
423             var found = stream.next(2);
424             
425             while (!stream.look().eof) {
426                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
427                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
428                     return true;
429                 }
430                 else {
431                     found += stream.next();
432                 }
433             }
434             return false;
435         },
436
437         /**
438             @returns {Boolean} Was the token found?
439          */
440         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
441             var last;
442             if (
443                 stream.look() == "/"
444                 && 
445                 (
446                     
447                     (
448                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
449                         || 
450                         (
451                                !last.is("NUMB")
452                             && !last.is("NAME")
453                             && !last.is("RIGHT_PAREN")
454                             && !last.is("RIGHT_BRACKET")
455                         )
456                     )
457                 )
458             ) {
459                 var regex = stream.next();
460                 
461                 while (!stream.look().eof) {
462                     if (stream.look() == "\\") { // escape sequence
463                         regex += stream.next(2);
464                     }
465                     else if (stream.look() == "/") {
466                         regex += stream.next();
467                         
468                         while (/[gmi]/.test(stream.look())) {
469                             regex += stream.next();
470                         }
471                         
472                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
473                         return true;
474                     }
475                     else {
476                         regex += stream.next();
477                     }
478                 }
479                 // error: unterminated regex
480             }
481             return false;
482         }
483 });