41513a190f49bfa2cbb8b8256b9f58c06daaa735
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3  
4 XObject = imports.XObject.XObject;
5 console = imports.console.console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         this.sepIdents = false; // seperate '.' in identifiers..
21         XObject.extend(this, o || {});
22         
23     },
24     Object,
25     {
26         collapseWhite : false, // only reduces white space...
27
28         /**
29          * tokenize a stream
30          * @return {Array} of tokens
31          * 
32          * ts = new TextStream(File.read(str));
33          * tr = TokenReader({ keepComments : true, keepWhite : true });
34          * tr.tokenize(ts)
35          * 
36          */
37             
38
39
40         tokenize : function(/**JSDOC.TextStream*/stream) {
41             this.line =1;
42             var tokens = [];
43             /**@ignore*/ 
44             tokens.last    = function() { return tokens[tokens.length-1]; }
45             /**@ignore*/ 
46             tokens.lastSym = function() {
47                 for (var i = tokens.length-1; i >= 0; i--) {
48                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
49                 }
50             }
51
52             while (!stream.look().eof) {
53                 if (this.read_mlcomment(stream, tokens)) continue;
54                 if (this.read_slcomment(stream, tokens)) continue;
55                 if (this.read_dbquote(stream, tokens))   continue;
56                 if (this.read_snquote(stream, tokens))   continue;
57                 if (this.read_regx(stream, tokens))      continue;
58                 if (this.read_numb(stream, tokens))      continue;
59                 if (this.read_punc(stream, tokens))      continue;
60                 if (this.read_newline(stream, tokens))   continue;
61                 if (this.read_space(stream, tokens))     continue;
62                 if (this.read_word(stream, tokens))      continue;
63                 
64                 // if execution reaches here then an error has happened
65                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
66             }
67             
68             
69             
70             return tokens;
71         },
72
73         /**
74             @returns {Boolean} Was the token found?
75          */
76         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
77             var found = "";
78             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
79                 found += stream.next();
80             }
81             
82             if (found === "") {
83                 return false;
84             }
85             else {
86                 var name;
87                 if ((name = Lang.keyword(found))) {
88                     tokens.push(new Token(found, "KEYW", name, this.line));
89                     return true;
90                 }
91                 if (!this.sepIdents || found.indexOf('.') < 0 ) {
92                     tokens.push(new Token(found, "NAME", "NAME", this.line));
93                     return true;
94                 }
95                 var n = found.split('.');
96                 var p = false;
97                 var _this = this;
98                 n.forEach(function(nm) {
99                     if (p) {
100                         tokens.push(new Token('.', "PUNC", "DOT", _this.line));
101                     }
102                     p=true;
103                     tokens.push(new Token(nm, "NAME", "NAME", _this.line));
104                 });
105                 return true;
106                 
107             }
108         },
109
110         /**
111             @returns {Boolean} Was the token found?
112          */
113         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
114             var found = "";
115             var name;
116             while (!stream.look().eof && Lang.punc(found+stream.look())) {
117                 found += stream.next();
118             }
119             
120             
121             if (found === "") {
122                 return false;
123             }
124             
125             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
126                 //print("Error - comma found before " + found);
127                 //print(JSON.stringify(tokens.lastSym(), null,4));
128                 throw {
129                     name : "ArgumentError", 
130                     message: "Error - comma found before " + found + " on line " + this.line + "\n"
131                 }   
132             }
133             
134             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
135             return true;
136             
137         },
138
139         /**
140             @returns {Boolean} Was the token found?
141          */
142         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
143             var found = "";
144             
145             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
146                 found += stream.next();
147             }
148             
149             if (found === "") {
150                 return false;
151             }
152             //print("WHITE = " + JSON.stringify(found)); 
153             if (this.collapseWhite) found = " ";
154             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
155             return true;
156         
157         },
158
159         /**
160             @returns {Boolean} Was the token found?
161          */
162         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
163             var found = "";
164             var line = this.line;
165             while (!stream.look().eof && Lang.isNewline(stream.look())) {
166                 this.line++;
167                 found += stream.next();
168             }
169             
170             if (found === "") {
171                 return false;
172             }
173             //this.line++;
174             if (this.collapseWhite) {
175                 found = "\n";
176             }
177             if (this.keepWhite) {
178                 var last = tokens.pop();
179                 if (last && last.name != "WHIT") {
180                     tokens.push(last);
181                 }
182                 
183                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
184             }
185             return true;
186         },
187
188         /**
189             @returns {Boolean} Was the token found?
190          */
191         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
192             if (stream.look() == "/" && stream.look(1) == "*") {
193                 var found = stream.next(2);
194                 var c = '';
195                 var line = this.line;
196                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
197                     c = stream.next();
198                     if (c == "\n") this.line++;
199                     found += c;
200                 }
201                 
202                 // to start doclet we allow /** or /*** but not /**/ or /****
203                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
204                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
205                 return true;
206             }
207             return false;
208         },
209
210         /**
211             @returns {Boolean} Was the token found?
212          */
213         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
214             var found;
215             if (
216                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
217                 || 
218                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
219             ) {
220                 var line = this.line;
221                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
222                     found += stream.next();
223                 }
224                 if (!stream.look().eof) {
225                     found += stream.next();
226                 }
227                 if (this.keepComments) {
228                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
229                 }
230                 this.line++;
231                 return true;
232             }
233             return false;
234         },
235
236         /**
237             @returns {Boolean} Was the token found?
238          */
239         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
240             if (stream.look() == "\"") {
241                 // find terminator
242                 var string = stream.next();
243                 
244                 while (!stream.look().eof) {
245                     if (stream.look() == "\\") {
246                         if (Lang.isNewline(stream.look(1))) {
247                             do {
248                                 stream.next();
249                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
250                             string += "\\\n";
251                         }
252                         else {
253                             string += stream.next(2);
254                         }
255                     }
256                     else if (stream.look() == "\"") {
257                         string += stream.next();
258                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
259                         return true;
260                     }
261                     else {
262                         string += stream.next();
263                     }
264                 }
265             }
266             return false; // error! unterminated string
267         },
268
269         /**
270             @returns {Boolean} Was the token found?
271          */
272         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
273             if (stream.look() == "'") {
274                 // find terminator
275                 var string = stream.next();
276                 
277                 while (!stream.look().eof) {
278                     if (stream.look() == "\\") { // escape sequence
279                         string += stream.next(2);
280                     }
281                     else if (stream.look() == "'") {
282                         string += stream.next();
283                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
284                         return true;
285                     }
286                     else {
287                         string += stream.next();
288                     }
289                 }
290             }
291             return false; // error! unterminated string
292         },
293
294         /**
295             @returns {Boolean} Was the token found?
296          */
297         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
298             if (stream.look() === "0" && stream.look(1) == "x") {
299                 return this.read_hex(stream, tokens);
300             }
301             
302             var found = "";
303             
304             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
305                 found += stream.next();
306             }
307             
308             if (found === "") {
309                 return false;
310             }
311             else {
312                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
313                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
314                 return true;
315             }
316         },
317         /*t:
318             requires("../lib/JSDOC/TextStream.js");
319             requires("../lib/JSDOC/Token.js");
320             requires("../lib/JSDOC/Lang.js");
321             
322             plan(3, "testing read_numb");
323             
324             //// setup
325             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
326             var tr = new TokenReader();
327             var tokens = tr.tokenize(new TextStream(src));
328             
329             var hexToken, octToken, decToken;
330             for (var i = 0; i < tokens.length; i++) {
331                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
332                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
333                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
334             }
335             ////
336             
337             is(decToken.data, "8.0", "decimal number is found in source.");
338             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
339             is(octToken.data, "0777", "octal number is found in source.");
340         */
341
342         /**
343             @returns {Boolean} Was the token found?
344          */
345         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
346             var found = stream.next(2);
347             
348             while (!stream.look().eof) {
349                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
350                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
351                     return true;
352                 }
353                 else {
354                     found += stream.next();
355                 }
356             }
357             return false;
358         },
359
360         /**
361             @returns {Boolean} Was the token found?
362          */
363         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
364             var last;
365             if (
366                 stream.look() == "/"
367                 && 
368                 (
369                     
370                     (
371                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
372                         || 
373                         (
374                                !last.is("NUMB")
375                             && !last.is("NAME")
376                             && !last.is("RIGHT_PAREN")
377                             && !last.is("RIGHT_BRACKET")
378                         )
379                     )
380                 )
381             ) {
382                 var regex = stream.next();
383                 
384                 while (!stream.look().eof) {
385                     if (stream.look() == "\\") { // escape sequence
386                         regex += stream.next(2);
387                     }
388                     else if (stream.look() == "/") {
389                         regex += stream.next();
390                         
391                         while (/[gmi]/.test(stream.look())) {
392                             regex += stream.next();
393                         }
394                         
395                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
396                         return true;
397                     }
398                     else {
399                         regex += stream.next();
400                     }
401                 }
402                 // error: unterminated regex
403             }
404             return false;
405         }
406 });