JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3 //imports['Object.js'].load(Object);
4 XObject = imports.XObject.XObject;
5 console = imports['console.js'].console;
6
7 JSDOC   = imports['JSDOC.js'].JSDOC;
8 Token   = imports['JSDOC/Token.js'].Token;
9 Lang    = imports['JSDOC/Lang.js'].Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         this.sepIdents = false; // seperate '.' in identifiers..
21         XObject.extend(this, o || {});
22         
23     },
24     Object,
25     {
26             
27
28         /**
29             @type {JSDOC.Token[]}
30          */
31
32
33         tokenize : function(/**JSDOC.TextStream*/stream) {
34             var tokens = [];
35             /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
36             /**@ignore*/ tokens.lastSym = function() {
37                 for (var i = tokens.length-1; i >= 0; i--) {
38                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
39                 }
40             }
41
42             while (!stream.look().eof) {
43                 if (this.read_mlcomment(stream, tokens)) continue;
44                 if (this.read_slcomment(stream, tokens)) continue;
45                 if (this.read_dbquote(stream, tokens))   continue;
46                 if (this.read_snquote(stream, tokens))   continue;
47                 if (this.read_regx(stream, tokens))      continue;
48                 if (this.read_numb(stream, tokens))      continue;
49                 if (this.read_punc(stream, tokens))      continue;
50                 if (this.read_newline(stream, tokens))   continue;
51                 if (this.read_space(stream, tokens))     continue;
52                 if (this.read_word(stream, tokens))      continue;
53                 
54                 // if execution reaches here then an error has happened
55                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN"));
56             }
57             
58             
59             
60             return tokens;
61         },
62
63         /**
64             @returns {Boolean} Was the token found?
65          */
66         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
67             var found = "";
68             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
69                 found += stream.next();
70             }
71             
72             if (found === "") {
73                 return false;
74             }
75             else {
76                 var name;
77                 if ((name = Lang.keyword(found))) {
78                     tokens.push(new Token(found, "KEYW", name));
79                     return true;
80                 }
81                 if (!this.sepIdents || found.indexOf('.') < 0 ) {
82                     tokens.push(new Token(found, "NAME", "NAME"));
83                     return true;
84                 }
85                 var n = found.split('.');
86                 var p = false;
87                 n.forEach(function(nm) {
88                     if (p) {
89                         tokens.push(new Token('.', "PUNC", "DOT"));
90                     }
91                     p=true;
92                     tokens.push(new Token(nm, "NAME", "NAME"));
93                 });
94                 return true;
95                 
96             }
97         },
98
99         /**
100             @returns {Boolean} Was the token found?
101          */
102         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
103             var found = "";
104             var name;
105             while (!stream.look().eof && Lang.punc(found+stream.look())) {
106                 found += stream.next();
107             }
108             
109             if (found === "") {
110                 return false;
111             }
112             else {
113                 tokens.push(new Token(found, "PUNC", Lang.punc(found)));
114                 return true;
115             }
116         },
117
118         /**
119             @returns {Boolean} Was the token found?
120          */
121         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
122             var found = "";
123             
124             while (!stream.look().eof && Lang.isSpace(stream.look())) {
125                 found += stream.next();
126             }
127             
128             if (found === "") {
129                 return false;
130             }
131             else {
132                 if (this.collapseWhite) found = " ";
133                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE"));
134                 return true;
135             }
136         },
137
138         /**
139             @returns {Boolean} Was the token found?
140          */
141         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
142             var found = "";
143             
144             while (!stream.look().eof && Lang.isNewline(stream.look())) {
145                 found += stream.next();
146             }
147             
148             if (found === "") {
149                 return false;
150             }
151             else {
152                 if (this.collapseWhite) found = "\n";
153                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE"));
154                 return true;
155             }
156         },
157
158         /**
159             @returns {Boolean} Was the token found?
160          */
161         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
162             if (stream.look() == "/" && stream.look(1) == "*") {
163                 var found = stream.next(2);
164                 
165                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
166                     found += stream.next();
167                 }
168                 
169                 // to start doclet we allow /** or /*** but not /**/ or /****
170                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC"));
171                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM"));
172                 return true;
173             }
174             return false;
175         },
176
177         /**
178             @returns {Boolean} Was the token found?
179          */
180         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
181             var found;
182             if (
183                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
184                 || 
185                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
186             ) {
187                 
188                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
189                     found += stream.next();
190                 }
191                 
192                 if (this.keepComments) {
193                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM"));
194                 }
195                 return true;
196             }
197             return false;
198         },
199
200         /**
201             @returns {Boolean} Was the token found?
202          */
203         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
204             if (stream.look() == "\"") {
205                 // find terminator
206                 var string = stream.next();
207                 
208                 while (!stream.look().eof) {
209                     if (stream.look() == "\\") {
210                         if (Lang.isNewline(stream.look(1))) {
211                             do {
212                                 stream.next();
213                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
214                             string += "\\\n";
215                         }
216                         else {
217                             string += stream.next(2);
218                         }
219                     }
220                     else if (stream.look() == "\"") {
221                         string += stream.next();
222                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE"));
223                         return true;
224                     }
225                     else {
226                         string += stream.next();
227                     }
228                 }
229             }
230             return false; // error! unterminated string
231         },
232
233         /**
234             @returns {Boolean} Was the token found?
235          */
236         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
237             if (stream.look() == "'") {
238                 // find terminator
239                 var string = stream.next();
240                 
241                 while (!stream.look().eof) {
242                     if (stream.look() == "\\") { // escape sequence
243                         string += stream.next(2);
244                     }
245                     else if (stream.look() == "'") {
246                         string += stream.next();
247                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE"));
248                         return true;
249                     }
250                     else {
251                         string += stream.next();
252                     }
253                 }
254             }
255             return false; // error! unterminated string
256         },
257
258         /**
259             @returns {Boolean} Was the token found?
260          */
261         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
262             if (stream.look() === "0" && stream.look(1) == "x") {
263                 return this.read_hex(stream, tokens);
264             }
265             
266             var found = "";
267             
268             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
269                 found += stream.next();
270             }
271             
272             if (found === "") {
273                 return false;
274             }
275             else {
276                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL"));
277                 else tokens.push(new Token(found, "NUMB", "DECIMAL"));
278                 return true;
279             }
280         },
281         /*t:
282             requires("../lib/JSDOC/TextStream.js");
283             requires("../lib/JSDOC/Token.js");
284             requires("../lib/JSDOC/Lang.js");
285             
286             plan(3, "testing read_numb");
287             
288             //// setup
289             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
290             var tr = new TokenReader();
291             var tokens = tr.tokenize(new TextStream(src));
292             
293             var hexToken, octToken, decToken;
294             for (var i = 0; i < tokens.length; i++) {
295                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
296                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
297                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
298             }
299             ////
300             
301             is(decToken.data, "8.0", "decimal number is found in source.");
302             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
303             is(octToken.data, "0777", "octal number is found in source.");
304         */
305
306         /**
307             @returns {Boolean} Was the token found?
308          */
309         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
310             var found = stream.next(2);
311             
312             while (!stream.look().eof) {
313                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
314                     tokens.push(new Token(found, "NUMB", "HEX_DEC"));
315                     return true;
316                 }
317                 else {
318                     found += stream.next();
319                 }
320             }
321             return false;
322         },
323
324         /**
325             @returns {Boolean} Was the token found?
326          */
327         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
328             var last;
329             if (
330                 stream.look() == "/"
331                 && 
332                 (
333                     
334                     (
335                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
336                         || 
337                         (
338                                !last.is("NUMB")
339                             && !last.is("NAME")
340                             && !last.is("RIGHT_PAREN")
341                             && !last.is("RIGHT_BRACKET")
342                         )
343                     )
344                 )
345             ) {
346                 var regex = stream.next();
347                 
348                 while (!stream.look().eof) {
349                     if (stream.look() == "\\") { // escape sequence
350                         regex += stream.next(2);
351                     }
352                     else if (stream.look() == "/") {
353                         regex += stream.next();
354                         
355                         while (/[gmi]/.test(stream.look())) {
356                             regex += stream.next();
357                         }
358                         
359                         tokens.push(new Token(regex, "REGX", "REGX"));
360                         return true;
361                     }
362                     else {
363                         regex += stream.next();
364                     }
365                 }
366                 // error: unterminated regex
367             }
368             return false;
369         }
370 });