Intial import
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2 JSDOC   = imports['JSDOC.js'].JSDOC;
3 Roo     = imports['Roo.js'].Roo;
4 console = imports['console.js'].console;
5
6
7
8 Token = imports['JSDOC/Token.js'].Token;
9 Lang = imports['JSDOC/Token.js'].Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = function(o) {
15     
16         this.keepDocs = true;
17         this.keepWhite = false;
18         this.keepComments = false;
19     Roo.apply(this, o || {});
20     
21 }
22
23
24 Roo.apply(TokenReader.prototype, {
25         
26
27     /**
28         @type {JSDOC.Token[]}
29      */
30
31
32     tokenize : function(/**JSDOC.TextStream*/stream) {
33         var tokens = [];
34         /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
35         /**@ignore*/ tokens.lastSym = function() {
36             for (var i = tokens.length-1; i >= 0; i--) {
37                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
38             }
39         }
40
41         while (!stream.look().eof) {
42             if (this.read_mlcomment(stream, tokens)) continue;
43             if (this.read_slcomment(stream, tokens)) continue;
44             if (this.read_dbquote(stream, tokens))   continue;
45             if (this.read_snquote(stream, tokens))   continue;
46             if (this.read_regx(stream, tokens))      continue;
47             if (this.read_numb(stream, tokens))      continue;
48             if (this.read_punc(stream, tokens))      continue;
49             if (this.read_newline(stream, tokens))   continue;
50             if (this.read_space(stream, tokens))     continue;
51             if (this.read_word(stream, tokens))      continue;
52             
53             // if execution reaches here then an error has happened
54             tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN"));
55         }
56         
57         
58         
59         return tokens;
60     },
61
62     /**
63         @returns {Boolean} Was the token found?
64      */
65     read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
66         var found = "";
67         while (!stream.look().eof && Lang.isWordChar(stream.look())) {
68             found += stream.next();
69         }
70         
71         if (found === "") {
72             return false;
73         }
74         else {
75             var name;
76             if ((name = Lang.keyword(found))) tokens.push(new Token(found, "KEYW", name));
77             else tokens.push(new Token(found, "NAME", "NAME"));
78             return true;
79         }
80     },
81
82     /**
83         @returns {Boolean} Was the token found?
84      */
85     read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
86         var found = "";
87         var name;
88         while (!stream.look().eof && Lang.punc(found+stream.look())) {
89             found += stream.next();
90         }
91         
92         if (found === "") {
93             return false;
94         }
95         else {
96             tokens.push(new Token(found, "PUNC", Lang.punc(found)));
97             return true;
98         }
99     },
100
101     /**
102         @returns {Boolean} Was the token found?
103      */
104     read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
105         var found = "";
106         
107         while (!stream.look().eof && Lang.isSpace(stream.look())) {
108             found += stream.next();
109         }
110         
111         if (found === "") {
112             return false;
113         }
114         else {
115             if (this.collapseWhite) found = " ";
116             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE"));
117             return true;
118         }
119     },
120
121     /**
122         @returns {Boolean} Was the token found?
123      */
124     read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
125         var found = "";
126         
127         while (!stream.look().eof && Lang.isNewline(stream.look())) {
128             found += stream.next();
129         }
130         
131         if (found === "") {
132             return false;
133         }
134         else {
135             if (this.collapseWhite) found = "\n";
136             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE"));
137             return true;
138         }
139     },
140
141     /**
142         @returns {Boolean} Was the token found?
143      */
144     read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
145         if (stream.look() == "/" && stream.look(1) == "*") {
146             var found = stream.next(2);
147             
148             while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
149                 found += stream.next();
150             }
151             
152             // to start doclet we allow /** or /*** but not /**/ or /****
153             if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC"));
154             else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM"));
155             return true;
156         }
157         return false;
158     },
159
160     /**
161         @returns {Boolean} Was the token found?
162      */
163     read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
164         var found;
165         if (
166             (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
167             || 
168             (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
169         ) {
170             
171             while (!stream.look().eof && !Lang.isNewline(stream.look())) {
172                 found += stream.next();
173             }
174             
175             if (this.keepComments) {
176                 tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM"));
177             }
178             return true;
179         }
180         return false;
181     },
182
183     /**
184         @returns {Boolean} Was the token found?
185      */
186     read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
187         if (stream.look() == "\"") {
188             // find terminator
189             var string = stream.next();
190             
191             while (!stream.look().eof) {
192                 if (stream.look() == "\\") {
193                     if (Lang.isNewline(stream.look(1))) {
194                         do {
195                             stream.next();
196                         } while (!stream.look().eof && Lang.isNewline(stream.look()));
197                         string += "\\\n";
198                     }
199                     else {
200                         string += stream.next(2);
201                     }
202                 }
203                 else if (stream.look() == "\"") {
204                     string += stream.next();
205                     tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE"));
206                     return true;
207                 }
208                 else {
209                     string += stream.next();
210                 }
211             }
212         }
213         return false; // error! unterminated string
214     },
215
216     /**
217         @returns {Boolean} Was the token found?
218      */
219     read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
220         if (stream.look() == "'") {
221             // find terminator
222             var string = stream.next();
223             
224             while (!stream.look().eof) {
225                 if (stream.look() == "\\") { // escape sequence
226                     string += stream.next(2);
227                 }
228                 else if (stream.look() == "'") {
229                     string += stream.next();
230                     tokens.push(new Token(string, "STRN", "SINGLE_QUOTE"));
231                     return true;
232                 }
233                 else {
234                     string += stream.next();
235                 }
236             }
237         }
238         return false; // error! unterminated string
239     },
240
241     /**
242         @returns {Boolean} Was the token found?
243      */
244     read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
245         if (stream.look() === "0" && stream.look(1) == "x") {
246             return this.read_hex(stream, tokens);
247         }
248         
249         var found = "";
250         
251         while (!stream.look().eof && Lang.isNumber(found+stream.look())){
252             found += stream.next();
253         }
254         
255         if (found === "") {
256             return false;
257         }
258         else {
259             if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL"));
260             else tokens.push(new Token(found, "NUMB", "DECIMAL"));
261             return true;
262         }
263     },
264     /*t:
265         requires("../lib/JSDOC/TextStream.js");
266         requires("../lib/JSDOC/Token.js");
267         requires("../lib/JSDOC/Lang.js");
268         
269         plan(3, "testing read_numb");
270         
271         //// setup
272         var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
273         var tr = new TokenReader();
274         var tokens = tr.tokenize(new TextStream(src));
275         
276         var hexToken, octToken, decToken;
277         for (var i = 0; i < tokens.length; i++) {
278             if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
279             if (tokens[i].name == "OCTAL") octToken = tokens[i];
280             if (tokens[i].name == "DECIMAL") decToken = tokens[i];
281         }
282         ////
283         
284         is(decToken.data, "8.0", "decimal number is found in source.");
285         is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
286         is(octToken.data, "0777", "octal number is found in source.");
287     */
288
289     /**
290         @returns {Boolean} Was the token found?
291      */
292     read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
293         var found = stream.next(2);
294         
295         while (!stream.look().eof) {
296             if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
297                 tokens.push(new Token(found, "NUMB", "HEX_DEC"));
298                 return true;
299             }
300             else {
301                 found += stream.next();
302             }
303         }
304         return false;
305     },
306
307     /**
308         @returns {Boolean} Was the token found?
309      */
310     read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
311         var last;
312         if (
313             stream.look() == "/"
314             && 
315             (
316                 
317                 (
318                     !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
319                     || 
320                     (
321                            !last.is("NUMB")
322                         && !last.is("NAME")
323                         && !last.is("RIGHT_PAREN")
324                         && !last.is("RIGHT_BRACKET")
325                     )
326                 )
327             )
328         ) {
329             var regex = stream.next();
330             
331             while (!stream.look().eof) {
332                 if (stream.look() == "\\") { // escape sequence
333                     regex += stream.next(2);
334                 }
335                 else if (stream.look() == "/") {
336                     regex += stream.next();
337                     
338                     while (/[gmi]/.test(stream.look())) {
339                         regex += stream.next();
340                     }
341                     
342                     tokens.push(new Token(regex, "REGX", "REGX"));
343                     return true;
344                 }
345                 else {
346                     regex += stream.next();
347                 }
348             }
349             // error: unterminated regex
350         }
351         return false;
352     }
353 });