JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3 //imports['Object.js'].load(Object);
4 XObject = imports.XObject.XObject;
5 console = imports['console.js'].console;
6
7 JSDOC   = imports['JSDOC.js'].JSDOC;
8 Token   = imports['JSDOC/Token.js'].Token;
9 Lang    = imports['JSDOC/Token.js'].Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         XObject.extend(this, o || {});
21         
22     },
23     Object,
24     {
25             
26
27         /**
28             @type {JSDOC.Token[]}
29          */
30
31
32         tokenize : function(/**JSDOC.TextStream*/stream) {
33             var tokens = [];
34             /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
35             /**@ignore*/ tokens.lastSym = function() {
36                 for (var i = tokens.length-1; i >= 0; i--) {
37                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
38                 }
39             }
40
41             while (!stream.look().eof) {
42                 if (this.read_mlcomment(stream, tokens)) continue;
43                 if (this.read_slcomment(stream, tokens)) continue;
44                 if (this.read_dbquote(stream, tokens))   continue;
45                 if (this.read_snquote(stream, tokens))   continue;
46                 if (this.read_regx(stream, tokens))      continue;
47                 if (this.read_numb(stream, tokens))      continue;
48                 if (this.read_punc(stream, tokens))      continue;
49                 if (this.read_newline(stream, tokens))   continue;
50                 if (this.read_space(stream, tokens))     continue;
51                 if (this.read_word(stream, tokens))      continue;
52                 
53                 // if execution reaches here then an error has happened
54                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN"));
55             }
56             
57             
58             
59             return tokens;
60         },
61
62         /**
63             @returns {Boolean} Was the token found?
64          */
65         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
66             var found = "";
67             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
68                 found += stream.next();
69             }
70             
71             if (found === "") {
72                 return false;
73             }
74             else {
75                 var name;
76                 if ((name = Lang.keyword(found))) tokens.push(new Token(found, "KEYW", name));
77                 else tokens.push(new Token(found, "NAME", "NAME"));
78                 return true;
79             }
80         },
81
82         /**
83             @returns {Boolean} Was the token found?
84          */
85         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
86             var found = "";
87             var name;
88             while (!stream.look().eof && Lang.punc(found+stream.look())) {
89                 found += stream.next();
90             }
91             
92             if (found === "") {
93                 return false;
94             }
95             else {
96                 tokens.push(new Token(found, "PUNC", Lang.punc(found)));
97                 return true;
98             }
99         },
100
101         /**
102             @returns {Boolean} Was the token found?
103          */
104         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
105             var found = "";
106             
107             while (!stream.look().eof && Lang.isSpace(stream.look())) {
108                 found += stream.next();
109             }
110             
111             if (found === "") {
112                 return false;
113             }
114             else {
115                 if (this.collapseWhite) found = " ";
116                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE"));
117                 return true;
118             }
119         },
120
121         /**
122             @returns {Boolean} Was the token found?
123          */
124         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
125             var found = "";
126             
127             while (!stream.look().eof && Lang.isNewline(stream.look())) {
128                 found += stream.next();
129             }
130             
131             if (found === "") {
132                 return false;
133             }
134             else {
135                 if (this.collapseWhite) found = "\n";
136                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE"));
137                 return true;
138             }
139         },
140
141         /**
142             @returns {Boolean} Was the token found?
143          */
144         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
145             if (stream.look() == "/" && stream.look(1) == "*") {
146                 var found = stream.next(2);
147                 
148                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
149                     found += stream.next();
150                 }
151                 
152                 // to start doclet we allow /** or /*** but not /**/ or /****
153                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC"));
154                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM"));
155                 return true;
156             }
157             return false;
158         },
159
160         /**
161             @returns {Boolean} Was the token found?
162          */
163         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
164             var found;
165             if (
166                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
167                 || 
168                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
169             ) {
170                 
171                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
172                     found += stream.next();
173                 }
174                 
175                 if (this.keepComments) {
176                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM"));
177                 }
178                 return true;
179             }
180             return false;
181         },
182
183         /**
184             @returns {Boolean} Was the token found?
185          */
186         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
187             if (stream.look() == "\"") {
188                 // find terminator
189                 var string = stream.next();
190                 
191                 while (!stream.look().eof) {
192                     if (stream.look() == "\\") {
193                         if (Lang.isNewline(stream.look(1))) {
194                             do {
195                                 stream.next();
196                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
197                             string += "\\\n";
198                         }
199                         else {
200                             string += stream.next(2);
201                         }
202                     }
203                     else if (stream.look() == "\"") {
204                         string += stream.next();
205                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE"));
206                         return true;
207                     }
208                     else {
209                         string += stream.next();
210                     }
211                 }
212             }
213             return false; // error! unterminated string
214         },
215
216         /**
217             @returns {Boolean} Was the token found?
218          */
219         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
220             if (stream.look() == "'") {
221                 // find terminator
222                 var string = stream.next();
223                 
224                 while (!stream.look().eof) {
225                     if (stream.look() == "\\") { // escape sequence
226                         string += stream.next(2);
227                     }
228                     else if (stream.look() == "'") {
229                         string += stream.next();
230                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE"));
231                         return true;
232                     }
233                     else {
234                         string += stream.next();
235                     }
236                 }
237             }
238             return false; // error! unterminated string
239         },
240
241         /**
242             @returns {Boolean} Was the token found?
243          */
244         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
245             if (stream.look() === "0" && stream.look(1) == "x") {
246                 return this.read_hex(stream, tokens);
247             }
248             
249             var found = "";
250             
251             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
252                 found += stream.next();
253             }
254             
255             if (found === "") {
256                 return false;
257             }
258             else {
259                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL"));
260                 else tokens.push(new Token(found, "NUMB", "DECIMAL"));
261                 return true;
262             }
263         },
264         /*t:
265             requires("../lib/JSDOC/TextStream.js");
266             requires("../lib/JSDOC/Token.js");
267             requires("../lib/JSDOC/Lang.js");
268             
269             plan(3, "testing read_numb");
270             
271             //// setup
272             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
273             var tr = new TokenReader();
274             var tokens = tr.tokenize(new TextStream(src));
275             
276             var hexToken, octToken, decToken;
277             for (var i = 0; i < tokens.length; i++) {
278                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
279                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
280                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
281             }
282             ////
283             
284             is(decToken.data, "8.0", "decimal number is found in source.");
285             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
286             is(octToken.data, "0777", "octal number is found in source.");
287         */
288
289         /**
290             @returns {Boolean} Was the token found?
291          */
292         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
293             var found = stream.next(2);
294             
295             while (!stream.look().eof) {
296                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
297                     tokens.push(new Token(found, "NUMB", "HEX_DEC"));
298                     return true;
299                 }
300                 else {
301                     found += stream.next();
302                 }
303             }
304             return false;
305         },
306
307         /**
308             @returns {Boolean} Was the token found?
309          */
310         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
311             var last;
312             if (
313                 stream.look() == "/"
314                 && 
315                 (
316                     
317                     (
318                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
319                         || 
320                         (
321                                !last.is("NUMB")
322                             && !last.is("NAME")
323                             && !last.is("RIGHT_PAREN")
324                             && !last.is("RIGHT_BRACKET")
325                         )
326                     )
327                 )
328             ) {
329                 var regex = stream.next();
330                 
331                 while (!stream.look().eof) {
332                     if (stream.look() == "\\") { // escape sequence
333                         regex += stream.next(2);
334                     }
335                     else if (stream.look() == "/") {
336                         regex += stream.next();
337                         
338                         while (/[gmi]/.test(stream.look())) {
339                             regex += stream.next();
340                         }
341                         
342                         tokens.push(new Token(regex, "REGX", "REGX"));
343                         return true;
344                     }
345                     else {
346                         regex += stream.next();
347                     }
348                 }
349                 // error: unterminated regex
350             }
351             return false;
352         }
353 });