JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3 //imports['Object.js'].load(Object);
4 XObject = imports.XObject.XObject;
5 console = imports.console..console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         this.sepIdents = false; // seperate '.' in identifiers..
21         XObject.extend(this, o || {});
22         
23     },
24     Object,
25     {
26             
27
28         /**
29             @type {JSDOC.Token[]}
30          */
31
32
33         tokenize : function(/**JSDOC.TextStream*/stream) {
34             this.line =1;
35             var tokens = [];
36             /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
37             /**@ignore*/ tokens.lastSym = function() {
38                 for (var i = tokens.length-1; i >= 0; i--) {
39                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
40                 }
41             }
42
43             while (!stream.look().eof) {
44                 if (this.read_mlcomment(stream, tokens)) continue;
45                 if (this.read_slcomment(stream, tokens)) continue;
46                 if (this.read_dbquote(stream, tokens))   continue;
47                 if (this.read_snquote(stream, tokens))   continue;
48                 if (this.read_regx(stream, tokens))      continue;
49                 if (this.read_numb(stream, tokens))      continue;
50                 if (this.read_punc(stream, tokens))      continue;
51                 if (this.read_newline(stream, tokens))   continue;
52                 if (this.read_space(stream, tokens))     continue;
53                 if (this.read_word(stream, tokens))      continue;
54                 
55                 // if execution reaches here then an error has happened
56                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
57             }
58             
59             
60             
61             return tokens;
62         },
63
64         /**
65             @returns {Boolean} Was the token found?
66          */
67         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
68             var found = "";
69             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
70                 found += stream.next();
71             }
72             
73             if (found === "") {
74                 return false;
75             }
76             else {
77                 var name;
78                 if ((name = Lang.keyword(found))) {
79                     tokens.push(new Token(found, "KEYW", name, this.line));
80                     return true;
81                 }
82                 if (!this.sepIdents || found.indexOf('.') < 0 ) {
83                     tokens.push(new Token(found, "NAME", "NAME", this.line));
84                     return true;
85                 }
86                 var n = found.split('.');
87                 var p = false;
88                 n.forEach(function(nm) {
89                     if (p) {
90                         tokens.push(new Token('.', "PUNC", "DOT", this.line));
91                     }
92                     p=true;
93                     tokens.push(new Token(nm, "NAME", "NAME", this.line));
94                 });
95                 return true;
96                 
97             }
98         },
99
100         /**
101             @returns {Boolean} Was the token found?
102          */
103         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
104             var found = "";
105             var name;
106             while (!stream.look().eof && Lang.punc(found+stream.look())) {
107                 found += stream.next();
108             }
109             
110             if (found === "") {
111                 return false;
112             }
113             else {
114                 tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
115                 return true;
116             }
117         },
118
119         /**
120             @returns {Boolean} Was the token found?
121          */
122         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
123             var found = "";
124             
125             while (!stream.look().eof && Lang.isSpace(stream.look())) {
126                 found += stream.next();
127             }
128             
129             if (found === "") {
130                 return false;
131             }
132             else {
133                 if (this.collapseWhite) found = " ";
134                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
135                 return true;
136             }
137         },
138
139         /**
140             @returns {Boolean} Was the token found?
141          */
142         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
143             var found = "";
144             
145             while (!stream.look().eof && Lang.isNewline(stream.look())) {
146                 this.line++;
147                 found += stream.next();
148             }
149             
150             if (found === "") {
151                 return false;
152             }
153             else {
154                 if (this.collapseWhite) found = "\n";
155                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE", this.line));
156                 return true;
157             }
158         },
159
160         /**
161             @returns {Boolean} Was the token found?
162          */
163         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
164             if (stream.look() == "/" && stream.look(1) == "*") {
165                 var found = stream.next(2);
166                 var c = '';
167                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
168                     c = stream.next();
169                     if (c == "\n") this.line++;
170                     found += c;
171                 }
172                 
173                 // to start doclet we allow /** or /*** but not /**/ or /****
174                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
175                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", this.line));
176                 return true;
177             }
178             return false;
179         },
180
181         /**
182             @returns {Boolean} Was the token found?
183          */
184         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
185             var found;
186             if (
187                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
188                 || 
189                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
190             ) {
191                 
192                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
193                     found += stream.next();
194                 }
195                 
196                 if (this.keepComments) {
197                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", this.line));
198                 }
199                 this.line++;
200                 return true;
201             }
202             return false;
203         },
204
205         /**
206             @returns {Boolean} Was the token found?
207          */
208         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
209             if (stream.look() == "\"") {
210                 // find terminator
211                 var string = stream.next();
212                 
213                 while (!stream.look().eof) {
214                     if (stream.look() == "\\") {
215                         if (Lang.isNewline(stream.look(1))) {
216                             do {
217                                 stream.next();
218                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
219                             string += "\\\n";
220                         }
221                         else {
222                             string += stream.next(2);
223                         }
224                     }
225                     else if (stream.look() == "\"") {
226                         string += stream.next();
227                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
228                         return true;
229                     }
230                     else {
231                         string += stream.next();
232                     }
233                 }
234             }
235             return false; // error! unterminated string
236         },
237
238         /**
239             @returns {Boolean} Was the token found?
240          */
241         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
242             if (stream.look() == "'") {
243                 // find terminator
244                 var string = stream.next();
245                 
246                 while (!stream.look().eof) {
247                     if (stream.look() == "\\") { // escape sequence
248                         string += stream.next(2);
249                     }
250                     else if (stream.look() == "'") {
251                         string += stream.next();
252                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
253                         return true;
254                     }
255                     else {
256                         string += stream.next();
257                     }
258                 }
259             }
260             return false; // error! unterminated string
261         },
262
263         /**
264             @returns {Boolean} Was the token found?
265          */
266         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
267             if (stream.look() === "0" && stream.look(1) == "x") {
268                 return this.read_hex(stream, tokens);
269             }
270             
271             var found = "";
272             
273             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
274                 found += stream.next();
275             }
276             
277             if (found === "") {
278                 return false;
279             }
280             else {
281                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
282                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
283                 return true;
284             }
285         },
286         /*t:
287             requires("../lib/JSDOC/TextStream.js");
288             requires("../lib/JSDOC/Token.js");
289             requires("../lib/JSDOC/Lang.js");
290             
291             plan(3, "testing read_numb");
292             
293             //// setup
294             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
295             var tr = new TokenReader();
296             var tokens = tr.tokenize(new TextStream(src));
297             
298             var hexToken, octToken, decToken;
299             for (var i = 0; i < tokens.length; i++) {
300                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
301                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
302                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
303             }
304             ////
305             
306             is(decToken.data, "8.0", "decimal number is found in source.");
307             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
308             is(octToken.data, "0777", "octal number is found in source.");
309         */
310
311         /**
312             @returns {Boolean} Was the token found?
313          */
314         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
315             var found = stream.next(2);
316             
317             while (!stream.look().eof) {
318                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
319                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
320                     return true;
321                 }
322                 else {
323                     found += stream.next();
324                 }
325             }
326             return false;
327         },
328
329         /**
330             @returns {Boolean} Was the token found?
331          */
332         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
333             var last;
334             if (
335                 stream.look() == "/"
336                 && 
337                 (
338                     
339                     (
340                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
341                         || 
342                         (
343                                !last.is("NUMB")
344                             && !last.is("NAME")
345                             && !last.is("RIGHT_PAREN")
346                             && !last.is("RIGHT_BRACKET")
347                         )
348                     )
349                 )
350             ) {
351                 var regex = stream.next();
352                 
353                 while (!stream.look().eof) {
354                     if (stream.look() == "\\") { // escape sequence
355                         regex += stream.next(2);
356                     }
357                     else if (stream.look() == "/") {
358                         regex += stream.next();
359                         
360                         while (/[gmi]/.test(stream.look())) {
361                             regex += stream.next();
362                         }
363                         
364                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
365                         return true;
366                     }
367                     else {
368                         regex += stream.next();
369                     }
370                 }
371                 // error: unterminated regex
372             }
373             return false;
374         }
375 });