JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3  
4 XObject = imports.XObject.XObject;
5 console = imports.console.console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         this.sepIdents = false; // seperate '.' in identifiers..
21         XObject.extend(this, o || {});
22         
23     },
24     Object,
25     {
26             
27
28         /**
29          * tokenize a stream
30          *  @return {Array} of tokens
31          */
32             @type {JSDOC.Token[]}
33            
34                 
35          */
36
37
38         tokenize : function(/**JSDOC.TextStream*/stream) {
39             this.line =1;
40             var tokens = [];
41             /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
42             /**@ignore*/ tokens.lastSym = function() {
43                 for (var i = tokens.length-1; i >= 0; i--) {
44                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
45                 }
46             }
47
48             while (!stream.look().eof) {
49                 if (this.read_mlcomment(stream, tokens)) continue;
50                 if (this.read_slcomment(stream, tokens)) continue;
51                 if (this.read_dbquote(stream, tokens))   continue;
52                 if (this.read_snquote(stream, tokens))   continue;
53                 if (this.read_regx(stream, tokens))      continue;
54                 if (this.read_numb(stream, tokens))      continue;
55                 if (this.read_punc(stream, tokens))      continue;
56                 if (this.read_newline(stream, tokens))   continue;
57                 if (this.read_space(stream, tokens))     continue;
58                 if (this.read_word(stream, tokens))      continue;
59                 
60                 // if execution reaches here then an error has happened
61                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
62             }
63             
64             
65             
66             return tokens;
67         },
68
69         /**
70             @returns {Boolean} Was the token found?
71          */
72         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
73             var found = "";
74             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
75                 found += stream.next();
76             }
77             
78             if (found === "") {
79                 return false;
80             }
81             else {
82                 var name;
83                 if ((name = Lang.keyword(found))) {
84                     tokens.push(new Token(found, "KEYW", name, this.line));
85                     return true;
86                 }
87                 if (!this.sepIdents || found.indexOf('.') < 0 ) {
88                     tokens.push(new Token(found, "NAME", "NAME", this.line));
89                     return true;
90                 }
91                 var n = found.split('.');
92                 var p = false;
93                 n.forEach(function(nm) {
94                     if (p) {
95                         tokens.push(new Token('.', "PUNC", "DOT", this.line));
96                     }
97                     p=true;
98                     tokens.push(new Token(nm, "NAME", "NAME", this.line));
99                 });
100                 return true;
101                 
102             }
103         },
104
105         /**
106             @returns {Boolean} Was the token found?
107          */
108         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
109             var found = "";
110             var name;
111             while (!stream.look().eof && Lang.punc(found+stream.look())) {
112                 found += stream.next();
113             }
114             
115             if (found === "") {
116                 return false;
117             }
118             else {
119                 tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
120                 return true;
121             }
122         },
123
124         /**
125             @returns {Boolean} Was the token found?
126          */
127         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
128             var found = "";
129             
130             while (!stream.look().eof && Lang.isSpace(stream.look())) {
131                 found += stream.next();
132             }
133             
134             if (found === "") {
135                 return false;
136             }
137             else {
138                 if (this.collapseWhite) found = " ";
139                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
140                 return true;
141             }
142         },
143
144         /**
145             @returns {Boolean} Was the token found?
146          */
147         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
148             var found = "";
149             
150             while (!stream.look().eof && Lang.isNewline(stream.look())) {
151                 this.line++;
152                 found += stream.next();
153             }
154             
155             if (found === "") {
156                 return false;
157             }
158             else {
159                 if (this.collapseWhite) found = "\n";
160                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE", this.line));
161                 return true;
162             }
163         },
164
165         /**
166             @returns {Boolean} Was the token found?
167          */
168         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
169             if (stream.look() == "/" && stream.look(1) == "*") {
170                 var found = stream.next(2);
171                 var c = '';
172                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
173                     c = stream.next();
174                     if (c == "\n") this.line++;
175                     found += c;
176                 }
177                 
178                 // to start doclet we allow /** or /*** but not /**/ or /****
179                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
180                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", this.line));
181                 return true;
182             }
183             return false;
184         },
185
186         /**
187             @returns {Boolean} Was the token found?
188          */
189         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
190             var found;
191             if (
192                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
193                 || 
194                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
195             ) {
196                 
197                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
198                     found += stream.next();
199                 }
200                 
201                 if (this.keepComments) {
202                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", this.line));
203                 }
204                 this.line++;
205                 return true;
206             }
207             return false;
208         },
209
210         /**
211             @returns {Boolean} Was the token found?
212          */
213         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
214             if (stream.look() == "\"") {
215                 // find terminator
216                 var string = stream.next();
217                 
218                 while (!stream.look().eof) {
219                     if (stream.look() == "\\") {
220                         if (Lang.isNewline(stream.look(1))) {
221                             do {
222                                 stream.next();
223                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
224                             string += "\\\n";
225                         }
226                         else {
227                             string += stream.next(2);
228                         }
229                     }
230                     else if (stream.look() == "\"") {
231                         string += stream.next();
232                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
233                         return true;
234                     }
235                     else {
236                         string += stream.next();
237                     }
238                 }
239             }
240             return false; // error! unterminated string
241         },
242
243         /**
244             @returns {Boolean} Was the token found?
245          */
246         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
247             if (stream.look() == "'") {
248                 // find terminator
249                 var string = stream.next();
250                 
251                 while (!stream.look().eof) {
252                     if (stream.look() == "\\") { // escape sequence
253                         string += stream.next(2);
254                     }
255                     else if (stream.look() == "'") {
256                         string += stream.next();
257                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
258                         return true;
259                     }
260                     else {
261                         string += stream.next();
262                     }
263                 }
264             }
265             return false; // error! unterminated string
266         },
267
268         /**
269             @returns {Boolean} Was the token found?
270          */
271         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
272             if (stream.look() === "0" && stream.look(1) == "x") {
273                 return this.read_hex(stream, tokens);
274             }
275             
276             var found = "";
277             
278             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
279                 found += stream.next();
280             }
281             
282             if (found === "") {
283                 return false;
284             }
285             else {
286                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
287                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
288                 return true;
289             }
290         },
291         /*t:
292             requires("../lib/JSDOC/TextStream.js");
293             requires("../lib/JSDOC/Token.js");
294             requires("../lib/JSDOC/Lang.js");
295             
296             plan(3, "testing read_numb");
297             
298             //// setup
299             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
300             var tr = new TokenReader();
301             var tokens = tr.tokenize(new TextStream(src));
302             
303             var hexToken, octToken, decToken;
304             for (var i = 0; i < tokens.length; i++) {
305                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
306                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
307                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
308             }
309             ////
310             
311             is(decToken.data, "8.0", "decimal number is found in source.");
312             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
313             is(octToken.data, "0777", "octal number is found in source.");
314         */
315
316         /**
317             @returns {Boolean} Was the token found?
318          */
319         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
320             var found = stream.next(2);
321             
322             while (!stream.look().eof) {
323                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
324                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
325                     return true;
326                 }
327                 else {
328                     found += stream.next();
329                 }
330             }
331             return false;
332         },
333
334         /**
335             @returns {Boolean} Was the token found?
336          */
337         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
338             var last;
339             if (
340                 stream.look() == "/"
341                 && 
342                 (
343                     
344                     (
345                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
346                         || 
347                         (
348                                !last.is("NUMB")
349                             && !last.is("NAME")
350                             && !last.is("RIGHT_PAREN")
351                             && !last.is("RIGHT_BRACKET")
352                         )
353                     )
354                 )
355             ) {
356                 var regex = stream.next();
357                 
358                 while (!stream.look().eof) {
359                     if (stream.look() == "\\") { // escape sequence
360                         regex += stream.next(2);
361                     }
362                     else if (stream.look() == "/") {
363                         regex += stream.next();
364                         
365                         while (/[gmi]/.test(stream.look())) {
366                             regex += stream.next();
367                         }
368                         
369                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
370                         return true;
371                     }
372                     else {
373                         regex += stream.next();
374                     }
375                 }
376                 // error: unterminated regex
377             }
378             return false;
379         }
380 });