JSDOC/TokenReader.js
[gnome.introspection-doc-generator] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3  
4 XObject = imports.XObject.XObject;
5 console = imports.console.console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13 */
14 TokenReader = XObject.define(
15     function(o) {
16         
17         this.keepDocs = true;
18         this.keepWhite = false;
19         this.keepComments = false;
20         this.sepIdents = false; // seperate '.' in identifiers..
21         XObject.extend(this, o || {});
22         
23     },
24     Object,
25     {
26             
27
28         /**
29          * tokenize a stream
30          * @return {Array} of tokens
31          * 
32          * ts = new TextStream();
33          * tr = TokenReader({ keepComments : true, keepWhite : true });
34          * tr.tokenize(ts)
35          * 
36          */
37             @type {JSDOC.Token[]}
38            
39                 
40          */
41
42
43         tokenize : function(/**JSDOC.TextStream*/stream) {
44             this.line =1;
45             var tokens = [];
46             /**@ignore*/ tokens.last    = function() { return tokens[tokens.length-1]; }
47             /**@ignore*/ tokens.lastSym = function() {
48                 for (var i = tokens.length-1; i >= 0; i--) {
49                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
50                 }
51             }
52
53             while (!stream.look().eof) {
54                 if (this.read_mlcomment(stream, tokens)) continue;
55                 if (this.read_slcomment(stream, tokens)) continue;
56                 if (this.read_dbquote(stream, tokens))   continue;
57                 if (this.read_snquote(stream, tokens))   continue;
58                 if (this.read_regx(stream, tokens))      continue;
59                 if (this.read_numb(stream, tokens))      continue;
60                 if (this.read_punc(stream, tokens))      continue;
61                 if (this.read_newline(stream, tokens))   continue;
62                 if (this.read_space(stream, tokens))     continue;
63                 if (this.read_word(stream, tokens))      continue;
64                 
65                 // if execution reaches here then an error has happened
66                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
67             }
68             
69             
70             
71             return tokens;
72         },
73
74         /**
75             @returns {Boolean} Was the token found?
76          */
77         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
78             var found = "";
79             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
80                 found += stream.next();
81             }
82             
83             if (found === "") {
84                 return false;
85             }
86             else {
87                 var name;
88                 if ((name = Lang.keyword(found))) {
89                     tokens.push(new Token(found, "KEYW", name, this.line));
90                     return true;
91                 }
92                 if (!this.sepIdents || found.indexOf('.') < 0 ) {
93                     tokens.push(new Token(found, "NAME", "NAME", this.line));
94                     return true;
95                 }
96                 var n = found.split('.');
97                 var p = false;
98                 n.forEach(function(nm) {
99                     if (p) {
100                         tokens.push(new Token('.', "PUNC", "DOT", this.line));
101                     }
102                     p=true;
103                     tokens.push(new Token(nm, "NAME", "NAME", this.line));
104                 });
105                 return true;
106                 
107             }
108         },
109
110         /**
111             @returns {Boolean} Was the token found?
112          */
113         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
114             var found = "";
115             var name;
116             while (!stream.look().eof && Lang.punc(found+stream.look())) {
117                 found += stream.next();
118             }
119             
120             if (found === "") {
121                 return false;
122             }
123             else {
124                 tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
125                 return true;
126             }
127         },
128
129         /**
130             @returns {Boolean} Was the token found?
131          */
132         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
133             var found = "";
134             
135             while (!stream.look().eof && Lang.isSpace(stream.look())) {
136                 found += stream.next();
137             }
138             
139             if (found === "") {
140                 return false;
141             }
142             else {
143                 if (this.collapseWhite) found = " ";
144                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
145                 return true;
146             }
147         },
148
149         /**
150             @returns {Boolean} Was the token found?
151          */
152         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
153             var found = "";
154             
155             while (!stream.look().eof && Lang.isNewline(stream.look())) {
156                 this.line++;
157                 found += stream.next();
158             }
159             
160             if (found === "") {
161                 return false;
162             }
163             else {
164                 if (this.collapseWhite) found = "\n";
165                 if (this.keepWhite) tokens.push(new Token(found, "WHIT", "NEWLINE", this.line));
166                 return true;
167             }
168         },
169
170         /**
171             @returns {Boolean} Was the token found?
172          */
173         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
174             if (stream.look() == "/" && stream.look(1) == "*") {
175                 var found = stream.next(2);
176                 var c = '';
177                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
178                     c = stream.next();
179                     if (c == "\n") this.line++;
180                     found += c;
181                 }
182                 
183                 // to start doclet we allow /** or /*** but not /**/ or /****
184                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
185                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", this.line));
186                 return true;
187             }
188             return false;
189         },
190
191         /**
192             @returns {Boolean} Was the token found?
193          */
194         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
195             var found;
196             if (
197                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
198                 || 
199                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
200             ) {
201                 
202                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
203                     found += stream.next();
204                 }
205                 
206                 if (this.keepComments) {
207                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", this.line));
208                 }
209                 this.line++;
210                 return true;
211             }
212             return false;
213         },
214
215         /**
216             @returns {Boolean} Was the token found?
217          */
218         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
219             if (stream.look() == "\"") {
220                 // find terminator
221                 var string = stream.next();
222                 
223                 while (!stream.look().eof) {
224                     if (stream.look() == "\\") {
225                         if (Lang.isNewline(stream.look(1))) {
226                             do {
227                                 stream.next();
228                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
229                             string += "\\\n";
230                         }
231                         else {
232                             string += stream.next(2);
233                         }
234                     }
235                     else if (stream.look() == "\"") {
236                         string += stream.next();
237                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
238                         return true;
239                     }
240                     else {
241                         string += stream.next();
242                     }
243                 }
244             }
245             return false; // error! unterminated string
246         },
247
248         /**
249             @returns {Boolean} Was the token found?
250          */
251         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
252             if (stream.look() == "'") {
253                 // find terminator
254                 var string = stream.next();
255                 
256                 while (!stream.look().eof) {
257                     if (stream.look() == "\\") { // escape sequence
258                         string += stream.next(2);
259                     }
260                     else if (stream.look() == "'") {
261                         string += stream.next();
262                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
263                         return true;
264                     }
265                     else {
266                         string += stream.next();
267                     }
268                 }
269             }
270             return false; // error! unterminated string
271         },
272
273         /**
274             @returns {Boolean} Was the token found?
275          */
276         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
277             if (stream.look() === "0" && stream.look(1) == "x") {
278                 return this.read_hex(stream, tokens);
279             }
280             
281             var found = "";
282             
283             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
284                 found += stream.next();
285             }
286             
287             if (found === "") {
288                 return false;
289             }
290             else {
291                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
292                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
293                 return true;
294             }
295         },
296         /*t:
297             requires("../lib/JSDOC/TextStream.js");
298             requires("../lib/JSDOC/Token.js");
299             requires("../lib/JSDOC/Lang.js");
300             
301             plan(3, "testing read_numb");
302             
303             //// setup
304             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
305             var tr = new TokenReader();
306             var tokens = tr.tokenize(new TextStream(src));
307             
308             var hexToken, octToken, decToken;
309             for (var i = 0; i < tokens.length; i++) {
310                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
311                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
312                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
313             }
314             ////
315             
316             is(decToken.data, "8.0", "decimal number is found in source.");
317             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
318             is(octToken.data, "0777", "octal number is found in source.");
319         */
320
321         /**
322             @returns {Boolean} Was the token found?
323          */
324         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
325             var found = stream.next(2);
326             
327             while (!stream.look().eof) {
328                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
329                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
330                     return true;
331                 }
332                 else {
333                     found += stream.next();
334                 }
335             }
336             return false;
337         },
338
339         /**
340             @returns {Boolean} Was the token found?
341          */
342         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
343             var last;
344             if (
345                 stream.look() == "/"
346                 && 
347                 (
348                     
349                     (
350                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
351                         || 
352                         (
353                                !last.is("NUMB")
354                             && !last.is("NAME")
355                             && !last.is("RIGHT_PAREN")
356                             && !last.is("RIGHT_BRACKET")
357                         )
358                     )
359                 )
360             ) {
361                 var regex = stream.next();
362                 
363                 while (!stream.look().eof) {
364                     if (stream.look() == "\\") { // escape sequence
365                         regex += stream.next(2);
366                     }
367                     else if (stream.look() == "/") {
368                         regex += stream.next();
369                         
370                         while (/[gmi]/.test(stream.look())) {
371                             regex += stream.next();
372                         }
373                         
374                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
375                         return true;
376                     }
377                     else {
378                         regex += stream.next();
379                     }
380                 }
381                 // error: unterminated regex
382             }
383             return false;
384         }
385 });