sync fixes
[app.jsdoc] / JSDOC / TokenReader.js
1 //<script type="text/javascript">
2
3  
4 XObject = imports.XObject.XObject;
5 console = imports.console.console;
6
7
8 Token   = imports.Token.Token;
9 Lang    = imports.Lang.Lang;
10
11 /**
12         @class Search a {@link JSDOC.TextStream} for language tokens.
13         @scope JSDOC
14
15 */
16 TokenReader = XObject.define(
17     function(o) {
18         
19         XObject.extend(this, o || {});
20        //print("READER: " + JSON.stringify(o, null,4));
21     },
22     Object,
23     {
24         /**
25          * @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token
26          */
27         collapseWhite : false, // only reduces white space...
28         /**
29          * @cfg {Boolean} keepDocs keep JSDOC comments
30          */
31         keepDocs : true,
32         /**
33          * @cfg {Boolean} keepWhite keep White space
34          */
35         keepWhite : false,
36         /**
37          * @cfg {Boolean} keepComments  keep all comments
38          */
39         keepComments : false,
40         /**
41          * @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) 
42          */
43         sepIdents : false,
44         /**
45          * @cfg {String} filename name of file being parsed.
46          */
47         filename : '',
48         /**
49          * @cfg {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression
50          */
51         ignoreBadGrammer : false,
52         /**
53          * @cfg {String} lang what language to parse..
54          */
55         lang : 'js',
56         /**
57          * tokenize a stream
58          * @param {JSDOC.TextStream} stream The text stream to tokenize
59          * @return {Array} of tokens
60          * 
61          * ts = new TextStream(File.read(str));
62          * tr = TokenReader({ keepComments : true, keepWhite : true });
63          * tr.tokenize(ts)
64          * 
65          */
66         tokenize : function(stream) {
67             this.line =1;
68             var tokens = [];
69             /**@ignore*/ 
70             tokens.last    = function() { return tokens[tokens.length-1]; }
71             /**@ignore*/ 
72             tokens.lastSym = function() {
73                 for (var i = tokens.length-1; i >= 0; i--) {
74                     if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
75                 }
76                                 return null;
77             }
78             
79             var incode = this.lang == 'php' ?  0 : -1;
80             
81             
82             
83             
84             while (!stream.look().eof) {
85                 
86                 
87                 if (incode == 0) {
88                     if (this.read_codestart(stream, tokens)) {
89                         incode = 1;
90                     }
91                 }
92                 if (incode == 1) {
93                     if (this.read_codeend(stream, tokens)) {
94                         incode = 0;
95                         continue;
96                     }
97                 }
98                 if (this.read_shebang(stream, tokens))      continue; 
99
100                 if (this.read_mlcomment(stream, tokens)) continue;
101                 if (this.read_slcomment(stream, tokens)) continue;
102                 if (this.read_dbquote(stream, tokens))   continue;
103                 if (this.read_snquote(stream, tokens))   continue;
104                 if (this.read_regx(stream, tokens))      continue;
105                 if (this.read_numb(stream, tokens))      continue;
106                 if (this.read_punc(stream, tokens))      continue;
107                 if (this.read_newline(stream, tokens))   continue;
108                 if (this.read_space(stream, tokens))     continue;
109                 if (this.read_word(stream, tokens))      continue;
110                 // if execution reaches here then an error has happened
111                 tokens.push(new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line));
112             }
113             
114             
115             
116             return tokens;
117         },
118
119         /**
120          * findPuncToken - find the id of a token (previous to current)
121          * need to back check syntax..
122          * 
123          * @arg {Array} tokens the array of tokens.
124          * @arg {String} token data (eg. '(')
125          * @arg {Number} offset where to start reading from
126          * @return {Number} position of token
127          */
128         findPuncToken : function(tokens, data, n)
129                 {
130             n = n || tokens.length -1;
131             var stack = 0;
132             while (n > -1) {
133                 
134                 if (!stack && tokens[n].data == data) {
135                     return n;
136                 }
137                 
138                 if (tokens[n].data  == ')' || tokens[n].data  == '}') {
139                     stack++;
140                     n--;
141                     continue;
142                 }
143                 if (stack && (tokens[n].data  == '{' || tokens[n].data  == '(')) {
144                     stack--;
145                     n--;
146                     continue;
147                 }
148                 
149                 
150                 n--;
151             }
152             return -1;
153         },
154         /**
155          * lastSym - find the last token symbol
156          * need to back check syntax..
157          * 
158          * @arg {Array} tokens the array of tokens.
159          * @arg {Number} offset where to start..
160          * @return {Token} the token
161          */
162         lastSym : function(tokens, n) {
163             for (var i = n-1; i >= 0; i--) {
164                 if (!(tokens[i].is("WHIT") || tokens[i].is("COMM"))) return tokens[i];
165             }
166             return null;
167         },
168         
169          
170         
171         /**
172             @returns {Boolean} Was the token found?
173          */
174         read_word : function(/**JSDOC.TokenStream*/stream, tokens) {
175             var found = "";
176             while (!stream.look().eof && Lang.isWordChar(stream.look())) {
177                 found += stream.next();
178             }
179             
180             if (found === "") {
181                 return false;
182             }
183             
184             var name;
185             if ((name = Lang.keyword(found))) {
186                 if (found == 'return' && tokens.lastSym().data == ')') {
187                     //Seed.print('@' + tokens.length);
188                     var n = this.findPuncToken(tokens, ')');
189                     //Seed.print(')@' + n);
190                     n = this.findPuncToken(tokens, '(', n-1);
191                     //Seed.print('(@' + n);
192                     
193                     var lt = this.lastSym(tokens, n);
194                    //Seed.print(JSON.stringify(lt));
195                     if (lt.type != 'KEYW' || ['IF', 'WHILE'].indexOf(lt.name) < -1) {
196                         if (!this.ignoreBadGrammer) {
197                             throw {
198                                 name : "ArgumentError", 
199                                 message: "\n" + this.filename + ':' + this.line + " Error - return found after )"
200                             }
201                         }
202                     }
203                     
204                     
205                     
206                 }
207                 
208                 tokens.push(new Token(found, "KEYW", name, this.line));
209                 return true;
210             }
211             if (!this.sepIdents || found.indexOf('.') < 0 ) {
212                 tokens.push(new Token(found, "NAME", "NAME", this.line));
213                 return true;
214             }
215             var n = found.split('.');
216             var p = false;
217             var _this = this;
218             n.forEach(function(nm) {
219                 if (p) {
220                     tokens.push(new Token('.', "PUNC", "DOT", _this.line));
221                 }
222                 p=true;
223                 tokens.push(new Token(nm, "NAME", "NAME", _this.line));
224             });
225             return true;
226                 
227
228         },
229
230         /**
231             @returns {Boolean} Was the token found?
232          */
233         read_punc : function(/**JSDOC.TokenStream*/stream, tokens) {
234             var found = "";
235             var name;
236             while (!stream.look().eof && Lang.punc(found+stream.look())) {
237                 found += stream.next();
238             }
239             
240             
241             if (found === "") {
242                 return false;
243             }
244             
245             if ((found == '}' || found == ']') && tokens.lastSym().data == ',') {
246                 //print("Error - comma found before " + found);
247                 //print(JSON.stringify(tokens.lastSym(), null,4));
248                 if (this.ignoreBadGrammer) {
249                     print("\n" + this.filename + ':' + this.line + " Error - comma found before " + found);
250                 } else {
251                     
252                     throw {
253                         name : "ArgumentError", 
254                         message: "\n" + this.filename + ':' + this.line + " Error - comma found before " + found
255                     }
256                 }
257             }
258             
259             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
260             return true;
261             
262         },
263
264         /**
265             @returns {Boolean} Was the token found?
266          */
267         read_space : function(/**JSDOC.TokenStream*/stream, tokens) {
268             var found = "";
269             
270             while (!stream.look().eof && Lang.isSpace(stream.look()) && !Lang.isNewline(stream.look())) {
271                 found += stream.next();
272             }
273             
274             if (found === "") {
275                 return false;
276             }
277             //print("WHITE = " + JSON.stringify(found)); 
278             if (this.collapseWhite) found = " ";
279             if (this.keepWhite) tokens.push(new Token(found, "WHIT", "SPACE", this.line));
280             return true;
281         
282         },
283
284         /**
285             @returns {Boolean} Was the token found?
286          */
287         read_newline : function(/**JSDOC.TokenStream*/stream, tokens) {
288             var found = "";
289             var line = this.line;
290             while (!stream.look().eof && Lang.isNewline(stream.look())) {
291                 this.line++;
292                 found += stream.next();
293             }
294             
295             if (found === "") {
296                 return false;
297             }
298             //this.line++;
299             if (this.collapseWhite) {
300                 found = "\n";
301             }
302             if (this.keepWhite) {
303                 var last = tokens.pop();
304                 if (last && last.name != "WHIT") {
305                     tokens.push(last);
306                 }
307                 
308                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
309             }
310             return true;
311         },
312
313         /**
314             @returns {Boolean} Was the token found?
315          */
316         read_mlcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
317             if (stream.look() == "/" && stream.look(1) == "*") {
318                 var found = stream.next(2);
319                 var c = '';
320                 var line = this.line;
321                 while (!stream.look().eof && !(stream.look(-1) == "/" && stream.look(-2) == "*")) {
322                     c = stream.next();
323                     if (c == "\n") this.line++;
324                     found += c;
325                 }
326                 
327                 // to start doclet we allow /** or /*** but not /**/ or /****
328                 if (/^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) tokens.push(new Token(found, "COMM", "JSDOC", this.line));
329                 else if (this.keepComments) tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
330                 return true;
331             }
332             return false;
333         },
334
335         /**
336             @returns {Boolean} Was the token found?
337          */
338         read_slcomment : function(/**JSDOC.TokenStream*/stream, tokens) {
339             var found;
340             if (
341                 (stream.look() == "/" && stream.look(1) == "/" && (found=stream.next(2)))
342                 || 
343                 (stream.look() == "<" && stream.look(1) == "!" && stream.look(2) == "-" && stream.look(3) == "-" && (found=stream.next(4)))
344             ) {
345                 var line = this.line;
346                 while (!stream.look().eof && !Lang.isNewline(stream.look())) {
347                     found += stream.next();
348                 }
349                 if (!stream.look().eof) {
350                     found += stream.next();
351                 }
352                 if (this.keepComments) {
353                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
354                 }
355                 this.line++;
356                 return true;
357             }
358             return false;
359         },
360
361         /**
362             @returns {Boolean} Was the token found?
363          */
364         read_dbquote : function(/**JSDOC.TokenStream*/stream, tokens) {
365             if (stream.look() == "\"") {
366                 // find terminator
367                 var string = stream.next();
368                 
369                 while (!stream.look().eof) {
370                     if (stream.look() == "\\") {
371                         if (Lang.isNewline(stream.look(1))) {
372                             do {
373                                 stream.next();
374                             } while (!stream.look().eof && Lang.isNewline(stream.look()));
375                             string += "\\\n";
376                         }
377                         else {
378                             string += stream.next(2);
379                         }
380                     }
381                     else if (stream.look() == "\"") {
382                         string += stream.next();
383                         tokens.push(new Token(string, "STRN", "DOUBLE_QUOTE", this.line));
384                         return true;
385                     }
386                     else {
387                         string += stream.next();
388                     }
389                 }
390             }
391             return false; // error! unterminated string
392         },
393
394         /**
395             @returns {Boolean} Was the token found?
396          */
397         read_snquote : function(/**JSDOC.TokenStream*/stream, tokens) {
398             if (stream.look() == "'") {
399                 // find terminator
400                 var string = stream.next();
401                 
402                 while (!stream.look().eof) {
403                     if (stream.look() == "\\") { // escape sequence
404                         string += stream.next(2);
405                     }
406                     else if (stream.look() == "'") {
407                         string += stream.next();
408                         tokens.push(new Token(string, "STRN", "SINGLE_QUOTE", this.line));
409                         return true;
410                     }
411                     else {
412                         string += stream.next();
413                     }
414                 }
415             }
416             return false; // error! unterminated string
417         },
418
419         /**
420             @returns {Boolean} Was the token found?
421          */
422         read_numb : function(/**JSDOC.TokenStream*/stream, tokens) {
423             if (stream.look() === "0" && stream.look(1) == "x") {
424                 return this.read_hex(stream, tokens);
425             }
426             
427             var found = "";
428             
429             while (!stream.look().eof && Lang.isNumber(found+stream.look())){
430                 found += stream.next();
431             }
432             
433             if (found === "") {
434                 return false;
435             }
436             else {
437                 if (/^0[0-7]/.test(found)) tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
438                 else tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
439                 return true;
440             }
441         },
442         /*t:
443             requires("../lib/JSDOC/TextStream.js");
444             requires("../lib/JSDOC/Token.js");
445             requires("../lib/JSDOC/Lang.js");
446             
447             plan(3, "testing read_numb");
448             
449             //// setup
450             var src = "function foo(num){while (num+8.0 >= 0x20 && num < 0777){}}";
451             var tr = new TokenReader();
452             var tokens = tr.tokenize(new TextStream(src));
453             
454             var hexToken, octToken, decToken;
455             for (var i = 0; i < tokens.length; i++) {
456                 if (tokens[i].name == "HEX_DEC") hexToken = tokens[i];
457                 if (tokens[i].name == "OCTAL") octToken = tokens[i];
458                 if (tokens[i].name == "DECIMAL") decToken = tokens[i];
459             }
460             ////
461             
462             is(decToken.data, "8.0", "decimal number is found in source.");
463             is(hexToken.data, "0x20", "hexdec number is found in source (issue #99).");
464             is(octToken.data, "0777", "octal number is found in source.");
465         */
466
467         /**
468             @returns {Boolean} Was the token found?
469          */
470         read_hex : function(/**JSDOC.TokenStream*/stream, tokens) {
471             var found = stream.next(2);
472             
473             while (!stream.look().eof) {
474                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look())) { // done
475                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
476                     return true;
477                 }
478                 else {
479                     found += stream.next();
480                 }
481             }
482             return false;
483         },
484        /**
485          * read_shebang ( #!/usr/bin/seed
486          * @param {JSDOC.TokenStream} stream the text stream
487          * @param {Array} tokens
488          *  @returns {Boolean} Was the token found?
489          */
490         read_shebang : function(/**JSDOC.TokenStream*/stream, tokens)
491         {
492             if (stream.cursor != 0) {
493                 return false;
494             }
495             
496             if (stream.look() == '#' && stream.look(1) == '!') {
497                 while (stream.next() != "\n") { }; //eof?
498                 return true;
499             }
500             return false; 
501         },
502         
503         /**
504          * read_codeend (currently on PHP supported?)
505          *  @returns {Boolean} Was the token found?
506          */
507         read_codeend : function(/**JSDOC.TokenStream*/stream, tokens)
508         {
509             var found = ''; 
510             if (stream.look() == '?' && stream.look(1) == '>') {
511                 stream.next(2);
512                 return true;
513             }
514             return false; 
515         },
516         
517         /**
518          * read_codestart (currently on PHP supported?)
519          *  @returns {Boolean} Was the token found?
520          */
521         read_codestart : function(/**JSDOC.TokenStream*/stream, tokens) {
522             var found = ''; 
523             
524             while (!stream.look().eof) {
525                 var c = stream.next(1);
526                 
527                 if (c != '<') {
528                     found += c;;
529                     continue;
530                 }
531                 // got '<'
532                 found += c;;
533                 c = stream.next(1);
534                 if (c != '?') {
535                     found += c;;
536                     continue;
537                 }
538                 found += c;
539                 var php = stream.next(3).toLowerCase();
540                 print(php);
541                 if (php != 'php') {
542                     found += c;
543                     continue;
544                 }
545                 // got <?php 
546                 return true;
547             }
548             return false;
549         },
550         
551         /**
552             @returns {Boolean} Was the token found?
553          */
554         read_regx : function(/**JSDOC.TokenStream*/stream, tokens) {
555             var last;
556             if (
557                 stream.look() == "/"
558                 && 
559                 (
560                     
561                     (
562                         !(last = tokens.lastSym()) // there is no last, the regex is the first symbol
563                         || 
564                         (
565                                !last.is("NUMB")
566                             && !last.is("NAME")
567                             && !last.is("RIGHT_PAREN")
568                             && !last.is("RIGHT_BRACKET")
569                         )
570                     )
571                 )
572             ) {
573                 var regex = stream.next();
574                 
575                 while (!stream.look().eof) {
576                     if (stream.look() == "\\") { // escape sequence
577                         regex += stream.next(2);
578                     }
579                     else if (stream.look() == "/") {
580                         regex += stream.next();
581                         
582                         while (/[gmi]/.test(stream.look())) {
583                             regex += stream.next();
584                         }
585                         
586                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
587                         return true;
588                     }
589                     else {
590                         regex += stream.next();
591                     }
592                 }
593                 // error: unterminated regex
594             }
595             return false;
596         }
597 });