JSDOC/Packer.vala
[gnome.introspection-doc-generator] / JSDOC / TokenReader.vala
1 //<script type="text/javascript">
2
3  
4 // test code
5  
6 //const Token   = imports.Token.Token;
7 //const Lang    = imports.Lang.Lang;
8
9 /**
10         @class Search a {@link JSDOC.TextStream} for language tokens.
11 */
12
13
14
15
16 namespace JSDOC {
17
18     public class TokenArray: Object {
19         
20         public Gee.ArrayList<Token> tokens;
21         public int length {
22             get { return this.tokens.size; }
23         }
24         
25         public TokenArray()
26         {
27             this.tokens = new Gee.ArrayList<Token>();
28         }
29         
30         public Token? last() {
31             if (this.tokens.size > 0) {
32                 return this.tokens.get(this.tokens.size-1);
33             }
34             return null;
35         }
36         public Token? lastSym () {
37             for (var i = this.tokens.size-1; i >= 0; i--) {
38                 if (!(this.tokens.get(i).is("WHIT") || this.tokens.get(i).is("COMM")))  {
39                     return this.tokens.get(i);
40                 }
41             }
42             return null;
43         }
44         public void push (Token t) {
45             this.tokens.add(t);
46         }
47         public Token? pop ()
48         {
49             if (this.tokens.size > 0) {
50                 return this.tokens.remove_at(this.tokens.size-1);
51             }
52             return null;
53         }
54         
55             public new Token get(int i) {
56             return this.tokens.get(i);
57         }
58         public void dump()
59         {
60                 foreach(var token in this.tokens) {
61                         print(token.asString() +"\n");
62                 }
63         }
64         
65     }
66
67     public errordomain TokenReader_Error {
68             ArgumentError
69     }
70     
71
72     public class TokenReader : Object
73     {
74         
75         
76         
77         /*
78          *
79          * I wonder if this will accept the prop: value, prop2 :value construxtor if we do not define one...
80          */
81         
82         /** @cfg {Boolean} collapseWhite merge multiple whitespace/comments into a single token **/
83         public bool collapseWhite = false; // only reduces white space...
84         /** @cfg {Boolean} keepDocs keep JSDOC comments **/
85         public bool keepDocs = true;
86         /** @cfg {Boolean} keepWhite keep White space **/
87         public bool keepWhite = false;
88         /** @cfg {Boolean} keepComments  keep all comments **/
89         public bool keepComments = false;
90         /** @cfg {Boolean} sepIdents seperate identifiers (eg. a.b.c into ['a', '.', 'b', '.', 'c'] ) **/
91         public bool sepIdents = false;
92         /** @cfg {String} filename name of file being parsed. **/
93         public string filename = "";
94         /** @config {Boolean} ignoreBadGrammer do not throw errors if we find stuff that might break compression **/
95         public bool ignoreBadGrammer = false;
96         
97         
98         int line = 0;
99         
100         /**
101          * tokenize a stream
102          * @return {Array} of tokens
103          * 
104          * ts = new TextStream(File.read(str));
105          * tr = TokenReader({ keepComments : true, keepWhite : true });
106          * tr.tokenize(ts)
107          * 
108          */
109         public TokenArray tokenize(TextStream stream)
110         {
111             this.line =1;
112             var tokens = new TokenArray();
113            
114          
115             while (!stream.lookEOF()) {
116                 
117
118                 if (this.read_mlcomment(stream, tokens)) continue;
119                 if (this.read_slcomment(stream, tokens)) continue;
120                 if (this.read_dbquote(stream, tokens))   continue;
121                 if (this.read_snquote(stream, tokens))   continue;
122                 if (this.read_regx(stream, tokens))      continue;
123                 if (this.read_numb(stream, tokens))      continue;
124                 if (this.read_punc(stream, tokens))      continue;
125                 if (this.read_newline(stream, tokens))   continue;
126                 if (this.read_space(stream, tokens))     continue;
127                 if (this.read_word(stream, tokens))      continue;
128                 
129                 // if execution reaches here then an error has happened
130                 tokens.push(
131                         new Token(stream.next(), "TOKN", "UNKNOWN_TOKEN", this.line)
132                 );
133             }
134             
135             
136             
137             return tokens;
138         }
139
140         /**
141          * findPuncToken - find the id of a token (previous to current)
142          * need to back check syntax..
143          * 
144          * @arg {Array} tokens the array of tokens.
145          * @arg {String} token data (eg. '(')
146          * @arg {Number} offset where to start reading from
147          * @return {Number} position of token
148          */
149         public int findPuncToken(TokenArray tokens, string data, int n)
150         {
151             n = n > 0 ? n :  tokens.length -1;
152             var stack = 0;
153             while (n > -1) {
154                 
155                 if (stack < 1 && tokens.get(n).data == data) {
156                     return n;
157                 }
158                 
159                 if (tokens.get(n).data  == ")" || tokens.get(n).data  == "}") {
160                     stack++;
161                     n--;
162                     continue;
163                 }
164                 if (stack > 0 && (tokens.get(n).data  == "{" || tokens.get(n).data  == "(")) {
165                     stack--;
166                     n--;
167                     continue;
168                 }
169                 
170                 
171                 n--;
172             }
173             return -1;
174         }
175         /**
176          * lastSym - find the last token symbol
177          * need to back check syntax..
178          * 
179          * @arg {Array} tokens the array of tokens.
180          * @arg {Number} offset where to start..
181          * @return {Token} the token
182          */
183         public Token? lastSym(TokenArray tokens, int n)
184         {
185             for (var i = n-1; i >= 0; i--) {
186                 if (!(tokens.get(i).is("WHIT") || tokens.get(i).is("COMM"))) {
187                     return tokens.get(i);
188                 }
189             }
190             return null;
191         }
192         
193          
194         
195         /**
196             @returns {Boolean} Was the token found?
197          */
198         public bool read_word (TextStream stream, TokenArray tokens)
199         {
200             string found = "";
201             while (!stream.lookEOF() && Lang.isWordChar(stream.look().to_string())) {
202                 found += stream.next();
203             }
204             
205             if (found == "") {
206                 return false;
207             }
208             
209             var name = Lang.keyword(found);
210             if (name != null) {
211                 
212                 // look for "()return" ?? why ???
213                 var ls = tokens.lastSym();
214                 if (found == "return" && ls != null && ls.data == ")") {
215                     //Seed.print('@' + tokens.length);
216                     var n = this.findPuncToken(tokens, ")", 0);
217                     //Seed.print(')@' + n);
218                     n = this.findPuncToken(tokens, "(", n-1);
219                     //Seed.print('(@' + n);
220                     
221                     //var lt = this.lastSym(tokens, n);
222                     /*
223                     //print(JSON.stringify(lt));
224                     if (lt.type != "KEYW" || ["IF", 'WHILE'].indexOf(lt.name) < -1) {
225                         if (!this.ignoreBadGrammer) {
226                             throw new TokenReader_Error.ArgumentError(
227                                 this.filename + ":" + this.line + " Error - return found after )"
228                             );
229                         }
230                     }
231                     
232                     */
233                     
234                 }
235                 
236                 tokens.push(new Token(found, "KEYW", name, this.line));
237                 return true;
238             }
239             
240             if (!this.sepIdents || found.index_of(".") < 0 ) {
241                 tokens.push(new Token(found, "NAME", "NAME", this.line));
242                 return true;
243             }
244             var n = found.split(".");
245             var p = false;
246             foreach (unowned string nm in n) {
247                 if (p) {
248                     tokens.push(new Token(".", "PUNC", "DOT", this.line));
249                 }
250                 p=true;
251                 tokens.push(new Token(nm, "NAME", "NAME", this.line));
252             }
253             return true;
254                 
255
256         }
257
258         /**
259             @returns {Boolean} Was the token found?
260          */
261         public bool read_punc (TextStream stream, TokenArray tokens) throws TokenReader_Error
262         {
263             string found = "";
264             
265             while (!stream.lookEOF()) {
266                         var ns = stream.look().to_string();
267
268                     if (null == Lang.punc(found + ns )) {
269                                 break;
270                         }
271                 found += stream.next();
272             }
273             
274             
275             if (found == "") {
276                 return false;
277             }
278             
279             var ls = tokens.lastSym();
280             
281             if ((found == "}" || found == "]") && ls != null && ls.data == ",") {
282                 //print("Error - comma found before " + found);
283                 //print(JSON.stringify(tokens.lastSym(), null,4));
284                 if (this.ignoreBadGrammer) {
285                     print("\n" + this.filename + ":" + this.line.to_string() + " Error - comma found before " + found);
286                 } else {
287                     throw new TokenReader_Error.ArgumentError(
288                                 this.filename + ":" + this.line.to_string() + "  comma found before " + found
289                   
290                     );
291                      
292                 }
293             }
294             
295             tokens.push(new Token(found, "PUNC", Lang.punc(found), this.line));
296             return true;
297             
298         } 
299
300         /**
301             @returns {Boolean} Was the token found?
302          */
303         public bool read_space  (TextStream stream, TokenArray tokens)
304         {
305             var found = "";
306             
307             while (!stream.lookEOF() && Lang.isSpaceC(  stream.look()) && !Lang.isNewlineC(stream.look())) {
308                 found += stream.next();
309             }
310             
311             if (found == "") {
312                 return false;
313             }
314             //print("WHITE = " + JSON.stringify(found));
315             
316              
317             if (this.collapseWhite) {
318                 found = " "; // this might work better if it was a '\n' ???
319             }
320             if (this.keepWhite) {
321                 tokens.push(new Token(found, "WHIT", "SPACE", this.line));
322             }
323             return true;
324         
325         }
326
327         /**
328             @returns {Boolean} Was the token found?
329          */
330         public bool read_newline  (TextStream stream, TokenArray tokens)
331         {
332             var found = "";
333             var line = this.line;
334             while (!stream.lookEOF() && Lang.isNewlineC(stream.look())) {
335                 this.line++;
336                 found += stream.next();
337             }
338             
339             if (found == "") {
340                 return false;
341             }
342             
343             // if we found a new line, then we could check if previous character was a ';' - if so we can drop it.
344             // otherwise generally keep it.. in which case it should reduce our issue with stripping new lines..
345            
346             
347             //this.line++;
348             if (this.collapseWhite) {
349                 found = "\n"; // reduces multiple line breaks into a single one...
350             }
351             
352             if (this.keepWhite) {
353                 var last = tokens.pop();
354                 if (last != null && last.name != "WHIT") {
355                     tokens.push(last);
356                 }
357                 // replaces last new line... 
358                 tokens.push(new Token(found, "WHIT", "NEWLINE", line));
359             }
360             return true;
361         }
362
363         /**
364             @returns {Boolean} Was the token found?
365          */
366         public bool read_mlcomment  (TextStream stream, TokenArray tokens)
367         {
368             if (stream.look() != '/') {
369                 return false;
370             }
371             if (stream.look(1) != '*') {
372                 return false;
373             }
374             var found = stream.next(2);
375             string  c = "";
376             var line = this.line;
377             while (!stream.lookEOF() && !(stream.look(-1) == '/' && stream.look(-2) == '*')) {
378                 c = stream.next();
379                 if (c == "\n") {
380                     this.line++;
381                 }
382                 found += c;
383             }
384             
385             // to start doclet we allow /** or /*** but not /**/ or /****
386             //if (found.length /^\/\*\*([^\/]|\*[^*])/.test(found) && this.keepDocs) {
387             if (this.keepDocs && found.length > 4 && found.index_of("/**") == 0 && found[3] != '/') {
388                 tokens.push(new Token(found, "COMM", "JSDOC", this.line));
389             } else if (this.keepComments) {
390                 tokens.push(new Token(found, "COMM", "MULTI_LINE_COMM", line));
391             }
392             return true;
393         
394         } 
395
396         /**
397             @returns {Boolean} Was the token found?
398          */
399          public bool read_slcomment  (TextStream stream, TokenArray tokens)
400          {
401             var found = "";
402             if (
403                 (stream.look() == '/' && stream.look(1) == '/' && (""!=(found=stream.next(2))))
404                 || 
405                 (stream.look() == '<' && stream.look(1) == '!' && stream.look(2) == '-' && stream.look(3) == '-' && (""!=(found=stream.next(4))))
406             ) {
407                 var line = this.line;
408                 while (!stream.lookEOF()) {
409                                         //print(stream.look().to_string());
410                         if ( Lang.isNewline(stream.look().to_string())) {
411                                 break;
412                         }
413                     found += stream.next();
414                 }
415                 if (!stream.lookEOF()) { // lookinng for end  of line... if we got it, then do not eat the character..
416                     found += stream.next();
417                 }
418                 if (this.keepComments) {
419                     tokens.push(new Token(found, "COMM", "SINGLE_LINE_COMM", line));
420                 }
421                 this.line++;
422                 return true;
423             }
424             return false;
425         }
426
427         /**
428             @returns {Boolean} Was the token found?
429          */
430         public bool read_dbquote  (TextStream stream, TokenArray tokens)
431         {
432             if (stream.look() != '"') {
433                 return false;
434             }
435                 // find terminator
436             var str = stream.next();
437             
438             while (!stream.lookEOF()) {
439                 if (stream.look() == '\\') {
440                     if (Lang.isNewline(stream.look(1).to_string())) {
441                         do {
442                             stream.next();
443                         } while (!stream.lookEOF() && Lang.isNewline(stream.look().to_string()));
444                         str += "\\\n";
445                     }
446                     else {
447                         str += stream.next(2);
448                     }
449                     continue;
450                 }
451                 if (stream.look() == '"') {
452                     str += stream.next();
453                     tokens.push(new Token(str, "STRN", "DOUBLE_QUOTE", this.line));
454                     return true;
455                 }
456             
457                 str += stream.next();
458                 
459             }
460             return false;
461         }
462
463         /**
464             @returns {Boolean} Was the token found?
465          */
466         public bool read_snquote  (TextStream stream, TokenArray tokens)
467         {
468             if (stream.look() != '\'') {
469                 return false;
470             }
471             // find terminator
472             var str = stream.next();
473             
474             while (!stream.lookEOF()) {
475                 if (stream.look() == '\\') { // escape sequence
476                     str += stream.next(2);
477                     continue;
478                 }
479                 if (stream.look() == '\'') {
480                     str += stream.next();
481                     tokens.push(new Token(str, "STRN", "SINGLE_QUOTE", this.line));
482                     return true;
483                 }
484                 str += stream.next();
485                 
486             }
487             return false;
488         }
489         
490
491         /**
492             @returns {Boolean} Was the token found?
493          */
494         public bool read_numb  (TextStream stream, TokenArray tokens)
495         {
496             if (stream.look() == '0' && stream.look(1) == 'x') {
497                 return this.read_hex(stream, tokens);
498             }
499             
500             var found = "";
501             
502             while (!stream.lookEOF() && Lang.isNumber(found+stream.look().to_string())){
503                 found += stream.next();
504             }
505             
506             if (found == "") {
507                 return false;
508             }
509             if (GLib.Regex.match_simple("^0[0-7]", found)) {
510                 tokens.push(new Token(found, "NUMB", "OCTAL", this.line));
511                 return true;
512             }
513             tokens.push(new Token(found, "NUMB", "DECIMAL", this.line));
514             return true;
515         
516         }
517        
518         /**
519             @returns {Boolean} Was the token found?
520          */
521         public bool read_hex  (TextStream stream, TokenArray tokens)
522         {
523             var found = stream.next(2);
524             
525             while (!stream.lookEOF()) {
526                 if (Lang.isHexDec(found) && !Lang.isHexDec(found+stream.look().to_string())) { // done
527                     tokens.push(new Token(found, "NUMB", "HEX_DEC", this.line));
528                     return true;
529                 }
530                 
531                 found += stream.next();
532                
533             }
534             return false;
535         }
536
537         /**
538             @returns {Boolean} Was the token found?
539          */
540         public bool read_regx (TextStream stream, TokenArray tokens)
541         {
542               
543             if (stream.look() != '/') {
544                 return false;
545             }
546             var  last = tokens.lastSym();
547             if (
548                 (last == null)
549                 || 
550                 (
551                        !last.is("NUMB")   // stuff that can not appear before a regex..
552                     && !last.is("NAME")
553                     && !last.is("RIGHT_PAREN")
554                     && !last.is("RIGHT_BRACKET")
555                 )
556             )  {
557                 var regex = stream.next();
558                 
559                 while (!stream.lookEOF()) {
560                     if (stream.look() == '\\') { // escape sequence
561                         regex += stream.next(2);
562                         continue;
563                     }
564                     if (stream.look() == '/') {
565                         regex += stream.next();
566                         
567                         while (GLib.Regex.match_simple("[gmi]", stream.look().to_string())) {
568                             regex += stream.next();
569                         }
570                         
571                         tokens.push(new Token(regex, "REGX", "REGX", this.line));
572                         return true;
573                     }
574                      
575                     regex += stream.next();
576                      
577                 }
578                 // error: unterminated regex
579             }
580             return false;
581         }
582     }
583 }