sync
[roojs1] / Roo / rtf / Parser.js
1 /**
2  *
3  *
4  * based on this https://github.com/iarna/rtf-parser
5  * it's really only designed to extract pict from pasted RTF 
6  *
7  * usage:
8  *
9  *  var images = new Roo.rtf.Parser().parse(a_string).filter(function(g) { return g.type == 'pict'; });
10  *  
11  *
12  */
13
14  
15
16
17
18 Roo.rtf.Parser = function(text) {
19     //super({objectMode: true})
20     this.text = '';
21     this.parserState = this.parseText;
22     
23     // these are for interpeter...
24     this.doc = {};
25     ///this.parserState = this.parseTop
26     this.groupStack = [];
27     this.hexStore = [];
28     this.doc = false;
29     
30     this.groups = []; // where we put the return.
31     
32     for (var ii = 0; ii < text.length; ++ii) {
33         ++this.cpos;
34         
35         if (text[ii] === '\n') {
36             ++this.row;
37             this.col = 1;
38         } else {
39             ++this.col;
40         }
41         this.parserState(text[ii]);
42     }
43     
44     
45     
46 };
47 Roo.rtf.Parser.prototype = {
48     text : '', // string being parsed..
49     controlWord : '',
50     controlWordParam :  '',
51     hexChar : '',
52     doc : false,
53     group: false,
54     groupStack : false,
55     hexStore : false,
56     
57     
58     cpos : 0, 
59     row : 1, // reportin?
60     col : 1, //
61
62      
63     push : function (el)
64     {
65         var m = 'cmd'+ el.type;
66         if (typeof(this[m]) == 'undefined') {
67             Roo.log('invalid cmd:' + el.type);
68             return;
69         }
70         this[m](el);
71         //Roo.log(el);
72     },
73     flushHexStore : function()
74     {
75         if (this.hexStore.length < 1) {
76             return;
77         }
78         var hexstr = this.hexStore.map(
79             function(cmd) {
80                 return cmd.value;
81         }).join('');
82         
83         this.group.addContent( new Roo.rtf.Hex( hexstr ));
84               
85             
86         this.hexStore.splice(0)
87         
88     },
89     
90     cmdgroupstart : function()
91     {
92         this.flushHexStore();
93         if (this.group) {
94             this.groupStack.push(this.group);
95         }
96          // parent..
97         if (this.doc === false) {
98             this.group = this.doc = new Roo.rtf.Document();
99             return;
100             
101         }
102         this.group = new Roo.rtf.Group(this.group);
103     },
104     cmdignorable : function()
105     {
106         this.flushHexStore();
107         this.group.ignorable = true;
108     },
109     cmdendparagraph : function()
110     {
111         this.flushHexStore();
112         this.group.addContent(new Roo.rtf.Paragraph());
113     },
114     cmdgroupend : function ()
115     {
116         this.flushHexStore();
117         var endingGroup = this.group;
118         
119         
120         this.group = this.groupStack.pop();
121         if (this.group) {
122             this.group.addChild(endingGroup);
123         }
124         
125         
126         
127         var doc = this.group || this.doc;
128         //if (endingGroup instanceof FontTable) {
129         //  doc.fonts = endingGroup.table
130         //} else if (endingGroup instanceof ColorTable) {
131         //  doc.colors = endingGroup.table
132         //} else if (endingGroup !== this.doc && !endingGroup.get('ignorable')) {
133         if (endingGroup.ignorable === false) {
134             //code
135             this.groups.push(endingGroup);
136            // Roo.log( endingGroup );
137         }
138             //Roo.each(endingGroup.content, function(item)) {
139             //    doc.addContent(item);
140             //}
141             //process.emit('debug', 'GROUP END', endingGroup.type, endingGroup.get('ignorable'))
142         //}
143     },
144     cmdtext : function (cmd)
145     {
146         this.flushHexStore();
147         if (!this.group) { // an RTF fragment, missing the {\rtf1 header
148             //this.group = this.doc
149         }
150         this.group.addContent(new Roo.rtf.Span(cmd));
151     },
152     cmdcontrolword : function (cmd)
153     {
154         this.flushHexStore();
155         if (!this.group.type) {
156             this.group.type = cmd.value;
157             return;
158         }
159         this.group.addContent(new Roo.rtf.Ctrl(cmd));
160         // we actually don't care about ctrl words...
161         return ;
162         /*
163         var method = 'ctrl$' + cmd.value.replace(/-(.)/g, (_, char) => char.toUpperCase())
164         if (this[method]) {
165             this[method](cmd.param)
166         } else {
167             if (!this.group.get('ignorable')) process.emit('debug', method, cmd.param)
168         }
169         */
170     },
171     cmdhexchar : function(cmd) {
172         this.hexStore.push(cmd);
173     },
174     cmderror : function(cmd) {
175         throw new Exception (cmd.value);
176     },
177     
178     /*
179       _flush (done) {
180         if (this.text !== '\u0000') this.emitText()
181         done()
182       }
183       */
184       
185       
186     parseText : function(c)
187     {
188         if (c === '\\') {
189             this.parserState = this.parseEscapes;
190         } else if (c === '{') {
191             this.emitStartGroup();
192         } else if (c === '}') {
193             this.emitEndGroup();
194         } else if (c === '\x0A' || c === '\x0D') {
195             // cr/lf are noise chars
196         } else {
197             this.text += c;
198         }
199     },
200     
201     parseEscapes: function (c)
202     {
203         if (c === '\\' || c === '{' || c === '}') {
204             this.text += c;
205             this.parserState = this.parseText;
206         } else {
207             this.parserState = this.parseControlSymbol;
208             this.parseControlSymbol(c);
209         }
210     },
211     parseControlSymbol: function(c)
212     {
213         if (c === '~') {
214             this.text += '\u00a0'; // nbsp
215             this.parserState = this.parseText
216         } else if (c === '-') {
217              this.text += '\u00ad'; // soft hyphen
218         } else if (c === '_') {
219             this.text += '\u2011'; // non-breaking hyphen
220         } else if (c === '*') {
221             this.emitIgnorable();
222             this.parserState = this.parseText;
223         } else if (c === "'") {
224             this.parserState = this.parseHexChar;
225         } else if (c === '|') { // formula cacter
226             this.emitFormula();
227             this.parserState = this.parseText;
228         } else if (c === ':') { // subentry in an index entry
229             this.emitIndexSubEntry();
230             this.parserState = this.parseText;
231         } else if (c === '\x0a') {
232             this.emitEndParagraph();
233             this.parserState = this.parseText;
234         } else if (c === '\x0d') {
235             this.emitEndParagraph();
236             this.parserState = this.parseText;
237         } else {
238             this.parserState = this.parseControlWord;
239             this.parseControlWord(c);
240         }
241     },
242     parseHexChar: function (c)
243     {
244         if (/^[A-Fa-f0-9]$/.test(c)) {
245             this.hexChar += c;
246             if (this.hexChar.length >= 2) {
247               this.emitHexChar();
248               this.parserState = this.parseText;
249             }
250             return;
251         }
252         this.emitError("Invalid character \"" + c + "\" in hex literal.");
253         this.parserState = this.parseText;
254         
255     },
256     parseControlWord : function(c)
257     {
258         if (c === ' ') {
259             this.emitControlWord();
260             this.parserState = this.parseText;
261         } else if (/^[-\d]$/.test(c)) {
262             this.parserState = this.parseControlWordParam;
263             this.controlWordParam += c;
264         } else if (/^[A-Za-z]$/.test(c)) {
265           this.controlWord += c;
266         } else {
267           this.emitControlWord();
268           this.parserState = this.parseText;
269           this.parseText(c);
270         }
271     },
272     parseControlWordParam : function (c) {
273         if (/^\d$/.test(c)) {
274           this.controlWordParam += c;
275         } else if (c === ' ') {
276           this.emitControlWord();
277           this.parserState = this.parseText;
278         } else {
279           this.emitControlWord();
280           this.parserState = this.parseText;
281           this.parseText(c);
282         }
283     },
284     
285     
286     
287     
288     emitText : function () {
289         if (this.text === '') {
290             return;
291         }
292         this.push({
293             type: 'text',
294             value: this.text,
295             pos: this.cpos,
296             row: this.row,
297             col: this.col
298         });
299         this.text = ''
300     },
301     emitControlWord : function ()
302     {
303         this.emitText();
304         if (this.controlWord === '') {
305             this.emitError('empty control word');
306         } else {
307             this.push({
308                   type: 'controlword',
309                   value: this.controlWord,
310                   param: this.controlWordParam !== '' && Number(this.controlWordParam),
311                   pos: this.cpos,
312                   row: this.row,
313                   col: this.col
314             });
315         }
316         this.controlWord = '';
317         this.controlWordParam = '';
318     },
319     emitStartGroup : function ()
320     {
321         this.emitText();
322         this.push({
323             type: 'groupstart',
324             pos: this.cpos,
325             row: this.row,
326             col: this.col
327         });
328     },
329     emitEndGroup : function ()
330     {
331         this.emitText();
332         this.push({
333             type: 'groupend',
334             pos: this.cpos,
335             row: this.row,
336             col: this.col
337         });
338     },
339     emitIgnorable : function ()
340     {
341         this.emitText();
342         this.push({
343             type: 'ignorable',
344             pos: this.cpos,
345             row: this.row,
346             col: this.col
347         });
348     },
349     emitHexChar : function ()
350     {
351         this.emitText();
352         this.push({
353             type: 'hexchar',
354             value: this.hexChar,
355             pos: this.cpos,
356             row: this.row,
357             col: this.col
358         });
359         this.hexChar = ''
360     },
361     emitError : function (message)
362     {
363       this.emitText();
364       this.push({
365             type: 'error',
366             value: message,
367             row: this.row,
368             col: this.col,
369             char: this.cpos //,
370             //stack: new Error().stack
371         });
372     },
373     emitEndParagraph : function () {
374         this.emitText();
375         this.push({
376             type: 'endparagraph',
377             pos: this.cpos,
378             row: this.row,
379             col: this.col
380         });
381     }
382      
383 } ;