6a3d7082f3a881c77caa9508f2e4ef9c96b4755a
[roojs1] / Roo / rtf / Parser.js
1 /**
2  *
3  *
4  * based on this https://github.com/iarna/rtf-parser
5  * it's really only designed to extract pict from pasted RTF 
6  *
7  * usage:
8  *
9  *  var images = new Roo.rtf.Parser().parse(a_string).filter(function(g) { return g.type == 'pict'; });
10  *  
11  *
12  */
13
14  
15
16
17
18 Roo.rtf.Parser = function(text) {
19     //super({objectMode: true})
20     this.text = '';
21     this.parserState = this.parseText;
22     
23     // these are for interpeter...
24     this.doc = {};
25     ///this.parserState = this.parseTop
26     this.groupStack = [];
27     this.hexStore = [];
28     this.doc = false;
29     
30     this.groups = []; // where we put the return.
31     
32     for (var ii = 0; ii < text.length; ++ii) {
33         ++this.cpos;
34         
35         if (text[ii] === '\n') {
36             ++this.row;
37             this.col = 1;
38         } else {
39             ++this.col;
40         }
41         this.parserState(text[ii]);
42     }
43     
44     
45     
46 };
47 Roo.rtf.Parser.prototype = {
48     text : '', // string being parsed..
49     controlWord : '',
50     controlWordParam :  '',
51     hexChar : '',
52     doc : false,
53     group: false,
54     groupStack : false,
55     hexStore : false,
56     
57     
58     cpos : 0, 
59     row : 1, // reportin?
60     col : 1, //
61
62      
63     push : function (el)
64     {
65         var m = 'cmd'+ el.type;
66         if (typeof(this[m]) == 'undefined') {
67             Roo.log('invalid cmd:' + el.type);
68             return;
69         }
70         this[m](el);
71         //Roo.log(el);
72     },
73     flushHexStore : function()
74     {
75         if (this.hexStore.length < 1) {
76             return;
77         }
78         var hexstr = this.hexStore.map(
79             function(cmd) {
80                 return cmd.value;
81         }).join('');
82         
83         this.group.addContent( new Roo.rtf.Hex( hexstr ));
84               
85             
86         this.hexStore.splice(0)
87         
88     },
89     
90     cmdgroupstart : function()
91     {
92         this.flushHexStore();
93         if (this.group) {
94             this.groupStack.push(this.group);
95         }
96          // parent..
97         if (this.doc === false) {
98             this.group = this.doc = new Roo.rtf.Document();
99             return;
100             
101         }
102         this.group = new Roo.rtf.Group(this.group);
103     },
104     cmdignorable : function()
105     {
106         this.flushHexStore();
107         this.group.ignorable = true;
108     },
109     cmdendparagraph : function()
110     {
111         this.flushHexStore();
112         this.group.addContent(new Roo.rtf.Paragraph());
113     },
114     cmdgroupend : function ()
115     {
116         this.flushHexStore();
117         var endingGroup = this.group;
118         
119         
120         this.group = this.groupStack.pop();
121         if (this.group) {
122             this.group.addChild(endingGroup);
123         }
124         
125         
126         
127         var doc = this.group || this.doc;
128         //if (endingGroup instanceof FontTable) {
129         //  doc.fonts = endingGroup.table
130         //} else if (endingGroup instanceof ColorTable) {
131         //  doc.colors = endingGroup.table
132         //} else if (endingGroup !== this.doc && !endingGroup.get('ignorable')) {
133         if (endingGroup.ignorable === false) {
134             //code
135             this.groups.push(endingGroup);
136            // Roo.log( endingGroup );
137         }
138             //Roo.each(endingGroup.content, function(item)) {
139             //    doc.addContent(item);
140             //}
141             //process.emit('debug', 'GROUP END', endingGroup.type, endingGroup.get('ignorable'))
142         //}
143     },
144     cmdtext : function (cmd)
145     {
146         this.flushHexStore();
147         if (!this.group) { // an RTF fragment, missing the {\rtf1 header
148             //this.group = this.doc
149             return;  // we really don't care about stray text...
150         }
151         this.group.addContent(new Roo.rtf.Span(cmd));
152     },
153     cmdcontrolword : function (cmd)
154     {
155         this.flushHexStore();
156         if (!this.group.type) {
157             this.group.type = cmd.value;
158             return;
159         }
160         this.group.addContent(new Roo.rtf.Ctrl(cmd));
161         // we actually don't care about ctrl words...
162         return ;
163         /*
164         var method = 'ctrl$' + cmd.value.replace(/-(.)/g, (_, char) => char.toUpperCase())
165         if (this[method]) {
166             this[method](cmd.param)
167         } else {
168             if (!this.group.get('ignorable')) process.emit('debug', method, cmd.param)
169         }
170         */
171     },
172     cmdhexchar : function(cmd) {
173         this.hexStore.push(cmd);
174     },
175     cmderror : function(cmd) {
176         throw new Exception (cmd.value);
177     },
178     
179     /*
180       _flush (done) {
181         if (this.text !== '\u0000') this.emitText()
182         done()
183       }
184       */
185       
186       
187     parseText : function(c)
188     {
189         if (c === '\\') {
190             this.parserState = this.parseEscapes;
191         } else if (c === '{') {
192             this.emitStartGroup();
193         } else if (c === '}') {
194             this.emitEndGroup();
195         } else if (c === '\x0A' || c === '\x0D') {
196             // cr/lf are noise chars
197         } else {
198             this.text += c;
199         }
200     },
201     
202     parseEscapes: function (c)
203     {
204         if (c === '\\' || c === '{' || c === '}') {
205             this.text += c;
206             this.parserState = this.parseText;
207         } else {
208             this.parserState = this.parseControlSymbol;
209             this.parseControlSymbol(c);
210         }
211     },
212     parseControlSymbol: function(c)
213     {
214         if (c === '~') {
215             this.text += '\u00a0'; // nbsp
216             this.parserState = this.parseText
217         } else if (c === '-') {
218              this.text += '\u00ad'; // soft hyphen
219         } else if (c === '_') {
220             this.text += '\u2011'; // non-breaking hyphen
221         } else if (c === '*') {
222             this.emitIgnorable();
223             this.parserState = this.parseText;
224         } else if (c === "'") {
225             this.parserState = this.parseHexChar;
226         } else if (c === '|') { // formula cacter
227             this.emitFormula();
228             this.parserState = this.parseText;
229         } else if (c === ':') { // subentry in an index entry
230             this.emitIndexSubEntry();
231             this.parserState = this.parseText;
232         } else if (c === '\x0a') {
233             this.emitEndParagraph();
234             this.parserState = this.parseText;
235         } else if (c === '\x0d') {
236             this.emitEndParagraph();
237             this.parserState = this.parseText;
238         } else {
239             this.parserState = this.parseControlWord;
240             this.parseControlWord(c);
241         }
242     },
243     parseHexChar: function (c)
244     {
245         if (/^[A-Fa-f0-9]$/.test(c)) {
246             this.hexChar += c;
247             if (this.hexChar.length >= 2) {
248               this.emitHexChar();
249               this.parserState = this.parseText;
250             }
251             return;
252         }
253         this.emitError("Invalid character \"" + c + "\" in hex literal.");
254         this.parserState = this.parseText;
255         
256     },
257     parseControlWord : function(c)
258     {
259         if (c === ' ') {
260             this.emitControlWord();
261             this.parserState = this.parseText;
262         } else if (/^[-\d]$/.test(c)) {
263             this.parserState = this.parseControlWordParam;
264             this.controlWordParam += c;
265         } else if (/^[A-Za-z]$/.test(c)) {
266           this.controlWord += c;
267         } else {
268           this.emitControlWord();
269           this.parserState = this.parseText;
270           this.parseText(c);
271         }
272     },
273     parseControlWordParam : function (c) {
274         if (/^\d$/.test(c)) {
275           this.controlWordParam += c;
276         } else if (c === ' ') {
277           this.emitControlWord();
278           this.parserState = this.parseText;
279         } else {
280           this.emitControlWord();
281           this.parserState = this.parseText;
282           this.parseText(c);
283         }
284     },
285     
286     
287     
288     
289     emitText : function () {
290         if (this.text === '') {
291             return;
292         }
293         this.push({
294             type: 'text',
295             value: this.text,
296             pos: this.cpos,
297             row: this.row,
298             col: this.col
299         });
300         this.text = ''
301     },
302     emitControlWord : function ()
303     {
304         this.emitText();
305         if (this.controlWord === '') {
306             this.emitError('empty control word');
307         } else {
308             this.push({
309                   type: 'controlword',
310                   value: this.controlWord,
311                   param: this.controlWordParam !== '' && Number(this.controlWordParam),
312                   pos: this.cpos,
313                   row: this.row,
314                   col: this.col
315             });
316         }
317         this.controlWord = '';
318         this.controlWordParam = '';
319     },
320     emitStartGroup : function ()
321     {
322         this.emitText();
323         this.push({
324             type: 'groupstart',
325             pos: this.cpos,
326             row: this.row,
327             col: this.col
328         });
329     },
330     emitEndGroup : function ()
331     {
332         this.emitText();
333         this.push({
334             type: 'groupend',
335             pos: this.cpos,
336             row: this.row,
337             col: this.col
338         });
339     },
340     emitIgnorable : function ()
341     {
342         this.emitText();
343         this.push({
344             type: 'ignorable',
345             pos: this.cpos,
346             row: this.row,
347             col: this.col
348         });
349     },
350     emitHexChar : function ()
351     {
352         this.emitText();
353         this.push({
354             type: 'hexchar',
355             value: this.hexChar,
356             pos: this.cpos,
357             row: this.row,
358             col: this.col
359         });
360         this.hexChar = ''
361     },
362     emitError : function (message)
363     {
364       this.emitText();
365       this.push({
366             type: 'error',
367             value: message,
368             row: this.row,
369             col: this.col,
370             char: this.cpos //,
371             //stack: new Error().stack
372         });
373     },
374     emitEndParagraph : function () {
375         this.emitText();
376         this.push({
377             type: 'endparagraph',
378             pos: this.cpos,
379             row: this.row,
380             col: this.col
381         });
382     }
383      
384 } ;