fix error in rtf
[roojs1] / Roo / rtf / Parser.js
1 /**
2  *
3  *
4  * based on this https://github.com/iarna/rtf-parser
5  * it's really only designed to extract pict from pasted RTF 
6  *
7  * usage:
8  *
9  *  var images = new Roo.rtf.Parser().parse(a_string).filter(function(g) { return g.type == 'pict'; });
10  *  
11  *
12  */
13
14  
15
16
17
18 Roo.rtf.Parser = function(text) {
19     //super({objectMode: true})
20     this.text = '';
21     this.parserState = this.parseText;
22     
23     // these are for interpeter...
24     this.doc = {};
25     ///this.parserState = this.parseTop
26     this.groupStack = [];
27     this.hexStore = [];
28     this.doc = false;
29     
30     this.groups = []; // where we put the return.
31     
32     for (var ii = 0; ii < text.length; ++ii) {
33         ++this.cpos;
34         
35         if (text[ii] === '\n') {
36             ++this.row;
37             this.col = 1;
38         } else {
39             ++this.col;
40         }
41         this.parserState(text[ii]);
42     }
43     
44     
45     
46 };
47 Roo.rtf.Parser.prototype = {
48     text : '', // string being parsed..
49     controlWord : '',
50     controlWordParam :  '',
51     hexChar : '',
52     doc : false,
53     group: false,
54     groupStack : false,
55     hexStore : false,
56     
57     
58     cpos : 0, 
59     row : 1, // reportin?
60     col : 1, //
61
62      
63     push : function (el)
64     {
65         var m = 'cmd'+ el.type;
66         if (typeof(this[m]) == 'undefined') {
67             Roo.log('invalid cmd:' + el.type);
68             return;
69         }
70         this[m](el);
71         //Roo.log(el);
72     },
73     flushHexStore : function()
74     {
75         if (this.hexStore.length < 1) {
76             return;
77         }
78         var hexstr = this.hexStore.map(
79             function(cmd) {
80                 return cmd.value;
81         }).join('');
82         
83         this.group.addContent( new Roo.rtf.Hex( hexstr ));
84               
85             
86         this.hexStore.splice(0)
87         
88     },
89     
90     cmdgroupstart : function()
91     {
92         this.flushHexStore();
93         if (this.group) {
94             this.groupStack.push(this.group);
95         }
96          // parent..
97         if (this.doc === false) {
98             this.group = this.doc = new Roo.rtf.Document();
99             return;
100             
101         }
102         this.group = new Roo.rtf.Group(this.group);
103     },
104     cmdignorable : function()
105     {
106         this.flushHexStore();
107         this.group.ignorable = true;
108     },
109     cmdendparagraph : function()
110     {
111         this.flushHexStore();
112         this.group.addContent(new Roo.rtf.Paragraph());
113     },
114     cmdgroupend : function ()
115     {
116         this.flushHexStore();
117         var endingGroup = this.group;
118         
119         
120         this.group = this.groupStack.pop();
121         if (this.group) {
122             this.group.addChild(endingGroup);
123         }
124         
125         
126         
127         var doc = this.group || this.doc;
128         //if (endingGroup instanceof FontTable) {
129         //  doc.fonts = endingGroup.table
130         //} else if (endingGroup instanceof ColorTable) {
131         //  doc.colors = endingGroup.table
132         //} else if (endingGroup !== this.doc && !endingGroup.get('ignorable')) {
133         if (endingGroup.ignorable === false) {
134             //code
135             this.groups.push(endingGroup);
136            // Roo.log( endingGroup );
137         }
138             //Roo.each(endingGroup.content, function(item)) {
139             //    doc.addContent(item);
140             //}
141             //process.emit('debug', 'GROUP END', endingGroup.type, endingGroup.get('ignorable'))
142         //}
143     },
144     cmdtext : function (cmd)
145     {
146         this.flushHexStore();
147         if (!this.group) { // an RTF fragment, missing the {\rtf1 header
148             //this.group = this.doc
149             return;  // we really don't care about stray text...
150         }
151         this.group.addContent(new Roo.rtf.Span(cmd));
152     },
153     cmdcontrolword : function (cmd)
154     {
155         this.flushHexStore();
156         if (!this.group.type) {
157             this.group.type = cmd.value;
158             return;
159         }
160         this.group.addContent(new Roo.rtf.Ctrl(cmd));
161         // we actually don't care about ctrl words...
162         return ;
163         /*
164         var method = 'ctrl$' + cmd.value.replace(/-(.)/g, (_, char) => char.toUpperCase())
165         if (this[method]) {
166             this[method](cmd.param)
167         } else {
168             if (!this.group.get('ignorable')) process.emit('debug', method, cmd.param)
169         }
170         */
171     },
172     cmdhexchar : function(cmd) {
173         this.hexStore.push(cmd);
174     },
175     cmderror : function(cmd) {
176         throw cmd.value;
177     },
178     
179     /*
180       _flush (done) {
181         if (this.text !== '\u0000') this.emitText()
182         done()
183       }
184       */
185       
186       
187     parseText : function(c)
188     {
189         if (c === '\\') {
190             this.parserState = this.parseEscapes;
191         } else if (c === '{') {
192             this.emitStartGroup();
193         } else if (c === '}') {
194             this.emitEndGroup();
195         } else if (c === '\x0A' || c === '\x0D') {
196             // cr/lf are noise chars
197         } else {
198             this.text += c;
199         }
200     },
201     
202     parseEscapes: function (c)
203     {
204         if (c === '\\' || c === '{' || c === '}') {
205             this.text += c;
206             this.parserState = this.parseText;
207         } else {
208             this.parserState = this.parseControlSymbol;
209             this.parseControlSymbol(c);
210         }
211     },
212     parseControlSymbol: function(c)
213     {
214         if (c === '~') {
215             this.text += '\u00a0'; // nbsp
216             this.parserState = this.parseText
217         } else if (c === '-') {
218              this.text += '\u00ad'; // soft hyphen
219         } else if (c === '_') {
220             this.text += '\u2011'; // non-breaking hyphen
221         } else if (c === '*') {
222             this.emitIgnorable();
223             this.parserState = this.parseText;
224         } else if (c === "'") {
225             this.parserState = this.parseHexChar;
226         } else if (c === '|') { // formula cacter
227             this.emitFormula();
228             this.parserState = this.parseText;
229         } else if (c === ':') { // subentry in an index entry
230             this.emitIndexSubEntry();
231             this.parserState = this.parseText;
232         } else if (c === '\x0a') {
233             this.emitEndParagraph();
234             this.parserState = this.parseText;
235         } else if (c === '\x0d') {
236             this.emitEndParagraph();
237             this.parserState = this.parseText;
238         } else {
239             this.parserState = this.parseControlWord;
240             this.parseControlWord(c);
241         }
242     },
243     parseHexChar: function (c)
244     {
245         if (/^[A-Fa-f0-9]$/.test(c)) {
246             this.hexChar += c;
247             if (this.hexChar.length >= 2) {
248               this.emitHexChar();
249               this.parserState = this.parseText;
250             }
251             return;
252         }
253         this.emitError("Invalid character \"" + c + "\" in hex literal.");
254         this.parserState = this.parseText;
255         
256     },
257     parseControlWord : function(c)
258     {
259         if (c === ' ') {
260             this.emitControlWord();
261             this.parserState = this.parseText;
262         } else if (/^[-\d]$/.test(c)) {
263             this.parserState = this.parseControlWordParam;
264             this.controlWordParam += c;
265         } else if (/^[A-Za-z]$/.test(c)) {
266           this.controlWord += c;
267         } else {
268           this.emitControlWord();
269           this.parserState = this.parseText;
270           this.parseText(c);
271         }
272     },
273     parseControlWordParam : function (c) {
274         if (/^\d$/.test(c)) {
275           this.controlWordParam += c;
276         } else if (c === ' ') {
277           this.emitControlWord();
278           this.parserState = this.parseText;
279         } else {
280           this.emitControlWord();
281           this.parserState = this.parseText;
282           this.parseText(c);
283         }
284     },
285     
286     
287     
288     
289     emitText : function () {
290         if (this.text === '') {
291             return;
292         }
293         this.push({
294             type: 'text',
295             value: this.text,
296             pos: this.cpos,
297             row: this.row,
298             col: this.col
299         });
300         this.text = ''
301     },
302     emitControlWord : function ()
303     {
304         this.emitText();
305         if (this.controlWord === '') {
306             // do we want to track this - it seems just to cause problems.
307             //this.emitError('empty control word');
308         } else {
309             this.push({
310                   type: 'controlword',
311                   value: this.controlWord,
312                   param: this.controlWordParam !== '' && Number(this.controlWordParam),
313                   pos: this.cpos,
314                   row: this.row,
315                   col: this.col
316             });
317         }
318         this.controlWord = '';
319         this.controlWordParam = '';
320     },
321     emitStartGroup : function ()
322     {
323         this.emitText();
324         this.push({
325             type: 'groupstart',
326             pos: this.cpos,
327             row: this.row,
328             col: this.col
329         });
330     },
331     emitEndGroup : function ()
332     {
333         this.emitText();
334         this.push({
335             type: 'groupend',
336             pos: this.cpos,
337             row: this.row,
338             col: this.col
339         });
340     },
341     emitIgnorable : function ()
342     {
343         this.emitText();
344         this.push({
345             type: 'ignorable',
346             pos: this.cpos,
347             row: this.row,
348             col: this.col
349         });
350     },
351     emitHexChar : function ()
352     {
353         this.emitText();
354         this.push({
355             type: 'hexchar',
356             value: this.hexChar,
357             pos: this.cpos,
358             row: this.row,
359             col: this.col
360         });
361         this.hexChar = ''
362     },
363     emitError : function (message)
364     {
365       this.emitText();
366       this.push({
367             type: 'error',
368             value: message,
369             row: this.row,
370             col: this.col,
371             char: this.cpos //,
372             //stack: new Error().stack
373         });
374     },
375     emitEndParagraph : function () {
376         this.emitText();
377         this.push({
378             type: 'endparagraph',
379             pos: this.cpos,
380             row: this.row,
381             col: this.col
382         });
383     }
384      
385 } ;