4 * based on this https://github.com/iarna/rtf-parser
5 * it's really only designed to extract pict from pasted RTF
9 * var images = new Roo.rtf.Parser().parse(a_string).filter(function(g) { return g.type == 'pict'; });
18 Roo.rtf.Parser = function(text) {
19 //super({objectMode: true})
21 this.parserState = this.parseText;
23 // these are for interpeter...
25 ///this.parserState = this.parseTop
30 this.groups = []; // where we put the return.
32 for (var ii = 0; ii < text.length; ++ii) {
35 if (text[ii] === '\n') {
41 this.parserState(text[ii]);
47 Roo.rtf.Parser.prototype = {
48 text : '', // string being parsed..
50 controlWordParam : '',
65 var m = 'cmd'+ el.type;
66 if (typeof(this[m]) == 'undefined') {
67 Roo.log('invalid cmd:' + el.type);
73 flushHexStore : function()
75 if (this.hexStore.length < 1) {
78 var hexstr = this.hexStore.map(
83 this.group.addContent( new Roo.rtf.Hex( hexstr ));
86 this.hexStore.splice(0)
90 cmdgroupstart : function()
94 this.groupStack.push(this.group);
97 if (this.doc === false) {
98 this.group = this.doc = new Roo.rtf.Document();
102 this.group = new Roo.rtf.Group(this.group);
104 cmdignorable : function()
106 this.flushHexStore();
107 this.group.ignorable = true;
109 cmdendparagraph : function()
111 this.flushHexStore();
112 this.group.addContent(new Roo.rtf.Paragraph());
114 cmdgroupend : function ()
116 this.flushHexStore();
117 var endingGroup = this.group;
120 this.group = this.groupStack.pop();
122 this.group.addChild(endingGroup);
127 var doc = this.group || this.doc;
128 //if (endingGroup instanceof FontTable) {
129 // doc.fonts = endingGroup.table
130 //} else if (endingGroup instanceof ColorTable) {
131 // doc.colors = endingGroup.table
132 //} else if (endingGroup !== this.doc && !endingGroup.get('ignorable')) {
133 if (endingGroup.ignorable === false) {
135 this.groups.push(endingGroup);
136 // Roo.log( endingGroup );
138 //Roo.each(endingGroup.content, function(item)) {
139 // doc.addContent(item);
141 //process.emit('debug', 'GROUP END', endingGroup.type, endingGroup.get('ignorable'))
144 cmdtext : function (cmd)
146 this.flushHexStore();
147 if (!this.group) { // an RTF fragment, missing the {\rtf1 header
148 //this.group = this.doc
149 return; // we really don't care about stray text...
151 this.group.addContent(new Roo.rtf.Span(cmd));
153 cmdcontrolword : function (cmd)
155 this.flushHexStore();
156 if (!this.group.type) {
157 this.group.type = cmd.value;
160 this.group.addContent(new Roo.rtf.Ctrl(cmd));
161 // we actually don't care about ctrl words...
164 var method = 'ctrl$' + cmd.value.replace(/-(.)/g, (_, char) => char.toUpperCase())
166 this[method](cmd.param)
168 if (!this.group.get('ignorable')) process.emit('debug', method, cmd.param)
172 cmdhexchar : function(cmd) {
173 this.hexStore.push(cmd);
175 cmderror : function(cmd) {
181 if (this.text !== '\u0000') this.emitText()
187 parseText : function(c)
190 this.parserState = this.parseEscapes;
191 } else if (c === '{') {
192 this.emitStartGroup();
193 } else if (c === '}') {
195 } else if (c === '\x0A' || c === '\x0D') {
196 // cr/lf are noise chars
202 parseEscapes: function (c)
204 if (c === '\\' || c === '{' || c === '}') {
206 this.parserState = this.parseText;
208 this.parserState = this.parseControlSymbol;
209 this.parseControlSymbol(c);
212 parseControlSymbol: function(c)
215 this.text += '\u00a0'; // nbsp
216 this.parserState = this.parseText
217 } else if (c === '-') {
218 this.text += '\u00ad'; // soft hyphen
219 } else if (c === '_') {
220 this.text += '\u2011'; // non-breaking hyphen
221 } else if (c === '*') {
222 this.emitIgnorable();
223 this.parserState = this.parseText;
224 } else if (c === "'") {
225 this.parserState = this.parseHexChar;
226 } else if (c === '|') { // formula cacter
228 this.parserState = this.parseText;
229 } else if (c === ':') { // subentry in an index entry
230 this.emitIndexSubEntry();
231 this.parserState = this.parseText;
232 } else if (c === '\x0a') {
233 this.emitEndParagraph();
234 this.parserState = this.parseText;
235 } else if (c === '\x0d') {
236 this.emitEndParagraph();
237 this.parserState = this.parseText;
239 this.parserState = this.parseControlWord;
240 this.parseControlWord(c);
243 parseHexChar: function (c)
245 if (/^[A-Fa-f0-9]$/.test(c)) {
247 if (this.hexChar.length >= 2) {
249 this.parserState = this.parseText;
253 this.emitError("Invalid character \"" + c + "\" in hex literal.");
254 this.parserState = this.parseText;
257 parseControlWord : function(c)
260 this.emitControlWord();
261 this.parserState = this.parseText;
262 } else if (/^[-\d]$/.test(c)) {
263 this.parserState = this.parseControlWordParam;
264 this.controlWordParam += c;
265 } else if (/^[A-Za-z]$/.test(c)) {
266 this.controlWord += c;
268 this.emitControlWord();
269 this.parserState = this.parseText;
273 parseControlWordParam : function (c) {
274 if (/^\d$/.test(c)) {
275 this.controlWordParam += c;
276 } else if (c === ' ') {
277 this.emitControlWord();
278 this.parserState = this.parseText;
280 this.emitControlWord();
281 this.parserState = this.parseText;
289 emitText : function () {
290 if (this.text === '') {
302 emitControlWord : function ()
305 if (this.controlWord === '') {
306 // do we want to track this - it seems just to cause problems.
307 //this.emitError('empty control word');
311 value: this.controlWord,
312 param: this.controlWordParam !== '' && Number(this.controlWordParam),
318 this.controlWord = '';
319 this.controlWordParam = '';
321 emitStartGroup : function ()
331 emitEndGroup : function ()
341 emitIgnorable : function ()
351 emitHexChar : function ()
363 emitError : function (message)
372 //stack: new Error().stack
375 emitEndParagraph : function () {
378 type: 'endparagraph',