git.roojs.org Git - roojs1/blob - Roo/htmleditor/FilterWord.js

   1 /**
   2  * @class Roo.htmleditor.FilterWord
   3  * try and clean up all the mess that Word generates.
   4  *
   5  * This is the 'nice version' - see 'Heavy' that white lists a very short list of elements, and multi-filters
   6
   7  * @constructor
   8  * Run a new Span Filter
   9  * @param {Object} config Configuration options
  10  */
  11
  12 Roo.htmleditor.FilterWord = function(cfg)
  13 {
  14     // no need to apply config.
  15     this.replaceDocBullets(cfg.node);
  16
  17     // this is disabled as the removal is done by other filters;
  18    // this.walk(cfg.node);
  19
  20
  21 }
  22
  23 Roo.extend(Roo.htmleditor.FilterWord, Roo.htmleditor.Filter,
  24 {
  25     tag: true,
  26
  27
  28     /**
  29      * Clean up MS wordisms...
  30      */
  31     replaceTag : function(node)
  32     {
  33
  34         // no idea what this does - span with text, replaceds with just text.
  35         if(
  36                 node.nodeName == 'SPAN' &&
  37                 !node.hasAttributes() &&
  38                 node.childNodes.length == 1 &&
  39                 node.firstChild.nodeName == "#text"
  40         ) {
  41             var textNode = node.firstChild;
  42             node.removeChild(textNode);
  43             if (node.getAttribute('lang') != 'zh-CN') {   // do not space pad on chinese characters..
  44                 node.parentNode.insertBefore(node.ownerDocument.createTextNode(" "), node);
  45             }
  46             node.parentNode.insertBefore(textNode, node);
  47             if (node.getAttribute('lang') != 'zh-CN') {   // do not space pad on chinese characters..
  48                 node.parentNode.insertBefore(node.ownerDocument.createTextNode(" ") , node);
  49             }
  50
  51             node.parentNode.removeChild(node);
  52             return false; // dont do chidren - we have remove our node - so no need to do chdhilren?
  53         }
  54
  55
  56
  57         if (node.tagName.toLowerCase().match(/^(style|script|applet|embed|noframes|noscript)$/)) {
  58             node.parentNode.removeChild(node);
  59             return false; // dont do chidlren
  60         }
  61         //Roo.log(node.tagName);
  62         // remove - but keep children..
  63         if (node.tagName.toLowerCase().match(/^(meta|link|\\?xml:|st1:|o:|v:|font)/)) {
  64             //Roo.log('-- removed');
  65             while (node.childNodes.length) {
  66                 var cn = node.childNodes[0];
  67                 node.removeChild(cn);
  68                 node.parentNode.insertBefore(cn, node);
  69                 // move node to parent - and clean it..
  70                 if (cn.nodeType == 1) {
  71                     this.replaceTag(cn);
  72                 }
  73
  74             }
  75             node.parentNode.removeChild(node);
  76             /// no need to iterate chidlren = it's got none..
  77             //this.iterateChildren(node, this.cleanWord);
  78             return false; // no need to iterate children.
  79         }
  80         // clean styles
  81         if (node.className.length) {
  82
  83             var cn = node.className.split(/\W+/);
  84             var cna = [];
  85             Roo.each(cn, function(cls) {
  86                 if (cls.match(/Mso[a-zA-Z]+/)) {
  87                     return;
  88                 }
  89                 cna.push(cls);
  90             });
  91             node.className = cna.length ? cna.join(' ') : '';
  92             if (!cna.length) {
  93                 node.removeAttribute("class");
  94             }
  95         }
  96
  97         if (node.hasAttribute("lang")) {
  98             node.removeAttribute("lang");
  99         }
 100
 101         if (node.hasAttribute("style")) {
 102
 103             var styles = node.getAttribute("style").split(";");
 104             var nstyle = [];
 105             Roo.each(styles, function(s) {
 106                 if (!s.match(/:/)) {
 107                     return;
 108                 }
 109                 var kv = s.split(":");
 110                 if (kv[0].match(/^(mso-|line|font|background|margin|padding|color)/)) {
 111                     return;
 112                 }
 113                 // what ever is left... we allow.
 114                 nstyle.push(s);
 115             });
 116             node.setAttribute("style", nstyle.length ? nstyle.join(';') : '');
 117             if (!nstyle.length) {
 118                 node.removeAttribute('style');
 119             }
 120         }
 121         return true; // do children
 122
 123
 124
 125     },
 126
 127     styleToObject: function(node)
 128     {
 129         var styles = (node.getAttribute("style") || '').split(";");
 130         var ret = {};
 131         Roo.each(styles, function(s) {
 132             if (!s.match(/:/)) {
 133                 return;
 134             }
 135             var kv = s.split(":");
 136
 137             // what ever is left... we allow.
 138             ret[kv[0].trim()] = kv[1];
 139         });
 140         return ret;
 141     },
 142
 143     replaceDocBullets : function(doc)
 144     {
 145         // this is a bit odd - but it appears some indents use ql-indent-1
 146         //Roo.log(doc.innerHTML);
 147
 148         var listpara = doc.getElementsByClassName('MsoListParagraphCxSpFirst');
 149         for( var i = 0; i < listpara.length; i ++) {
 150             listpara.item(i).className = "MsoListParagraph";
 151         }
 152         // this is a bit hacky - we had one word document where h2 had a miso-list attribute.
 153         var htwo = doc.getElementsByTagName('h2');
 154         for( var i = 0; i < htwo.length; i ++) {
 155             if (htwo.item(i).hasAttribute('style') && htwo.item(i).getAttribute('style').match(/mso-list:/)) {
 156                 htwo.item(i).className = "MsoListParagraph";
 157             }
 158         }
 159         listpara = doc.getElementsByClassName('MsoNormal');
 160         while(listpara.length) {
 161             if (listpara.item(0).hasAttribute('style') && listpara.item(0).getAttribute('style').match(/mso-list:/)) {
 162                 listpara.item(0).className = "MsoListParagraph";
 163             } else {
 164                 listpara.item(0).className = "MsoNormalx";
 165             }
 166         }
 167         listpara = doc.getElementsByClassName('ql-indent-1');
 168         while(listpara.length) {
 169             this.replaceDocBullet(listpara.item(0));
 170         }
 171         listpara = doc.getElementsByClassName('MsoListParagraph');
 172         while(listpara.length) {
 173
 174             this.replaceDocBullet(listpara.item(0));
 175         }
 176
 177     },
 178
 179
 180
 181     replaceDocBullet : function(p)
 182     {
 183         // gather all the siblings.
 184         var ns = p,
 185             parent = p.parentNode,
 186             doc = parent.ownerDocument,
 187             items = [];
 188
 189
 190         while (ns) {
 191             if (ns.nodeType != 1) {
 192                 ns = ns.nextSibling;
 193                 continue;
 194             }
 195             if (!ns.className.match(/(MsoListParagraph|ql-indent-1)/i)) {
 196                 break;
 197             }
 198             if (ns.getAttribute('style').match(/mso-list/)) {
 199                 items.push(ns);
 200                 ns = ns.nextSibling;
 201                 has_list = true;
 202                 continue;
 203             }
 204             var spans = ns.getElementsByTagName('span');
 205             if (!spans.length) {
 206                 break;
 207             }
 208             var has_list  = false;
 209             for(var i = 0; i < spans.length; i++) {
 210                 if (spans[i].getAttribute('style').match(/mso-list/)) {
 211                     has_list = true;
 212                     break;
 213                 }
 214             }
 215             if (!has_list) {
 216                 break;
 217             }
 218             items.push(ns);
 219             ns = ns.nextSibling;
 220
 221
 222         }
 223         if (!items.length) {
 224             ns.className = "";
 225             return;
 226         }
 227
 228         var ul = parent.ownerDocument.createElement('ul'); // what about number lists...
 229         parent.insertBefore(ul, p);
 230         var lvl = 0;
 231         var stack = [ ul ];
 232         var last_li = false;
 233
 234         var margin_to_depth = {};
 235         max_margins = -1;
 236
 237         items.forEach(function(n, ipos) {
 238             //Roo.log("got innertHMLT=" + n.innerHTML);
 239
 240             var spans = n.getElementsByTagName('span');
 241             if (!spans.length) {
 242                 //Roo.log("No spans found");
 243
 244                 parent.removeChild(n);
 245
 246
 247                 return; // skip it...
 248             }
 249
 250
 251
 252             var style = {};
 253             for(var i = 0; i < spans.length; i++) {
 254
 255                 style = this.styleToObject(spans[i]);
 256                 if (typeof(style['mso-list']) == 'undefined') {
 257                     continue;
 258                 }
 259
 260                 spans[i].parentNode.removeChild(spans[i]); // remove the fake bullet.
 261                 break;
 262             }
 263             //Roo.log("NOW GOT innertHMLT=" + n.innerHTML);
 264             style = this.styleToObject(n); // mo-list is from the parent node.
 265             if (typeof(style['mso-list']) == 'undefined') {
 266                 //Roo.log("parent is missing level");
 267
 268                 parent.removeChild(n);
 269
 270                 return;
 271             }
 272
 273             var margin = style['margin-left'];
 274             if (typeof(margin_to_depth[margin]) == 'undefined') {
 275                 max_margins++;
 276                 margin_to_depth[margin] = max_margins;
 277             }
 278             nlvl = margin_to_depth[margin] ;
 279
 280             if (nlvl > lvl) {
 281                 //new indent
 282                 var nul = doc.createElement('ul'); // what about number lists...
 283                 if (!last_li) {
 284                     last_li = doc.createElement('li');
 285                     stack[lvl].appendChild(last_li);
 286                 }
 287                 last_li.appendChild(nul);
 288                 stack[nlvl] = nul;
 289
 290             }
 291             lvl = nlvl;
 292
 293             var nli = stack[nlvl].appendChild(doc.createElement('li'));
 294             last_li = nli;
 295             nli.innerHTML = n.innerHTML;
 296             //Roo.log("innerHTML = " + n.innerHTML);
 297             parent.removeChild(n);
 298
 299
 300
 301
 302         },this);
 303
 304
 305
 306
 307     }
 308
 309
 310
 311 });