// no need to apply config.
this.replaceDocBullets(cfg.node);
+ this.replaceAname(cfg.node);
+ // this is disabled as the removal is done by other filters;
// this.walk(cfg.node);
var kv = s.split(":");
// what ever is left... we allow.
- ret[kv[0]] = kv[1];
+ ret[kv[0].trim()] = kv[1];
});
return ret;
},
+ replaceAname : function (doc)
+ {
+ // replace all the a/name without..
+ var aa = Array.from(doc.getElementsByTagName('a'));
+ for (var i = 0; i < aa.length; i++) {
+ var a = aa[i];
+ if (a.hasAttribute("name")) {
+ a.removeAttribute("name");
+ }
+ if (a.hasAttribute("href")) {
+ continue;
+ }
+ // reparent children.
+ this.removeNodeKeepChildren(a);
+
+ }
+
+
+
+ },
+
+
+
replaceDocBullets : function(doc)
{
// this is a bit odd - but it appears some indents use ql-indent-1
+ //Roo.log(doc.innerHTML);
- var listpara = doc.getElementsByClassName('ql-indent-1');
- while(listpara.length) {
- this.replaceDocBullet(listpara.item(0));
+ var listpara = Array.from(doc.getElementsByClassName('MsoListParagraphCxSpFirst'));
+ for( var i = 0; i < listpara.length; i ++) {
+ listpara[i].className = "MsoListParagraph";
+ }
+
+ listpara = Array.from(doc.getElementsByClassName('MsoListParagraphCxSpMiddle'));
+ for( var i = 0; i < listpara.length; i ++) {
+ listpara[i].className = "MsoListParagraph";
}
+ listpara = Array.from(doc.getElementsByClassName('MsoListParagraphCxSpLast'));
+ for( var i = 0; i < listpara.length; i ++) {
+ listpara[i].className = "MsoListParagraph";
+ }
+ listpara = Array.from(doc.getElementsByClassName('ql-indent-1'));
+ for( var i = 0; i < listpara.length; i ++) {
+ listpara[i].className = "MsoListParagraph";
+ }
+
+ // this is a bit hacky - we had one word document where h2 had a miso-list attribute.
+ var htwo = Array.from(doc.getElementsByTagName('h2'));
+ for( var i = 0; i < htwo.length; i ++) {
+ if (htwo[i].hasAttribute('style') && htwo[i].getAttribute('style').match(/mso-list:/)) {
+ htwo[i].className = "MsoListParagraph";
+ }
+ }
+ listpara = Array.from(doc.getElementsByClassName('MsoNormal'));
+ for( var i = 0; i < listpara.length; i ++) {
+ if (listpara[i].hasAttribute('style') && listpara[i].getAttribute('style').match(/mso-list:/)) {
+ listpara[i].className = "MsoListParagraph";
+ } else {
+ listpara[i].className = "MsoNormalx";
+ }
+ }
+
+ listpara = doc.getElementsByClassName('MsoListParagraph');
+ // Roo.log(doc.innerHTML);
+
+
- var listpara = doc.getElementsByClassName('MsoListParagraph');
while(listpara.length) {
+
this.replaceDocBullet(listpara.item(0));
}
+
},
+
+
replaceDocBullet : function(p)
{
// gather all the siblings.
var ns = p,
parent = p.parentNode,
doc = parent.ownerDocument,
- items = [];
+ items = [];
+
+ //Roo.log("Parsing: " + p.innerText) ;
+ var listtype = 'ul';
while (ns) {
if (ns.nodeType != 1) {
ns = ns.nextSibling;
continue;
}
if (!ns.className.match(/(MsoListParagraph|ql-indent-1)/i)) {
+ //Roo.log("Missing para r q1indent - got:" + ns.className);
+ break;
+ }
+ var spans = ns.getElementsByTagName('span');
+
+ if (ns.hasAttribute('style') && ns.getAttribute('style').match(/mso-list/)) {
+ items.push(ns);
+ ns = ns.nextSibling;
+ has_list = true;
+ if (!spans.length) {
+ continue;
+ }
+ var ff = spans[0].style.fontFamily;
+ if (!spans[0].hasAttribute('style') && spans.length > 1 && spans[1].hasAttribute('style')) {
+
+ ff = spans[1].style.fontFamily;
+ }
+ //var style = this.styleToObject(spans[0]);
+ //Roo.log("got font family: " + ff);
+ if (typeof(ff) != 'undefined' && !ff.match(/Symbol/)) {
+ listtype = 'ol';
+ }
+
+ continue;
+ }
+ //Roo.log("no mso-list?");
+
+ var spans = ns.getElementsByTagName('span');
+ if (!spans.length) {
+ break;
+ }
+ var has_list = false;
+ for(var i = 0; i < spans.length; i++) {
+ if (spans[i].hasAttribute('style') && spans[i].getAttribute('style').match(/mso-list/)) {
+ has_list = true;
+ break;
+ }
+ }
+ if (!has_list) {
break;
}
items.push(ns);
ns = ns.nextSibling;
+
}
- var ul = parent.ownerDocument.createElement('ul'); // what about number lists...
+ if (!items.length) {
+ ns.className = "";
+ return;
+ }
+
+ var ul = parent.ownerDocument.createElement(listtype); // what about number lists...
parent.insertBefore(ul, p);
var lvl = 0;
var stack = [ ul ];
var last_li = false;
- items.forEach(function(n) {
+ var margin_to_depth = {};
+ max_margins = -1;
+
+ items.forEach(function(n, ipos) {
//Roo.log("got innertHMLT=" + n.innerHTML);
var spans = n.getElementsByTagName('span');
if (!spans.length) {
//Roo.log("No spans found");
-
+
parent.removeChild(n);
+
+
return; // skip it...
}
-
+ var num = 1;
var style = {};
for(var i = 0; i < spans.length; i++) {
if (typeof(style['mso-list']) == 'undefined') {
continue;
}
-
+ if (listtype == 'ol') {
+ num = spans[i].innerText.replace(/[^0-9]+]/g,'') * 1;
+ }
spans[i].parentNode.removeChild(spans[i]); // remove the fake bullet.
break;
}
style = this.styleToObject(n); // mo-list is from the parent node.
if (typeof(style['mso-list']) == 'undefined') {
//Roo.log("parent is missing level");
+
parent.removeChild(n);
+
return;
}
- var nlvl = Math.min(
- stack.length-1,
- (style['mso-list'].split(' ')[1].replace(/level/,'') *1) - 1
- );
-
-
-
+ var margin = style['margin-left'];
+ if (typeof(margin_to_depth[margin]) == 'undefined') {
+ max_margins++;
+ margin_to_depth[margin] = max_margins;
+ }
+ nlvl = margin_to_depth[margin] ;
+
if (nlvl > lvl) {
//new indent
- var nul = doc.createElement('ul'); // what about number lists...
- last_li.appendChild(nul);
- stack[nlvl] = nul;
+ var nul = doc.createElement(listtype); // what about number lists...
+ if (!last_li) {
+ last_li = doc.createElement('li');
+ stack[lvl].appendChild(last_li);
+ }
+ last_li.appendChild(nul);
+ stack[nlvl] = nul;
}
lvl = nlvl;
+ // not starting at 1..
+ if (!stack[nlvl].hasAttribute("start") && listtype == "ol") {
+ stack[nlvl].setAttribute("start", num);
+ }
+
var nli = stack[nlvl].appendChild(doc.createElement('li'));
last_li = nli;
nli.innerHTML = n.innerHTML;
//Roo.log("innerHTML = " + n.innerHTML);
parent.removeChild(n);
- // copy children of p into nli
- /*while(n.firstChild) {
- var fc = n.firstChild;
- n.removeChild(fc);
- nli.appendChild(fc);
- }*/
+
},this);