- $tmpDoc.find("style").remove(); // sanitize all style tags present prior to the transformation
-
- var newHTML = $tmpDoc.find("html").html();
- tmpWindow.close();
-
- var relevantPatterns = [];
- isPatternRelevant(newHTML, "href=\"", relevantPatterns);
- isPatternRelevant(newHTML, "src=\"", relevantPatterns);
- return sanitize( newHTML, relevantPatterns );
- };
-
- function sanitize(html, patterns){
- var ret = html;
- for(var i=0; i<patterns.length; i++){
- ret = san(ret, patterns[i])
- }
- return ret;
- };
-
- /**
- * This method will take HTML and a PATTERN and essentially
- * sanitize the following chars within the HTML with that
- * pattern through a filter:
- * Currently this only applies to &' -> &
- */
- function san(html, pattern){
-
- var ret = "";
- var remainingString;
- var hrefIndex;
- for(var i=0; i<html.length; i++){
- remainingString = html.substring(i);
- hrefIndex = remainingString.search(pattern);
- if( hrefIndex === 0 ){
- // actually sanitize the pattern, i.e. href="[sanitize-candidate]"
- // must be encapsulated within quotes, "
- (function(){
- // get the start of what we will sanitize
- var startIndex = remainingString.indexOf("\"");
- // and the end
- var endIndex = remainingString.indexOf("\"",startIndex+1);
- // get the data to sanitize
- var newHREF = html.substring(i+startIndex+1, i+endIndex+1);
- // here we actually perform the replacement
- newHREF = newHREF.replace(/&/g, '&');
- // add the pattern + the new data + a closing quote
- var regExpStartLen = "/".length;
- var regExpFlagsLen = "/i".length;
- ret += String(pattern).substring( regExpStartLen, String(pattern).length - regExpFlagsLen)
- + newHREF;
- i += endIndex;
- })();
- continue;
- } else {
- // if we have another href, copy everything until that href index
- if( hrefIndex > 0 ) {
- ret += html.substring(i, hrefIndex);
- i = hrefIndex-1;
- } else {
- // otherwise just add the remaining chars and stop trying to sanitize
- ret += html.substring(i);
- break;
- }
- }
- }
- return ret;
-
- };
+ var style = window.getComputedStyle(dom, null);
+ if (style.display == 'none' || dom.nodeName == "NOSCRIPT" || dom.nodeName == "SCRIPT" ) {
+ dom.parentElement.removeChild(dom);
+ return;
+ }
+ var cn = [];
+
+ if (dom.childNodes.length > 100) {
+ console.log(dom);
+ throw "too many child nodes?" + dom.childNodes.length ;
+ }
+ for (var i = 0;i < dom.childNodes.length;i++) {
+ cn.push(dom.childNodes[i]);
+ }
+
+ if (cn.length > 100) {
+ console.log(dom);
+ throw "too many child nodes? cn";
+ }
+ for (var i = 0;i < cn.length;i++) {
+
+ //console.log( i + ':'+ cn[i].nodeName);
+ fillStyle(cn[i], style);
+ }
+
+
+ //var pstyle = false;
+ //if (dom.nodeName != 'BODY') {
+ // pstyle = window.getComputedStyle(dom.parentElement, null);
+ //}
+
+ if (dom.nodeName == 'SPAM') {
+ pstyle = false; //?? others??
+ }
+
+ //if (dom.nodeName == 'LI') { throw 'done'; }
+
+
+ for(var i=0;i<style.length;i++){
+ var prop = style[i];
+ var camel = prop.replace(/\-([a-z])/g, camelize);
+ var val = style.getPropertyValue(prop);
+ //returns[camel] = val;
+
+ // idea... if the parent has the same style.. then do not apply it to the child?
+ if (pstyle && pstyle[prop] == val) {
+ continue;
+ }
+ //console.log(prop + '=' + val);
+
+ dom.style[camel] = val;
+
+
+ }
+ var es = dom.getAttribute('style');
+ //console.log(dom.nodeName + '::' + es);
+ dom.setAttribute('style', es);
+
+ //return returns;
+ }