--- /dev/null
+<?php
+
+/**
+ * This is a PHP implementation of the Roo HTMLEditorCore onPaste method - that cleans up HTML
+ * and replaces things like tables etc..
+ */
+
+class HTML_Clean {
+
+ static function fromHTML($str, $opts = array())
+ {
+ $str= self::cleanWordChars($str);
+ $dom = new DOMDocument('1.0', 'utf8');
+ $dom->loadHTML($str);
+ $opts['dom'] = $dom;
+ return new HTML_Clean($opts);
+ }
+ static function cleanWordChars($str)
+ {
+ $swapCodes = array(
+ 8211 => "–" ,
+ 8212 => "—" ,
+ 8216 => "'" ,
+ 8217 => "'" ,
+ 8220 => '"' ,
+ 8221 => '"' ,
+ 8226 => "*" ,
+ 8230 => "..."
+ );
+ foreach($swapCodes as $k=>$v) {
+ $str = str_replace(mb_chr($k), $v, $str);
+ }
+ return $str;
+
+ }
+
+
+ var $dom; // Dom Document.
+ var $black = array(
+ 'APPLET', //
+ 'BASE', 'BASEFONT', 'BGSOUND', 'BLINK', 'BODY',
+ 'FRAME', 'FRAMESET', 'HEAD', 'HTML', 'ILAYER',
+ 'IFRAME', 'LAYER', 'LINK', 'META', 'OBJECT',
+ 'SCRIPT', 'STYLE' ,'TITLE', 'XML',
+ //'FONT' // CLEAN LATER..
+ 'COLGROUP', 'COL' // messy tables.
+ ); // blacklist of elements.
+
+ function __construct($opts)
+ {
+ foreach($opts as $k=>$v) {
+ $this->{$k} = $v;
+ }
+ $d = $this->dom->documentElement;
+ $this->filter('Word',array( 'node' => $d ));
+
+ $this->filter('StyleToTag', array(
+ 'node' => $d // this could add nodes to tree, so not very good to nest the walk.
+
+ ));
+
+ $this->filter('Attributes',array( // does walk as well.
+ 'node' => $d,
+ 'attrib_white' => array('href', 'src', 'name', 'align', 'colspan', 'rowspan', 'data-display', 'data-width', 'start'),
+ 'attrib_clean' => array('href', 'src' ),
+
+ 'replaceComment' => true // this is sneaked in here - as walk will get rid of comments at the same time.
+ ));
+ // is this used?!?!
+ $this->filter('Black', array( 'node' => $d, 'tag' => $this->black ));
+ // we don't use the whitelist?
+
+
+ // should be fonts..
+ $this->filter('KeepChildren',array( 'node' => $d, 'tag' => array( 'FONT', ':' )) );
+ $this->filter('Paragraph',array( 'node' => $d ));
+ $this->filter('Span',array( 'node' => $d ));
+ $this->filter('LongBr',array( 'node' => $d ));
+
+ $ar = $this->arrayFrom($d->getElementsByTagName('img'));
+ foreach($ar as $img) {
+ if ($this->findParent($img, 'figure')) {
+ continue;
+ }
+ require_once 'HTML/Clean/BlockFigure.php';
+ $fig = new HTML_Clean_BlockFigure(array(
+ 'image_src' => $img->getAttribute('src')
+ ));
+ $fig->updateElement($img);
+
+ }
+
+
+
+ require_once 'HTML/Clean/Block.php';
+ HTML_Clean_Block::initAll($d);
+
+ }
+
+ function filter($type, $args)
+ {
+ require_once 'HTML/Clean/Filter'. $type .'.php';
+ $cls = 'HTML_Clean_Filter'. $type;
+ new $cls($args);
+ }
+
+ function toString()
+ {
+ $this->dom->saveHTML();
+ }
+
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * This is used in the HTML editor to make content editable
+ *
+ * In our case it's used to render images and tables correctly.
+ */
+require_once 'Filter.php';
+abstract class HTML_Clean_Block extends HTML_Clean_Filter
+{
+ var $node;
+
+ // used by context menu
+ var $context = false; // ??
+
+
+ static function factory ($node)
+ {
+
+
+ $db = $node->hasAttribute('data-block') ? $node->getAttribute('data-block') : false;
+ if ($db) {
+ $db = ucfirst($node->nodeName);
+ }
+ require_once 'HTML/Clean/Block'.$db . '.php';
+ $cls = 'HTML_Clean_Block'. $db;
+
+ return new $cls(array('node' => $node ));
+
+ }
+
+
+ /**
+ * initalize all Elements from content that are 'blockable'
+ * @static
+ * @param the body element
+ */
+ static function initAll ($body, $type=false)
+ {
+ if ($type === false) {
+
+ self::initAll($body,'table');
+ self::initAll($body,'td');
+ self::initAll($body,'figure');
+ return;
+ }
+ $ar = $body->getElementsByName($type);
+ foreach($ar as $a) {
+ self::factory($a);
+ }
+
+ }
+ /**
+ * Update a node with values from this object
+ * @param {DomElement} node
+ */
+ function updateElement ($node)
+ {
+ self::updateNode(empty($node) ? $this->node : $node, self::createDom($this->toObject()));
+
+ }
+ /**
+ * convert to plain HTML for calling insertAtCursor..
+ */
+ function toHTML ()
+ {
+ return self::createHTML($this->toObject());
+ }
+ /**
+ * used by readEleemnt to extract data from a node
+ * may need improving as it's pretty basic
+
+ * @param {DomElement} node
+ * @param {String} tag - tag to find, eg. IMG ?? might be better to use DomQuery ?
+ * @param {String} attribute (use html - for contents, style for using next param as style, or false to return the node)
+ * @param {String} style the style property - eg. text-align
+ */
+ function getVal($node, $tag , $attr = false, $style = false)
+ {
+ $n = $node;
+ if ($tag !== true && $n->tagName != strtoupper($tag)) {
+ // in theory we could do figure[3] << 3rd figure? or some more complex search..?
+ // but kiss for now.
+ $n = $node->getElementsByTagName($tag)->item(0);
+ }
+ if (!$n) {
+ return '';
+ }
+ if ($attr === false) {
+ return $n;
+ }
+ if ($attr == 'html') {
+ return $this->innerHTML($node);
+
+
+ }
+ if ($attr == 'style') {
+ $style = $this->styleToObject($node, true);
+ return isset($style[strtolower($style)]) ? $style[strtolower($style)] : '';
+ }
+
+ return n->hasAttribute($attr) ? n->getAttribute($attr) : '';
+
+ }
+ /**
+ * create a DomHelper friendly object - for use with
+ * Roo.DomHelper.markup / overwrite / etc..
+ * (override this)
+ */
+ abstract function toObject();
+ /**
+ * Read a node that has a 'data-block' property - and extract the values from it.
+ * @param {DomElement} node - the node
+ */
+ abstract function readElement ($node);
+
+ static function createHTML($o)
+ {
+
+ if (is_string($o)) {
+ return $o;
+ }
+ $b = "";
+ if(empty($o->tag)){
+ $o->tag = "div";
+ }
+ $b .= "<" . $o->tag;
+
+ foreach($o as $attr => $val) {
+ if ($attr == "tag" || $attr == "children" || $attr == "cn" || $attr == "html") {
+ continue;
+ }
+ if($attr == "style"){
+
+ if (is_string($val)) {
+ $b .= ' style="' . $val . '"';
+ } else if(is_array($val)) {
+ $b .= ' style="';
+ foreach($val as $kk=>$vv) {
+ $b .= $kk . ":" . $vv . ";";
+
+ }
+ $b .= '"';
+ }
+ } else {
+ if($attr == "cls"){
+ $b .= ' class="' + $val + '"';
+ }else if($attr == "htmlFor"){
+ $b .= ' for="' + $val + '"';
+ } else {
+ $b .= " " + $attr + '="' + $val + '"';
+ }
+ }
+ }
+ if (preg_match('/^(?:br|frame|hr|img|input|link|meta|range|spacer|wbr|area|param|col)$/i', $o->tag)) {
+ $b .= "/>"; // empty
+ } else {
+ $b .= ">";
+ $cn = isset($o->cn) ? $o->cn :
+ (isset($o->children) ? $o->children : false);
+
+ if($cn !== false){
+
+ if(is_array($cn)) {
+ foreach($cn as $v) {
+ $b .= self::createHtml($v);
+ }
+ }else{
+ $b .= self::createHtml(cn);
+ }
+ }
+ if(isset($o->html)){
+ $b .= $o->html;
+ }
+ $b .= "</" + $o->tag + ">";
+ }
+ return $b;
+
+
+ }
+
+ static function createDom ($o, $parentNode = false) {
+
+ // defininition craeted..
+ $ns = false;
+ $doc = new DOMDocument('1.0', 'utf8');
+
+ if (is_string($o)) {
+ return $parentNode ? $parentNode->appendChild($doc->createTextNode($o)) : $doc->createTextNode($o);
+ }
+ if(empty($o->tag)){
+ $o->tag = "div";
+ }
+
+ $el = $doc.createElement($o->tag);
+
+ foreach ($o as $attr => $val) {
+
+ if($attr == "tag" || $attr == "ns" ||$attr == "xmlns" || $attr == "children" || $attr == "cn" || $attr == "html" ||
+ $attr == "style") {
+ continue;
+ }
+
+ if ($attr=="cls"){
+ $el->setAttribute('class',$val);
+ } else {
+ $el->setAttribute($attr, $val);
+ }
+ }
+ if (isset($o->style)) {
+ foreach ($styles as $k=>$v){
+ $el->setAttribute($k,$v);
+ }
+ }
+ $cn = isset($o->cn) ? $o->cn :
+ (isset($o->children) ? $o->children : false);
+
+ if($cn) {
+ //http://bugs.kde.org/show_bug.cgi?id=71506
+ if (is_array($cn)) {
+ foreach($cn as $c) {
+ self::createDom($c, $el);
+ }
+ }else{
+ self::createDom($cn, $el);
+ }
+ }
+ if(isset($o->html)) {
+ $f = $doc->createDocumentFragment();
+ $f->appendXML($o->html);
+ $el->appendChild($f);
+ }
+ if($parentNode){
+ $parentNode->appendChild(el);
+ }
+ return $el;
+ }
+
+ static function updateNode ($from, $to)
+ {
+ // should we handle non-standard elements?
+
+ if ($from->nodeType != $to->nodeType) {
+ //Roo.log(["ReplaceChild - mismatch notType" , to, from ]);
+ $from->parentNode->replaceChild($to, $from);
+ }
+
+ if ($from->nodeType == 3) {
+ // assume it's text?!
+ if ($from->data == $to->data) {
+ return;
+ }
+ $from->data = $to->data;
+ return;
+ }
+ if (!$from->parentNode) {
+ return;
+ }
+ // assume 'to' doesnt have '1/3 nodetypes!
+ // not sure why, by from, parent node might not exist?
+ if ($from->nodeType != 1 || $from->tagName != $to->tagName) {
+ $from->parentNode->replaceChild($to, $from);
+ return;
+ }
+
+ // compare attributes
+ $ar = $this->arrayFrom($from->attributes);
+ foreach($ar as $k=>$v) {
+ if ($to->hasAttribute($k)) {
+ continue;
+ }
+ if ($k == 'id') { // always keep ids?
+ continue;
+ }
+ //if (ar[i].name == 'style') {
+ // throw "style removed?";
+ //}
+
+ $from->removeAttribute($k);
+ }
+
+ foreach($to->attributes as $k => $v) {
+
+ if ($from->getAttribute($k) == $v) {
+ continue;
+ }
+ $from->setAttribute($k, $v);
+ }
+ // children
+ $far = $this->arrayFrom(from.childNodes);
+ $tar = $this->arrayFrom(to.childNodes);
+ // if the lengths are different.. then it's probably a editable content change, rather than
+ // a change of the block definition..
+
+ // this did notwork , as our rebuilt nodes did not include ID's so did not match at all.
+ /*if (from.innerHTML == to.innerHTML) {
+ return;
+ }
+ if (far.length != tar.length) {
+ from.innerHTML = to.innerHTML;
+ return;
+ }
+ */
+
+ for ($i = 0; $i < max(count(array_keys($tar)), count(array_keys($far))); $i++) {
+ if ($i >= count(array_keys($far))) {
+ $from->appendChild($tar[$i]);
+ continue;
+
+ }
+ if ( $i >= count(array_keys($tar))) {
+ $from->removeChild($far[$i]);
+ continue;
+ }
+ self::updateNode($far[$i], $tar[$i]);
+
+ }
+
+
+
+
+ }
+
+
+};
--- /dev/null
+<?php
+
+/**
+ * This handles 'figure' tags (which is what img's are auto wrapped with.)
+ */
+
+
+require_once 'Block.php';
+class HTML_Clean_BlockFigure extends HTML_Clean_Block
+{
+
+ function __construct($cfg) {
+ if ($cfg['node']) {
+ $this->readElement($cfg['node']);
+ $this->updateElement($cfg['node']);
+ }
+ parent::__construct();
+ }
+
+
+
+ // setable values.
+ var $image_src= '';
+ var $align= 'center';
+ var $caption = '';
+ var $caption_display = 'block';
+ var $width = '100%';
+ var $cls = '';
+ var $href = '';
+ var $video_url = '';
+
+ // margin: '2%', not used
+
+ var $text_align = 'left'; // (left|right) alignment for the text caption default left. - not used at present
+
+
+ // used by context menu
+
+ /**
+ * create a DomHelper friendly object - for use with
+ * Roo.DomHelper.markup / overwrite / etc..
+ */
+ function toObject ()
+ {
+ $doc = new DOMDocument('1.0', 'utf8');
+
+ $d = $doc->createElement('div');
+ $f = $doc->createDocumentFragment();
+ $f->appendXML($o->caption); // caption could include html
+ $d->appendChild($f);
+ $caption_plain = $this->caption_display == "block" ? trim(preg_replace('/\s+/g', ' ', str_replace("\n", " ", $d->textContent))) : '';
+
+ $m = $this->width != '100%' && $this->align == 'center' ? '0 auto' : 0;
+
+ $iw = $this->align == 'center' ? $this->width : '100%';
+ $img = array(
+ 'tag' => 'img',
+ 'src' => $this->image_src,
+ 'alt' => $caption_plain,
+ 'style'=> array(
+ 'width' => $iw,
+ 'max-width' =>$iw + ' !important', // this is not getting rendered?
+ 'margin' => $m
+
+ )
+ );
+ /*
+ '<div class="{0}" width="420" height="315" src="{1}" frameborder="0" allowfullscreen>' +
+ '<a href="{2}">' +
+ '<img class="{0}-thumbnail" src="{3}/Images/{4}/{5}#image-{4}" />' +
+ '</a>' +
+ '</div>',
+ */
+
+ if (!empty($this->href)) {
+ $img = array(
+ 'tag ' => 'a',
+ 'href' => $this->href,
+ 'cn' => array(
+ $img
+ )
+ );
+ }
+
+
+ if (!empty($this->video_url.length )) {
+ $img = array(
+ 'tag' => 'div',
+ 'cls' => $this->cls,
+ 'frameborder' => 0,
+ 'allowfullscreen' => true,
+ 'width' => 420, // these are for video tricks - that we replace the outer
+ 'height' => 315,
+ 'src' => $this->video_url,
+ 'cn' => array(
+ $img
+ )
+ );
+ }
+ // we remove caption totally if its hidden... - will delete data.. but otherwise we end up with fake caption
+ $captionhtml = $this->caption_display == 'none' || !strlen($this->caption) ? '' : $this->caption;
+
+
+ return array(
+ 'tag '=> 'figure',
+ 'data-block' => 'Figure',
+ 'data-width' => $this->width,
+
+
+ 'style' => array(
+ 'display' => 'block',
+ 'float' => $this->align ,
+ 'max-width' => $this->align == 'center' ? '100% !important' : ($this->width + ' !important'),
+ 'width' => $this->align == 'center' ? '100%' : $this->width,
+ 'margin' => '0px',
+ 'padding' => $this->align == 'center' ? '0' : '0 10px' ,
+ 'text-align' => $this->align // seems to work for email..
+
+ ),
+
+
+ 'align' => $this->align,
+ 'cn' => array(
+ $img,
+
+ array (
+ 'tag'=> 'figcaption',
+ 'data-display' => $this->caption_display,
+ 'style' => array(
+ 'text-align' => 'left',
+ 'font-size' => '16px',
+ 'line-height' => '24px',
+ 'display' => $this->caption_display,
+ 'max-width' => ($this->align == 'center' ? $this->width : '100%' ) + ' !important',
+ 'margin'=> $m,
+ 'width'=> $this->align == 'center' ? $this->width : '100%'
+
+
+ ),
+ 'cls' => $this->cls.length > 0 ? ($this->cls + '-thumbnail' ) : '',
+ 'cn' => array(
+ array(
+ 'tag' => 'div',
+ 'style' => array(
+ 'margin-top' => '16px',
+ 'text-align' => 'left'
+ ),
+ 'align'=> 'left',
+ 'cn' => array(
+ array(
+ // we can not rely on yahoo syndication to use CSS elements - so have to use '<i>' to encase stuff.
+ 'tag' => 'i',
+ 'html' => $captionhtml
+ )
+
+ )
+ )
+
+ )
+
+ )
+ )
+ );
+
+ }
+
+ function readElement ($node)
+ {
+ // this should not really come from the link...
+ $this->video_url = $this->getVal($node, 'div', 'src');
+ $this->cls = $this->getVal($node, 'div', 'class');
+ $this->href = $this->getVal($node, 'a', 'href');
+
+
+ $this->image_src = $this->getVal($node, 'img', 'src');
+
+ $this->align = $this->getVal($node, 'figure', 'align');
+
+ $figcaption = $this->getVal($node, 'figcaption', false);
+ if ($figcaption !== '') {
+ $this->caption = $this->getVal($figcaption, 'i', 'html');
+ }
+
+
+ $this->caption_display = $this->getVal($node, 'figcaption', 'data-display');
+ //$this->text_align = $this->getVal(node, 'figcaption', 'style','text-align');
+ $this->width = $this->getVal($node, true, 'data-width');
+ //$this->margin = $this->getVal(node, 'figure', 'style', 'margin');
+
+ }
+
+
+
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * This handles 'table' tags (and puts nice borders on them.)
+ */
+
+
+require_once 'Block.php';
+require_once 'BlockTd.php';
+class HTML_Clean_BlockTable extends HTML_Clean_Block
+{
+
+ var $rows = array(); // row cols {array}
+ var $no_col = 1;
+ var $no_row = 1;
+ var $width = '100%';
+
+ function __construct($cfg) {
+
+
+
+ if ($cfg['node']) {
+ $this->readElement($cfg['node']);
+ $this->updateElement($cfg['node']);
+ }
+ parent::__construct();
+ if (!$this->node) {
+
+ for($r = 0; $r < $this->no_row; $r++) {
+ $this->rows[$r] = array();
+ for($c = 0; $c < $this->no_col; $c++) {
+ $this->rows[$r][$c] = $this->emptyCell();
+ }
+ }
+ }
+ }
+
+ function toObject ()
+ {
+
+ $ret = array(
+ 'tag' => 'table',
+ 'data-block' => 'Table',
+ 'style' => array(
+ 'width'=> $this->width,
+ 'border' => 'solid 1px #000', // ??? hard coded?
+ 'border-collapse' => 'collapse'
+ ),
+ 'cn' => array(
+ array( 'tag' => 'tbody' , 'cn' => array() )
+ )
+ );
+
+ // do we have a head = not really
+ $ncols = 0;
+ foreach($this->rows as $row) {
+ $tr = array(
+ 'tag' => 'tr',
+ 'style' => array(
+ 'margin' => '6px',
+ 'border' => 'solid 1px #000',
+ 'text-align' => 'left'
+ ),
+ 'cn' => array()
+ );
+
+
+ // does the row have any properties? ?? height?
+ $nc = 0;
+ foreach($row as $cell) {
+
+ $td = $cell->toObject();
+
+ if ($cell->colspan > 1) {
+ $nc += $cell->colspan;
+ } else {
+ $nc++;
+ }
+
+ // widths ?
+ $tr->cn[] = $td;
+
+
+ }
+
+ $ret->cn[0]->cn[] = $tr;
+
+ $ncols = max($nc, $ncols);
+
+
+ }
+ // add the header row..
+
+ $ncols++; // not used?
+
+
+ return $ret;
+
+ }
+
+ function readElement($node)
+ {
+
+ $node = $node ? $node : $this->node ;
+ $this->width = this.getVal($node, true, 'style', 'width') || '100%';
+
+ $this->rows = array();
+ $this->no_row = 0;
+ $trs = $this->arrayFrom($node->getElementsByTagName('tr'));
+ foreach($trs as $tr) {
+ $row = array();
+
+
+ $this->no_row++;
+ $no_column = 0;
+ foreach($node->getElementsByTagName('td') as $td) {
+
+
+ $add = new HTML_Clean_Block_Td( array('node' => $td ));
+ /*'colspan : td.hasAttribute('colspan') ? td.getAttribute('colspan')*1 : 1,
+ rowspan : td.hasAttribute('rowspan') ? td.getAttribute('rowspan')*1 : 1,
+ style : td.hasAttribute('style') ? td.getAttribute('style') : '',
+ html : td.innerHTML
+
+ };
+ */
+ $no_column += $add->colspan;
+
+
+ $row[] = $add;
+
+
+ }
+ $this->rows[] = $row;
+ $this->no_col = max($this->no_col, $no_column);
+
+
+ }
+
+
+ }
+
+ function emptyCell () {
+ return new HTML_Clean_Block_Td(array());
+
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * This handles 'td' tags (and puts nice borders on them.)
+ */
+
+
+require_once 'Block.php';
+class HTML_Clean_BlockTd extends HTML_Clean_Block
+{
+ var $width = ''; // should be a percent.!
+ var $textAlign = 'left';
+ var $valign = 'top';
+
+ var $colspan = 1;
+ var $rowspan = 1;
+
+
+
+ function __construct($cfg) {
+
+ if ($cfg['node']) {
+ $this->readElement($cfg['node']);
+ $this->updateElement($cfg['node']);
+ }
+ parent::__construct();
+
+ }
+
+ function toObject ()
+ {
+ $ret = array(
+ 'tag' => 'td',
+ 'data-block' => 'Td',
+ 'valign' => $this->valign,
+ 'style' => array(
+ 'text-align' => $this->textAlign,
+ 'border' => 'solid 1px rgb(0, 0, 0)', // ??? hard coded?
+ 'border-collapse' => 'collapse',
+ 'padding' => '6px', // 8 for desktop / 4 for mobile
+ 'vertical-align'=> $this->valign
+ ),
+ html => $this->html
+ );
+ if ($this->width != '') {
+ $ret->width = $this->width;
+ $ret['style']['width'] = $this->width;
+ }
+
+
+ if ($this->colspan > 1) {
+ $ret['colspan'] = $this->colspan ;
+ }
+ if ($this->rowspan > 1) {
+ $ret['rowspan'] = $this->rowspan ;
+ }
+
+
+
+ return $ret;
+
+ }
+
+
+ function readElement ($node)
+ {
+ $node = $node ? $node : $this->node ;
+
+
+ $this->width = $node->getAttribute('width');
+ $this->colspan = max(1,1*$node->getAttribute('colspan'));
+ $this->rowspan = max(1,1*$node->getAttribute('rowspan'));
+ $this->html = $this->innerHTML($node);
+ $styles = $this->styleToObject($node,true);
+
+ if (!empty($styles['text-align'])) {
+ $this->textAlign = $styles['text-align'];
+ }
+ if ($node->hasAttribute('valign')) {
+ $this->valign = $node->getAttribute('valign');
+ }
+
+ }
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * Base class for all the filtering, contains a few usefull routines along with the walk code
+ *
+ * doesnt really use filter parent much
+ *
+ */
+
+class HTML_Clean_Filter
+{
+ var $replaceComment = false; // default to trash these.!
+
+ var $node = false;
+ var $tag = false;
+
+ function __construct($cfg)
+ {
+ foreach($cfg as $k=>$v) {
+ $this->$k = $v;
+ }
+ }
+
+ function walk ($dom)
+ {
+
+ $ar = $this->arrayFrom($dom->childNodes);
+ foreach($ar as $e) {
+
+ switch(true) {
+
+
+ case $this->isTagMatch(e):
+
+ if (false === $this->replaceTag(e)) {
+ return;
+ }
+ if ($e->hasChildNodes()) {
+ $this->walk($e);
+ }
+ return;
+
+ default: // tags .. that do not match.
+ if ($e->hasChildNodes()) {
+ this->walk($e);
+ }
+ }
+
+ }
+
+ }
+ function isTagMatch($e) {
+ switch(true) {
+
+ case $e->nodeType == 8 && $this->replaceComment !== false: // comment
+ $this->replaceComment($e);
+ return false;
+
+ case $e->nodeType != 1: //not a node.
+ return false;
+
+ case $this->tag === true: // everything
+ case strpos(':', $e->tagName) !== false && is_array($this->tag) && in_array(":", $this->tag):
+ case strpos(':', $e->tagName) !== false && is_string($this->tag) && $this->tag == ":":
+ case is_array($this->tag) && in_array($e->tagName, $this->tag):
+ case is_string($this->tag) && $e->tagName == $this->tag:
+ return true;
+ }
+
+ }
+
+
+ // dummy version - implementations should return false to not walk children.
+ function replaceTag($e) {
+ // if we avoid filtering here -> we could just call walk on all the child names.
+
+ return true;
+ }
+
+ function removeNodeKeepChildren ( $node)
+ {
+
+ $ar = $this->arrayFrom($node->childNodes);
+ foreach($ar as $n) {
+ $node->removeChild($n);
+ $node->parentNode->insertBefore($n, $node);
+ }
+ $node->parentNode->removeChild($node);
+ }
+
+ function arrayFrom($list)
+ {
+ $ret = array();
+ foreach($list as $k=> $l) {
+ $ret[$k] = $l;
+ }
+ return $ret;
+ }
+
+ function innerHTML($n)
+ {
+ $ret = "";
+ foreach($n->children as $c) {
+ $ret .= $c->ownerDocument->saveXML($c);
+ }
+ return $ret;
+ }
+
+ function copyInnerHTML($from, $to)
+ {
+ $ar = $this->arrayFrom($from->childNodes);
+ foreach($ar as $n) {
+ $from->removeChild($n);
+ $to->appendChild($n);
+ }
+ }
+
+ function styleToObject($node, $lower = false)
+ {
+ $styles = explode(';',$node->hasAttribute("style") ? $node->getAttribute("style") : '');
+ $ret = array();
+ foreach($styles as $s) {
+ if (strpos($s, ':') === false) {
+ return;
+ }
+ $kv = explode(':', $s, 2);
+
+ // what ever is left... we allow.
+ $ret[$lower ? strtotrim($kv[0]) : $kv[0]] = trim($kv[1]);
+ }
+ return $ret;
+ }
+ function nodeSetStyle($node, $style)
+ {
+ $str = array();
+ foreach($style as $k=>$v) {
+ $str[] = "$k:$v";
+ }
+ $node->setAttribute('style', implode(";", $str));
+ }
+
+}
--- /dev/null
+<?php
+
+
+
+/**
+ * replaces bad attributes... and attribute values
+ *
+ * done by walking all elements
+ *
+ *
+ *
+ */
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterAttribute extends HTML_Clean_Filter
+{
+
+ var $tag = true; // all tags
+
+ var $attrib_black = array(); // array
+ var $attrib_clean = array(); // array
+ var $attrib_white = array(); // array
+
+ var $style_black = array(); // array
+ var $style_white = array(); // array
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ $this->walk($cfg['node']);
+ }
+
+ function replaceTag ($node)
+ {
+ if (!$node->hasAttributes()) {
+ return true; // do children.
+ }
+ $ats = $this->arrayFrom($node->attributes);
+ foreach($ats as $a) {
+
+ // remove all if we have a white list..
+ if (count($this->attrib_white) && in_array(strtolower($a->name), $this->attrib_white)) {
+ $node->removeAttribute($a->name);
+ continue;
+ }
+
+ // always remove 'on'
+ if (substr(strtolower($a->name),0,2) == 'on') {
+ $node->removeAttribute($a->name);
+ continue;
+ }
+
+
+ if (in_array( strtolower($a->name),$this->attrib_black)) {
+ $node->removeAttribute($a->name);
+ continue;
+ }
+ if (in_array( strtolower($a->name),$this->attrib_clean)) {
+ $this->cleanAttr($node,$a->name,$a->value); // fixme..
+ continue;
+ }
+
+ if ($a->name == 'style') {
+ $this->cleanStyle($node);
+ continue;
+ }
+ /// clean up MS crap..
+ // tecnically this should be a list of valid class'es..
+
+
+ if ($a->name == 'class') {
+ if (preg_match('/^Mso/', $a->value)) {
+ $node->removeAttribute('class');
+ continue;
+ }
+ if (preg_match('/^body$/', $a->value)) {
+ $node->removeAttribute('class');
+ continue;
+ }
+ }
+
+
+ // style cleanup!?
+ // class cleanup?
+
+ }
+ return true; // clean children
+ }
+ // cleans urls...
+ function cleanAttr($node, $n,$v)
+ {
+ // starts with 'dot' or 'slash', 'hash' or '{' << template
+ if (preg_match('/^(\.|\/|#|\{)/' , $v)) {
+ return;
+ }
+ // standard stuff? - should we allow data?
+ if (preg_match('/(http|https|mailto|ftp|data):/' , $v)) {
+ return;
+ }
+
+// Roo.log("(REMOVE TAG)"+ node.tagName +'.' + n + '=' + v);
+ $node->removeAttribute($n);
+
+ }
+
+ function cleanStyle ($node)
+ {
+ if (preg_match('/expression/', $node->getAttribute('style'))) { //XSS?? should we even bother..
+ $node->removeAttribute('style');
+ return;
+ }
+ $style = $this->styleToObject($node);
+ $update = false;
+ foreach($style as $k=>$v) {
+
+ if ( in_array(strtolower($k), $this->style_black)) {
+ unset($style[$k]);
+ $update = true;
+ continue;
+ }
+
+ //Roo.log()
+ // only allow 'c whitelisted system attributes'
+ if ( count($this->style_white) && in_array(strtolower($k), $this->style_white)) {
+ continue;
+ }
+ unset($style[$k]);
+ $update = true;
+
+ }
+ if ($update) {
+ $this->nodeSetStyle($node, $style);
+ }
+
+ }
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ *
+ * black list removes all nodes which match and their children.
+ *
+ * if it's doesnt need to use ':', then we can just do a search.
+ *
+ */
+
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterBlack extends HTML_Clean_Filter
+{
+
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ if (is_array($this->tag) && !in_array(':', $this->tag)) {
+ $this->simpleReplace();
+ return;
+ }
+ if (is_string($this->tag) && ':' != $this->tag) {
+ $this->simpleReplace();
+ return;
+ }
+ $this->walk($cfg['node']);
+ }
+
+ function replaceTag ($n)
+ {
+ $n->parentNode->removeChild($n);
+ return false; // don't both with children..
+ }
+
+ function simpleReplace()
+ {
+ foreach(is_array($this->tag) ? $this->tag : array($this->tag) as $t) {
+ $ar = $this->fromArray($this->node->getElementsByTagName($t));
+ foreach($ar as $k) {
+ if ($k->parentNode) {
+ $k->parentNode->removeChild($k);
+ }
+ }
+ }
+ }
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ *
+ * if the node matches, it will replace the child with children.
+ * done for wierd namespaced nodes, and stuff like font.
+ *
+ * js one extends black?
+ *
+ */
+
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterKeepChildren extends HTML_Clean_Filter
+{
+
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ if ($this->tag === false) { //not sure why.
+ return;
+ }
+
+ $this->walk($cfg['node']);
+ }
+
+ function replaceTag ($n)
+ {
+
+ // walk children...
+ //Roo.log(node.tagName);
+
+ $ar = $this->arrayFrom($node->childNodes);
+
+
+ //remove first.. - otherwise due to our walking method - the parent will not look at them.
+ foreach($ar as $t) {
+ if (!$this->isTagMatch($t)) {
+ continue;
+ }
+ $this->replaceTag($t); // this effetively walks all the children.
+ }
+ $ar = $this->arrayFrom($node->childNodes);
+ foreach($ar as $t) {
+
+ $node->removeChild($t);
+ // what if we need to walk these???
+ $node->parentNode->insertBefore($t, $node);
+ // js code walks again.
+ }
+ //Roo.log("REMOVE:" + node.tagName);
+ $node->parentNode->removeChild(node);
+ return false; // don't walk children
+
+ }
+
+
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ *
+ * br br br >>> BR ?
+ *
+ *
+ */
+
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterLongBR extends HTML_Clean_Filter
+{
+
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ $pp = $this->arrayFrom($this->node->getElementsByTagName('br'));
+ foreach($pp as $p) {
+ if (!$p->parentNode) { // should not happen as we only walk forwards.
+ continue;
+ }
+ $this->replaceIt($p);
+ }
+ }
+
+ function replaceIt($node)
+ {
+
+ if (!$node->previousSibling) { // not hing before us...
+ return false;
+ }
+
+ $ps = $node->nextSibling;
+ // find the nex sibling that is a node,
+ while ($ps && $ps->nodeType == 3 && strlen(trim($ps->nodeValue)) < 1) {
+ $ps = $ps->nextSibling;
+ }
+ // we have no next sibling, and are inside one of these tags
+ if (!$ps && in_array(strtoupper($node->parentNode->tagName), array( 'TD', 'TH', 'LI', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ))) {
+ $node->parentNode->removeChild($node); // remove last BR inside one fo these tags
+ return false;
+ }
+
+ if (!$ps || $ps->nodeType != 1) {
+ return;
+ }
+ // next node node a BR.
+
+ if (!$ps || $ps->tagName != 'BR') {
+ return;
+ }
+
+
+ $ps = $node->previousSibling;
+
+ while ($ps && $ps->nodeType == 3 && strlen(trim($ps->nodeValue)) < 1) {
+ $ps = $ps->previousSibling;
+ }
+
+ if (!$ps || $ps->nodeType != 1) {
+ return;
+ }
+ // if header or BR before.. then it's a candidate for removal.. - as we only want '2' of these..
+ if (!$ps || !in_array(strtoupper($node->parentNode->tagName), array( 'TD', 'TH', 'LI', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ))) {
+ return;
+ }
+
+ $node->parentNode->removeChild($node); // remove me...
+
+
+ }
+}
+
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ *
+ * if the node matches, it will replace the child with children.
+ * done for wierd namespaced nodes, and stuff like font.
+ *
+ * js one extends black?
+ *
+ */
+
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterParagraph extends HTML_Clean_Filter
+{
+
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ $pp = $this->node->getElementsByTagName('p');
+ while($pp->length) {
+ $this->replaceIt($p);
+ }
+ }
+
+ function replaceIt($node)
+ {
+
+ if ($node->childNodes->length == 1 &&
+ $node->childNodes->item(0)->nodeType == 3 &&
+ strlen(trim($node->childNodes->item(0)->textContent)) < 1
+ ) {
+
+ // remove and replace with '<BR>';
+ $node->parentNode->replaceChild($node->ownerDocument->createElement('BR'),$node);
+ }
+ $ar = $this->arrayFrom($node->childNodes);
+ foreach($ar as $a) {
+ $node->removeChild($a);
+ // what if we need to walk these???
+ $node->parentNode->insertBefore($a, $node);
+ }
+ // now what about this?
+ // <p> </p>
+
+ // double BR.
+ $node->parentNode->insertBefore($node->ownerDocument->createElement('BR'), $node);
+ $node->parentNode->insertBefore($node->ownerDocument->createElement('BR'), $node);
+
+ $node->parentNode->removeChild($node);
+
+ }
+}
+
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ *
+ * remove spans without attributes.
+ *
+ *
+ */
+
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterSpan extends HTML_Clean_Filter
+{
+
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ $ar = $this->arrayFrom($this->node->getElementsByTagName('span'));
+ foreach($as as $a) {
+ if ($a->hasAttributes()) {
+ continue;
+ }
+ $this->removeNodeKeepChildren($a);
+ }
+
+ }
+
+
+}
+
\ No newline at end of file
--- /dev/null
+<?php
+
+
+
+/**
+ * replaces styles with HTML
+ *
+ *
+ *
+ *
+ */
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterStyleToTag extends HTML_Clean_Filter
+{
+
+ var $tag = true;
+
+ // what we are going to change..
+ var $tags = array(
+
+
+ 'B' => array( 'font-weight' => 'bold' ),
+ 'I' => array( 'font-style' => 'italic' ),
+
+ // h1.. h6 ?? font-size?
+ 'SUP' => array( 'vertical-align' => 'super'),
+ 'SUB' => array( 'vertical-align' => 'sub' )
+
+ );
+
+ function __construct($cfg)
+ {
+ parent::__construct($cfg);
+ $this->walk($cfg['node']);
+ }
+
+
+
+
+
+ function replaceTag($node)
+ {
+
+
+ if (!$node->hasAttribute("style")) {
+ return true;
+ }
+ $inject = array();
+ $style = $this->styleToObject($node, true);
+ foreach ($this->tags as $tn => $kv) {
+ list($k,$v) = $kv;
+ if (!isset($style[$k]) || $style[$k] != $v) {
+ continue;
+ }
+ unset($style[$k]);
+ $inject[] = $tn;
+ }
+ if (!count($inject)) {
+ return true;
+ }
+ $this->nodeSetStyle($node, $style);
+ $cn = $this->arrayFrom($node->childNodes);
+ $nn = $node;
+ foreach($inject as $t) {
+
+ $nc = $node->ownerDocument->createElement($t);
+ $nn->appendChild($nc);
+ $nn = $nc;
+ }
+ foreach($cn as $n) {
+ $node->removeChild($n);
+ $nn->appendChild($n);
+ }
+
+ return true; /// iterate thru
+ }
+
+
+
+}
\ No newline at end of file
--- /dev/null
+<?php
+
+/**
+ * Does a few very specific word paste filtering - doc bullents an a name= tags.
+ *
+ * doesnt really use filter parent much
+ *
+ */
+
+require_once 'Filter.php';
+
+class HTML_Clean_FilterWord extends HTML_Clean_Filter
+{
+
+ var $tag = true;
+
+ function __construct($cfg)
+ {
+ $this->replaceDocBullets($cfg->node);
+ $this->replaceAname($cfg->node);
+ }
+
+
+ function replaceAname ($doc)
+ {
+ // replace all the a/name without..
+ $aa= $this->arrayFrom($doc->getElementsByTagName('a'));
+
+ for ($i = 0; $i < count($aa); $i++) {
+ $a = $aa[$i];
+ if ($a->hasAttribute("name")) {
+ $a->removeAttribute("name");
+ }
+ if ($a->hasAttribute("href")) {
+ continue;
+ }
+ // reparent children.
+ $this->removeNodeKeepChildren($a);
+
+ }
+
+
+
+ }
+ function replaceClassList($list)
+ {
+ foreach($this->arrayFrom($list) as $l) {
+ $l->setAttribute('class', "MsoListParagraph");
+ }
+ }
+
+
+ function replaceDocBullets ($doc)
+ {
+ // this is a bit odd - but it appears some indents use ql-indent-1
+ //Roo.log(doc.innerHTML);
+
+ $this->replaceClassList($doc->getElementsByClassName('MsoListParagraphCxSpFirst'));
+ $this->replaceClassList($doc->getElementsByClassName('MsoListParagraphCxSpMiddle'));
+ $this->replaceClassList($doc->getElementsByClassName('MsoListParagraphCxSpLast'));
+ $this->replaceClassList($doc->getElementsByClassName('ql-indent-1'));
+
+
+ // this is a bit hacky - we had one word document where h2 had a miso-list attribute.
+ $htwo = $this->arrayFrom($doc->getElementsByTagName('h2'));
+ foreach($htow as $a) {
+ if ($a->hasAttribute('style') && preg_match('/mso-list:/', $a->getAttribute('style'))) {
+ $a->setAttribute('class', "MsoListParagraph");
+ }
+ }
+ $ar = $this->arrayFrom($doc->getElementsByClassName('MsoNormal'));
+ foreach($ar as $a) {
+ if ($a->hasAttribute('style') && preg_match('/mso-list:/', $a->getAttribute('style'))) {
+ $a->setAttribute('class', "MsoListParagraph");
+ } else {
+ $a->setAttribute('class', "MsoNormalx");
+ }
+ }
+
+ $listpara = $doc->getElementsByClassName('MsoListParagraph');
+ // Roo.log(doc.innerHTML);
+
+
+
+ while($listpara->length) {
+
+ $this->replaceDocBullet($listpara->item(0));
+ }
+
+ }
+
+
+
+ function replaceDocBullet ($p)
+ {
+ // gather all the siblings.
+ $ns = $p;
+ $parent = $p->parentNode;
+ $doc = $parent->ownerDocument;
+ $items = array();;
+
+ $listtype = 'ul';
+ while ($ns) {
+ if ($ns->nodeType != 1) {
+ $ns = $ns->nextSibling;
+ continue;
+ }
+ $cln = $ns->hasAttribute('class') ? $ns->getAttribute('class') : '';
+ if (preg_match('/(MsoListParagraph|ql-indent-1)/i', $cln)) {
+ break;
+ }
+ $spans = $ns->getElementsByTagName('span');
+ if ($ns->hasAttribute('style') && preg_match('/mso-list/', $ns->getAttribute('style'))) {
+ $items[] = $ns;
+ $ns = $ns->nextSibling;
+ $has_list = true;
+ if ($spans->length && spans->item(0).hasAttribute('style')) {
+ $style = $this->styleToObject($spans->item(0), true);
+ if (!empty($style['font-family']) && !preg_match('/Symbol/', $style['font-family'])) {
+ $listtype = 'ol';
+ }
+ }
+
+ continue;
+ }
+
+ $spans = $ns->getElementsByTagName('span');
+ if (!$spans->length) {
+ break;
+ }
+ $has_list = false;
+ foreach($spasn as $s) {
+ if ($s->hasAttribute('style') && preg_match('/mso-list/', $s->getAttribute('style'))) {
+ $has_list = true;
+ break;
+ }
+ }
+ if (!$has_list) {
+ break;
+ }
+ $items[] = $ns;
+ $ns = $ns->nextSibling;
+
+
+ }
+ if (!count($items)) {
+ $ns->setAttribute('class', '');
+ return;
+ }
+
+ $ul = $parent->ownerDocument->createElement($listtype); // what about number lists...
+ $parent->insertBefore($ul, $p);
+ $lvl = 0;
+ $stack = array ( $ul );
+ $last_li = false;
+
+ $margin_to_depth = array();
+ $max_margins = -1;
+
+ foreach($items as $ipos => $n)
+ {
+
+ //Roo.log("got innertHMLT=" + n.innerHTML);
+
+ $spans = $this->arrayFrom($n->getElementsByTagName('span'));
+ if (!count($spans)) {
+ //Roo.log("No spans found");
+
+ $parent->removeChild($n);
+
+
+ continue; // skip it...
+ }
+
+
+ $num = 1;
+ $style = array();
+ foreach($spans as $i => $span) {
+
+ $style = $this->styleToObject($span, true);
+ if (empty($style['mso-list']) ) {
+ continue;
+ }
+ if ($listtype == 'ol') {
+ $num = preg_replace('/[^0-9]+]/g', '', $span->textContent) * 1;
+ }
+ $span->parentNode->removeChild($span); // remove the fake bullet.
+ break;
+ }
+ //Roo.log("NOW GOT innertHMLT=" + n.innerHTML);
+ $style = $this->styleToObject($n, true); // mo-list is from the parent node.
+ if (empty($style['mso-list'])) {
+
+ $parent->removeChild($n);
+
+ continue;
+ }
+
+ $margin = $style['margin-left'];
+ if (empty($margin_to_depth[$margin]) ) {
+ $max_margins++;
+ $margin_to_depth[$margin] = $max_margins;
+ }
+ $nlvl = $margin_to_depth[$margin] ;
+
+ if ($nlvl > $lvl) {
+ //new indent
+ $nul = $doc->createElement($listtype); // what about number lists...
+ if (!$last_li) {
+ $last_li = $doc->createElement('li');
+ $stack[$lvl]->appendChild($last_li);
+ }
+ $last_li->appendChild($nul);
+ $stack[$nlvl] = $nul;
+
+ }
+ $lvl = $nlvl;
+
+ // not starting at 1..
+ if (!$stack[$nlvl]->hasAttribute("start") && $listtype == "ol") {
+ $stack[$nlvl]->setAttribute("start", $num);
+ }
+
+ $nli = $stack[$nlvl]->appendChild($doc->createElement('li'));
+ $last_li = $nli;
+ $this->copyInnerHtml($n, $nli);
+ //$nli->innerHTML = $n->innerHTML;
+ //Roo.log("innerHTML = " + n.innerHTML);
+ $parent->removeChild($n);
+
+
+ }
+
+
+
+
+ }
+
+
+
+}