4 * This is a PHP implementation of the Roo HTMLEditorCore onPaste method - that cleans up HTML
5 * and replaces things like tables etc..
10 static function fromHTML($str, $opts = array())
12 $str= self::cleanWordChars($str);
13 $dom = new DOMDocument('1.0', 'utf8');
16 return new HTML_Clean($opts);
18 static function cleanWordChars($str)
30 foreach($swapCodes as $k=>$v) {
31 $str = str_replace(mb_chr($k), $v, $str);
38 var $dom; // Dom Document.
41 'BASE', 'BASEFONT', 'BGSOUND', 'BLINK', 'BODY',
42 'FRAME', 'FRAMESET', 'HEAD', 'HTML', 'ILAYER',
43 'IFRAME', 'LAYER', 'LINK', 'META', 'OBJECT',
44 'SCRIPT', 'STYLE' ,'TITLE', 'XML',
45 //'FONT' // CLEAN LATER..
46 'COLGROUP', 'COL' // messy tables.
47 ); // blacklist of elements.
49 function __construct($opts)
51 foreach($opts as $k=>$v) {
54 $d = $this->dom->documentElement;
55 $this->filter('Word',array( 'node' => $d ));
57 $this->filter('StyleToTag', array(
58 'node' => $d // this could add nodes to tree, so not very good to nest the walk.
62 $this->filter('Attributes',array( // does walk as well.
64 'attrib_white' => array('href', 'src', 'name', 'align', 'colspan', 'rowspan', 'data-display', 'data-width', 'start'),
65 'attrib_clean' => array('href', 'src' ),
67 'replaceComment' => true // this is sneaked in here - as walk will get rid of comments at the same time.
70 $this->filter('Black', array( 'node' => $d, 'tag' => $this->black ));
71 // we don't use the whitelist?
75 $this->filter('KeepChildren',array( 'node' => $d, 'tag' => array( 'FONT', ':' )) );
76 $this->filter('Paragraph',array( 'node' => $d ));
77 $this->filter('Span',array( 'node' => $d ));
78 $this->filter('LongBr',array( 'node' => $d ));
80 $ar = $this->arrayFrom($d->getElementsByTagName('img'));
81 foreach($ar as $img) {
82 if ($this->findParent($img, 'figure')) {
85 require_once 'HTML/Clean/BlockFigure.php';
86 $fig = new HTML_Clean_BlockFigure(array(
87 'image_src' => $img->getAttribute('src')
89 $fig->updateElement($img);
95 require_once 'HTML/Clean/Block.php';
96 HTML_Clean_Block::initAll($d);
100 function filter($type, $args)
102 require_once 'HTML/Clean/Filter'. $type .'.php';
103 $cls = 'HTML_Clean_Filter'. $type;
109 $this->dom->saveHTML();