2 * This is based loosely on tinymce
3 * @class Roo.htmleditor.TidyEntities
5 * https://github.com/thorn0/tinymce.html/blob/master/tinymce.html.js
8 Roo.htmleditor.TidyEntities = {
16 // Decodes text by using the browser
18 // Build a two way lookup table for the entities
20 // Unpack entities lookup where the numbers are in radix 32 to reduce the size
23 this.namedEntities = buildEntitiesLookup(this.namedEntitiesData, 32);
28 buildEntitiesLookup: function(items, radix) {
29 var i, chr, entity, lookup = {};
31 items = items.split(',');
33 // Build entities lookup table
34 for (i = 0; i < items.length; i += 2) {
35 chr = String.fromCharCode(parseInt(items[i], radix));
36 // Only add non base entities
37 if (!baseEntities[chr]) {
38 entity = '&' + items[i + 1] + ';';
80 // Needs to be escaped since the YUI compressor would otherwise break the code
87 // Reverse lookup table for raw entities
96 attrsCharsRegExp : /[&<>\"\u0060\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
97 textCharsRegExp : /[<>&\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
98 rawCharsRegExp : /[<>&\"\']/g,
99 entityRegExp : /&#([a-z0-9]+);?|&([a-z0-9]+);/gi,
100 namedEntities : false,
101 namedEntitiesData : [
602 * Encodes the specified string using raw entities. This means only the required XML base entities will be encoded.
605 * @param {String} text Text to encode.
606 * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
607 * @return {String} Entity encoded text.
609 encodeRaw: function(text, attr) {
610 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
611 return baseEntities[chr] || chr;
615 * Encoded the specified text with both the attributes and text entities. This function will produce larger text contents
616 * since it doesn't know if the context is within a attribute or text node. This was added for compatibility
617 * and is exposed as the DOMUtils.encode function.
619 * @method encodeAllRaw
620 * @param {String} text Text to encode.
621 * @return {String} Entity encoded text.
623 encodeAllRaw: function(text) {
624 return ('' + text).replace(rawCharsRegExp, function(chr) {
625 return baseEntities[chr] || chr;
629 * Encodes the specified string using numeric entities. The core entities will be
630 * encoded as named ones but all non lower ascii characters will be encoded into numeric entities.
632 * @method encodeNumeric
633 * @param {String} text Text to encode.
634 * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
635 * @return {String} Entity encoded text.
637 encodeNumeric: function(text, attr) {
638 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
639 // Multi byte sequence convert it to a single entity
640 if (chr.length > 1) {
641 return '&#' + (1024 * (chr.charCodeAt(0) - 55296) + (chr.charCodeAt(1) - 56320) + 65536) + ';';
643 return baseEntities[chr] || '&#' + chr.charCodeAt(0) + ';';
647 * Encodes the specified string using named entities. The core entities will be encoded
648 * as named ones but all non lower ascii characters will be encoded into named entities.
650 * @method encodeNamed
651 * @param {String} text Text to encode.
652 * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
653 * @param {Object} entities Optional parameter with entities to use.
654 * @return {String} Entity encoded text.
656 encodeNamed: function(text, attr, entities) {
657 entities = entities || namedEntities;
658 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
659 return baseEntities[chr] || entities[chr] || chr;
663 * Returns an encode function based on the name(s) and it's optional entities.
665 * @method getEncodeFunc
666 * @param {String} name Comma separated list of encoders for example named,numeric.
667 * @param {String} entities Optional parameter with entities to use instead of the built in set.
668 * @return {function} Encode function to be used.
670 getEncodeFunc: function(name, entities) {
671 entities = buildEntitiesLookup(entities) || namedEntities;
673 function encodeNamedAndNumeric(text, attr) {
674 return text.replace(attr ? attrsCharsRegExp : textCharsRegExp, function(chr) {
675 return baseEntities[chr] || entities[chr] || '&#' + chr.charCodeAt(0) + ';' || chr;
679 function encodeCustomNamed(text, attr) {
680 return Entities.encodeNamed(text, attr, entities);
682 // Replace + with , to be compatible with previous TinyMCE versions
683 name = makeMap(name.replace(/\+/g, ','));
684 // Named and numeric encoder
685 if (name.named && name.numeric) {
686 return encodeNamedAndNumeric;
692 return encodeCustomNamed;
694 return Entities.encodeNamed;
698 return Entities.encodeNumeric;
701 return Entities.encodeRaw;
704 * Decodes the specified string, this will replace entities with raw UTF characters.
707 * @param {String} text Text to entity decode.
708 * @return {String} Entity decoded string.
710 decode: function(text) {
711 return text.replace(entityRegExp, function(all, numeric) {
713 numeric = 'x' === numeric.charAt(0).toLowerCase() ? parseInt(numeric.substr(1), 16) : parseInt(numeric, 10);
715 if (numeric > 65535) {
717 return String.fromCharCode(55296 + (numeric >> 10), 56320 + (1023 & numeric));
719 return asciiMap[numeric] || String.fromCharCode(numeric);
721 return reverseEntities[all] || namedEntities[all] || nativeDecode(all);
724 function nativeDecode(text) {
731 Roo.htmleditor.TidyEntities.init();