/***
* This is based loosely on tinymce
* @class Roo.htmleditor.TidyEntities
* @static
* https://github.com/thorn0/tinymce.html/blob/master/tinymce.html.js
*
* Not 100% sure this is actually used or needed.
*/
Roo.htmleditor.TidyEntities = {
/**
* initialize data..
*/
init : function (){
this.namedEntities = this.buildEntitiesLookup(this.namedEntitiesData, 32);
},
buildEntitiesLookup: function(items, radix) {
var i, chr, entity, lookup = {};
if (!items) {
return {};
}
items = typeof(items) == 'string' ? items.split(',') : items;
radix = radix || 10;
// Build entities lookup table
for (i = 0; i < items.length; i += 2) {
chr = String.fromCharCode(parseInt(items[i], radix));
// Only add non base entities
if (!this.baseEntities[chr]) {
entity = '&' + items[i + 1] + ';';
lookup[chr] = entity;
lookup[entity] = chr;
}
}
return lookup;
},
asciiMap : {
128: '€',
130: '‚',
131: 'ƒ',
132: '„',
133: '…',
134: '†',
135: '‡',
136: 'ˆ',
137: '‰',
138: 'Š',
139: '‹',
140: 'Œ',
142: 'Ž',
145: '‘',
146: '’',
147: '“',
148: '”',
149: '•',
150: '–',
151: '—',
152: '˜',
153: '™',
154: 'š',
155: '›',
156: 'œ',
158: 'ž',
159: 'Ÿ'
},
// Raw entities
baseEntities : {
'"': '"',
// Needs to be escaped since the YUI compressor would otherwise break the code
'\'': ''',
'<': '<',
'>': '>',
'&': '&',
'`': '`'
},
// Reverse lookup table for raw entities
reverseEntities : {
'<': '<',
'>': '>',
'&': '&',
'"': '"',
''': '\''
},
attrsCharsRegExp : /[&<>\"\u0060\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
textCharsRegExp : /[<>&\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
rawCharsRegExp : /[<>&\"\']/g,
entityRegExp : /&#([a-z0-9]+);?|&([a-z0-9]+);/gi,
namedEntities : false,
namedEntitiesData : [
'50',
'nbsp',
'51',
'iexcl',
'52',
'cent',
'53',
'pound',
'54',
'curren',
'55',
'yen',
'56',
'brvbar',
'57',
'sect',
'58',
'uml',
'59',
'copy',
'5a',
'ordf',
'5b',
'laquo',
'5c',
'not',
'5d',
'shy',
'5e',
'reg',
'5f',
'macr',
'5g',
'deg',
'5h',
'plusmn',
'5i',
'sup2',
'5j',
'sup3',
'5k',
'acute',
'5l',
'micro',
'5m',
'para',
'5n',
'middot',
'5o',
'cedil',
'5p',
'sup1',
'5q',
'ordm',
'5r',
'raquo',
'5s',
'frac14',
'5t',
'frac12',
'5u',
'frac34',
'5v',
'iquest',
'60',
'Agrave',
'61',
'Aacute',
'62',
'Acirc',
'63',
'Atilde',
'64',
'Auml',
'65',
'Aring',
'66',
'AElig',
'67',
'Ccedil',
'68',
'Egrave',
'69',
'Eacute',
'6a',
'Ecirc',
'6b',
'Euml',
'6c',
'Igrave',
'6d',
'Iacute',
'6e',
'Icirc',
'6f',
'Iuml',
'6g',
'ETH',
'6h',
'Ntilde',
'6i',
'Ograve',
'6j',
'Oacute',
'6k',
'Ocirc',
'6l',
'Otilde',
'6m',
'Ouml',
'6n',
'times',
'6o',
'Oslash',
'6p',
'Ugrave',
'6q',
'Uacute',
'6r',
'Ucirc',
'6s',
'Uuml',
'6t',
'Yacute',
'6u',
'THORN',
'6v',
'szlig',
'70',
'agrave',
'71',
'aacute',
'72',
'acirc',
'73',
'atilde',
'74',
'auml',
'75',
'aring',
'76',
'aelig',
'77',
'ccedil',
'78',
'egrave',
'79',
'eacute',
'7a',
'ecirc',
'7b',
'euml',
'7c',
'igrave',
'7d',
'iacute',
'7e',
'icirc',
'7f',
'iuml',
'7g',
'eth',
'7h',
'ntilde',
'7i',
'ograve',
'7j',
'oacute',
'7k',
'ocirc',
'7l',
'otilde',
'7m',
'ouml',
'7n',
'divide',
'7o',
'oslash',
'7p',
'ugrave',
'7q',
'uacute',
'7r',
'ucirc',
'7s',
'uuml',
'7t',
'yacute',
'7u',
'thorn',
'7v',
'yuml',
'ci',
'fnof',
'sh',
'Alpha',
'si',
'Beta',
'sj',
'Gamma',
'sk',
'Delta',
'sl',
'Epsilon',
'sm',
'Zeta',
'sn',
'Eta',
'so',
'Theta',
'sp',
'Iota',
'sq',
'Kappa',
'sr',
'Lambda',
'ss',
'Mu',
'st',
'Nu',
'su',
'Xi',
'sv',
'Omicron',
't0',
'Pi',
't1',
'Rho',
't3',
'Sigma',
't4',
'Tau',
't5',
'Upsilon',
't6',
'Phi',
't7',
'Chi',
't8',
'Psi',
't9',
'Omega',
'th',
'alpha',
'ti',
'beta',
'tj',
'gamma',
'tk',
'delta',
'tl',
'epsilon',
'tm',
'zeta',
'tn',
'eta',
'to',
'theta',
'tp',
'iota',
'tq',
'kappa',
'tr',
'lambda',
'ts',
'mu',
'tt',
'nu',
'tu',
'xi',
'tv',
'omicron',
'u0',
'pi',
'u1',
'rho',
'u2',
'sigmaf',
'u3',
'sigma',
'u4',
'tau',
'u5',
'upsilon',
'u6',
'phi',
'u7',
'chi',
'u8',
'psi',
'u9',
'omega',
'uh',
'thetasym',
'ui',
'upsih',
'um',
'piv',
'812',
'bull',
'816',
'hellip',
'81i',
'prime',
'81j',
'Prime',
'81u',
'oline',
'824',
'frasl',
'88o',
'weierp',
'88h',
'image',
'88s',
'real',
'892',
'trade',
'89l',
'alefsym',
'8cg',
'larr',
'8ch',
'uarr',
'8ci',
'rarr',
'8cj',
'darr',
'8ck',
'harr',
'8dl',
'crarr',
'8eg',
'lArr',
'8eh',
'uArr',
'8ei',
'rArr',
'8ej',
'dArr',
'8ek',
'hArr',
'8g0',
'forall',
'8g2',
'part',
'8g3',
'exist',
'8g5',
'empty',
'8g7',
'nabla',
'8g8',
'isin',
'8g9',
'notin',
'8gb',
'ni',
'8gf',
'prod',
'8gh',
'sum',
'8gi',
'minus',
'8gn',
'lowast',
'8gq',
'radic',
'8gt',
'prop',
'8gu',
'infin',
'8h0',
'ang',
'8h7',
'and',
'8h8',
'or',
'8h9',
'cap',
'8ha',
'cup',
'8hb',
'int',
'8hk',
'there4',
'8hs',
'sim',
'8i5',
'cong',
'8i8',
'asymp',
'8j0',
'ne',
'8j1',
'equiv',
'8j4',
'le',
'8j5',
'ge',
'8k2',
'sub',
'8k3',
'sup',
'8k4',
'nsub',
'8k6',
'sube',
'8k7',
'supe',
'8kl',
'oplus',
'8kn',
'otimes',
'8l5',
'perp',
'8m5',
'sdot',
'8o8',
'lceil',
'8o9',
'rceil',
'8oa',
'lfloor',
'8ob',
'rfloor',
'8p9',
'lang',
'8pa',
'rang',
'9ea',
'loz',
'9j0',
'spades',
'9j3',
'clubs',
'9j5',
'hearts',
'9j6',
'diams',
'ai',
'OElig',
'aj',
'oelig',
'b0',
'Scaron',
'b1',
'scaron',
'bo',
'Yuml',
'm6',
'circ',
'ms',
'tilde',
'802',
'ensp',
'803',
'emsp',
'809',
'thinsp',
'80c',
'zwnj',
'80d',
'zwj',
'80e',
'lrm',
'80f',
'rlm',
'80j',
'ndash',
'80k',
'mdash',
'80o',
'lsquo',
'80p',
'rsquo',
'80q',
'sbquo',
'80s',
'ldquo',
'80t',
'rdquo',
'80u',
'bdquo',
'810',
'dagger',
'811',
'Dagger',
'81g',
'permil',
'81p',
'lsaquo',
'81q',
'rsaquo',
'85c',
'euro'
],
/**
* Encodes the specified string using raw entities. This means only the required XML base entities will be encoded.
*
* @method encodeRaw
* @param {String} text Text to encode.
* @param {Boolean} attr Optional flag to specify if the text is attribute contents.
* @return {String} Entity encoded text.
*/
encodeRaw: function(text, attr)
{
var t = this;
return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
return t.baseEntities[chr] || chr;
});
},
/**
* Encoded the specified text with both the attributes and text entities. This function will produce larger text contents
* since it doesn't know if the context is within a attribute or text node. This was added for compatibility
* and is exposed as the DOMUtils.encode function.
*
* @method encodeAllRaw
* @param {String} text Text to encode.
* @return {String} Entity encoded text.
*/
encodeAllRaw: function(text) {
var t = this;
return ('' + text).replace(this.rawCharsRegExp, function(chr) {
return t.baseEntities[chr] || chr;
});
},
/**
* Encodes the specified string using numeric entities. The core entities will be
* encoded as named ones but all non lower ascii characters will be encoded into numeric entities.
*
* @method encodeNumeric
* @param {String} text Text to encode.
* @param {Boolean} attr Optional flag to specify if the text is attribute contents.
* @return {String} Entity encoded text.
*/
encodeNumeric: function(text, attr) {
var t = this;
return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
// Multi byte sequence convert it to a single entity
if (chr.length > 1) {
return '&#' + (1024 * (chr.charCodeAt(0) - 55296) + (chr.charCodeAt(1) - 56320) + 65536) + ';';
}
return t.baseEntities[chr] || '&#' + chr.charCodeAt(0) + ';';
});
},
/**
* Encodes the specified string using named entities. The core entities will be encoded
* as named ones but all non lower ascii characters will be encoded into named entities.
*
* @method encodeNamed
* @param {String} text Text to encode.
* @param {Boolean} attr Optional flag to specify if the text is attribute contents.
* @param {Object} entities Optional parameter with entities to use.
* @return {String} Entity encoded text.
*/
encodeNamed: function(text, attr, entities) {
var t = this;
entities = entities || this.namedEntities;
return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
return t.baseEntities[chr] || entities[chr] || chr;
});
},
/**
* Returns an encode function based on the name(s) and it's optional entities.
*
* @method getEncodeFunc
* @param {String} name Comma separated list of encoders for example named,numeric.
* @param {String} entities Optional parameter with entities to use instead of the built in set.
* @return {function} Encode function to be used.
*/
getEncodeFunc: function(name, entities) {
entities = this.buildEntitiesLookup(entities) || this.namedEntities;
var t = this;
function encodeNamedAndNumeric(text, attr) {
return text.replace(attr ? t.attrsCharsRegExp : t.textCharsRegExp, function(chr) {
return t.baseEntities[chr] || entities[chr] || '&#' + chr.charCodeAt(0) + ';' || chr;
});
}
function encodeCustomNamed(text, attr) {
return t.encodeNamed(text, attr, entities);
}
// Replace + with , to be compatible with previous TinyMCE versions
name = this.makeMap(name.replace(/\+/g, ','));
// Named and numeric encoder
if (name.named && name.numeric) {
return this.encodeNamedAndNumeric;
}
// Named encoder
if (name.named) {
// Custom names
if (entities) {
return encodeCustomNamed;
}
return this.encodeNamed;
}
// Numeric
if (name.numeric) {
return this.encodeNumeric;
}
// Raw encoder
return this.encodeRaw;
},
/**
* Decodes the specified string, this will replace entities with raw UTF characters.
*
* @method decode
* @param {String} text Text to entity decode.
* @return {String} Entity decoded string.
*/
decode: function(text)
{
var t = this;
return text.replace(this.entityRegExp, function(all, numeric) {
if (numeric) {
numeric = 'x' === numeric.charAt(0).toLowerCase() ? parseInt(numeric.substr(1), 16) : parseInt(numeric, 10);
// Support upper UTF
if (numeric > 65535) {
numeric -= 65536;
return String.fromCharCode(55296 + (numeric >> 10), 56320 + (1023 & numeric));
}
return t.asciiMap[numeric] || String.fromCharCode(numeric);
}
return t.reverseEntities[all] || t.namedEntities[all] || t.nativeDecode(all);
});
},
nativeDecode : function (text) {
return text;
},
makeMap : function (items, delim, map) {
var i;
items = items || [];
delim = delim || ',';
if (typeof items == "string") {
items = items.split(delim);
}
map = map || {};
i = items.length;
while (i--) {
map[items[i]] = {};
}
return map;
}
};
Roo.htmleditor.TidyEntities.init();