Roo/htmleditor/TidyEntities.js

/***
 * This is based loosely on tinymce 
 * @class Roo.htmleditor.TidyEntities
 * @static
 * https://github.com/thorn0/tinymce.html/blob/master/tinymce.html.js
 *
 * Not 100% sure this is actually used or needed.
 */

Roo.htmleditor.TidyEntities = {

    /**
     * initialize data..
     */
    init : function (){

        this.namedEntities = this.buildEntitiesLookup(this.namedEntitiesData, 32);

    },


    buildEntitiesLookup: function(items, radix) {
        var i, chr, entity, lookup = {};
        if (!items) {
            return {};
        }
        items = typeof(items) == 'string' ? items.split(',') : items;
        radix = radix || 10;
        // Build entities lookup table
        for (i = 0; i < items.length; i += 2) {
            chr = String.fromCharCode(parseInt(items[i], radix));
            // Only add non base entities
            if (!this.baseEntities[chr]) {
                entity = '&' + items[i + 1] + ';';
                lookup[chr] = entity;
                lookup[entity] = chr;
            }
        }
        return lookup;

    },

    asciiMap : {
            128: '€',
            130: '‚',
            131: 'ƒ',
            132: '„',
            133: '…',
            134: '†',
            135: '‡',
            136: 'ˆ',
            137: '‰',
            138: 'Š',
            139: '‹',
            140: 'Œ',
            142: 'Ž',
            145: '‘',
            146: '’',
            147: '“',
            148: '”',
            149: '•',
            150: '–',
            151: '—',
            152: '˜',
            153: '™',
            154: 'š',
            155: '›',
            156: 'œ',
            158: 'ž',
            159: 'Ÿ'
    },
    // Raw entities
    baseEntities : {
        '"': '&quot;',
        // Needs to be escaped since the YUI compressor would otherwise break the code
        '\'': '&#39;',
        '<': '&lt;',
        '>': '&gt;',
        '&': '&amp;',
        '`': '&#96;'
    },
    // Reverse lookup table for raw entities
    reverseEntities : {
        '&lt;': '<',
        '&gt;': '>',
        '&amp;': '&',
        '&quot;': '"',
        '&apos;': '\''
    },

    attrsCharsRegExp : /[&<>\"\u0060\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
    textCharsRegExp : /[<>&\u007E-\uD7FF\uE000-\uFFEF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/g,
    rawCharsRegExp : /[<>&\"\']/g,
    entityRegExp : /&#([a-z0-9]+);?|&([a-z0-9]+);/gi,
    namedEntities  : false,
    namedEntitiesData : [
        '50',
        'nbsp',
        '51',
        'iexcl',
        '52',
        'cent',
        '53',
        'pound',
        '54',
        'curren',
        '55',
        'yen',
        '56',
        'brvbar',
        '57',
        'sect',
        '58',
        'uml',
        '59',
        'copy',
        '5a',
        'ordf',
        '5b',
        'laquo',
        '5c',
        'not',
        '5d',
        'shy',
        '5e',
        'reg',
        '5f',
        'macr',
        '5g',
        'deg',
        '5h',
        'plusmn',
        '5i',
        'sup2',
        '5j',
        'sup3',
        '5k',
        'acute',
        '5l',
        'micro',
        '5m',
        'para',
        '5n',
        'middot',
        '5o',
        'cedil',
        '5p',
        'sup1',
        '5q',
        'ordm',
        '5r',
        'raquo',
        '5s',
        'frac14',
        '5t',
        'frac12',
        '5u',
        'frac34',
        '5v',
        'iquest',
        '60',
        'Agrave',
        '61',
        'Aacute',
        '62',
        'Acirc',
        '63',
        'Atilde',
        '64',
        'Auml',
        '65',
        'Aring',
        '66',
        'AElig',
        '67',
        'Ccedil',
        '68',
        'Egrave',
        '69',
        'Eacute',
        '6a',
        'Ecirc',
        '6b',
        'Euml',
        '6c',
        'Igrave',
        '6d',
        'Iacute',
        '6e',
        'Icirc',
        '6f',
        'Iuml',
        '6g',
        'ETH',
        '6h',
        'Ntilde',
        '6i',
        'Ograve',
        '6j',
        'Oacute',
        '6k',
        'Ocirc',
        '6l',
        'Otilde',
        '6m',
        'Ouml',
        '6n',
        'times',
        '6o',
        'Oslash',
        '6p',
        'Ugrave',
        '6q',
        'Uacute',
        '6r',
        'Ucirc',
        '6s',
        'Uuml',
        '6t',
        'Yacute',
        '6u',
        'THORN',
        '6v',
        'szlig',
        '70',
        'agrave',
        '71',
        'aacute',
        '72',
        'acirc',
        '73',
        'atilde',
        '74',
        'auml',
        '75',
        'aring',
        '76',
        'aelig',
        '77',
        'ccedil',
        '78',
        'egrave',
        '79',
        'eacute',
        '7a',
        'ecirc',
        '7b',
        'euml',
        '7c',
        'igrave',
        '7d',
        'iacute',
        '7e',
        'icirc',
        '7f',
        'iuml',
        '7g',
        'eth',
        '7h',
        'ntilde',
        '7i',
        'ograve',
        '7j',
        'oacute',
        '7k',
        'ocirc',
        '7l',
        'otilde',
        '7m',
        'ouml',
        '7n',
        'divide',
        '7o',
        'oslash',
        '7p',
        'ugrave',
        '7q',
        'uacute',
        '7r',
        'ucirc',
        '7s',
        'uuml',
        '7t',
        'yacute',
        '7u',
        'thorn',
        '7v',
        'yuml',
        'ci',
        'fnof',
        'sh',
        'Alpha',
        'si',
        'Beta',
        'sj',
        'Gamma',
        'sk',
        'Delta',
        'sl',
        'Epsilon',
        'sm',
        'Zeta',
        'sn',
        'Eta',
        'so',
        'Theta',
        'sp',
        'Iota',
        'sq',
        'Kappa',
        'sr',
        'Lambda',
        'ss',
        'Mu',
        'st',
        'Nu',
        'su',
        'Xi',
        'sv',
        'Omicron',
        't0',
        'Pi',
        't1',
        'Rho',
        't3',
        'Sigma',
        't4',
        'Tau',
        't5',
        'Upsilon',
        't6',
        'Phi',
        't7',
        'Chi',
        't8',
        'Psi',
        't9',
        'Omega',
        'th',
        'alpha',
        'ti',
        'beta',
        'tj',
        'gamma',
        'tk',
        'delta',
        'tl',
        'epsilon',
        'tm',
        'zeta',
        'tn',
        'eta',
        'to',
        'theta',
        'tp',
        'iota',
        'tq',
        'kappa',
        'tr',
        'lambda',
        'ts',
        'mu',
        'tt',
        'nu',
        'tu',
        'xi',
        'tv',
        'omicron',
        'u0',
        'pi',
        'u1',
        'rho',
        'u2',
        'sigmaf',
        'u3',
        'sigma',
        'u4',
        'tau',
        'u5',
        'upsilon',
        'u6',
        'phi',
        'u7',
        'chi',
        'u8',
        'psi',
        'u9',
        'omega',
        'uh',
        'thetasym',
        'ui',
        'upsih',
        'um',
        'piv',
        '812',
        'bull',
        '816',
        'hellip',
        '81i',
        'prime',
        '81j',
        'Prime',
        '81u',
        'oline',
        '824',
        'frasl',
        '88o',
        'weierp',
        '88h',
        'image',
        '88s',
        'real',
        '892',
        'trade',
        '89l',
        'alefsym',
        '8cg',
        'larr',
        '8ch',
        'uarr',
        '8ci',
        'rarr',
        '8cj',
        'darr',
        '8ck',
        'harr',
        '8dl',
        'crarr',
        '8eg',
        'lArr',
        '8eh',
        'uArr',
        '8ei',
        'rArr',
        '8ej',
        'dArr',
        '8ek',
        'hArr',
        '8g0',
        'forall',
        '8g2',
        'part',
        '8g3',
        'exist',
        '8g5',
        'empty',
        '8g7',
        'nabla',
        '8g8',
        'isin',
        '8g9',
        'notin',
        '8gb',
        'ni',
        '8gf',
        'prod',
        '8gh',
        'sum',
        '8gi',
        'minus',
        '8gn',
        'lowast',
        '8gq',
        'radic',
        '8gt',
        'prop',
        '8gu',
        'infin',
        '8h0',
        'ang',
        '8h7',
        'and',
        '8h8',
        'or',
        '8h9',
        'cap',
        '8ha',
        'cup',
        '8hb',
        'int',
        '8hk',
        'there4',
        '8hs',
        'sim',
        '8i5',
        'cong',
        '8i8',
        'asymp',
        '8j0',
        'ne',
        '8j1',
        'equiv',
        '8j4',
        'le',
        '8j5',
        'ge',
        '8k2',
        'sub',
        '8k3',
        'sup',
        '8k4',
        'nsub',
        '8k6',
        'sube',
        '8k7',
        'supe',
        '8kl',
        'oplus',
        '8kn',
        'otimes',
        '8l5',
        'perp',
        '8m5',
        'sdot',
        '8o8',
        'lceil',
        '8o9',
        'rceil',
        '8oa',
        'lfloor',
        '8ob',
        'rfloor',
        '8p9',
        'lang',
        '8pa',
        'rang',
        '9ea',
        'loz',
        '9j0',
        'spades',
        '9j3',
        'clubs',
        '9j5',
        'hearts',
        '9j6',
        'diams',
        'ai',
        'OElig',
        'aj',
        'oelig',
        'b0',
        'Scaron',
        'b1',
        'scaron',
        'bo',
        'Yuml',
        'm6',
        'circ',
        'ms',
        'tilde',
        '802',
        'ensp',
        '803',
        'emsp',
        '809',
        'thinsp',
        '80c',
        'zwnj',
        '80d',
        'zwj',
        '80e',
        'lrm',
        '80f',
        'rlm',
        '80j',
        'ndash',
        '80k',
        'mdash',
        '80o',
        'lsquo',
        '80p',
        'rsquo',
        '80q',
        'sbquo',
        '80s',
        'ldquo',
        '80t',
        'rdquo',
        '80u',
        'bdquo',
        '810',
        'dagger',
        '811',
        'Dagger',
        '81g',
        'permil',
        '81p',
        'lsaquo',
        '81q',
        'rsaquo',
        '85c',
        'euro'
    ],


    /**
     * Encodes the specified string using raw entities. This means only the required XML base entities will be encoded.
     *
     * @method encodeRaw
     * @param {String} text Text to encode.
     * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
     * @return {String} Entity encoded text.
     */
    encodeRaw: function(text, attr)
    {
        var t = this;
        return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
            return t.baseEntities[chr] || chr;
        });
    },
    /**
     * Encoded the specified text with both the attributes and text entities. This function will produce larger text contents
     * since it doesn't know if the context is within a attribute or text node. This was added for compatibility
     * and is exposed as the DOMUtils.encode function.
     *
     * @method encodeAllRaw
     * @param {String} text Text to encode.
     * @return {String} Entity encoded text.
     */
    encodeAllRaw: function(text) {
        var t = this;
        return ('' + text).replace(this.rawCharsRegExp, function(chr) {
            return t.baseEntities[chr] || chr;
        });
    },
    /**
     * Encodes the specified string using numeric entities. The core entities will be
     * encoded as named ones but all non lower ascii characters will be encoded into numeric entities.
     *
     * @method encodeNumeric
     * @param {String} text Text to encode.
     * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
     * @return {String} Entity encoded text.
     */
    encodeNumeric: function(text, attr) {
        var t = this;
        return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
            // Multi byte sequence convert it to a single entity
            if (chr.length > 1) {
                return '&#' + (1024 * (chr.charCodeAt(0) - 55296) + (chr.charCodeAt(1) - 56320) + 65536) + ';';
            }
            return t.baseEntities[chr] || '&#' + chr.charCodeAt(0) + ';';
        });
    },
    /**
     * Encodes the specified string using named entities. The core entities will be encoded
     * as named ones but all non lower ascii characters will be encoded into named entities.
     *
     * @method encodeNamed
     * @param {String} text Text to encode.
     * @param {Boolean} attr Optional flag to specify if the text is attribute contents.
     * @param {Object} entities Optional parameter with entities to use.
     * @return {String} Entity encoded text.
     */
    encodeNamed: function(text, attr, entities) {
        var t = this;
        entities = entities || this.namedEntities;
        return text.replace(attr ? this.attrsCharsRegExp : this.textCharsRegExp, function(chr) {
            return t.baseEntities[chr] || entities[chr] || chr;
        });
    },
    /**
     * Returns an encode function based on the name(s) and it's optional entities.
     *
     * @method getEncodeFunc
     * @param {String} name Comma separated list of encoders for example named,numeric.
     * @param {String} entities Optional parameter with entities to use instead of the built in set.
     * @return {function} Encode function to be used.
     */
    getEncodeFunc: function(name, entities) {
        entities = this.buildEntitiesLookup(entities) || this.namedEntities;
        var t = this;
        function encodeNamedAndNumeric(text, attr) {
            return text.replace(attr ? t.attrsCharsRegExp : t.textCharsRegExp, function(chr) {
                return t.baseEntities[chr] || entities[chr] || '&#' + chr.charCodeAt(0) + ';' || chr;
            });
        }

        function encodeCustomNamed(text, attr) {
            return t.encodeNamed(text, attr, entities);
        }
        // Replace + with , to be compatible with previous TinyMCE versions
        name = this.makeMap(name.replace(/\+/g, ','));
        // Named and numeric encoder
        if (name.named && name.numeric) {
            return this.encodeNamedAndNumeric;
        }
        // Named encoder
        if (name.named) {
            // Custom names
            if (entities) {
                return encodeCustomNamed;
            }
            return this.encodeNamed;
        }
        // Numeric
        if (name.numeric) {
            return this.encodeNumeric;
        }
        // Raw encoder
        return this.encodeRaw;
    },
    /**
     * Decodes the specified string, this will replace entities with raw UTF characters.
     *
     * @method decode
     * @param {String} text Text to entity decode.
     * @return {String} Entity decoded string.
     */
    decode: function(text)
    {
        var  t = this;
        return text.replace(this.entityRegExp, function(all, numeric) {
            if (numeric) {
                numeric = 'x' === numeric.charAt(0).toLowerCase() ? parseInt(numeric.substr(1), 16) : parseInt(numeric, 10);
                // Support upper UTF
                if (numeric > 65535) {
                    numeric -= 65536;
                    return String.fromCharCode(55296 + (numeric >> 10), 56320 + (1023 & numeric));
                }
                return t.asciiMap[numeric] || String.fromCharCode(numeric);
            }
            return t.reverseEntities[all] || t.namedEntities[all] || t.nativeDecode(all);
        });
    },
    nativeDecode : function (text) {
        return text;
    },
    makeMap : function (items, delim, map) {
		var i;
		items = items || [];
		delim = delim || ',';
		if (typeof items == "string") {
			items = items.split(delim);
		}
		map = map || {};
		i = items.length;
		while (i--) {
			map[items[i]] = {};
		}
		return map;
	}
};



Roo.htmleditor.TidyEntities.init();