From 2cfff3331c5b70c183d0f75dfc62e47498ba3c9f Mon Sep 17 00:00:00 2001
From: Alan Knowles <alan@roojs.com>
Date: Wed, 21 Aug 2019 18:29:09 +0800
Subject: [PATCH] Changed Roo/HtmlEditorCore.js

---
 Roo/HtmlEditorCore.js | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/Roo/HtmlEditorCore.js b/Roo/HtmlEditorCore.js
index 3e30137d18..46ea4fc2e4 100644
--- a/Roo/HtmlEditorCore.js
+++ b/Roo/HtmlEditorCore.js
@@ -335,17 +335,38 @@ Roo.extend(Roo.HtmlEditorCore, Roo.Component,  {
             html = this.cleanHtml(html);
             // fix up the special chars.. normaly like back quotes in word...
             // however we do not want to do this with chinese..
-            html = html.replace(/([\x80-\uffff])/g, function (a, b) {
-                var cc = b.charCodeAt();
-                if (
+            html = html.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0000-\u001f\u0080-\uFFFF]/g, function(match) {
+                var high, low, charValue, rep
+                var cc = match.charCodeAt();
+
+                // Get the character value, handling surrogate pairs
+                if (match.length == 2) {
+                    // It's a surrogate pair, calculate the Unicode code point
+                    high = match.charCodeAt(0) - 0xD800;
+                    low  = match.charCodeAt(1) - 0xDC00;
+                    charValue = (high * 0x400) + low + 0x10000;
+                }  else if (
                     (cc >= 0x4E00 && cc < 0xA000 ) ||
                     (cc >= 0x3400 && cc < 0x4E00 ) ||
                     (cc >= 0xf900 && cc < 0xfb00 )
                 ) {
-                        return b;
+                        return match;
+                } else {
+                    // Not a surrogate pair, the value *is* the Unicode code point
+                    charValue = match.charCodeAt(0);
                 }
-                return "&#"+cc+";" 
+    
+                // See if we have a mapping for it
+                 
+                    // No, use a numeric entity. Here we brazenly (and possibly mistakenly)
+                    rep = "&#" + charValue + ";";
+                
+                // Return replacement
+                return rep;
             });
+            
+            
+             
             if(this.owner.fireEvent('beforesync', this, html) !== false){
                 this.el.dom.value = html;
                 this.owner.fireEvent('sync', this, html);
-- 
2.39.2