在javascript中编码html实体

时间:2013-09-11 19:24:50

标签: javascript html

我在CMS中工作,允许用户输入内容。问题是,当他们添加符号®时,它可能无法在所有浏览器中良好显示。我想设置一个必须搜索的符号列表,然后转换为相应的html实体。例如

®=> ®
&安培; => &
©=> ©
™=> ™

转换后,需要将其换成<sup>标记,结果如下:

® =&gt; <sup>&reg;</sup>

因为需要特定的字体大小和填充样式:

sup { font-size: 0.6em; padding-top: 0.2em; }

JavaScript会是这样的吗?

var regs = document.querySelectorAll('®');
  for ( var i = 0, l = imgs.length; i < l; ++i ) {
  var [?] = regs[i];
  var [?] = document.createElement('sup');
  img.parentNode.insertBefore([?]);
  div.appendChild([?]);
}

“[?]”表示有些东西我不确定。

其他详细信息:

  • 我想用纯JavaScript做这件事,而不是那样做 需要像jQuery这样的库,谢谢。
  • Backend是Ruby
  • 使用使用Ruby on Rails构建的RefineryCMS

18 个答案:

答案 0 :(得分:124)

您可以使用正则表达式替换给定unicode范围内的任何字符,其等效的html实体。代码看起来像这样:

var encodedStr = rawStr.replace(/[\u00A0-\u9999<>\&]/gim, function(i) {
   return '&#'+i.charCodeAt(0)+';';
});

此代码将替换给定范围内的所有字符(unicode 00A0 - 9999,以及&符号,更大和更小)与其html实体等效项,这只是&#nnn;其中nnn是我们从charCodeAt得到的unicode值。

在此处查看它:http://jsfiddle.net/E3EqX/13/(此示例使用jQuery作为示例中使用的元素选择器。上面的基本代码本身不使用jQuery)

进行这些转换并不能解决所有问题 - 确保使用UTF8字符编码,确保数据库以UTF8格式存储字符串。您仍然可能会看到字符无法正确显示的情况,具体取决于系统字体配置和您无法控制的其他问题。

<强>文档

答案 1 :(得分:45)

The currently accepted answer有几个问题。这篇文章解释了它们,并提供了更强大的解决方案。答案中建议的解决方案是:

var encodedStr = rawStr.replace(/[\u00A0-\u9999<>\&]/gim, function(i) {
  return '&#' + i.charCodeAt(0) + ';';
});

i标志是多余的,因为在U + 00A0到U + 9999范围内的Unicode符号没有大于/小写的变量在同一范围之外。

m标志是多余的,因为正则表达式中未使用^$

为什么范围U + 00A0到U + 9999?这似乎是随意的。

无论如何,对于正确编码 all 的解决方案,除了安全&amp;输入中可打印的ASCII符号(包括星号符号!),并实现所有命名的字符引用(不仅仅是HTML4中的那些),use the he library(免责声明:这个库是我的)。从其自述文件:

  

(对于“HTML实体”)是一个用JavaScript编写的健壮的HTML实体编码器/解码器。它支持all standardized named character references as per HTML,处理ambiguous ampersands和其他边缘情况just like a browser would,具有广泛的测试套件,并且 - 与许多其他JavaScript解决方案相反 - 处理星体Unicode符号就好了。 An online demo is available.

另见this relevant Stack Overflow answer

答案 2 :(得分:23)

我遇到了同样的问题并创建了两个函数来创建实体并将它们转换回普通字符。 以下方法将任何字符串转换为HTML实体并返回String原型

/**
 * Convert a string to HTML entities
 */
String.prototype.toHtmlEntities = function() {
    return this.replace(/./gm, function(s) {
        return "&#" + s.charCodeAt(0) + ";";
    });
};

/**
 * Create string from HTML entities
 */
String.fromHtmlEntities = function(string) {
    return (string+"").replace(/&#\d+;/gm,function(s) {
        return String.fromCharCode(s.match(/\d+/gm)[0]);
    })
};

然后您可以按照以下方式使用它:

var str = "Test´†®¥¨©˙∫ø…ˆƒ∆÷∑™ƒ∆æø𣨠ƒ™en tést".toHtmlEntities();
console.log("Entities:", str);
console.log("String:", String.fromHtmlEntities(str));

控制台输出:

Entities: &#68;&#105;&#116;&#32;&#105;&#115;&#32;&#101;&#180;&#8224;&#174;&#165;&#168;&#169;&#729;&#8747;&#248;&#8230;&#710;&#402;&#8710;&#247;&#8721;&#8482;&#402;&#8710;&#230;&#248;&#960;&#163;&#168;&#160;&#402;&#8482;&#101;&#110;&#32;&#116;&#163;&#101;&#233;&#115;&#116;
String: Dit is e´†®¥¨©˙∫ø…ˆƒ∆÷∑™ƒ∆æø𣨠ƒ™en t£eést 

答案 3 :(得分:13)

没有任何库,如果你不需要支持IE&lt; 9,您可以创建一个html元素并使用Node.textContent设置其内容:

var str = "<this is not a tag>";
var p = document.createElement("p");
p.textContent = str;
var converted = p.innerHTML;

以下是一个示例:https://jsfiddle.net/1erdhehv/

答案 4 :(得分:11)

你可以使用它。

var escapeChars = {
  '¢' : 'cent',
  '£' : 'pound',
  '¥' : 'yen',
  '€': 'euro',
  '©' :'copy',
  '®' : 'reg',
  '<' : 'lt',
  '>' : 'gt',
  '"' : 'quot',
  '&' : 'amp',
  '\'' : '#39'
};

var regexString = '[';
for(var key in escapeChars) {
  regexString += key;
}
regexString += ']';

var regex = new RegExp( regexString, 'g');

function escapeHTML(str) {
  return str.replace(regex, function(m) {
    return '&' + escapeChars[m] + ';';
  });
};

https://github.com/epeli/underscore.string/blob/master/escapeHTML.js

var htmlEntities = {
    nbsp: ' ',
    cent: '¢',
    pound: '£',
    yen: '¥',
    euro: '€',
    copy: '©',
    reg: '®',
    lt: '<',
    gt: '>',
    quot: '"',
    amp: '&',
    apos: '\''
};

function unescapeHTML(str) {
    return str.replace(/\&([^;]+);/g, function (entity, entityCode) {
        var match;

        if (entityCode in htmlEntities) {
            return htmlEntities[entityCode];
            /*eslint no-cond-assign: 0*/
        } else if (match = entityCode.match(/^#x([\da-fA-F]+)$/)) {
            return String.fromCharCode(parseInt(match[1], 16));
            /*eslint no-cond-assign: 0*/
        } else if (match = entityCode.match(/^#(\d+)$/)) {
            return String.fromCharCode(~~match[1]);
        } else {
            return entity;
        }
    });
};

答案 5 :(得分:3)

如果您想多次避免编码html实体

function encodeHTML(str){
    return str.replace(/[\u00A0-\u9999<>&](?!#)/gim, function(i) {
      return '&#' + i.charCodeAt(0) + ';';
    });
}

function decodeHTML(str){
    return str.replace(/&#([0-9]{1,3});/gi, function(match, num) {
        return String.fromCharCode(parseInt(num));
    });
}

实施例

var text = "<a>Content</a>";

text = encodeHTML(text);
console.log("Encode 1 times: " + text);

// &#60;a&#62;Content&#60;/a&#62;

text = encodeHTML(text);
console.log("Encode 2 times: " + text);

// &#60;a&#62;Content&#60;/a&#62;

text = decodeHTML(text);
console.log("Decoded: " + text);

// <a>Content</a>

答案 6 :(得分:2)

HTML特殊字符&amp;它的ESCAPE CODES

保留字符必须通过HTML转义:我们可以使用字符转义来表示任何Unicode字符[例如:&amp; - 仅使用ASCII字符的HTML,XHTML或XML中的U + 00026]。 Numeric character references [Ex:&符号(&amp;) - &#38;]&amp; Named character references [例如:&amp;]是character escape used in markup的类型。

Predefined Entities

  

原始字符 XML实体替换 XML数字替换 < / KBD>
  &LT; &安培; LT; &安培;#60;
  &GT; &安培; GT; &安培;#62;
   &amp; quot; &amp;#34;
  &安培; &放大器;放大器; &安培;#38;
  ' &安培;者; &安培;#39;

要在网页中将HTML标记显示为普通表单,我们会使用<pre><code>标记,或者我们可以将其转义。通过字符串"&"替换"&amp;"字符的任何出现以及字符串">"出现的"&gt;"字符,可以转义字符串。例如:stackoverflow post

function escapeCharEntities() {
    var map = {
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        "\"": "&quot;",
        "'": "&apos;"
    };
    return map;
}

var mapkeys = '', mapvalues = '';
var html = {
    encodeRex : function () {
        return  new RegExp(mapkeys, 'gm');
    }, 
    decodeRex : function () {
        return  new RegExp(mapvalues, 'gm');
    },
    encodeMap : JSON.parse( JSON.stringify( escapeCharEntities () ) ),
    decodeMap : JSON.parse( JSON.stringify( swapJsonKeyValues( escapeCharEntities () ) ) ),
    encode : function ( str ) {
        return str.replace(html.encodeRex(), function(m) { return html.encodeMap[m]; });
    },
    decode : function ( str ) {
        return str.replace(html.decodeRex(), function(m) { return html.decodeMap[m]; });
    }
};

function swapJsonKeyValues ( json ) {
    var count = Object.keys( json ).length;
    var obj = {};
    var keys = '[', val = '(', keysCount = 1;
    for(var key in json) {
        if ( json.hasOwnProperty( key ) ) {
            obj[ json[ key ] ] = key;
            keys += key;
            if( keysCount < count ) {
                val += json[ key ]+'|';
            } else {
                val += json[ key ];
            }
            keysCount++;
        }
    }
    keys += ']';    val  += ')';
    console.log( keys, ' == ', val);
    mapkeys = keys;
    mapvalues = val;
    return obj;
}

console.log('Encode: ', html.encode('<input type="password" name="password" value=""/>') ); 
console.log('Decode: ', html.decode(html.encode('<input type="password" name="password" value=""/>')) );

O/P:
Encode:  &lt;input type=&quot;password&quot; name=&quot;password&quot; value=&quot;&quot;/&gt;
Decode:  <input type="password" name="password" value=""/>

答案 7 :(得分:1)

如果您已经在使用jQuery,请尝试html()

$('<div>').text('<script>alert("gotcha!")</script>').html()
// "&lt;script&gt;alert("gotcha!")&lt;/script&gt;"

实例化内存中文本节点,并在其上调用html()

它很丑,它浪费了一些内存,我不知道它是否像he库一样彻底,但如果你已经在使用jQuery,也许这是你的选择。 / p>

取自FelixGeisendörfer的博客文章Encode HTML entities with jQuery

答案 8 :(得分:1)

有时候你只想对每个角色进行编码......这个函数在regxp中替换了“一切都没有”。

function encode(e){return e.replace(/[^]/g,function(e){return"&#"+e.charCodeAt(0)+";"})}

function encode(w) {
  return w.replace(/[^]/g, function(w) {
    return "&#" + w.charCodeAt(0) + ";";
  });
}

test.value=encode(document.body.innerHTML.trim());
<textarea id=test rows=11 cols=55>www.WHAK.com</textarea>

答案 9 :(得分:1)

var htmlEntities = [
            {regex:/&/g,entity:'&amp;'},
            {regex:/>/g,entity:'&gt;'},
            {regex:/</g,entity:'&lt;'},
            {regex:/"/g,entity:'&quot;'},
            {regex:/á/g,entity:'&aacute;'},
            {regex:/é/g,entity:'&eacute;'},
            {regex:/í/g,entity:'&iacute;'},
            {regex:/ó/g,entity:'&oacute;'},
            {regex:/ú/g,entity:'&uacute;'}
        ];

total = <some string value>

for(v in htmlEntities){
    total = total.replace(htmlEntities[v].regex, htmlEntities[v].entity);
}

阵列解决方案

答案 10 :(得分:1)

从Ourcodeworld查看教程 Ourcodeworld - encode and decode html entities with javascript

最重要的是he library示例

he.encode('foo © bar ≠ baz ???? qux');
// → 'foo &#xA9; bar &#x2260; baz &#x1D306; qux'

// Passing an `options` object to `encode`, to explicitly encode all symbols:
he.encode('foo © bar ≠ baz ???? qux', {
 'encodeEverything': true
});

he.decode('foo &copy; bar &ne; baz &#x1D306; qux');
// → 'foo © bar ≠ baz ???? qux'

此库可能会使您的编码更容易且管理更好。它很受欢迎,会定期更新并遵循HTML规范。它本身没有依赖性,如package.json

所示

答案 11 :(得分:0)

函数 htmlEntityReplacer(encoded_text) { vardecoded_text = encoding_text;

const all_entities = [{ /* source: https://www.w3schools.com/html/html_entities.asp */
    encoded: `&nbsp;`,
    decoded: ` `
}, {
    encoded: `&lt;`,
    decoded: `<`
}, {
    encoded: `&gt;`,
    decoded: `>`
}, {
    encoded: `&amp;`,
    decoded: `&`
}, {
    encoded: `&quot;`,
    decoded: `"`
}, {
    encoded: `&apos;`,
    decoded: `'`
}, {
    encoded: `&cent;`,
    decoded: `¢`
}, {
    encoded: `&pound;`,
    decoded: `£`
}, {
    encoded: `&yen;`,
    decoded: `yen`
}, {
    encoded: `&euro;`,
    decoded: `€`
}, {
    encoded: `&copy;`,
    decoded: `©`
}, {
    encoded: `&reg;`,
    decoded: `®`
}]
for (i = 0; i < all_entities.length; i++) {
    var decoded_text = decoded_text.replace(new RegExp(all_entities[i].encoded, 'g'), all_entities[i].decoded)
}
return decoded_text;

}

// 对于 node 或 vanilla

答案 12 :(得分:0)

htmlentities() 转换 HTML 实体

因此,我们构建了一个常量,其中将包含我们要转换的 html 标签。

const htmlEntities = [ 
    {regex:'&',entity:'&amp;'},
    {regex:'>',entity:'&gt;'},
    {regex:'<',entity:'&lt;'} 
  ];

我们构建了一个函数,将所有对应的 html 字符转换为字符串:Html ==> String

 function htmlentities (s){
    var reg; 
    for (v in htmlEntities) {
      reg = new RegExp(htmlEntities[v].regex, 'g');
      s = s.replace(reg, htmlEntities[v].entity);
    }
    return s;
  }

为了解码,我们构建了一个反向函数,将所有字符串转换为其等效的 html 。字符串 ==> html

 function  html_entities_decode (s){
    var reg; 
    for (v in htmlEntities) {
      reg = new RegExp(htmlEntities[v].entity, 'g');
      s = s.replace(reg, htmlEntities[v].regex);
    }
    return s;
  
   }

之后,我们可以使用 encodeURIComponent() 对所有其他特殊字符 (é è ...) 进行编码

用例

 var s  = '<div> God bless you guy   </div> '
 var h = encodeURIComponent(htmlentities(s));         /** To encode */
 h =  html_entities_decode(decodeURIComponent(h));     /** To decode */

答案 13 :(得分:0)

编码或解码 HTML 实体的简便方法之一
只用一个参数调用一个函数...

解码 HTML 实体

function decodeHTMLEntities(text) {
  var textArea = document.createElement('textarea');
  textArea.innerHTML = text;
  return textArea.value;
}

解码 HTML 实体 (JQuery)

function decodeHTMLEntities(text) {
  return $("<textarea/>").html(text).text();
}

编码 HTML 实体

function encodeHTMLEntities(text) {
  var textArea = document.createElement('textarea');
  textArea.innerText = text;
  return textArea.innerHTML;
}

编码 HTML 实体 (JQuery)

function encodeHTMLEntities(text) {
  return $("<textarea/>").text(text).html();
}

答案 14 :(得分:0)

这是我实现编码的方式。我从上面给出的答案中得到了启发。

function encodeHTML(str) {
  const code = {
      ' ' : '&nbsp;',
      '¢' : '&cent;',
      '£' : '&pound;',
      '¥' : '&yen;',
      '€' : '&euro;', 
      '©' : '&copy;',
      '®' : '&reg;',
      '<' : '&lt;', 
      '>' : '&gt;',  
      '"' : '&quot;', 
      '&' : '&amp;',
      '\'' : '&apos;'
  };
  return str.replace(/[\u00A0-\u9999<>\&''""]/gm, (i)=>code[i]);
}

// TEST
console.log(encodeHTML("Dolce & Gabbana"));
console.log(encodeHTML("Hamburgers < Pizza < Tacos"));
console.log(encodeHTML("Sixty > twelve"));
console.log(encodeHTML('Stuff in "quotation marks"'));
console.log(encodeHTML("Schindler's List"));
console.log(encodeHTML("<>"));

答案 15 :(得分:0)

replaceHtmlEntities(text) {
  var tagsToReplace = {
    '&amp;': '&',
    '&lt;': '<',
    '&gt;': '>',
  };
  var newtext = text;
  for (var tag in tagsToReplace) {
    if (Reflect.apply({}.hasOwnProperty, this, [tagsToReplace, tag])) {
      var regex = new RegExp(tag, 'g');
      newtext = newtext.replace(regex, tagsToReplace[tag]);
    }
  }
  return newtext;
}

答案 16 :(得分:-1)

<!DOCTYPE html>
<html>
<style>
button {
backround: #ccc;
padding: 14px;
width: 400px;
font-size: 32px;
}
#demo {
font-size: 20px;
font-family: Arial;
font-weight: bold;
}
</style>
<body>

<p>Click the button to decode.</p>

<button onclick="entitycode()">Html Code</button>

<p id="demo"></p>


<script>
function entitycode() {
  var uri = "quotation  = ark __ &apos; = apostrophe  __ &amp; = ampersand __ &lt; = less-than __ &gt; = greater-than __ 	non- = reaking space __ &iexcl; = inverted exclamation mark __ &cent; = cent __ &pound; = pound __ &curren; = currency __ &yen; = yen __ &brvbar; = broken vertical bar __ &sect; = section __ &uml; = spacing diaeresis __ &copy; = copyright __ &ordf; = feminine ordinal indicator __ &laquo; = angle quotation mark (left) __ &not; = negation __ &shy; = soft hyphen __ &reg; = registered trademark __ &macr; = spacing macron __ &deg; = degree __ &plusmn; = plus-or-minus  __ &sup2; = superscript 2 __ &sup3; = superscript 3 __ &acute; = spacing acute __ &micro; = micro __ &para; = paragraph __ &middot; = middle dot __ &cedil; = spacing cedilla __ &sup1; = superscript 1 __ &ordm; = masculine ordinal indicator __ &raquo; = angle quotation mark (right) __ &frac14; = fraction 1/4 __ &frac12; = fraction 1/2 __ &frac34; = fraction 3/4 __ &iquest; = inverted question mark __ &times; = multiplication __ &divide; = division __ &Agrave; = capital a, grave accent __ &Aacute; = capital a, acute accent __ &Acirc; = capital a, circumflex accent __ &Atilde; = capital a, tilde __ &Auml; = capital a, umlaut mark __ &Aring; = capital a, ring __ &AElig; = capital ae __ &Ccedil; = capital c, cedilla __ &Egrave; = capital e, grave accent __ &Eacute; = capital e, acute accent __ &Ecirc; = capital e, circumflex accent __ &Euml; = capital e, umlaut mark __ &Igrave; = capital i, grave accent __ &Iacute; = capital i, acute accent __ &Icirc; = capital i, circumflex accent __ &Iuml; = capital i, umlaut mark __ &ETH; = capital eth, Icelandic __ &Ntilde; = capital n, tilde __ &Ograve; = capital o, grave accent __ &Oacute; = capital o, acute accent __ &Ocirc; = capital o, circumflex accent __ &Otilde; = capital o, tilde __ &Ouml; = capital o, umlaut mark __ &Oslash; = capital o, slash __ &Ugrave; = capital u, grave accent __ &Uacute; = capital u, acute accent __ &Ucirc; = capital u, circumflex accent __ &Uuml; = capital u, umlaut mark __ &Yacute; = capital y, acute accent __ &THORN; = capital THORN, Icelandic __ &szlig; = small sharp s, German __ &agrave; = small a, grave accent __ &aacute; = small a, acute accent __ &acirc; = small a, circumflex accent __ &atilde; = small a, tilde __ &auml; = small a, umlaut mark __ &aring; = small a, ring __ &aelig; = small ae __ &ccedil; = small c, cedilla __ &egrave; = small e, grave accent __ &eacute; = small e, acute accent __ &ecirc; = small e, circumflex accent __ &euml; = small e, umlaut mark __ &igrave; = small i, grave accent __ &iacute; = small i, acute accent __ &icirc; = small i, circumflex accent __ &iuml; = small i, umlaut mark __ &eth; = small eth, Icelandic __ &ntilde; = small n, tilde __ &ograve; = small o, grave accent __ &oacute; = small o, acute accent __ &ocirc; = small o, circumflex accent __ &otilde; = small o, tilde __ &ouml; = small o, umlaut mark __ &oslash; = small o, slash __ &ugrave; = small u, grave accent __ &uacute; = small u, acute accent __ &ucirc; = small u, circumflex accent __ &uuml; = small u, umlaut mark __ &yacute; = small y, acute accent __ &thorn; = small thorn, Icelandic __ &yuml; = small y, umlaut mark";
  var enc = encodeURI(uri);
  var dec = decodeURI(enc);
  var res = dec;
  document.getElementById("demo").innerHTML = res;
}
</script>

</body>
</html>

答案 17 :(得分:-2)

您可以使用charCodeAt()方法检查指定字符的值是否大于127,并使用toString(16)将其转换为数字字符引用。