有人可以解释为什么System.Xml.XmlTextWriter在我认为有效的unicode字符引用之前转义&符号:
[TestMethod]
public void TestEncoding()
{
using (StringWriter sw = new StringWriter())
{
XmlWriter writer = new XmlTextWriter(sw);
const string testChar = "–";
writer.WriteString(testChar);
Assert.AreEqual(testChar, sw.ToString());
writer.Close();
}
}
失败并显示消息
Assert.AreEqual failed. Expected:<–>. Actual:<&#8211;>.
有没有办法摆脱这种行为,同时正确地保持隔离或无效&符号转换为&amp; amp; ?
html = Regex.Replace(html, @"&#?x?[a-zA-Z0-9]{2,7};", ReplaceCharEntities);
...
private static string ReplaceCharEntities(Match m)
{
string s= m.ToString();
s = s.Substring(1,s.Length-2);
if (s[0] == '#')
{
if (s[1]=='x')
{
return Char.ConvertFromUtf32(Int32.Parse(s.Substring(2), System.Globalization.NumberStyles.HexNumber));
}
return Char.ConvertFromUtf32(Int32.Parse(s.Substring(1)));
}
switch (s)
{
case "nbsp": return "\u00a0";
case "iexcl": return "\u00a1";
case "cent": return "\u00a2";
case "pound": return "\u00a3";
case "curren": return "\u00a4";
case "yen": return "\u00a5";
case "brvbar": return "\u00a6";
case "sect": return "\u00a7";
case "uml": return "\u00a8";
case "copy": return "\u00a9";
case "ordf": return "\u00aa";
case "laquo": return "\u00ab";
case "not": return "\u00ac";
case "shy": return "\u00ad";
case "reg": return "\u00ae";
case "macr": return "\u00af";
case "deg": return "\u00b0";
case "plusmn": return "\u00b1";
case "sup2": return "\u00b2";
case "sup3": return "\u00b3";
case "acute": return "\u00b4";
case "micro": return "\u00b5";
case "para": return "\u00b6";
case "middot": return "\u00b7";
case "cedil": return "\u00b8";
case "sup1": return "\u00b9";
case "ordm": return "\u00ba";
case "raquo": return "\u00bb";
case "frac14": return "\u00bc";
case "frac12": return "\u00bd";
case "frac34": return "\u00be";
case "iquest": return "\u00bf";
case "Agrave": return "\u00c0";
case "Aacute": return "\u00c1";
case "Acirc": return "\u00c2";
case "Atilde": return "\u00c3";
case "Auml": return "\u00c4";
case "Aring": return "\u00c5";
case "AElig": return "\u00c6";
case "Ccedil": return "\u00c7";
case "Egrave": return "\u00c8";
case "Eacute": return "\u00c9";
case "Ecirc": return "\u00ca";
case "Euml": return "\u00cb";
case "Igrave": return "\u00cc";
case "Iacute": return "\u00cd";
case "Icirc": return "\u00ce";
case "Iuml": return "\u00cf";
case "ETH": return "\u00d0";
case "Ntilde": return "\u00d1";
case "Ograve": return "\u00d2";
case "Oacute": return "\u00d3";
case "Ocirc": return "\u00d4";
case "Otilde": return "\u00d5";
case "Ouml": return "\u00d6";
case "times": return "\u00d7";
case "Oslash": return "\u00d8";
case "Ugrave": return "\u00d9";
case "Uacute": return "\u00da";
case "Ucirc": return "\u00db";
case "Uuml": return "\u00dc";
case "Yacute": return "\u00dd";
case "THORN": return "\u00de";
case "szlig": return "\u00df";
case "agrave": return "\u00e0";
case "aacute": return "\u00e1";
case "acirc": return "\u00e2";
case "atilde": return "\u00e3";
case "auml": return "\u00e4";
case "aring": return "\u00e5";
case "aelig": return "\u00e6";
case "ccedil": return "\u00e7";
case "egrave": return "\u00e8";
case "eacute": return "\u00e9";
case "ecirc": return "\u00ea";
case "euml": return "\u00eb";
case "igrave": return "\u00ec";
case "iacute": return "\u00ed";
case "icirc": return "\u00ee";
case "iuml": return "\u00ef";
case "eth": return "\u00f0";
case "ntilde": return "\u00f1";
case "ograve": return "\u00f2";
case "oacute": return "\u00f3";
case "ocirc": return "\u00f4";
case "otilde": return "\u00f5";
case "ouml": return "\u00f6";
case "divide": return "\u00f7";
case "oslash": return "\u00f8";
case "ugrave": return "\u00f9";
case "uacute": return "\u00fa";
case "ucirc": return "\u00fb";
case "uuml": return "\u00fc";
case "yacute": return "\u00fd";
case "thorn": return "\u00fe";
case "yuml": return "\u00ff";
case "Alpha": return "\u0391";
case "Beta": return "\u0392";
case "Gamma": return "\u0393";
case "Delta": return "\u0394";
case "Epsilon": return "\u0395";
case "Zeta": return "\u0396";
case "Eta": return "\u0397";
case "Theta": return "\u0398";
case "Iota": return "\u0399";
case "Kappa": return "\u039a";
case "Lambda": return "\u039b";
case "Mu": return "\u039c";
case "Nu": return "\u039d";
case "Xi": return "\u039e";
case "Omicron": return "\u039f";
case "Pi": return "\u03a0";
case "Rho": return "\u03a1";
case "Sigma": return "\u03a3";
case "Tau": return "\u03a4";
case "Upsilon": return "\u03a5";
case "Phi": return "\u03a6";
case "Chi": return "\u03a7";
case "Psi": return "\u03a8";
case "Omega": return "\u03a9";
case "alpha": return "\u03b1";
case "beta": return "\u03b2";
case "gamma": return "\u03b3";
case "delta": return "\u03b4";
case "epsilon": return "\u03b5";
case "zeta": return "\u03b6";
case "eta": return "\u03b7";
case "theta": return "\u03b8";
case "iota": return "\u03b9";
case "kappa": return "\u03ba";
case "lambda": return "\u03bb";
case "mu": return "\u03bc";
case "nu": return "\u03bd";
case "xi": return "\u03be";
case "omicron": return "\u03bf";
case "pi": return "\u03c0";
case "rho": return "\u03c1";
case "sigmaf": return "\u03c2";
case "sigma": return "\u03c3";
case "tau": return "\u03c4";
case "upsilon": return "\u03c5";
case "phi": return "\u03c6";
case "chi": return "\u03c7";
case "psi": return "\u03c8";
case "omega": return "\u03c9";
case "ϑ": return "\u03d1";
case "upsih": return "\u03d2";
case "piv": return "\u03d6";
case "bull": return "\u2022";
case "hellip": return "\u2026";
case "prime": return "\u2032";
case "Prime": return "\u2033";
case "oline": return "\u203e";
case "frasl": return "\u2044";
case "weierp": return "\u2118";
case "image": return "\u2111";
case "real": return "\u211c";
case "trade": return "\u2122";
case "alefsym": return "\u2135";
case "larr": return "\u2190";
case "uarr": return "\u2191";
case "rarr": return "\u2192";
case "darr": return "\u2193";
case "harr": return "\u2194";
case "crarr": return "\u21b5";
case "lArr": return "\u21d0";
case "uArr": return "\u21d1";
case "rArr": return "\u21d2";
case "dArr": return "\u21d3";
case "hArr": return "\u21d4";
case "forall": return "\u2200";
case "part": return "\u2202";
case "exist": return "\u2203";
case "empty": return "\u2205";
case "nabla": return "\u2207";
case "isin": return "\u2208";
case "notin": return "\u2209";
case "ni": return "\u220b";
case "prod": return "\u220f";
case "sum": return "\u2211";
case "minus": return "\u2212";
case "lowast": return "\u2217";
case "radic": return "\u221a";
case "prop": return "\u221d";
case "infin": return "\u221e";
case "ang": return "\u2220";
case "and": return "\u2227";
case "or": return "\u2228";
case "cap": return "\u2229";
case "cup": return "\u222a";
case "int": return "\u222b";
case "there4": return "\u2234";
case "sim": return "\u223c";
case "cong": return "\u2245";
case "asymp": return "\u2248";
case "ne": return "\u2260";
case "equiv": return "\u2261";
case "le": return "\u2264";
case "ge": return "\u2265";
case "sub": return "\u2282";
case "sup": return "\u2283";
case "nsub": return "\u2284";
case "sube": return "\u2286";
case "supe": return "\u2287";
case "oplus": return "\u2295";
case "otimes": return "\u2297";
case "perp": return "\u22a5";
case "sdot": return "\u22c5";
case "lceil": return "\u2308";
case "rceil": return "\u2309";
case "lfloor": return "\u230a";
case "rfloor": return "\u230b";
case "lang": return "\u2329";
case "rang": return "\u232a";
case "loz": return "\u25ca";
case "spades": return "\u2660";
case "clubs": return "\u2663";
case "hearts": return "\u2665";
case "diams": return "\u2666";
case "OElig": return "\u0152";
case "oelig": return "\u0153";
case "Scaron": return "\u0160";
case "scaron": return "\u0161";
case "Yuml": return "\u0178";
case "circ": return "\u02c6";
case "tilde": return "\u02dc";
case "ensp": return "\u2002";
case "emsp": return "\u2003";
case "thinsp": return "\u2009";
case "zwnj": return "\u200c";
case "zwj": return "\u200d";
case "lrm": return "\u200e";
case "rlm": return "\u200f";
case "ndash": return "\u2013";
case "mdash": return "\u2014";
case "lsquo": return "\u2018";
case "rsquo": return "\u2019";
case "sbquo": return "\u201a";
case "ldquo": return "\u201c";
case "rdquo": return "\u201d";
case "bdquo": return "\u201e";
case "dagger": return "\u2020";
case "Dagger": return "\u2021";
case "permil": return "\u2030";
case "lsaquo": return "\u2039";
case "rsaquo": return "\u203a";
case "euro": return "\u20ac";
default: return s;
}
}
答案 0 :(得分:2)
testchar
只包含字符串&#34;&amp;#8211;&#34;,XmlTextWriter
正确转义为&#8211;
。您需要使用C#转义语法设置testChar
转义unicode char,如下所示:
[TestMethod]
public void TestEncoding()
{
using (StringWriter sw = new StringWriter())
{
XmlWriter writer = new XmlTextWriter(sw);
const string testChar = "\u8211";
writer.WriteString(testChar);
Assert.AreEqual(testChar, sw.ToString());
writer.Close();
}
}