我正在尝试使用HtmlAgilityPack从输入中删除任何脚本。
我的意见:
<div>If the amount<500 show results. Else do not show results.<mytag1>This is an xml element</mytag1></div><script>alert("welcome");</script>
预期结果:
<div>If the amount<500 show results. Else do not show results.<mytag1>This is an xml element</mytag1></div>
最终结果:
<div>If the amount<500 show="" results.="" else="" do="" not="" /><mytag1>This is an xml element</mytag1></div>
这是我的代码
public HashSet<string> BlackList = new HashSet<string>()
{
{ "script" },
{ "iframe" },
{ "form" },
{ "head" },
{ "meta" },
{ "comment" }
};
public static string GetSafeHtmlString(string sInputString)
{
HtmlDocument doc = new HtmlDocument();
doc.OptionFixNestedTags = true;
//doc.OptionAutoCloseOnEnd = true;
doc.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8;
doc.LoadHtml(HttpUtility.HtmlDecode(sInputString));
HtmlSanitizer sanitizer = new HtmlSanitizer();
sanitizer.SanitizeHtmlNode(doc.DocumentNode);
string output = null;
using (StringWriter sw = new StringWriter())
{
XmlWriter writer = new XmlTextWriter(sw);
doc.DocumentNode.WriteTo(writer);
output = sw.ToString();
if (!string.IsNullOrEmpty(output))
{
int at = output.IndexOf("?>");
output = output.Substring(at + 2);
}
writer.Close();
}
doc = null;
return output;
}
private void SanitizeHtmlNode(HtmlNode node)
{
if (node.NodeType == HtmlNodeType.Element)
{
// check for blacklist items and remove
if (BlackList.Contains(node.Name))
{
node.Remove();
return;
}
}
if (node.HasChildNodes)
{
for (int i = node.ChildNodes.Count - 1; i >= 0; i--)
{
SanitizeHtmlNode(node.ChildNodes[i]);
}
}
}
如何获得预期结果。 html解析器正在使用&lt;作为新的html标签的开始。如何在输入中添加&#34;&lt;&#34;(小于)不是html标记开头的字符。