我想知道如何使用htmlagilitypack删除html标签,如下所示?
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(Description);
//markups to be removed
var markups = new List<string> { "br","ol","ul","li" };
感谢
答案 0 :(得分:0)
你可以使用这种方法
public static string RemoveHTMLTags(string content)
{
var cleaned = string.Empty;
try
{
string textOnly = string.Empty;
Regex tagRemove = new Regex(@"<[^>]*(>|$)");
Regex compressSpaces = new Regex(@"[\s\r\n]+");
textOnly = tagRemove.Replace(content, string.Empty);
textOnly = compressSpaces.Replace(textOnly, " ");
cleaned = textOnly;
}
catch
{
//A tag is probably not closed. fallback to regex string clean.
}
return cleaned;
}
答案 1 :(得分:0)
//markups to be removed
var markups = new List<string> { "br", "ol", "ul", "li" };
var xpath = String.Join(" | ", markups.Select(x => "//" + x));
var nodes = htmlDoc.DocumentNode.SelectNodes(xpath);
if (nodes != null)
{
foreach (var node in nodes)
{
node.Remove();
}
}