嗨,我有以下HTML列表:
<ol><li>klndzldgnldzl</li>
<li>zdkgnlkzdngzlkdgn</li>
<li>dzlknglkdznglkzdn</li></ol>
当前,我正在使用HTMLAgillityPack将列表格式化为以下格式:
* klndzldgnldzl
* zdkgnlkzdngzlkdgn
* dzlknglkdznglkzdn
以下是我用来格式化列表的方法:
public static string ConvertToPlainText(string html)
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
StringWriter sw = new StringWriter();
ConvertTo(doc.DocumentNode, sw);
sw.Flush();
return sw.ToString();
}
/// <summary>
/// Count the words.
/// The content has to be converted to plain text before (using ConvertToPlainText).
/// </summary>
/// <param name="plainText">The plain text.</param>
/// <returns></returns>
public static int CountWords(string plainText)
{
return !String.IsNullOrEmpty(plainText) ? plainText.Split(' ', '\n').Length : 0;
}
public static string Cut(string text, int length)
{
if (!String.IsNullOrEmpty(text) && text.Length > length)
{
text = text.Substring(0, length - 4) + " ...";
}
return text;
}
private static void ConvertContentTo(HtmlNode node, TextWriter outText)
{
foreach (HtmlNode subnode in node.ChildNodes)
{
ConvertTo(subnode, outText);
}
}
private static void ConvertTo(HtmlNode node, TextWriter outText)
{
string html;
switch (node.NodeType)
{
case HtmlNodeType.Comment:
// don't output comments
break;
case HtmlNodeType.Document:
ConvertContentTo(node, outText);
break;
case HtmlNodeType.Text:
// script and style must not be output
string parentName = node.ParentNode.Name;
if ((parentName == "script") || (parentName == "style"))
break;
// get text
html = ((HtmlTextNode)node).Text;
// is it in fact a special closing node output as text?
if (HtmlNode.IsOverlappedClosingElement(html))
break;
// check the text is meaningful and not a bunch of whitespaces
if (html.Trim().Length > 0)
{
outText.Write(HtmlEntity.DeEntitize(html));
}
break;
case HtmlNodeType.Element:
switch (node.Name)
{
case "li":
outText.Write("\r\n* "); // FORMATS THE LIST
// outText.Write("\r\n");
break;
case "p":
// treat paragraphs as crlf
outText.Write("\r\n");
break;
case "br":
outText.Write("\r\n");
break;
}
if (node.HasChildNodes)
{
ConvertContentTo(node, outText);
}
break;
}
}
string s = "<ol><li>klndzldgnldzl</li><li>zdkgnlkzdngzlkdgn</li <li>dzlknglkdznglkzdn</li></ol>"
var lala = ConvertToPlainText(s); // Formated Text
我的问题是有没有办法以以下格式输出列表:
我可以想象在某个地方需要运行for循环槽,但在HTMLAgillity包中我没有足够的深入知识来实现这一目标。