我想在ff中创建一个表。会显示但我遇到了一些问题
public class Book
{
public HtmlAttribute Href{ get; set; }
public string Title{ get; set; }
public string Author{ get; set; }
public string Characters{ get; set; }
}
这是我要解析的页面,我需要 href值, 链接,描述和角色列表(有时没有):
<div id=title>
<li>
<h3><a href="www.harrypotter.com">Harry Potter</a></h3>
<div>Harry James Potter is the title character of J. K. Rowling's Harry Potter series. </div>
<ul>
<li>Harry Potter</li>
<li>Hermione Granger</li>
<li>Ron Weasley</li>
</ul>
</li>
<li>
<h3><a href="www.littleprince.com">Little Prince</a></h3>
<div>A little girl lives in a very grown-up world with her mother, who tries to prepare her for it. </div>
</li>
</div>
这是我解析它并将其放入列表
的代码 List<Book> BookList= new List<Book>();
var titleNode = doc.DocumentNode.SelectNodes("//*[@id=\"title\"]//li//h3");
var descNode = doc.DocumentNode.SelectNodes("//*[@id=\"title\"]//li//div");
var authorNode = doc.DocumentNode.SelectNodes("//*[@id=\"title\"]//li//ul");
var title = titleNode.Select(node => node.InnerText).ToList();
var desc = descNode.Select(node => node.InnerText).ToList();
var characters= authorNode.Select(node => node.InnerText).ToList();
for (int i = 0; i < Title.Count(); ++i)
{
var list= new Book();
list.Title= title[i];
list.Author= desc[i];
list.Characters = characters[i];
BookList.Add(list);
}
我的问题是:1)我如何获得href值并将其添加到列表中? 2)有些在html中没有字符标记,如何在没有NullReferenceException错误的情况下获取列表?注意:我无法对html进行任何更改。
答案 0 :(得分:0)
我在不使用HTMLAgilityPack的情况下解决了您的问题,我在这里使用 的System.Xml
注意:您应该添加一些唯一值来标识主 li 元素,在这里我添加了Class作为&#39; Main&#39;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
namespace Test
{
public class Book
{
public string Href { get; set; }
public string Title { get; set; }
public string Author { get; set; }
public string Characters { get; set; }
}
class Program
{
static void Main(string[] args)
{
string str="<div id='title'><li class='Main'><h3><a href='www.harrypotter.com'>Harry Potter</a></h3><div>Harry James Potter is the title character of J. K. Rowling's Harry Potter series. </div>";
str += "<ul><li>Harry Potter</li><li>Hermione Granger</li><li>Ron Weasley</li></ul></li><li class='Main'><h3><a href='www.littleprince.com'>Little Prince</a></h3><div>A little girl lives in a very grown-up world with her mother, who tries to prepare her for it. </div></li></div>";
XmlDocument doc = new XmlDocument();
doc.LoadXml(str);
XmlNodeList xnList= doc.SelectNodes("//*[@id=\"title\"]//li[@class=\"Main\"]");
List<Book> BookList=new List<Book>();
for (int i = 0; i < xnList.Count; i++)
{
XmlNode TitleNode = xnList[i].SelectSingleNode("h3");
XmlNode DescNode = xnList[i].SelectSingleNode("div");
XmlNode AuthorNode = xnList[i].SelectSingleNode("ul");
Book list = new Book();
if(TitleNode!=null)
list.Title=TitleNode.InnerText;
else
list.Title="";
if (DescNode != null)
list.Author = DescNode.InnerText;
else
list.Author = string.Empty;
if (AuthorNode != null)
list.Characters = AuthorNode.InnerText;
else
list.Characters = string.Empty;
if (TitleNode != null && TitleNode.ChildNodes.Count>0)
{
XmlNode HrefNode = TitleNode.ChildNodes[0];
if (HrefNode != null && HrefNode.Attributes.Count > 0 && HrefNode.Attributes["href"] != null)
list.Href = HrefNode.Attributes["href"].Value;
else
list.Href = string.Empty;
}
else
{
list.Href = string.Empty;
}
BookList.Add(list);
}
}
}
}
答案 1 :(得分:0)
这就是我要做的。让我知道你的问题所以我可以提供帮助。
//get all li(s)
var lis = doc.DocumentNode.Descendants("li").Where(_ => _.ParentNode.Id.Equals("title"));
foreach (var li in lis)
{
//get tile and href
var title = li.Descendants("h3").FirstOrDefault().InnerText; //you can check null or empty here
var href = li.Descendants("h3").FirstOrDefault(_ => _.Name.Equals("a"))?.Attributes["href"]; //again check null here
var desc = li.Descendants("div").FirstOrDefault().InnerHtml;
var characters = li.Descendants("ul").FirstOrDefault()?.Descendants("li");
foreach (var character in characters)
{
var val = character.InnerText;
}
}