我正在使用HTML敏捷包,并试图将网站中的某些数据解析为数据表。我已经成功地生成了第一个节点,但是在获取下一个玩家信息时遇到了一些麻烦。
DataTable dt5 = new DataTable();
dt5.Columns.Add("Team");
dt5.Columns.Add("Name");
dt5.Columns.Add("Position");
dt5.Columns.Add("Injury");
dt5.Columns.Add("Status");
var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
DataRow row;
foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("//*[@id='TableBase']"))
{
row = dt5.NewRow();
//TEAM
foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes(".//tr[1]//td[1]"))
{
row["Team"] = Regex.Replace(node2.InnerText, @"\r\n?|\n| ", "") ;
}
//PLAYER NAME
foreach (HtmlAgilityPack.HtmlNode node3 in node.SelectNodes(".//tr[1]//td[2]//span"))
{
row["Name"] = Regex.Replace(node3.InnerText, @"\r\n?|\n| ", "");
}
//PLAYER POSITION
foreach (HtmlAgilityPack.HtmlNode node4 in node.SelectNodes(".//tr[1]//td[3]"))
{
row["Position"] = Regex.Replace(node4.InnerText, @"\r\n?|\n| ", "");
}
//PLAYER INJURY
foreach (HtmlAgilityPack.HtmlNode node5 in node.SelectNodes(".//tr[1]//td[4]"))
{
row["Injury"] = Regex.Replace(node5.InnerText, @"\r\n?|\n| ", "");
}
//PLAYER STATUS
foreach (HtmlAgilityPack.HtmlNode node6 in node.SelectNodes(".//tr[1]//td[5]"))
{
row["Status"] = Regex.Replace(node6.InnerText, @"\r\n?|\n| ", "");
}
dt5.Rows.Add(row);
}
dataGridView3.DataSource = dt5;
答案 0 :(得分:1)
尝试一下。它对我有用,我只是在控制台应用程序上对其进行了测试。
public static void Process() {
DataTable dt5 = new DataTable();
dt5.Columns.Add("Team");
dt5.Columns.Add("Name");
dt5.Columns.Add("Position");
dt5.Columns.Add("Injury");
dt5.Columns.Add("Status");
var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
DataRow row;
var tables = doc.DocumentNode.SelectNodes("//*[@id='TableBase']/div/div/table");
foreach (HtmlAgilityPack.HtmlNode node in tables) {
var rows = node.SelectNodes("//tr");
foreach (HtmlAgilityPack.HtmlNode node2 in rows) {
row = dt5.NewRow();
var tdNodes = node2.DescendantNodes().Where(o => o.Name == "td");
if (tdNodes.Any()) {
row["Team"] = Regex.Replace(tdNodes.ElementAt(0).InnerText, @"\r\n?|\n| ", "");
row["Name"] = Regex.Replace(tdNodes.ElementAt(1).InnerText, @"\r\n?|\n| ", "");
row["Position"] = Regex.Replace(tdNodes.ElementAt(2).InnerText, @"\r\n?|\n| ", "");
row["Injury"] = Regex.Replace(tdNodes.ElementAt(3).InnerText, @"\r\n?|\n| ", "");
row["Status"] = Regex.Replace(tdNodes.ElementAt(4).InnerText, @"\r\n?|\n| ", "");
dt5.Rows.Add(row);
}
}
}
foreach(DataRow item in dt5.Rows) {
Console.WriteLine(item.ItemArray[0] + " " + item.ItemArray[1]);
}
}
答案 1 :(得分:0)
尤里卡!谢谢大家的帮助,这是我的最终解决方案:
private void button7_Click(object sender, EventArgs e)
{
DataTable dt5 = new DataTable();
dt5.Columns.Add("Team");
dt5.Columns.Add("Name");
dt5.Columns.Add("Position");
dt5.Columns.Add("Injury");
dt5.Columns.Add("Status");
var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/");
var products = doc.DocumentNode.SelectNodes("//*[@id='TableBase']");
foreach (HtmlNode product in products)
{
DataRow row = dt5.NewRow();
var teamName = product.SelectNodes(".//span[@class='TeamLogoNameLockup-name']");
var playerName = product.SelectNodes(".//span[@class='CellPlayerName--long']");
foreach (HtmlNode T in teamName)
{
row["Team"] = (Regex.Replace(T.InnerText, @"\r\n", "").Replace(" ",""));
dt5.Rows.Add(row);
}
foreach(HtmlNode P in playerName)
{
row["Name"] = (Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));
Console.WriteLine(Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));
}
var position = product.SelectNodes(".//td[2][contains(@class, 'TableBase-bodyTd')]");
foreach (HtmlNode Pos in position)
{
row["Position"] = (Regex.Replace(Pos.InnerText, @"\r\n?|\n| ", ""));
}
var injury = product.SelectNodes(".//td[4][contains(@class, 'TableBase-bodyTd')]");
foreach (HtmlNode inj in injury)
{
row["Injury"] = (Regex.Replace(inj.InnerText, @"\r\n?|\n| ", ""));
}
var status = product.SelectNodes(".//td[5][contains(@class, 'TableBase-bodyTd')]");
foreach (HtmlNode stat in status)
{
row["Status"] = (Regex.Replace(stat.InnerText, @"\r\n?|\n| ", ""));
}
}
dataGridView3.DataSource = dt5;
}