C#仅在foreach循环中获得第一个html节点

时间:2019-03-12 00:20:13

标签: c# html-agility-pack

我正在使用HTML敏捷包,并试图将网站中的某些数据解析为数据表。我已经成功地生成了第一个节点,但是在获取下一个玩家信息时遇到了一些麻烦。

enter image description here

  DataTable dt5 = new DataTable();
            dt5.Columns.Add("Team");
            dt5.Columns.Add("Name");
            dt5.Columns.Add("Position");
            dt5.Columns.Add("Injury");
            dt5.Columns.Add("Status");

            var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
            DataRow row;




            foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("//*[@id='TableBase']"))
            {

                row = dt5.NewRow();
                //TEAM
                foreach (HtmlAgilityPack.HtmlNode node2 in node.SelectNodes(".//tr[1]//td[1]"))
                {

                    row["Team"] = Regex.Replace(node2.InnerText, @"\r\n?|\n| ", "") ;

                }
                //PLAYER NAME
                foreach (HtmlAgilityPack.HtmlNode node3 in node.SelectNodes(".//tr[1]//td[2]//span"))
                {
                    row["Name"] = Regex.Replace(node3.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER POSITION
                foreach (HtmlAgilityPack.HtmlNode node4 in node.SelectNodes(".//tr[1]//td[3]"))
                {
                    row["Position"] = Regex.Replace(node4.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER INJURY
                foreach (HtmlAgilityPack.HtmlNode node5 in node.SelectNodes(".//tr[1]//td[4]"))
                {
                    row["Injury"] = Regex.Replace(node5.InnerText, @"\r\n?|\n| ", "");
                }
                //PLAYER STATUS
                foreach (HtmlAgilityPack.HtmlNode node6 in node.SelectNodes(".//tr[1]//td[5]"))
                {
                    row["Status"] = Regex.Replace(node6.InnerText, @"\r\n?|\n| ", "");
                }


                dt5.Rows.Add(row);

            }


            dataGridView3.DataSource = dt5;

2 个答案:

答案 0 :(得分:1)

尝试一下。它对我有用,我只是在控制台应用程序上对其进行了测试。

 public static void Process() {
    DataTable dt5 = new DataTable();
    dt5.Columns.Add("Team");
    dt5.Columns.Add("Name");
    dt5.Columns.Add("Position");
    dt5.Columns.Add("Injury");
    dt5.Columns.Add("Status");

    var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/daily/");
    DataRow row;
        var tables = doc.DocumentNode.SelectNodes("//*[@id='TableBase']/div/div/table");
        foreach (HtmlAgilityPack.HtmlNode node in tables) {
            var rows = node.SelectNodes("//tr");
            foreach (HtmlAgilityPack.HtmlNode node2 in rows) {
                row = dt5.NewRow();
                var tdNodes = node2.DescendantNodes().Where(o => o.Name == "td");
                if (tdNodes.Any()) {
                    row["Team"] = Regex.Replace(tdNodes.ElementAt(0).InnerText, @"\r\n?|\n| ", "");
                    row["Name"] = Regex.Replace(tdNodes.ElementAt(1).InnerText, @"\r\n?|\n| ", "");
                    row["Position"] = Regex.Replace(tdNodes.ElementAt(2).InnerText, @"\r\n?|\n| ", "");
                    row["Injury"] = Regex.Replace(tdNodes.ElementAt(3).InnerText, @"\r\n?|\n| ", "");
                    row["Status"] = Regex.Replace(tdNodes.ElementAt(4).InnerText, @"\r\n?|\n| ", "");
                    dt5.Rows.Add(row);
                }
            }
        }

        foreach(DataRow item in dt5.Rows) {
            Console.WriteLine(item.ItemArray[0] + " " + item.ItemArray[1]);
        }
    }

答案 1 :(得分:0)

尤里卡!谢谢大家的帮助,这是我的最终解决方案:

    private void button7_Click(object sender, EventArgs e)
    {
        DataTable dt5 = new DataTable();
        dt5.Columns.Add("Team");
        dt5.Columns.Add("Name");
        dt5.Columns.Add("Position");
        dt5.Columns.Add("Injury");
        dt5.Columns.Add("Status");

        var doc = new HtmlWeb().Load("https://www.cbssports.com/nba/injuries/");



        var products = doc.DocumentNode.SelectNodes("//*[@id='TableBase']");


        foreach (HtmlNode product  in products)
        {
            DataRow row = dt5.NewRow();
            var teamName = product.SelectNodes(".//span[@class='TeamLogoNameLockup-name']");
            var playerName = product.SelectNodes(".//span[@class='CellPlayerName--long']");
            foreach (HtmlNode T in teamName)
            {

                row["Team"] = (Regex.Replace(T.InnerText, @"\r\n", "").Replace(" ",""));
                dt5.Rows.Add(row);
            }




            foreach(HtmlNode P in  playerName)
            {

                row["Name"] = (Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));

                Console.WriteLine(Regex.Replace(P.InnerText, @"\r\n?|\n| ", ""));
            }

            var position = product.SelectNodes(".//td[2][contains(@class, 'TableBase-bodyTd')]");

                foreach (HtmlNode Pos in position)
                {


                row["Position"] = (Regex.Replace(Pos.InnerText, @"\r\n?|\n| ", ""));


                }

            var injury = product.SelectNodes(".//td[4][contains(@class, 'TableBase-bodyTd')]");

            foreach (HtmlNode inj in injury)
            {


                row["Injury"] = (Regex.Replace(inj.InnerText, @"\r\n?|\n| ", ""));


            }


            var status = product.SelectNodes(".//td[5][contains(@class, 'TableBase-bodyTd')]");

            foreach (HtmlNode stat in status)

            {


                row["Status"] = (Regex.Replace(stat.InnerText, @"\r\n?|\n| ", ""));



            }


        }



        dataGridView3.DataSource = dt5;


        }