使用C# - HtmlAgilityPack从HTML表中提取特定内容

时间:2015-12-26 15:05:41

标签: c#


C# - HtmlAgilityPack
  - 我想从HTML表中提取特定内容不提取所有内容表)使用HtmlAgilityPack(显示图片)和插入在dataGridView1 (显示代码c#)  
+
  - 切换column3中column2的顺序(显示图片)

= + = + = + = +
显示图片 这就是我想要的东西 => Click show pic

= + = + = + = + = + = + = + = +
我的测试代码(不好代码)此代码提取所有内容
= + = + = + = +

// Clear Datagridview
dataGridView1.DataSource = null;
HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
// Load a file
html.Load(@test.html); 
var headers = html.DocumentNode.SelectNodes("//p[4]/table[1]/tr[1]/th");
DataTable table = new DataTable();
// Create columns from th
foreach (HtmlNode header in headers)
{
    table.Columns.Add(header.InnerText); 
}
// Select rows with td elements 
foreach (var row in html.DocumentNode.SelectNodes("//p[4]/table[1]/tr[td]"))
{
    table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
}
// Show Result
dataGridView1.DataSource = table;


= + = + = + = + = + = + = + = +
代码页Html
= + = + = + = +



<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">

<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />

</head>

<body>

<center><font size="+2">Title_Test Title_Test</font></center>
<p><font size="+1"><b><span class="yyyyyy">2_Title_test&nbsp;</span><font color="#000000">NAME 
PROJET</font></b></font></p>
<p>
<table>
	<tr>
		<td><b>Testtable</b></td>
		<td>:</td>
		<td>oo</td>
	</tr>
	<tr>
		<td><b>Testtable2TesttableTesttable</b></td>
		<td>:</td>
		<td>uu</td>
	</tr>
	<tr>
		<td><b>Testtable3</b></td>
		<td>:</td>
		<td>iii</td>
	</tr>
</table>
</p>
<p><font size="+1"><b><a name="GGGGGGGGG"></a>InfoTest_InfoTest_InfoTest</b></font></p>
<p><b>testtesttesttesttesttesttesttesttesttest </b>.<br />
<table border="1" bordercolor="#808080" cellpadding="2">
	<tr valign="center">
		<th align="middle">Column0</th>
		<th align="middle">Column1</th>
		<th align="middle">Column2</th>
		<th align="middle">Column3</th>
		<th align="middle">Column4</th>
		<th align="middle">Column5</th>
		<th align="middle">Column6</th>
		<th align="middle">Column7</th>
	</tr>
	<tr valign="center">
		<td align="left">pola</td>
		<td align="right">111</td>
		<td align="right">po111</td>
		<td align="right">1111</td>
		<td align="right">po1111</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right">NV</td>
	</tr>
	<tr valign="center">
		<td align="left">yato</td>
		<td align="right">222</td>
		<td align="right">ya222</td>
		<td align="right">2222</td>
		<td align="right">ya2222</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right">NV</td>
	</tr>
	<tr valign="center">
		<td align="left">romaz</td>
		<td align="right">333</td>
		<td align="right">ro333</td>
		<td align="right">3333</td>
		<td align="right">ro3333</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right"></td>
	</tr>
	<tr valign="center">
		<td align="left">anik</td>
		<td align="right">444</td>
		<td align="right">an444</td>
		<td align="right">4444</td>
		<td align="right">an4444</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right"></td>
	</tr>
	<tr valign="center">
		<td align="left">kilwa</td>
		<td align="right">555</td>
		<td align="right">ki555</td>
		<td align="right">5555</td>
		<td align="right">ki5555</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right"></td>
	</tr>
	<tr valign="center">
		<td align="left">sekil</td>
		<td align="right">666</td>
		<td align="right">se666</td>
		<td align="right">5555</td>
		<td align="right">se6666</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right"></td>
	</tr>
	<tr valign="center">
		<td align="left">janit</td>
		<td align="right">777</td>
		<td align="right">ja777</td>
		<td align="right">7777</td>
		<td align="right">ja7777</td>
		<td align="right">NN</td>
		<td align="right">VV</td>
		<td align="right"></td>
	</tr>
	
</table>
</p>

</body>

</html>
&#13;
&#13;
&#13;

= + = + = + = + = + = + = + = +
谢谢,,,我在等待

1 个答案:

答案 0 :(得分:0)

也许先尝试提取表格?

        HtmlAgilityPack.HtmlDocument html = new HtmlAgilityPack.HtmlDocument();
        // Load a file
        html.Load(@"c:\OneDrive\Work\MS Projects\text.html");       
        HtmlNode table = html.DocumentNode.SelectSingleNode("//table[@border='1']");
        DataTable dt = new DataTable();
        var rows = table.SelectNodes("tr");
        for (int i = 0; i < rows.Count; ++i)
        {
            //if row = then these are headers
            if (i == 0)
            {
                var cols = rows[i].SelectNodes("th");                    
                dt.Columns.Add(new DataColumn(cols[0].InnerText.ToString()));
                dt.Columns.Add(new DataColumn(cols[1].InnerText.ToString()));                    
                dt.Columns.Add(new DataColumn(cols[3].InnerText.ToString()));
                dt.Columns.Add(new DataColumn(cols[2].InnerText.ToString()));
                dt.Columns.Add(new DataColumn(cols[4].InnerText.ToString()));
            }

            //row>0 then data
            else
            {
                var cols = rows[i].SelectNodes("td");

                DataRow dr = dt.NewRow();
                dr[0] = cols[0].InnerText.ToString();
                dr[1] = cols[1].InnerText.ToString();
                dr[2] = cols[3].InnerText.ToString();
                dr[3] = cols[2].InnerText.ToString();
                dr[4] = cols[4].InnerText.ToString();
                dt.Rows.Add(dr);
            }
        }