在C#中创建一个字典或列表(包括HTML标签)

时间:2010-03-07 19:45:52

标签: c# html-parsing html-table

A有这样的字符串:

string s = @"
    <tr>
    <td>11</td><td>12</td>
    </tr>
    <tr>
    <td>21</td><td>22</td>
    </tr>
    <tr>
    <td>31</td><td>32</td>
    </tr>";

如何从字符串 s 创建Dictionary<int, int> d = new Dictionary<int, int>(); 得到相同的结果:

d.Add(11, 12);
d.Add(21, 22);
d.Add(31, 32);

6 个答案:

答案 0 :(得分:11)

您应该使用HTML Agility Pack

例如:(已测试)

var doc = new HtmlDocument();
doc.LoadHtml(s);
var dict = doc.DocumentNode.Descendants("tr")
              .ToDictionary(
                  tr => int.Parse(tr.Descendants("td").First().InnerText),
                  tr => int.Parse(tr.Descendants("td").Last().InnerText)
              );

如果HTML总是格式正确,您可以使用LINQ-to-XML;代码几乎完全相同。

答案 1 :(得分:3)

代码

using RE=System.Text.RegularExpressions;

....

public void Run()
{
    string s=@"
<tr>
<td>11</td><td>12</td>
</tr>
<tr>
<td>21</td><td>22</td>
</tr>
<tr>
<td>31</td><td>32</td>
</tr>";

    var mcol= RE.Regex.Matches(s,"<td>(\\d+)</td><td>(\\d+)</td>");
    var d = new Dictionary<int, int>();

    foreach(RE.Match match in mcol)
        d.Add(Int32.Parse(match.Groups[1].Value),
              Int32.Parse(match.Groups[2].Value));

    foreach (var key in d.Keys)
        System.Console.WriteLine("  {0}={1}", key, d[key]);
}

答案 2 :(得分:1)

string s =
@"<tr> 
<td>11</td><td>12</td> 
</tr> 
<tr> 
<td>21</td><td>22</td> 
</tr> 
<tr> 
<td>31</td><td>32</td> 
</tr>";

XPathDocument doc = new XPathDocument(XmlReader.Create(new StringReader(s), new XmlReaderSettings { ConformanceLevel = ConformanceLevel.Fragment, IgnoreWhitespace = true }));

Dictionary<int, int> dict = doc.CreateNavigator()
   .Select("tr")
   .Cast<XPathNavigator>()
   .ToDictionary(
      r => r.SelectSingleNode("td[1]").ValueAsInt,
      r => r.SelectSingleNode("td[2]").ValueAsInt
   );

答案 3 :(得分:0)

如果您不想使用HTML敏捷包,可以尝试类似的内容:

var arr = s.Replace("<tr>", "").Split("</tr", StringSplitOptions.RemoveEmptyEntries);

var d = new Dictionary<int, int>();
foreach (var row in arr) {
  var itm = row.Replace("<td>", "").Split("</td>", StringSplitOptions.RemoveEmptyEntries);
  d.Add(int.Parse(itm[0]), int.Parse(itm[1]);
}

(未测试的)

答案 4 :(得分:0)

var s = "<tr><td>11</td><td>12</td></tr><tr><td>21</td><td>22</td></tr><tr><td>31</td><td>32</td></tr>";

var rows = s.Split( new[] { "</tr>" }, StringSplitOptions.None );

var results = new Dictionary<int, int>();
foreach ( var row in rows )
{
    var cols = row.Split( new[] { "</td>" }, StringSplitOptions.None );
    var vals = new List<int>();

    foreach ( var col in cols )
    {
        var val = col.Replace( "<td>", string.Empty ).Replace( "<tr>", string.Empty );

        int intVal;
        if ( int.TryParse( val, out intVal ) )
            vals.Add( intVal );
    }

    if ( vals.Count == 2 )
        results.Add( vals[0], vals[1] );
}

答案 5 :(得分:0)

使用RE = System.Text.RegularExpressions;

....

public void Run() {     string s = @“   1112     2122     3132 “;

var mcol= RE.Regex.Matches(s,"<td>(\\d+)</td><td>(\\d+)</td>"); 
var d = new Dictionary<int, int>(); 

foreach(RE.Match match in mcol) 
    d.Add(Int32.Parse(match.Groups[1].Value), 
          Int32.Parse(match.Groups[2].Value)); 

foreach (var key in d.Keys) 
    System.Console.WriteLine("  {0}={1}", key, d[key]); 

}