
时间:2015-01-14 06:32:33

标签: c# parsing html-agility-pack


<div class="srg">
<li class="g">...</li>
<li class="g">...</li>
<li class="g">...</li>
<li class="g">...</li>
<li class="g">...</li>
<li class="g">...</li>


HtmlAgilityPack.HtmlDocument doc1 = new HtmlAgilityPack.HtmlDocument();
StreamReader reader = new StreamReader(WebRequest.Create("http://www.google.com/?gws_rd=ssl#q=(404)8271500").GetResponse().GetResponseStream(), Encoding.Default); //put your encoding            

var selectNodes = doc1.DocumentNode.SelectNodes("//li[@class='g']");
foreach (var node in selectNodes)  
    //node.InnerText will give you the text content of the li tags ...

2 个答案:

答案 0 :(得分:1)


        string result = @"<div class=""srg"">
                        <li class=""g"">...</li>
                        <li class=""g"">...</li>
                        <li class=""g"">...</li>
                        <li class=""g"">...</li>
                        <li class=""g"">...</li>
                        <li class=""g"">...</li>

        HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
        var selectNodes = doc.DocumentNode.SelectNodes("//li[@class='g']");
        foreach (var node in selectNodes)
           //node.InnerText will give you the text content of the li tags ...

答案 1 :(得分:0)


string query = "(404)8271500";
string json = "";

// Get the Json from the API. Dont forget to put your function in async.
// You need HttpClient https://www.nuget.org/packages/Microsoft.Net.Http
using (var client = new HttpClient()) 
    json = await client.GetStringAsync("http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=large&start=0&q=" + query);

// Parse the Json string to your object.
// You need Json.NET https://www.nuget.org/packages/Newtonsoft.Json/
GoogleObject googleObject = JsonConvert.DeserializeObject<GoogleObject>(json);
foreach (var item in googleObject.responseData.results)
    Console.WriteLine(item.title); // title
    Console.WriteLine(item.content); // description


public class GoogleObject
    public Responsedata responseData { get; set; }
    public object responseDetails { get; set; }
    public int responseStatus { get; set; }

public class Responsedata
    public Result[] results { get; set; }
    public Cursor cursor { get; set; }

public class Cursor
    public string resultCount { get; set; }
    public Page[] pages { get; set; }
    public string estimatedResultCount { get; set; }
    public int currentPageIndex { get; set; }
    public string moreResultsUrl { get; set; }
    public string searchResultTime { get; set; }

public class Page
    public string start { get; set; }
    public int label { get; set; }

public class Result
    public string GsearchResultClass { get; set; }
    public string unescapedUrl { get; set; }
    public string url { get; set; }
    public string visibleUrl { get; set; }
    public string cacheUrl { get; set; }
    public string title { get; set; }
    public string titleNoFormatting { get; set; }
    public string content { get; set; }

