如何使用HTMLAgilityPack提取该字符串?

时间:2014-01-13 20:23:16

标签: c# html-agility-pack

我想从HTML源中提取content值:

<span itemprop="price" content="164,06"></span>

使用HTMLAgilityPack使用C#。我的来源总是回应“没有找到”。有什么想法吗?

private string getTextfrom()
    {

    var webGet = new HtmlWeb();
    var doc = webGet.Load("http://pastebin.com/raw.php?i=zE31NWtU");


    HtmlNode priceNode = doc.DocumentNode.SelectSingleNode("//span[@content]");

    if (priceNode != null) {
        return priceNode.InnerText;
    }

    else
        return "nothing found"; // And this

    }

谢谢!

1 个答案:

答案 0 :(得分:1)

你可以这样写:

private string getTextfrom()
{
    HtmlDocument doc = new HtmlDocument();
    //doc.LoadHtml("<span itemprop=\"price\" content=\"164,06\"></span>");
    string htmlContent = GetPageContent("http://pastebin.com/raw.php?i=zE31NWtU");
    doc.LoadHtml(htmlContent);

    HtmlNode priceNode = doc.DocumentNode.SelectNodes("//span")[0];
    HtmlAttribute valueAttribute = priceNode.Attributes["content"];
    return valueAttribute.Value;
}

public static String GetPageContent(string Url)
{

    HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(Url);
    myRequest.Method = "GET";
    WebResponse myResponse = myRequest.GetResponse();
    StreamReader sr = new StreamReader(myResponse.GetResponseStream(), System.Text.Encoding.UTF8);
    string result = sr.ReadToEnd();
    sr.Close();
    myResponse.Close();

    return result;
}