单击选项时使用selenium刮取HTML

时间:2017-07-30 20:42:56

标签: excel-vba selenium html-table selenium-chromedriver vba

我有一个脚本,用于从网站上删除数据。

    Sub Body_Building()
    Dim driver As New WebDriver, post As Object

    With driver
        .Start "chrome", "http://www.bodybuildingwarehouse.co.uk"
        .Get "/optimum-nutrition?limit=all"
    End With

    On Error Resume Next
    For Each post In driver.FindElementsByClass("grid-info")
        i = i + 1: Cells(i, 1) = post.FindElementByClass("product-name").Text
        Cells(i, 2) = post.FindElementByXPath(".//span[@class='regular-price']//span[@class='price']|.//p[@class='special-price']//span[@class='price']").Text
    Next post
End Sub

是否可以使用相同或类似的技术从this website中删除数据,以便结果如下图所示?

enter image description here

请参阅VBA的工作情况,以符合预期的结果。谢谢 SMth80

Sub optigura_scraper_v2()
    Dim driver As New ChromeDriver
    Dim elems As Object, post As Object

    driver.Get "https://www.optigura.com/uk/product/gold-standard-100-whey/"
    [A1:D1].Value = [{"Name","Flavor","Size","Price"}]

    Set elems = driver.FindElementsByXPath("//span[@class='img']/img")
    i = 2

    For n = 1 To elems.Count
        driver.FindElementsByXPath("//span[@class='img']/img")(n).Click
        driver.Wait 1000
        For Each post In driver.FindElementsByXPath("//div[@class='colright']//ul[@class='opt2']//label")
            Cells(i, 1) = driver.FindElementByXPath("//h1[@itemprop='name']").Text
            Cells(i, 2) = post.Text
            Cells(i, 3) = Split(driver.FindElementByXPath("//li[@class='active']//span[@class='img']/img").Attribute("alt"), "-")(1)
            Cells(i, 4) = driver.FindElementByXPath("//span[@class='price']").Text
            i = i + 1
        Next post
    Next n
End Sub

1 个答案:

答案 0 :(得分:2)

检查出来。这肯定不是最好的技术。但是,它将满足您的目的。顺便说一句,刮刀将精确解析数据在该页面中的显示方式。

Sub optigura_scraper()
    Dim driver As New ChromeDriver
    Dim elems As Object, post As Object

    driver.Get "https://www.optigura.com/uk/product/gold-standard-100-whey/"
    [A1:D1].Value = [{"Name","Price","Size","Flavor"}]

    Set elems = driver.FindElementsByXPath("//span[@class='img']/img")
    i = 2

    For N = 1 To elems.Count
        driver.FindElementsByXPath("//span[@class='img']/img")(N).Click
        driver.Wait 1000
        Cells(i, 1) = driver.FindElementByXPath("//h1[@itemprop='name']").Text
        Cells(i, 2) = driver.FindElementByXPath("//span[@class='price']").Text
        Cells(i, 3) = Split(driver.FindElementByXPath("//li[@class='active']//span[@class='img']/img").Attribute("alt"), "-")(1)
        For Each post In driver.FindElementsByXPath("//div[@class='colright']//ul[@class='opt2']//label")
            Cells(i, 4) = post.Text
            i = i + 1
        Next post
    Next N
End Sub