按ClassName的数据收集元素

时间:2019-02-22 08:05:47

标签: html excel vba web-scraping

我正在尝试从网站上提取数据,我想复制第1行中的'10'x 5'unit(类名是“ unit_size medium”),但我可以为其成功复制数据,但是我还想要促销(类名是“ promo_offers”)“第一个月免费!”在第2行中,问题在于此促销仅针对特定单元格。因此,数据令人误解,我在第4个单元格中得到促销,然后出现错误。但是,我只想复制提供了促销信息的那些单位的促销,否则单元格应该为空白或需要设置任何其他值。下面是代码...

请建议如何构建代码。

Sub GetClassNames()

    Dim html As HTMLDocument

    Dim objIE As Object
    Dim element As IHTMLElement
    Dim ie As InternetExplorer
    Dim elements As IHTMLElementCollection
    Dim result As String 'string variable that will hold our result link

    Dim count As Long
    Dim erow As Long

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
    count = 0

    Set html = objIE.document
    Set elements = html.getElementsByClassName("unit_size medium")

    For Each element In elements
        If element.className = "unit_size medium" Then
            erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
            Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText

            Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
            count = count + 1      
        End If
    Next element
End Sub

1 个答案:

答案 0 :(得分:2)

在尝试访问该元素时,我只会将其包装在On Error Resume Next中。在输出数组中已经为它保留了一个位置,因此,如果不存在该位置,则保持空白。

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
        headers = Array("size", "features", "promo", "in store", "web")
        Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate

        ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
        For Each listing In listings
            r = r + 1
            On Error Resume Next
            results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
            results(r, 2) = listing.getElementsByClassName("features")(0).innerText
            results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
            results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
            results(r, 5) = listing.getElementsByClassName("price")(0).innerText
            On Error GoTo 0
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub

所有方框:

Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
    Dim ie As New InternetExplorer, ws As Worksheet
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    With ie
        .Visible = True
        .Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results()
        Dim r As Long, list As Object, item As Object
        headers = Array("size", "features", "promo", "in store", "web")
        Set list = .document.getElementsByClassName("main_unit")
        '.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
        Dim rowCount As Long
        rowCount = .document.querySelectorAll(".main_unit li").Length
        ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
        For Each listing In list
            For Each item In listing.getElementsByTagName("li")
                r = r + 1
                On Error Resume Next
                results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
                results(r, 2) = item.getElementsByClassName("features")(0).innerText
                results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
                results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
                results(r, 5) = item.getElementsByClassName("price")(0).innerText
                On Error GoTo 0
            Next
        Next
        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
        .Quit
    End With
End Sub