通过ClassName进行网页爬取

时间:2019-04-09 12:12:35

标签: html excel vba web-scraping

我不是开发人员,而是从事一个项目,其中多个网站涉及不同的格式。我正在尝试仅按ClassName复制数据,因为某些网站不存在TagName。

我尝试了下面的代码,但是没有用。请有人帮助我编写代码。

Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement

Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link

Dim count As Long
Dim erow As Long

Dim ie As New InternetExplorer, ws As Worksheet



Set ws = ThisWorkbook.Worksheets("Unit Data")

Set objIE = New InternetExplorer

'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True

'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://libertystorage.com/locations/1"

'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0

Set html = objIE.document
Set elements = html.getElementsByClassName("title")

For Each element In elements
    If element.className = "title" Then
        erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
        Cells(erow, 1) = html.getElementsByClassName("title")(count).innerText 'Size
        Cells(erow, 1) = html.getElementsByClassName("price")(count).innerText 'price1
        Cells(erow, 1) = html.getElementsByClassName("sale -Price")(count).innerText 'price2
        Cells(erow, 1) = html.getElementsByClassName("Text -cell")(count).innerText 'price2

        count = count + 1
    End If
Next element
End Sub

1 个答案:

答案 0 :(得分:0)

执行以下脚本以获取所需的输出。如果您要添加任何内容,可以按照我在此处应用的逻辑自行完成。

Sub FetchData()
    Const URL = "https://www.storagefront.com/storage-units/california/corona/storage-direct-corona-80807/"
    Dim s$, r&, elem As Object, post As HTMLDivElement

    With New XMLHTTP
        .Open "GET", URL, False
        .send
        s = .responseText
    End With


    With New HTMLDocument
        .body.innerHTML = s
        Set elem = .getElementsByClassName("unit_detail")
        If elem.Length > 1 Then
            [A1:E1] = [{"size","feature","offer","rate","price"}]

            For Each post In elem
                With post.querySelectorAll(".unit_size")
                    If .Length Then r = r + 1: Cells(r + 1, 1) = .item(0).innerText
                End With

                With post.querySelectorAll(".features")
                    If .Length Then Cells(r + 1, 2) = .item(0).innerText
                End With

                With post.querySelectorAll(".promo_offers > span")
                    If .Length Then Cells(r + 1, 3) = .item(0).innerText
                End With

                With post.querySelectorAll(".board_rate")
                    If .Length Then Cells(r + 1, 4) = .item(0).innerText
                End With

                With post.querySelectorAll(".price")
                    If .Length Then Cells(r + 1, 5) = .item(0).innerText
                End With
            Next post
        End If
    End With
End Sub

确保在执行之前添加引用:

1. Microsoft HTML Object Library
2. Microsoft XML, v6.0