如何提取元素名称数据刮取

时间:2019-11-25 10:07:05

标签: html excel vba web-scraping

我试图刮除“ Element Name”(该元素的内容不满意)。尝试将其拉为“数据促销名称”,结果应为以下代码中的“ $ 1首月租金”。

class="ps-properties-property__units__prices col-4 col-md-3" data-promo-id="132" data-promo-name="$1 first month rent">

网站:https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1

PFB代码:

Sub Element_Name()

Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement

Set ws = ThisWorkbook.Worksheets("Unit Data")

With ie

    .Visible = True

    .Navigate2 "https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1"

    While .Busy Or .readyState < 4: DoEvents: Wend

    Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object

    headers = Array("Width", "Length", "Hight/Space Type", "promo", "Reguler Price", "Online Price", "Listing Active", "features", "features1", "features2", "features3", "features4", "features5", "features6")

    Set listings = .document.getElementsByClassName("row ps-properties-property__units__row ps-properties-property__units__row__desktop")

    ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)

    For Each listing In listings

        r = r + 1

        On Error Resume Next

        results(r, 1) = listing.getElementsByClassName("ps-properties-property__units__header")(0).innerText 'Size

        'results(r, 4) = listing.getElementsByClassName(Need a code here) 'Promo

        results(r, 5) = listing.getElementsByClassName("ps-properties-property__units__prices__old-price")(0).innerText 'Sizet 'Reguler Price

        results(r, 6) = listing.getElementsByClassName("ps-properties-property__units__prices__price")(0).innerText 'Online Price

        results(r, 7) = listing.getElementsByTagName("ps-properties-property__units__prices col-1 col-md-3")(0).innerText 'Listing Active

        results(r, 8) = listing.getElementsByClassName("ps-properties-property__units__feature")(0).innerText 'Features

        On Error GoTo 0

    Next

    ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results

    .Quit

End With

2 个答案:

答案 0 :(得分:2)

您需要隔离节点,然后使用getAttribute函数

node.getAttribute("data-promo-name")

html对我来说似乎有所不同。

使用单行的示例是

ie.document.querySelector(".ps-properties-property__units__prices.col-4.col-md-3").getAttribute("data-promo-name")

完整代码:

Option Explicit

Public Sub ElementName()

    Dim ie As New InternetExplorer, ws As Worksheet

    Dim element As IHTMLElement

    Set ws = ThisWorkbook.Worksheets("Unit Data")

    With ie

        .Visible = True

        .Navigate2 "https://www.publicstorage.com/self-storage-mi-ann-arbor/1760?sp=1760|1|Ann%20Arbor|42.28083|-83.74303|0|1|1"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object

        headers = Array("Width", "Length", "Hight/Space Type", "promo", "Reguler Price", "Online Price", "Listing Active", "features", "features1", "features2", "features3", "features4", "features5", "features6")

        Set listings = .document.getElementsByClassName("row ps-properties-property__units__row ")

        Dim html2 As HTMLDocument

        Set html2 = New HTMLDocument

        ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)

        For Each listing In listings

            r = r + 1

            On Error Resume Next

            results(r, 1) = listing.getElementsByClassName("ps-properties-property__units__header")(0).innerText 'Size

            html2.body.innerHTML = listing.outerHTML

            results(r, 4) = html2.querySelector(".ps-properties-property__units__prices").getAttribute("data-promo-name")
            'results(r, 4) = listing.getElementsByClassName(Need a code here) 'Promo

            results(r, 5) = listing.getElementsByClassName("ps-properties-property__units__prices__old-price")(0).innerText 'Sizet 'Reguler Price

            results(r, 6) = listing.getElementsByClassName("ps-properties-property__units__prices__price")(0).innerText 'Online Price

            results(r, 7) = listing.getElementsByTagName("ps-properties-property__units__prices col-1 col-md-3")(0).innerText 'Listing Active

            results(r, 8) = listing.getElementsByClassName("ps-properties-property__units__feature")(0).innerText 'Features

            On Error GoTo 0
            html2.body.innerHTML = vbNullString
        Next

        ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

        ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results

        .Quit

    End With
End Sub

答案 1 :(得分:1)

findElement(By.xpath("//div[@class='ps-properties-property__units__prices col-4 col-md-3']")).getAttribute("data-promo-name");