网络抓取的基础

时间:2019-05-03 11:45:32

标签: html excel vba web-scraping

我想通过网页抓取来获取Maruti Alto所有型号的价格。我正在尝试通过代码获取数据,但无法获取数据。

Sub Basics_Of_Web_Macro()
    Dim myIE As Object
    Dim myIEDoc As Object

    'Start Internet Explorer
    Set myIE = CreateObject("InternetExplorer.Application")

    'if you want to see the window set this to True
    myIE.Visible = False

    'Now we open the page we'd like to use as a source for information
    myIE.navigate "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"

    'We wait for the Explorer to actually open the page and finish loading
    While myIE.Busy
        DoEvents
    Wend

    'Now lets read the HTML content of the page
    Set myIEDoc = myIE.document

    'Time to grab the information we want

    Range("A1") = myIEDoc.Title
    'Then we'll get something from teh inner page content by using the ID

    Range("B1") = myIEDoc.Class("priceInfo clearfix")    
End Sub

1 个答案:

答案 0 :(得分:0)

XHR:

您可以使用xmlhttp请求并避免使用浏览器。循环通过收集类名cols返回的nodeList。每5个元素开始新行,并将列重置为1以输出。因此创建表格格式以从nodeList /

的列表格式输出

VBE>工具>参考> Microsoft HTML对象库

Option Explicit
Public Sub GetPrices()
    Dim html As HTMLDocument
    Set html = New HTMLDocument
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        html.body.innerHTML = .responseText
    End With

    Dim listings As Object, i As Long, r As Long, c As Long, results()
    Set listings = html.querySelectorAll(".cols")
    ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
    r = 1: c = 1
    For i = 0 To listings.Length - 2
        If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
        results(r, c) = listings.item(i).innerText
        c = c + 1
    Next
    ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Internet Explorer:

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetPrices()
    Dim html As HTMLDocument
    Set html = New HTMLDocument
    Dim ie As New InternetExplorer
    With ie
        .Visible = True
        .Navigate2 "https://www.marutisuzuki.com/channels/arena/price-list/alto-price-in-mumbai-in-maharashtra"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim listings As Object, i As Long, r As Long, c As Long, results()
        Set listings = .document.querySelectorAll(".cols")
        ReDim results(1 To (listings.Length - 2) / 4, 1 To 4)
        r = 1: c = 1
        For i = 0 To listings.Length - 2
            If i Mod 4 = 0 And i > 0 Then r = r + 1: c = 1
            results(r, c) = listings.item(i).innerText
            c = c + 1
        Next
        .Quit
    End With
    ActiveSheet.Cells(1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub