刮擦页面而不是下一页

时间:2015-02-26 10:59:51

标签: vba excel-vba web-scraping excel

我正在尝试废弃一个网站,但是当我点击第二页并尝试废弃相同的代码时显示错误。

  1. 报废第一页的数据。
  2. 点击2页链接。
  3. 显示以下错误。
  4.   

    对象变量或未设置块

    使用以下内容启动网站并在第一页中抓取数据。

    Dim Shell As Object
    Dim IE As Object
    Dim linkelement As Object
    Dim link As HTMLLinkElement
    
    Set Shell = CreateObject("Shell.Application")
    
    For Each IE In Shell.Windows
        If TypeName(IE.document) = "HTMLDocument" Then
            IE.Quit
        End If
    Next
    
    Const READYSTATE_COMPLETE As Integer = 4
    
        Dim IntExpl As Object
        Dim HTMLdoc  As HTMLDocument
    
    
        Set IntExpl = CreateObject("InternetExplorer.Application")
    
        With IntExpl
            .navigate "http://www.ikea.com/us/en/search/?query=chair"
            .Visible = True
    Q:
    
        Do Until IntExpl.ReadyState = 4
    
        Loop
    
    
    
        For i = 1 To 25
    
        Cells(i + 1, 1) = .document.getElementById("txtNameProduct" & i).innerText
        Cells(i + 1, 2) = .document.getElementById("txtPriceProduct" & i).innerText
        Cells(i + 1, 3) = .document.getElementById("txtDescrProduct" & i).innerText
    
        Next
    
        End With
    
        Set HTMLdoc = IntExpl.document
    
        While j < HTMLdoc.Links.Length And link Is Nothing
            If HTMLdoc.Links(j).innerText = "2" Then Set link = HTMLdoc.Links(j)
            j = j + 1
        Wend
    

    这将转到网站的下一页

        If Not link Is Nothing Then
            link.Focus
            link.Click
        End If
    
    GoTo Q
    

1 个答案:

答案 0 :(得分:0)

不要使用Html Link,而是模拟寻呼机地址并让IE导航到新的URL。这里的例子。在我的电脑上,宏需要大约一分钟来浏览所有38页并获取数据。

注意:检查页数(现在我看到了38页,但它可能随时间而变化)。 HTH。

Option Explicit

Sub GetIkeaChairs(): On Error GoTo ErrGetIkeaChairs
    Const READYSTATE_COMPLETE As Integer = 4
    Dim IE As Object
    Dim linkelement As Object
    Dim link As HTMLLinkElement
    Dim IntExpl As Object
    Dim HTMLdoc  As HTMLDocument
    Dim item, page, pageParameter, baseUrl

    baseUrl = "http://www.ikea.com/us/en/search/?query=chair"
    Set IntExpl = CreateObject("InternetExplorer.Application")

    Application.ScreenUpdating = False

    For page = 1 To 38 ' <<< Here change the number of pages
        If page > 1 Then _
            pageParameter = "&pageNumber=" & page

        IntExpl.navigate baseUrl & pageParameter
        Do Until IntExpl.ReadyState = 4: Loop

        Set HTMLdoc = IntExpl.document

        For item = 1 To 25 ' <<< Here change the number of item per page
           WriteToSheet HTMLdoc, page, item
        Next item
    Next page

    MsgBox "I love IKEA :-)", vbInformation

ErrGetIkeaChairs:
    Application.ScreenUpdating = True
    IntExpl.Visible = True

    If Err.Number <> 0 Then _
        MsgBox Err.Description
End Sub

Private Sub WriteToSheet(HTMLdoc, page, item)
    Dim row, nameProduct, priceProduct, descrProduct
    row = (page - 1) * 25 + item

    Set nameProduct = HTMLdoc.getElementById("txtNameProduct" & item)
    If Not nameProduct Is Nothing Then
        Cells(row, 1) = nameProduct.innerText
    End If

    Set priceProduct = HTMLdoc.getElementById("txtPriceProduct" & item)
    If Not priceProduct Is Nothing Then
        Cells(row, 2) = priceProduct.innerText
    End If

    Set descrProduct = HTMLdoc.getElementById("txtDescrProduct" & item)
    If Not descrProduct Is Nothing Then
        Cells(row, 3) = descrProduct.innerText
    End If
End Sub