在Excel VBA代码中

时间:2018-07-14 14:12:36

标签: excel vba excel-vba web-scraping

我正在研究网站数据提取器。我有两个工作表,一个用于输入,另一个用于输出,看起来像这样。

enter image description here

在第一页中,单元格包含提取数据所需的URL。我正在尝试此URL

https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun

我已经写了这个宏。

Sub extractTablesData()
    Dim IE As Object, obj As Object
    Dim str, e As String
    Dim pgf, pgt, pg As Integer
    Dim ele, Results As Object
    Dim add, size, cno, price, inurl, sp, sp1 As String
    Dim isheet, rts As Worksheet
    Dim LastRow As Long
    Dim pgno As Variant

    Set IE = CreateObject("InternetExplorer.Application")
    Set isheet = Worksheets("InputSheet")
    Set rts = Worksheets("Results")

    URL = isheet.Cells(3, 2)

    RowCount = 1
        rts.Range("A" & RowCount) = "Address"
        rts.Range("B" & RowCount) = "Size"
        rts.Range("C" & RowCount) = "Contact Number"
        rts.Range("D" & RowCount) = "Price"
        rts.Range("E" & RowCount) = "Url"
    LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
      'RowCount = LastRow

     With IE
        .Visible = True
        .navigate (URL)

        DoEvents
          Do While IE.Busy Or IE.readyState <> 4
        Loop

    'Application.Wait (Now + #12:00:05 AM#)

    For Each Results In .document.all
        Select Case Results.className
            Case "title search-title"
                str = Results.innerText
                str1 = Split(str, " ")
                str = CInt(str1(0))
        End Select
        If Results.className = "btn btn-main-inverted dropdown-toggle" And InStr(1, Results.Title, " page") > 2 Then
            str2 = Results.Title
            str1 = Split(str2, " ")
            str2 = CInt(str1(0))
        End If
    Next

    If str2 = 0 Then
        pgno = CVErr(xlErrDiv0)
    Else
        pgno = WorksheetFunction.RoundUp(str / str2, 0)
    End If
    End With
    IE.Quit

    Set IE = Nothing
    UrlS = Split(URL, "?")
    Url1 = UrlS(0)
    Url2 = "?" & UrlS(1)

    For i = 1 To pgno
      Set IE = CreateObject("InternetExplorer.Application")
      URL = Url1 & "/" & i & Url2
      With IE
        .Visible = True
        .navigate (URL)

        DoEvents
          Do While IE.Busy Or IE.readyState <> 4
        Loop

        'Application.Wait (Now + #12:00:08 AM#)
        For Each ele In .document.all        
          Select Case ele.className
            Case "listing-img-a"
                inurl = ele.href
                rts.Cells(LastRow + 1, 5) = inurl

            Case "listing-location"
                LastRow = LastRow + 1
                add = ele.innerText
                rts.Cells(LastRow, 1) = add

            Case "lst-sizes"
                sp = Split(ele.innerText, " ·")

                size = sp(0)
                rts.Cells(LastRow, 2) = size

            Case "pgicon pgicon-phone js-agent-phone-number"      ' btn-tools" 'pgicon pgicon-phone js-agent-phone-number" 'agent-phone-number"
                rts.Cells(LastRow, 3) = ele.innerText

            Case "listing-price"
                price = ele.innerText
                rts.Cells(LastRow, 4) = price
         End Select        
        Next
    LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
    rts.Activate
    rts.Range("A" & LastRow).Select

    End With
    IE.Quit
    Set IE = Nothing
    Application.Wait (Now + #12:00:04 AM#)
    Next i

    MsgBox "Success"
End Sub

运行此宏时出现错误

  

输入未匹配项

我调试时突出显示代码

For i = 1 To pgno 
  Set IE = CreateObject("InternetExplorer.Application") URL = Url1 & "/" & i & Url2 
  With IE .Visible = True .navigate (URL) 

我已经尽力弄清楚了,但是不明白问题出在哪里。请帮助我进行更正。.

它也没有在链接上获取全部记录。该链接包含200条记录,每页30条记录。

1 个答案:

答案 0 :(得分:0)

您可以依靠隐式转换并使用以下内容。假设所有页面都有编号。您可能需要改善错误处理。如果倒数第二个li CSS选择器失败,则默认为页码= 1,否则它将尝试获取">"

之前的最后一个页码。

请参考我的prior answer与您相关的问题,该问题向您展示了如何更有效地从页面上抓取信息。


显示正在使用的功能的示例代码:

Option Explicit
Public Sub GetListings()
    Dim IE As New InternetExplorer, pgno As Long
    With IE
        .Visible = True
        .navigate "https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun", False
        While .Busy Or .readyState < 4: DoEvents: Wend
           pgno  = GetNumberOfPages(.document)
    End With
End Sub

Public Function GetNumberOfPages(ByVal doc As HTMLDocument) As Long
    On Error GoTo errhand:
    GetNumberOfPages = doc.querySelector(".listing-pagination li:nth-last-child(2)").innerText
    Exit Function
errhand:
   If Err.Number <> 0 Then GetNumberOfPages = 1
End Function