我正在研究网站数据提取器。我有两个工作表,一个用于输入,另一个用于输出,看起来像这样。
在第一页中,单元格包含提取数据所需的URL。我正在尝试此URL
我已经写了这个宏。
Sub extractTablesData()
Dim IE As Object, obj As Object
Dim str, e As String
Dim pgf, pgt, pg As Integer
Dim ele, Results As Object
Dim add, size, cno, price, inurl, sp, sp1 As String
Dim isheet, rts As Worksheet
Dim LastRow As Long
Dim pgno As Variant
Set IE = CreateObject("InternetExplorer.Application")
Set isheet = Worksheets("InputSheet")
Set rts = Worksheets("Results")
URL = isheet.Cells(3, 2)
RowCount = 1
rts.Range("A" & RowCount) = "Address"
rts.Range("B" & RowCount) = "Size"
rts.Range("C" & RowCount) = "Contact Number"
rts.Range("D" & RowCount) = "Price"
rts.Range("E" & RowCount) = "Url"
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
'RowCount = LastRow
With IE
.Visible = True
.navigate (URL)
DoEvents
Do While IE.Busy Or IE.readyState <> 4
Loop
'Application.Wait (Now + #12:00:05 AM#)
For Each Results In .document.all
Select Case Results.className
Case "title search-title"
str = Results.innerText
str1 = Split(str, " ")
str = CInt(str1(0))
End Select
If Results.className = "btn btn-main-inverted dropdown-toggle" And InStr(1, Results.Title, " page") > 2 Then
str2 = Results.Title
str1 = Split(str2, " ")
str2 = CInt(str1(0))
End If
Next
If str2 = 0 Then
pgno = CVErr(xlErrDiv0)
Else
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End If
End With
IE.Quit
Set IE = Nothing
UrlS = Split(URL, "?")
Url1 = UrlS(0)
Url2 = "?" & UrlS(1)
For i = 1 To pgno
Set IE = CreateObject("InternetExplorer.Application")
URL = Url1 & "/" & i & Url2
With IE
.Visible = True
.navigate (URL)
DoEvents
Do While IE.Busy Or IE.readyState <> 4
Loop
'Application.Wait (Now + #12:00:08 AM#)
For Each ele In .document.all
Select Case ele.className
Case "listing-img-a"
inurl = ele.href
rts.Cells(LastRow + 1, 5) = inurl
Case "listing-location"
LastRow = LastRow + 1
add = ele.innerText
rts.Cells(LastRow, 1) = add
Case "lst-sizes"
sp = Split(ele.innerText, " ·")
size = sp(0)
rts.Cells(LastRow, 2) = size
Case "pgicon pgicon-phone js-agent-phone-number" ' btn-tools" 'pgicon pgicon-phone js-agent-phone-number" 'agent-phone-number"
rts.Cells(LastRow, 3) = ele.innerText
Case "listing-price"
price = ele.innerText
rts.Cells(LastRow, 4) = price
End Select
Next
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
rts.Activate
rts.Range("A" & LastRow).Select
End With
IE.Quit
Set IE = Nothing
Application.Wait (Now + #12:00:04 AM#)
Next i
MsgBox "Success"
End Sub
运行此宏时出现错误
输入未匹配项
我调试时突出显示代码
For i = 1 To pgno
Set IE = CreateObject("InternetExplorer.Application") URL = Url1 & "/" & i & Url2
With IE .Visible = True .navigate (URL)
我已经尽力弄清楚了,但是不明白问题出在哪里。请帮助我进行更正。.
它也没有在链接上获取全部记录。该链接包含200条记录,每页30条记录。
答案 0 :(得分:0)
您可以依靠隐式转换并使用以下内容。假设所有页面都有编号。您可能需要改善错误处理。如果倒数第二个li CSS选择器失败,则默认为页码= 1,否则它将尝试获取">"
请参考我的prior answer与您相关的问题,该问题向您展示了如何更有效地从页面上抓取信息。
显示正在使用的功能的示例代码:
Option Explicit
Public Sub GetListings()
Dim IE As New InternetExplorer, pgno As Long
With IE
.Visible = True
.navigate "https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun", False
While .Busy Or .readyState < 4: DoEvents: Wend
pgno = GetNumberOfPages(.document)
End With
End Sub
Public Function GetNumberOfPages(ByVal doc As HTMLDocument) As Long
On Error GoTo errhand:
GetNumberOfPages = doc.querySelector(".listing-pagination li:nth-last-child(2)").innerText
Exit Function
errhand:
If Err.Number <> 0 Then GetNumberOfPages = 1
End Function