专家,我是新来的人,我的Excel VBA代码遇到了问题,该代码用于提取网站上的数据。我有两个工作表,名称分别为“输入”和“输出”,看起来像这样。...
第一张纸将获得一个URL作为输入,然后运行下面编写的代码...
Sub extractTablesData()
Dim IE As Object, obj As Object
Dim str, e As String
Dim pgf, pgt, pg As Integer
Dim ele, Results As Object
Dim add, size, cno, price, inurl, sp, sp1 As String
Dim isheet, rts As Worksheet
Dim LastRow As Long
Set IE = CreateObject("InternetExplorer.Application")
Set isheet = Worksheets("InputSheet")
Set rts = Worksheets("Results")
url = isheet.Cells(3, 2)
RowCount = 1
rts.Range("A" & RowCount) = "Address"
rts.Range("B" & RowCount) = "Size"
rts.Range("C" & RowCount) = "Contact Number"
rts.Range("D" & RowCount) = "Price"
rts.Range("E" & RowCount) = "Url"
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
'RowCount = LastRow
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:05 AM#)
For Each Results In .Document.all
Select Case Results.className
Case "title search-title"
str = Results.innerText
str1 = Split(str, " ")
str = CInt(str1(0))
End Select
If Results.className = "btn btn-main-inverted dropdown-toggle" And InStr(1, Results.Title, " page") > 2 Then
str2 = Results.Title
str1 = Split(str2, " ")
str2 = CInt(str1(0))
End If
Next
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End With
IE.Quit
Set IE = Nothing
UrlS = Split(url, "?")
Url1 = UrlS(0)
Url2 = "?" & UrlS(1)
For i = 1 To pgno
Set IE = CreateObject("InternetExplorer.Application")
url = Url1 & "/" & i & Url2
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:08 AM#)
For Each ele In .Document.all
Select Case ele.className
Case "listing-img-a"
inurl = ele.href
rts.Cells(LastRow + 1, 5) = inurl
Case "listing-location"
LastRow = LastRow + 1
add = ele.innerText
rts.Cells(LastRow, 1) = add
Case "lst-sizes"
sp = Split(ele.innerText, " ·")
size = sp(0)
rts.Cells(LastRow, 2) = size
Case "pgicon pgicon-phone js-agent-phone-number" ' btn-tools" 'pgicon pgicon-phone js-agent-phone-number" 'agent-phone-number"
rts.Cells(LastRow, 3) = ele.innerText
Case "listing-price"
price = ele.innerText
rts.Cells(LastRow, 4) = price
End Select
Next
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
rts.Activate
rts.Range("A" & LastRow).Select
End With
IE.Quit
Set IE = Nothing
Application.Wait (Now + #12:00:04 AM#)
Next i
MsgBox "Success"
End Sub
执行此代码后,出现此错误。...
Error Message after code execution
在调试之后,我将该字段突出显示。 Debug Message
请检查并让我纠正出现错误的位置...此代码将在成功运行后提取数据,最后它将运行消息框,消息为“成功” ...
答案 0 :(得分:1)
您可以尝试以下使用CSS选择器的方法。
在父元素之前,"."
表示类," a"
表示a
标签。
示例:因此CSS模式.listing-info a
将是具有a
的父元素中的class = listing-info
标签。
querySelectorAll
将找到所有具有此CSS模式的匹配元素,并返回nodeList
。
Option Explicit
Public Sub GetListings()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun", False
While .Busy Or .readyState < 4: DoEvents: Wend
Dim addresses As Object, address As Object, sizes As Object, prices As Object, _
listingIds As Object, i As Long, urls As Object
With .document
Set addresses = .querySelectorAll(".listing-location")
Set listingIds = .querySelectorAll(".listing-item")
Set sizes = .querySelectorAll(".lst-sizes")
Set prices = .querySelectorAll(".price")
Set urls = .querySelectorAll(".listing-info a")
End With
Dim headers()
headers = Array("Address", "Size", "ListingId", "Price", "Url")
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For i = 0 To addresses.Length - 1
.Cells(i + 2, 1) = addresses.item(i).innerText
.Cells(i + 2, 2) = Split(sizes.item(i).innerText, "S$")(0)
.Cells(i + 2, 3) = Split(Split(listingIds.item(i).outerHTML, "listing-id-")(1), Chr$(32))(0)
.Cells(i + 2, 4) = "S$" & prices.item(i).innerText
.Cells(i + 2, 5) = "https://www.propertyguru.com.sg/" & urls.item(i).getAttribute("href")
Next i
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub
您可以使用一个函数以更可靠的方式获取页数。然后,您可以修改上面的代码以非常容易地从1循环到pgno。
Sub Main
Dim pgno As Long
'your other code
pgno = GetNumberOfPages(.document)
'other code
End Sub
Public Function GetNumberOfPages(ByVal doc As HTMLDocument) As Long
On Error GoTo errhand:
GetNumberOfPages = doc.querySelector(".listing-pagination li:nth-last-child(2)").innerText
Exit Function
errhand:
If Err.Number <> 0 Then GetNumberOfPages = 1
End Function
我会继续上面写的内容,并修改为一个循环,但这是我对您的代码的观察:
0)主除以0错误
您需要处理str2 = 0
的零除错误。例如:
您可以将pgno
声明为Variant
并拥有
If str2 = 0 Then
pgNo = CVErr(xlErrDiv0)
Else
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End If
1)另外,请注意,当您在同一行上有多个声明并且仅声明一个的类型时,所有隐式未声明的类型都是变量。
例如
Dim add, size, cno, price, inurl, sp, sp1 As String
只有sp1
是一个字符串。其他都是变种。
如果所有字符串都声明为:
Dim add As String, size As String, cno As String, price As String, inurl As String, sp1 As String
我排除sp As String
是因为我认为应该是sp() As String
。
并且由于add
和size
是VBA中的方法,因此我会避免将它们用作变量名,而应使用iAdd
或iSize
,或者更具描述性和有用的,不能被认为是模棱两可的。
2)您也不必使用匈牙利/伪匈牙利表示法,例如str
。
3)使用Integer
而不是Long
4)使用Option Explicit
并检查您的数据类型。例如,如评论中所提到的,您的意思是说str1是在除法中使用的字符串吗?您要依靠隐式转换吗?别。声明为预期的类型。
例如:Dim str1() As String, str2 As String, pgno As Double
这还将突出显示您缺少变量声明,例如RowCount
。