是否可以让爬虫在VBA中进行递归?我尝试使用一段代码,但是一旦在代码中的虚线标记区域内找到该行,就会抛出错误,显示错误的参数数量或无效的属性赋值"。由于我不是VBA的专家,我不能这样做,但我想可能有任何方式可以应用它。
Sub NEWAPPS()
Dim http As New MSXML2.XMLHTTP60, html As New HTMLDocument
Dim Items As Object, Item As Object, Newitem As Object, elem As Object
Dim Z As String
With http
.Open "GET", "https://itunes.apple.com/us/app/candy-crush-saga/id553834731?mt=8", False
.send
html.body.innerHTML = .responseText
End With
Set Items = html.getElementsByClassName("left")
Set Newitem = html.getElementsByClassName("name")
For Each Item In Items
x = x + 1
If Item.getElementsByTagName("h1").Length Then _
Cells(x, 1) = Item.getElementsByTagName("h1")(0).innerText
If Item.getElementsByTagName("h2").Length Then _
Cells(x, 2) = Item.getElementsByTagName("h2")(0).innerText
Next Item
For Each elem In Newitem
Z = elem.href
'---------------------
NEWAPPS (Z)
'---------------------
Next elem
End Sub
答案 0 :(得分:1)
您可以创建一个递归子并从另一个子调用它。然而,你正在伤害iTunes应用程序,所以它必须是一个巨大的来源并且需要很长时间。
要跳过访问相同网址并避免恶意循环,我使用了字典,您要查找的值存储在excel单元格中。
以下是您开始使用的工作代码。您可能希望根据需要停止代码的方式或时间来更改代码。
Public dict As Object
Sub NEWAPPS(Z As String)
Dim http As New MSXML2.XMLHTTP60, html As New HTMLDocument
Dim Items As Object, Item As Object, Newitem As Object, elem As Object
With http
.Open "GET", Z, False
.send
html.body.innerHTML = .responseText
End With
Set Newitem = html.getElementsByClassName("name")
Set Items = html.getElementsByClassName("left")
If Not dict.Exists(Z) Then
dict(Z) = Items(1).innerText 'key is url and value is app name and developer
Cells(Rows.Count, 1).End(xlUp).Offset(1, 0) = Z 'url
Cells(Rows.Count, 2).End(xlUp).Offset(1, 0) = Split(Items(1).innerText, vbLf)(0) 'app name
Cells(Rows.Count, 3).End(xlUp).Offset(1, 0) = Split(Items(1).innerText, vbLf)(1) 'developer
End If
For Each elem In Newitem
If Not dict.Exists(elem.href) Then 'skip visiting same urls and avoid vicious circle
NEWAPPS (elem.href)
End If
Next elem
End Sub
Sub RecursiveCrawler()
Set dict = CreateObject("Scripting.Dictionary")
NEWAPPS ("https://itunes.apple.com/us/app/toy-blast/id890378044?mt=8")
'###You can get stored keys and values once the scraping is finished. If it ever finishes:)###
'Dim key As Variant
'For Each key In dict.Keys
' Debug.Print key 'url
' Debug.Print Split(dict(key), vbLf)(0) 'app name
' Debug.Print Split(dict(key), vbLf)(0) 'developer
'Next key
End Sub