我用excel制作了一个vba宏来从网页(www.bfound.io)中删除公司信息。一切都很好,除了我还没有找到一种方法来加载整个页面与所有公司可见。问题在于,此页面无法正确加载所有信息。它只在您向下滚动或按下按钮时加载它。如果你按下这个按钮,它将加载100多家公司到现场。
我试图谷歌这件事并没有找到任何有效的东西,甚至没有滚动这个页面。有没有办法绕过这个问题,或者该网站是否不可能有效地刮掉?我对编程和vba非常陌生,所以非常感谢帮助。谢谢你提前!
编辑:这是我的代码。它的编码可能非常糟糕,但它正在做得很好。如果您想知道我在加载首页时手动点击右侧搜索页面上的浏览器。
Sub Test()
Dim i As Long
Dim tagPosition As Integer
Dim browserIE As Object
Dim ws As Worksheet
Dim companyNameLocation As Integer
Dim emailLocation As Integer
Dim phoneNumberLocation As Integer
Dim webSiteLocation As Integer
Dim kategoryLocation As Integer
Dim companyName As String
Dim Email As String
Dim phoneNumber As String
Dim webSite As String
Dim kategory As String
Set browserIE = CreateObject("InternetExplorer.Application")
browserIE.Top = 0
browserIE.Left = 800
browserIE.Width = 800
browserIE.Height = 1200
browserIE.Visible = True
Set ws = ThisWorkbook.Worksheets("Taul1")
i = 2
companyNameLocation = 2
emailLocation = 1
phoneNumberLocation = 5
webSiteLocation = 4
kategoryLocation = 3
browserIE.navigate ("http://www.bfound.io/")
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:03"))
Do While i <= 1000
Debug.Print "TÄÄ ON i: " & i
'browserIE.document.getElementById("companysearchform_query_companySearchTypecompanycode").Click
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop Application.Wait (Now + TimeValue("0:00:03"))
companyName = browserIE.document.body.getElementsByClassName("bf-company-name")((i - 1)).getElementsByTagName("h2")(0).innerText
Debug.Print companyName
Cells(i, companyNameLocation).Value = companyName
browserIE.document.body.getElementsByClassName("bf-company-name")((i - 1)).getElementsByTagName("a")(0).Click
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:2"))
tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
If InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "@") <> 0 Then
Email = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
Debug.Print Email
Cells(i, emailLocation).Value = Email
Exit Do
End If
tagPosition = tagPosition + 1
Loop
On Error GoTo 0
tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
If (InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "www.") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, ".com") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, ".net") <> 0) And InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "@") = 0 Then
webSite = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
Debug.Print webSite
Cells(i, nettiSivuLocation).Value = webSite
Exit Do
End If
tagPosition = tagPosition + 1
Loop
On Error GoTo 0
tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
If InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "+") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "0") <> 0 Then
phoneNumber = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
Debug.Print phoneNumber
Cells(i, puhelinNumeroLocation).Value = phoneNumber
Exit Do
End If
tagPosition = tagPosition + 1
Loop
On Error GoTo 0
tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
If InStr(browserIE.document.body.getElementsByClassName("info-box info")(0).getElementsByTagName("h4")(tagPosition).innerText, "Category") <> 0 Then
kategory = browserIE.document.body.getElementsByClassName("info-box info")(0).getElementsByTagName("p")(tagPosition).innerText
Debug.Print kategory
Cells(i, kategoriaLocation).Value = kategory
Exit Do
End If
tagPosition = tagPosition + 1
Loop
On Error Resume Next
browserIE.document.body.getElementsByClassName("bf-back-to-search-results")(0).getElementsByTagName("button")(0).Click
i = i + 1
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:2"))
Loop
End Sub