如何使用vba立即抓取没有加载内容的网页?

时间:2016-06-15 20:24:14

标签: vba excel-vba excel

我用excel制作了一个vba宏来从网页(www.bfound.io)中删除公司信息。一切都很好,除了我还没有找到一种方法来加载整个页面与所有公司可见。问题在于,此页面无法正确加载所有信息。它只在您向下滚动或按下按钮时加载它。如果你按下这个按钮,它将加载100多家公司到现场。

我试图谷歌这件事并没有找到任何有效的东西,甚至没有滚动这个页面。有没有办法绕过这个问题,或者该网站是否不可能有效地刮掉?我对编程和vba非常陌生,所以非常感谢帮助。谢谢你提前!

编辑:这是我的代码。它的编码可能非常糟糕,但它正在做得很好。如果您想知道我在加载首页时手动点击右侧搜索页面上的浏览器。

Sub Test()
Dim i As Long
Dim tagPosition As Integer
Dim browserIE As Object
Dim ws As Worksheet
Dim companyNameLocation As Integer
Dim emailLocation As Integer
Dim phoneNumberLocation As Integer
Dim webSiteLocation As Integer
Dim kategoryLocation As Integer
Dim companyName As String
Dim Email As String
Dim phoneNumber As String
Dim webSite As String
Dim kategory As String



Set browserIE = CreateObject("InternetExplorer.Application")
browserIE.Top = 0
browserIE.Left = 800
browserIE.Width = 800
browserIE.Height = 1200
browserIE.Visible = True

Set ws = ThisWorkbook.Worksheets("Taul1")
i = 2
companyNameLocation = 2
emailLocation = 1
phoneNumberLocation = 5
webSiteLocation = 4
kategoryLocation = 3


browserIE.navigate ("http://www.bfound.io/")
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:03"))

Do While i <= 1000
    Debug.Print "TÄÄ ON i: " & i
     'browserIE.document.getElementById("companysearchform_query_companySearchTypecompanycode").Click
    Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop Application.Wait (Now + TimeValue("0:00:03"))


 companyName = browserIE.document.body.getElementsByClassName("bf-company-name")((i - 1)).getElementsByTagName("h2")(0).innerText
 Debug.Print companyName
 Cells(i, companyNameLocation).Value = companyName
 browserIE.document.body.getElementsByClassName("bf-company-name")((i - 1)).getElementsByTagName("a")(0).Click
 Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
 Application.Wait (Now + TimeValue("0:00:2"))

 tagPosition = 0
 Do While tagPosition <= 40
 On Error Resume Next
    If InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "@") <> 0 Then
    Email = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
        Debug.Print Email
        Cells(i, emailLocation).Value = Email
        Exit Do
    End If
    tagPosition = tagPosition + 1
    Loop
    On Error GoTo 0

tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
  If (InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "www.") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, ".com") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, ".net") <> 0) And InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "@") = 0 Then
      webSite = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
      Debug.Print webSite
      Cells(i, nettiSivuLocation).Value = webSite
      Exit Do
    End If
    tagPosition = tagPosition + 1
    Loop
    On Error GoTo 0

tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
    If InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "+") <> 0 Or InStr(browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText, "0") <> 0 Then
        phoneNumber = browserIE.document.body.getElementsByClassName("info-box contact")(0).getElementsByTagName("a")(tagPosition).innerText
        Debug.Print phoneNumber
        Cells(i, puhelinNumeroLocation).Value = phoneNumber
        Exit Do
    End If
    tagPosition = tagPosition + 1
    Loop
    On Error GoTo 0

tagPosition = 0
Do While tagPosition <= 40
On Error Resume Next
    If InStr(browserIE.document.body.getElementsByClassName("info-box info")(0).getElementsByTagName("h4")(tagPosition).innerText, "Category") <> 0 Then
        kategory = browserIE.document.body.getElementsByClassName("info-box info")(0).getElementsByTagName("p")(tagPosition).innerText
        Debug.Print kategory
        Cells(i, kategoriaLocation).Value = kategory
        Exit Do
    End If
    tagPosition = tagPosition + 1
    Loop
    On Error Resume Next

browserIE.document.body.getElementsByClassName("bf-back-to-search-results")(0).getElementsByTagName("button")(0).Click
i = i + 1
Do While browserIE.readyState <> 4 And browserIE.Busy: DoEvents: Loop
Application.Wait (Now + TimeValue("0:00:2"))

Loop

End Sub

0 个答案:

没有答案