无法弄清楚如何从我的代码中使用的页面获取所有公司链接。运行我的脚本我只得到20个链接。页面有懒惰加载方法,这就是为什么我无法获得所有这些方法。对此的任何意见将受到高度赞赏。我到目前为止尝试过:
Sub Company_links()
Const lnk = "http://fortune.com"
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim topic As Object
With http
.Open "GET", "http://fortune.com/fortune500/list/", False
.send
html.body.innerHTML = .responseText
End With
For Each topic In html.getElementsByClassName("small-12 column row")
x = x + 1
With topic.getElementsByTagName("a")
If .Length Then Cells(x, 1) = lnk & Split(.item(0).href, "about:")(1)
End With
Next topic
Set html = Nothing: Set topics = Nothing
End Sub
答案 0 :(得分:1)
在新工作簿中运行以下代码。它会将结果输出到Sheet1而不管它们是否为空,所以如果你有数据则要小心。您可以稍后根据需要更改此部分代码。
首先,您需要在VBA编辑器中从Microsoft HTML Object Library
激活Microsoft Internet Controls
和Tools -> References
。然后运行以下代码,高枕无忧,直到看到“All Done!”消息:
Sub Company_links()
Dim i As Long
Dim aIE As InternetExplorer
Dim Rank As IHTMLElement, Company As IHTMLElement, Revenues As IHTMLElement
Set aIE = New InternetExplorer
With aIE
.navigate "http://fortune.com/fortune500/list/"
.Visible = True
End With
Do While (aIE.Busy Or aIE.ReadyState <> READYSTATE_COMPLETE)
DoEvents
Loop
For i = 1 To 50
On Error Resume Next
Set Rank = aIE.document.getElementsByClassName("column small-2 company-rank")(999)
If Rank Is Nothing Then
GoTo Skip
End If
Exit For
Skip:
SendKeys "{end}"
Application.Wait (Now() + TimeValue("00:00:005"))
Next i
With Sheet1
.Range("A1") = "RANK"
.Range("B1") = "COMPANY"
.Range("C1") = "REVENUE"
For i = 0 To 999
Set Rank = aIE.document.getElementsByClassName("column small-2 company-rank")(i)
Set Company = aIE.document.getElementsByClassName("column small-5 company-title")(i)
Set Revenues = aIE.document.getElementsByClassName("column small-5 company-revenue")(i)
.Range("A" & i + 2) = Rank.innerText
.Range("B" & i + 2) = Company.innerText
.Range("C" & i + 2) = Revenues.innerText
Next i
End With
SendKeys "%{F4}"
Set aIE = Nothing
Set Rank = Nothing
Set Company = Nothing
Set Revenues= Nothing
MsgBox "All Done!"
End Sub
答案 1 :(得分:0)
如果该网站使用ajax加载其余链接。您需要先使页面加载剩余的链接。我的建议是使用selenium加载页面,然后使用你的代码来获取链接。
答案 2 :(得分:0)
我会这样做。
Option Explicit
Sub Sample()
Dim ie As Object
Dim links As Variant, lnk As Variant
Dim rowcount As Long
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate "http://fortune.com"
'Wait for site to fully load
'ie.Navigate2 URL
Do While ie.Busy = True
DoEvents
Loop
Set links = ie.document.getElementsByTagName("a")
rowcount = 1
With Sheets("Sheet1")
For Each lnk In links
'Debug.Print lnk.innerText
'If lnk.classname Like "*Real Statistics Examples Part 1*" Then
.Range("A" & rowcount) = lnk.innerText
rowcount = rowcount + 1
'Exit For
'End If
Next
End With
End Sub