我使用以下代码从体育网站中提取数据。我的问题是我无法在此网站上找到弹出窗口的URL - 因此,我不确定如何从此窗口中提取数据。单击播放器名称旁边的蓝色图标可以访问弹出窗口,我需要的数据位于弹出窗口的第二个选项卡上。
Sub Extract_goals()
Dim url As String, links_count As Integer
Dim i As Integer, j As Integer, row As Integer
Dim XMLHTTP As Object, html As Object
Dim tr_coll As Object, tr As Object
Dim td_coll As Object, td As Object
links_count = 40
For i = 1 To links_count
url = "http://fantasy.premierleague.com/stats/elements/?stat_filter=goals_scored&element_filter=0&page=" & i & ""
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set tbl = html.getelementsbytagname("Table")
Set tr_coll = tbl(0).getelementsbytagname("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getelementsbytagname("TD")
For Each td In td_col
Cells(row + 1, j).Value = td.innerText
j = j + 1
Next
row = row + 1
Next
Next
End Sub
对此有任何帮助表示赞赏。
谢谢, 沙希德
答案 0 :(得分:0)
我使用了另一种方法来实现这一点(通过创建Internet Explorer
对象),因为我无法使用MSXML2.XMLHTTP
对象以完全相同的方式工作。
我找到了弹出窗口的URL,但尚未发现如何从该窗口中提取数据。如果我有更多的时间,我会玩更多,但也许这会让你超越驼峰,你可以弄清楚其余的。
Sub Extract_goals2()
Dim ie As Object
Dim doc As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
links_count = 40
For i = 1 To links_count
.navigate "http://fantasy.premierleague.com/stats/elements/?stat_filter=goals_scored&element_filter=0&page=" & i & ""
Do
DoEvents
Loop Until Not .busy Or .readyState <> 4
Set doc = .document
Dim tbl As Object
Set tbl = doc.getelementsbytagname("Table")
Dim tr_coll As Object
Set tr_coll = tbl(0).getelementsbytagname("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getelementsbytagname("TD")
For Each td In td_col
If j = 2 Then 'only do this on 2nd table column
Set td_a = td.getelementsbytagname("a")
Debug.Print td_a(o).href 'this will provide the exact URL
td_a(o).Click 'this will actually open the pop-up box
'my thoughts were then to work with the elements in this URL to extract what you need
Else
Cells(row + 1, j).Value = td.innerText
End If
j = j + 1
Next
row = row + 1
Next
Next
End With
End Sub