我正在尝试将表格下载到Excel工作表中,然后循环到下一个表格。循环工作(虽然很慢)但我只是在页面顶部(前5行狗名教练)名称等)和主表没有出现。我也得到了Cookie消息。 欢迎任何建议:
Option Explicit
Sub Macro1()
Sheets("Sheet1").Select
Range("A1").Select
Dim i As Integer
Dim e As integer
Dim myurl As String, shorturl As String
Sheets("Sheet1").Select
i = 1
Do While i < 3
myurl = "URL;http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=" & i & ""
With ActiveSheet.QueryTables.Add(Connection:=myurl, Destination:=Range("$A$1"))
.Name = shorturl
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Columns("A:J").Select
Selection.Copy
Range("K1").Select
Selection.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Columns("A:J").Select
Range("J1").Activate
Application.CutCopyMode = False
Selection.Delete Shift:=xlToLeft
Columns("A:J").Select
Selection.ColumnWidth = 20.01
Columns("B:B").Select
Selection.ColumnWidth = 20.01
Rows("1:9").Select
Selection.Insert Shift:=xlDown, CopyOrigin:=xlFormatFromLeftOrAbove
i = i + 1
Loop
End Sub
答案 0 :(得分:2)
在初始页面加载后,通过ajax
请求加载表数据。
如果您查看chrome中的页面并打开developer tools (F12) -> Network Tab
。您会看到以下url的其他请求:
http://www.racingpost.com/greyhounds/dog_form.sd?dog_id=
用于检索数据的方法很慢。加快速度的一种方法是通过xmlhttprequest
请求网址并解析您自己需要的相应数据。
以下是xmlhttprequest
的示例(请注意,返回的数据是您可以解析的源代码字符串):
Function XmlHttpRequest(url As String) As String
Dim xml As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
xml.Open "GET", url, False
xml.send
XmlHttpRequest = xml.responseText
End Function
因此,通过此方法请求数据看起来像这样:
response = XmlHttpRequest("http://www.somesite.com")
这可能是我知道从网站检索数据的最快方法,因为它并不涉及实际渲染任何内容。
然后,要解析任何给定的数据,您需要查找源中一致的数据前面或后面的内容。 (通常div具有特定的类名或类似的东西)。通用解析可能如下所示:
loc1 = instr(response,"MyClassName")
loc1 = instr(loc1, response, ">") + 1 'the exact beginning of the data i'd like
loc2 = instr(loc1, response, "</td>")' the end of the data i'd like
data = trim(mid(response,loc1,loc2-loc1))
最后,您可以粘贴所有可以启动和运行的方法。我不确定你到底是哪些字段所以我只是从每个页面解析了一些例子:
Option Explicit
Sub GetTrackData()
Dim response As String
Dim dogHomeUrl As String
Dim dogFormUrl As String
Dim i As Integer
Dim x As Integer
Dim dogName As String
Dim dogDate As String
Dim trainer As String
Dim breeding As String
Dim loc1 As Long, loc2 As Long
dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
x = 2
For i = 1 To 10
response = XmlHttpRequest(dogHomeUrl & i)
Debug.Print (response)
'parse the overall info
'this is the basic of parsing the web page
'just find the start of the data you want with instr
'then find the end of the data with instr
'and use mid to pull out the data we want
'rinse and repeat this method for every line of data we'd like
loc1 = InStr(response, "popUpHead")
loc1 = InStr(loc1, response, "<h1>") + 4
loc2 = InStr(loc1, response, "</h1>")
dogName = Trim(Mid(response, loc1, loc2 - loc1))
'apparantly if dog name is blank there is data to report on the web site
If dogName <> "" Then
'now lets get the dogDate
loc1 = InStr(loc2, response, "<li>")
loc1 = InStr(loc1, response, "(") + 1
loc2 = InStr(loc1, response, ")")
dogDate = Trim(Mid(response, loc1, loc2 - loc1))
'now the trainer
loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
loc2 = InStr(loc1, response, "</li>")
trainer = Trim(Mid(response, loc1, loc2 - loc1))
response = XmlHttpRequest(dogFormUrl & i)
'now we need to loop through the form table and parse out the values we care about
loc1 = InStr(response, "Full Results")
Do While (loc1 <> 0)
Dim raceDate As String
Dim raceTrack As String
Dim dis As String
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</a>")
raceDate = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
raceTrack = Trim(Mid(response, loc1, loc2 - loc1))
Range("A" & x).Value = dogName
Range("B" & x).Value = dogDate
Range("C" & x).Value = trainer
Range("D" & x).Value = raceDate
Range("E" & x).Value = raceTrack
loc1 = InStr(loc2, response, "Full Results")
x = x + 1
Loop
Debug.Print (response)
End If
'parse the form table
Next i
End Sub
Function XmlHttpRequest(url As String) As String
Dim xml As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
xml.Open "GET", url, False
xml.send
XmlHttpRequest = xml.responseText
End Function
修改1
我们正在进行的数据是错误的,显然第一列并不总是一个链接。这是一个修改过的示例,其中包含更多字段的解析。如果您有任何问题,请与我联系:
Option Explicit
Sub GetTrackData()
Dim response As String
Dim dogHomeUrl As String
Dim dogFormUrl As String
Dim i As Integer
Dim x As Integer
Dim dogName As String
Dim dogDate As String
Dim trainer As String
Dim breeding As String
Dim loc1 As Long, loc2 As Long
Dim qt As String
qt = """"
dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
x = 2
For i = 1 To 10
response = XmlHttpRequest(dogHomeUrl & i)
Debug.Print (response)
'parse the overall info
'this is the basic of parsing the web page
'just find the start of the data you want with instr
'then find the end of the data with instr
'and use mid to pull out the data we want
'rinse and repeat this method for every line of data we'd like
loc1 = InStr(response, "popUpHead")
loc1 = InStr(loc1, response, "<h1>") + 4
loc2 = InStr(loc1, response, "</h1>")
dogName = Trim(Mid(response, loc1, loc2 - loc1))
'apparantly if dog name is blank there is data to report on the web site
If dogName <> "" Then
'now lets get the dogDate
loc1 = InStr(loc2, response, "<li>")
loc1 = InStr(loc1, response, "(") + 1
loc2 = InStr(loc1, response, ")")
dogDate = Trim(Mid(response, loc1, loc2 - loc1))
'now the trainer
loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
loc2 = InStr(loc1, response, "</li>")
trainer = Trim(Mid(response, loc1, loc2 - loc1))
response = XmlHttpRequest(dogFormUrl & i)
'now we need to loop through the form table and parse out the values we care about
loc1 = InStr(response, "<td class=" & qt & "first" & qt) + 17
Do While (loc1 > 17)
Dim raceDate As String
Dim raceTrack As String
Dim dis As String
Dim trp As String
Dim splt As String
Dim pos As String
Dim fin As String
Dim by As String
Dim winSec As String
Dim remarks As String
Dim time As String
Dim going As String
Dim price As String
Dim grd As String
Dim calc As String
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</td>")
raceDate = Trim(Mid(response, loc1, loc2 - loc1))
If InStr(raceDate, "<a href") > 0 Then 'we have a link so parse out the date from the link
Dim tem1 As Long
Dim tem2 As Long
tem1 = InStr(raceDate, ">") + 1
tem2 = InStr(tem1, raceDate, "</a>")
raceDate = Trim(Mid(raceDate, tem1, tem2 - tem1))
End If
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
raceTrack = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td><span class=") + 16
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</span>")
dis = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td class=")
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</td>")
trp = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
splt = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
pos = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<span class= " & qt & "black" & qt & ">") + 21
loc2 = InStr(loc1, response, "</span>")
fin = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
by = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<a href=") + 8
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</a>")
winSec = Trim(Mid(response, loc1, loc2 - loc1))
'<td><i>
loc1 = InStr(loc2, response, "<td><i>") + 7
loc2 = InStr(loc1, response, "</i>")
remarks = Trim(Mid(response, loc1, loc2 - loc1))
'<span class="black">
loc1 = InStr(loc2, response, "<span class=" & qt & "black" & qt & ">") + 21
loc2 = InStr(loc1, response, "</span>")
time = Trim(Mid(response, loc1, loc2 - loc1))
'<td class="center">
loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
loc2 = InStr(loc1, response, "</td>")
going = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
loc2 = InStr(loc1, response, "</td>")
price = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
loc2 = InStr(loc1, response, "</td>")
grd = Trim(Mid(response, loc1, loc2 - loc1))
Range("A" & x).Value = dogName
Range("B" & x).Value = dogDate
Range("C" & x).Value = trainer
Range("D" & x).Value = raceDate
Range("E" & x).Value = raceTrack
Range("F" & x).Value = dis
Range("G" & x).Value = trp
Range("H" & x).Value = splt
Range("I" & x).Value = pos
Range("J" & x).Value = fin
Range("K" & x).Value = by
Range("L" & x).Value = winSec
Range("M" & x).Value = remarks
Range("N" & x).Value = time
Range("O" & x).Value = going
Range("P" & x).Value = price
Range("Q" & x).Value = grd
loc1 = InStr(loc2, response, "<td class=" & qt & "first" & qt) + 17
x = x + 1
Loop
Debug.Print (response)
End If
'parse the form table
Next i
End Sub
Function XmlHttpRequest(url As String) As String
Dim xml As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
xml.Open "GET", url & "&cache_buster=" & GenerateRandom, False
xml.send
XmlHttpRequest = xml.responseText
End Function
Function GenerateRandom() As String
GenerateRandom = Int(Rnd * 1000)
End Function
答案 1 :(得分:1)
感谢那个伟大而详细的回复。我对此比较陌生,我一直在处理你的代码,我在从源代码解析时遇到了一些问题。我设法获得表字段的距离,位置,分裂时间和结束罚款,但其余我不能得到。我提取了每个项目的来源,这里是一个列表,其中相应的操作对于右边的那些: 最佳 狗的名字
date of birth an sexb
<li>
(3 Aug 2013)
bk d </li>
trainer
<li><strong>Trainer</strong> J B Thompson</li>
breeding
<li><strong>Breeding</strong> Head Bound — Inshaarla</li>
FORM
DATE
>18Mar15</a> </td> ' devweeks dev code works fine]
TRACK
<td>BVue</td> ' devweeks dev code works fine]
DIS
<td><span class="black">470m</span></td> [here I did +25 and - 7 and it works]
TRP
<td class="center">[2]</td>
SPLIT [+4 works with this]
<td>4.51</td>
POS
<td>5555</td> [+4 works with this]
FIN
<td><span class="black">5th</span></td> [ 25 - 7 works with this]
BY
<td>12</td>
WIN/SEC
">Ballymac Fleetie</a> </td>
REMARKS
<td><i>Crd&CkdW&StruckInto1/4</i></td>
TIME
<td><span class="black">28.67</span></td>
GOING
<td class="center"> N</td>
PRICE
<td class="center">4/1</td>
GRADE
<td class="center">A5</td>
CALC
<td class="last right"><span>29.63</span></td>
</tr>
对于其余的字段,我遇到不同的响应。例如字段POS(位置)和SPLIT的代码是+ 4.但是当我将它应用于raceBY时,它是与POS和SPLIT相同的源我得到了以下结果... td>13¾。 还有其他领域我得到...(href =“http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970”onclick =“return Html.popup(this,{width:800,height: 480})“title =”点击狗形态...“&gt;风暴力...... 请看我的代码。 我已尝试过所有其他领域,并获得上述两条消息的变体。我知道我必须在这里做错事,并且非常感谢任何建议。
Option Explicit
Sub GetTrackData()
Dim response As String
Dim dogHomeUrl As String
Dim dogFormUrl As String
Dim i As Long
Dim x As Long
Dim dogName As String
Dim dogDate As String
Dim trainer As String
Dim breeding As String
Dim loc1 As Long, loc2 As Long
dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
x = 2
For i = 1 To 10
response = XmlHttpRequest(dogHomeUrl & i)
Debug.Print (response)
'parse the overall info
'this is the basic of parsing the web page
'just find the start of the data you want with instr
'then find the end of the data with instr
'and use mid to pull out the data we want
'rinse and repeat this method for every line of data we'd like
loc1 = InStr(response, "popUpHead")
loc1 = InStr(loc1, response, "<h1>") + 4
loc2 = InStr(loc1, response, "</h1>")
dogName = Trim(Mid(response, loc1, loc2 - loc1))
'apparantly if dog name is blank there is data to report on the web site
If dogName <> "" Then
'now lets get the dogDate
loc1 = InStr(loc2, response, "<li>")
loc1 = InStr(loc1, response, "(") + 1
loc2 = InStr(loc1, response, ")")
dogDate = Trim(Mid(response, loc1, loc2 - loc1))
'now the trainer
loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
loc2 = InStr(loc1, response, "</li>")
trainer = Trim(Mid(response, loc1, loc2 - loc1))
response = XmlHttpRequest(dogFormUrl & i)
'now we need to loop through the form table and parse out the values we care about
loc1 = InStr(response, "Full Results")
Do While (loc1 <> 0)
Dim raceDate As String
Dim raceTrack As String
Dim raceDis As String
Dim racePos As String
Dim raceSplit As String
Dim raceFin As String
Dim raceBy As String
Dim raceTrp As String
Dim raceRemarks As String
Dim raceWinSec As String
Dim raceTime As String
Dim raceGoing As String
Dim racePrice As String
Dim raceGrd As String
Dim raceCalc As String
' Dim raceBy As String
'Dim raceBy As String
'Dim raceWinSec As String
loc1 = InStr(loc1, response, ">") + 1
loc2 = InStr(loc1, response, "</a>")
raceDate = Trim(Mid(response, loc1, loc2 - loc1)) ' weeksdevcode works fine
loc1 = InStr(loc2, response, "<td>") + 4
loc2 = InStr(loc1, response, "</td>")
raceTrack = Trim(Mid(response, loc1, loc2 - loc1)) 'weeksdevcode works fine
'ABOVE TWO WEEKSDEV CODE
'..............................................................
'BELOW ARE MINE
loc1 = InStr(loc2, response, "<td>") + 25 ' column F = DISTANCE/works
loc2 = InStr(loc1, response, "</td>") - 7
raceDis = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4 'column G = POSITION/works
loc2 = InStr(loc1, response, "</td>")
racePos = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 4 'column H = SPLIT/works
loc2 = InStr(loc1, response, "</td>")
raceSplit = Trim(Mid(response, loc1, loc2 - loc1))
loc1 = InStr(loc2, response, "<td>") + 25 'column I =FINISH/works
loc2 = InStr(loc1, response, "</td>") - 7
raceFin = Trim(Mid(response, loc1, loc2 - loc1))
'BELOW IS THE CODE FOR raceBy and raceRemarks
'and by the side is the results.
' loc1 = InStr(loc2, response, "<td>") + 4 '( here I get..13¾ and similar;)
' loc2 = InStr(loc1, response, "</td>")
' raceBy = Trim(Mid(response, loc1, loc2 - loc1))
' loc1 = InStr(loc2, response, "<td>") + 7 '(Here I Geta href="http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970" onclick="return Html.popup(this, {width:800, height:480})" title="Click for Dog Form...">STORM FORCE</a>
'loc2 = InStr(loc1, response, "</td>")
' raceRemarks = Trim(Mid(response, loc1, loc2 - loc1))
' BELOW ARE 3 variations I tried on raceTrp
'loc1 = InStr(loc2, response, "<td>") + 18 'Here I Get.. <a href=""http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970"" onclick=""return Html.popup(this, {width:800, height:480})"" title=""Click for Dog Form..."">STORM FORCE</a>"
'loc2 = InStr(loc1, response, "</td>")
'raceTrp = Trim(Mid(response, loc1, loc2 - loc1))
' loc1 = InStr(loc2, response, "<td>") + 18 ' Here I Get.. <a href=""http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970"" onclick=""return Html.popup(this, {width:800, height:480})"" title=""Click for Dog Form..."">STORM FORCE</a>"
'loc2 = InStr(loc1, response, "</td>") - 6
'raceTrp = Trim(Mid(response, loc1, loc2 - loc1))
' loc1 = InStr(loc2, response, "class=center") 'same as above
' loc1 = InStr(loc2, response, ">") + 19
'loc2 = InStr(loc1, response, "</td>")
' raceTrp = Trim(Mid(response, loc1, loc2 - loc1))
Range("A" & x).Value = dogName
Range("B" & x).Value = dogDate
Range("C" & x).Value = trainer
Range("D" & x).Value = raceDate
Range("E" & x).Value = raceTrack
' above weeksdev all works fine
Range("F" & x).Value = raceDis
Range("G" & x).Value = racePos
Range("H" & x).Value = raceSplit
Range("I" & x).Value = raceFin
Range("J" & x).Value = raceBy
Range("K" & x).Value = raceTrp
Range("L" & x).Value = raceRemarks
Range("M" & x).Value = raceWinSec
Range("N" & x).Value = raceTime
Range("O" & x).Value = raceGoing
Range("M" & x).Value = racePrice
Range("N" & x).Value = raceGrd
Range("O" & x).Value = raceCalc
loc1 = InStr(loc2, response, "Full Results")
x = x + 1
Loop
Debug.Print (response)
End If
'parse the form table
Next i
End Sub
Function XmlHttpRequest(url As String) As String
Dim xml As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
xml.Open "GET", url, False
xml.send
XmlHttpRequest = xml.responseText
End Function
亲切的问候 山口