VBA Web数据未显示整个表

时间:2015-03-28 18:19:17

标签: excel vba

我正在尝试将表格下载到Excel工作表中,然后循环到下一个表格。循环工作(虽然很慢)但我只是在页面顶部(前5行狗名教练)名称等)和主表没有出现。我也得到了Cookie消息。 欢迎任何建议:

Option Explicit

Sub Macro1()


 Sheets("Sheet1").Select
  Range("A1").Select

    Dim i As Integer
    Dim e As integer
    Dim myurl As String, shorturl As String
    Sheets("Sheet1").Select

i = 1
Do While i < 3


 myurl = "URL;http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=" & i & ""


  With ActiveSheet.QueryTables.Add(Connection:=myurl,   Destination:=Range("$A$1"))


  .Name = shorturl
  .FieldNames = True
  .RowNumbers = False
  .FillAdjacentFormulas = False
  .PreserveFormatting = True
  .RefreshOnFileOpen = False
  .BackgroundQuery = True
  .RefreshStyle = xlInsertDeleteCells
  .SavePassword = False
  .SaveData = True
  .AdjustColumnWidth = True
  .RefreshPeriod = 0
  .WebSelectionType = xlEntirePage
  .WebFormatting = xlWebFormattingNone
  .WebPreFormattedTextToColumns = True
  .WebConsecutiveDelimitersAsOne = True
  .WebSingleBlockTextImport = False
  .WebDisableDateRecognition = False
  .WebDisableRedirections = False
  .Refresh BackgroundQuery:=False
  .WebDisableDateRecognition = False
  .WebDisableRedirections = False
  .Refresh BackgroundQuery:=False
  End With

  Columns("A:J").Select
  Selection.Copy
  Range("K1").Select
  Selection.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone,   SkipBlanks _
  :=False, Transpose:=False
  Columns("A:J").Select
  Range("J1").Activate
  Application.CutCopyMode = False
  Selection.Delete Shift:=xlToLeft
  Columns("A:J").Select
  Selection.ColumnWidth = 20.01
  Columns("B:B").Select
  Selection.ColumnWidth = 20.01
  Rows("1:9").Select
  Selection.Insert Shift:=xlDown, CopyOrigin:=xlFormatFromLeftOrAbove




  i = i + 1

 Loop

End Sub

2 个答案:

答案 0 :(得分:2)

在初始页面加载后,通过ajax请求加载表数据。

如果您查看chrome中的页面并打开developer tools (F12) -> Network Tab。您会看到以下url的其他请求: http://www.racingpost.com/greyhounds/dog_form.sd?dog_id=

用于检索数据的方法很慢。加快速度的一种方法是通过xmlhttprequest请求网址并解析您自己需要的相应数据。

以下是xmlhttprequest的示例(请注意,返回的数据是您可以解析的源代码字符串):

Function XmlHttpRequest(url As String) As String
    Dim xml As Object
    Set xml = CreateObject("MSXML2.XMLHTTP")
    xml.Open "GET", url, False
    xml.send
    XmlHttpRequest = xml.responseText
End Function

因此,通过此方法请求数据看起来像这样:

response = XmlHttpRequest("http://www.somesite.com")

这可能是我知道从网站检索数据的最快方法,因为它并不涉及实际渲染任何内容。

然后,要解析任何给定的数据,您需要查找源中一致的数据前面或后面的内容。 (通常div具有特定的类名或类似的东西)。通用解析可能如下所示:

loc1 = instr(response,"MyClassName")
loc1 = instr(loc1, response, ">") + 1 'the exact beginning of the data i'd like
loc2 = instr(loc1, response, "</td>")' the end of the data i'd like
data = trim(mid(response,loc1,loc2-loc1))

最后,您可以粘贴所有可以启动和运行的方法。我不确定你到底是哪些字段所以我只是从每个页面解析了一些例子:

Option Explicit
Sub GetTrackData()
    Dim response As String
    Dim dogHomeUrl As String
    Dim dogFormUrl As String
    Dim i As Integer
    Dim x As Integer
    Dim dogName As String
    Dim dogDate As String
    Dim trainer As String
    Dim breeding As String
    Dim loc1 As Long, loc2 As Long

    dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
    dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
    x = 2
    For i = 1 To 10
        response = XmlHttpRequest(dogHomeUrl & i)
        Debug.Print (response)
        'parse the overall info

        'this is the basic of parsing the web page
        'just find the start of the data you want with instr
        'then find the end of the data with instr
        'and use mid to pull out the data we want
        'rinse and repeat this method for every line of data we'd like
        loc1 = InStr(response, "popUpHead")
        loc1 = InStr(loc1, response, "<h1>") + 4
        loc2 = InStr(loc1, response, "</h1>")
        dogName = Trim(Mid(response, loc1, loc2 - loc1))
        'apparantly if dog name is blank there is data to report on the web site
        If dogName <> "" Then
            'now lets get the dogDate
            loc1 = InStr(loc2, response, "<li>")
            loc1 = InStr(loc1, response, "(") + 1
            loc2 = InStr(loc1, response, ")")
            dogDate = Trim(Mid(response, loc1, loc2 - loc1))
            'now the trainer
            loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
            loc2 = InStr(loc1, response, "</li>")
            trainer = Trim(Mid(response, loc1, loc2 - loc1))

            response = XmlHttpRequest(dogFormUrl & i)
            'now we need to loop through the form table and parse out the values we care about
            loc1 = InStr(response, "Full Results")
            Do While (loc1 <> 0)
                Dim raceDate As String
                Dim raceTrack As String
                Dim dis As String

                loc1 = InStr(loc1, response, ">") + 1
                loc2 = InStr(loc1, response, "</a>")
                raceDate = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td>") + 4
                loc2 = InStr(loc1, response, "</td>")
                raceTrack = Trim(Mid(response, loc1, loc2 - loc1))

                Range("A" & x).Value = dogName
                Range("B" & x).Value = dogDate
                Range("C" & x).Value = trainer
                Range("D" & x).Value = raceDate
                Range("E" & x).Value = raceTrack

                loc1 = InStr(loc2, response, "Full Results")
                x = x + 1
            Loop
            Debug.Print (response)
        End If
        'parse the form table

    Next i
End Sub
Function XmlHttpRequest(url As String) As String
    Dim xml As Object
    Set xml = CreateObject("MSXML2.XMLHTTP")
    xml.Open "GET", url, False
    xml.send
    XmlHttpRequest = xml.responseText
End Function

修改1

我们正在进行的数据是错误的,显然第一列并不总是一个链接。这是一个修改过的示例,其中包含更多字段的解析。如果您有任何问题,请与我联系:

Option Explicit
Sub GetTrackData()
    Dim response As String
    Dim dogHomeUrl As String
    Dim dogFormUrl As String
    Dim i As Integer
    Dim x As Integer
    Dim dogName As String
    Dim dogDate As String
    Dim trainer As String
    Dim breeding As String
    Dim loc1 As Long, loc2 As Long
    Dim qt As String
    qt = """"


    dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
    dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
    x = 2
    For i = 1 To 10
        response = XmlHttpRequest(dogHomeUrl & i)
        Debug.Print (response)
        'parse the overall info

        'this is the basic of parsing the web page
        'just find the start of the data you want with instr
        'then find the end of the data with instr
        'and use mid to pull out the data we want
        'rinse and repeat this method for every line of data we'd like
        loc1 = InStr(response, "popUpHead")
        loc1 = InStr(loc1, response, "<h1>") + 4
        loc2 = InStr(loc1, response, "</h1>")
        dogName = Trim(Mid(response, loc1, loc2 - loc1))
        'apparantly if dog name is blank there is data to report on the web site
        If dogName <> "" Then
            'now lets get the dogDate
            loc1 = InStr(loc2, response, "<li>")
            loc1 = InStr(loc1, response, "(") + 1
            loc2 = InStr(loc1, response, ")")
            dogDate = Trim(Mid(response, loc1, loc2 - loc1))
            'now the trainer
            loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
            loc2 = InStr(loc1, response, "</li>")
            trainer = Trim(Mid(response, loc1, loc2 - loc1))

            response = XmlHttpRequest(dogFormUrl & i)
            'now we need to loop through the form table and parse out the values we care about
            loc1 = InStr(response, "<td class=" & qt & "first" & qt) + 17
            Do While (loc1 > 17)
                Dim raceDate As String
                Dim raceTrack As String
                Dim dis As String
                Dim trp As String
                Dim splt As String
                Dim pos As String
                Dim fin As String
                Dim by As String
                Dim winSec As String
                Dim remarks As String
                Dim time As String
                Dim going As String
                Dim price As String
                Dim grd As String
                Dim calc As String

                loc1 = InStr(loc1, response, ">") + 1
                loc2 = InStr(loc1, response, "</td>")
                raceDate = Trim(Mid(response, loc1, loc2 - loc1))
                If InStr(raceDate, "<a href") > 0 Then 'we have a link so parse out the date from the link
                    Dim tem1 As Long
                    Dim tem2 As Long
                    tem1 = InStr(raceDate, ">") + 1
                    tem2 = InStr(tem1, raceDate, "</a>")
                    raceDate = Trim(Mid(raceDate, tem1, tem2 - tem1))
                End If
                loc1 = InStr(loc2, response, "<td>") + 4
                loc2 = InStr(loc1, response, "</td>")
                raceTrack = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td><span class=") + 16
                loc1 = InStr(loc1, response, ">") + 1
                loc2 = InStr(loc1, response, "</span>")
                dis = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td class=")
                loc1 = InStr(loc1, response, ">") + 1
                loc2 = InStr(loc1, response, "</td>")
                trp = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td>") + 4
                loc2 = InStr(loc1, response, "</td>")
                splt = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td>") + 4
                loc2 = InStr(loc1, response, "</td>")
                pos = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<span class= " & qt & "black" & qt & ">") + 21
                loc2 = InStr(loc1, response, "</span>")
                fin = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td>") + 4
                loc2 = InStr(loc1, response, "</td>")
                by = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<a href=") + 8
                loc1 = InStr(loc1, response, ">") + 1
                loc2 = InStr(loc1, response, "</a>")
                winSec = Trim(Mid(response, loc1, loc2 - loc1))
                '<td><i>
                loc1 = InStr(loc2, response, "<td><i>") + 7
                loc2 = InStr(loc1, response, "</i>")
                remarks = Trim(Mid(response, loc1, loc2 - loc1))
                '<span class="black">
                loc1 = InStr(loc2, response, "<span class=" & qt & "black" & qt & ">") + 21
                loc2 = InStr(loc1, response, "</span>")
                time = Trim(Mid(response, loc1, loc2 - loc1))
                '<td class="center">
                loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
                loc2 = InStr(loc1, response, "</td>")
                going = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
                loc2 = InStr(loc1, response, "</td>")
                price = Trim(Mid(response, loc1, loc2 - loc1))
                loc1 = InStr(loc2, response, "<td class=" & qt & "center" & qt & ">") + 19
                loc2 = InStr(loc1, response, "</td>")
                grd = Trim(Mid(response, loc1, loc2 - loc1))

                Range("A" & x).Value = dogName
                Range("B" & x).Value = dogDate
                Range("C" & x).Value = trainer
                Range("D" & x).Value = raceDate
                Range("E" & x).Value = raceTrack
                Range("F" & x).Value = dis
                Range("G" & x).Value = trp
                Range("H" & x).Value = splt
                Range("I" & x).Value = pos
                Range("J" & x).Value = fin
                Range("K" & x).Value = by
                Range("L" & x).Value = winSec
                Range("M" & x).Value = remarks
                Range("N" & x).Value = time
                Range("O" & x).Value = going
                Range("P" & x).Value = price
                Range("Q" & x).Value = grd

                loc1 = InStr(loc2, response, "<td class=" & qt & "first" & qt) + 17
                x = x + 1
            Loop
            Debug.Print (response)
        End If
        'parse the form table

    Next i
End Sub
Function XmlHttpRequest(url As String) As String
    Dim xml As Object
    Set xml = CreateObject("MSXML2.XMLHTTP")
    xml.Open "GET", url & "&cache_buster=" & GenerateRandom, False
    xml.send
    XmlHttpRequest = xml.responseText
End Function
Function GenerateRandom() As String
    GenerateRandom = Int(Rnd * 1000)
End Function

答案 1 :(得分:1)

感谢那个伟大而详细的回复。我对此比较陌生,我一直在处理你的代码,我在从源代码解析时遇到了一些问题。我设法获得表字段的距离,位置,分裂时间和结束罚款,但其余我不能得到。我提取了每个项目的来源,这里是一个列表,其中相应的操作对于右边的那些: 最佳     狗的名字         

安格尔西飞行

date of birth an sexb
<li>
 (3 Aug 2013)
 bk d </li>

trainer
<li><strong>Trainer</strong> J B Thompson</li>

breeding
<li><strong>Breeding</strong> Head Bound — Inshaarla</li>

FORM
DATE
>18Mar15</a> </td>            ' devweeks dev code works fine] 

TRACK
<td>BVue</td>                 '   devweeks dev code works fine] 



DIS
<td><span class="black">470m</span></td>  [here I did +25 and - 7 and     it     works] 

TRP
<td class="center">[2]</td>                   

SPLIT                                        [+4 works with this]
<td>4.51</td>

POS
<td>5555</td>                                [+4 works with this]

FIN
<td><span class="black">5th</span></td>      [ 25 - 7 works with this] 

BY
<td>12</td>

WIN/SEC
">Ballymac Fleetie</a> </td>

REMARKS
<td><i>Crd&amp;CkdW&amp;StruckInto1/4</i></td>

TIME
 <td><span class="black">28.67</span></td>
GOING
 <td class="center"> N</td>

PRICE
<td class="center">4/1</td>

GRADE
<td class="center">A5</td>

CALC
<td class="last right"><span>29.63</span></td>
 </tr>

对于其余的字段,我遇到不同的响应。例如字段POS(位置)和SPLIT的代码是+ 4.但是当我将它应用于raceBY时,它是与POS和SPLIT相同的源我得到了以下结果... td>13¾。 还有其他领域我得到...(href =“http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970”onclick =“return Html.popup(this,{width:800,height: 480})“title =”点击狗形态...“&gt;风暴力...... 请看我的代码。 我已尝试过所有其他领域,并获得上述两条消息的变体。我知道我必须在这里做错事,并且非常感谢任何建议。

         Option Explicit
         Sub GetTrackData()
    Dim response As String
    Dim dogHomeUrl As String
    Dim dogFormUrl As String
    Dim i As Long
    Dim x As Long
    Dim dogName As String
    Dim dogDate As String
    Dim trainer As String
    Dim breeding As String

Dim loc1 As Long, loc2 As Long

dogHomeUrl = "http://www.racingpost.com/greyhounds/dog_home.sd?dog_id="
dogFormUrl = "http://www.racingpost.com/greyhounds/dog_form.sd?dog_id="
x = 2
For i = 1 To 10
    response = XmlHttpRequest(dogHomeUrl & i)
    Debug.Print (response)
    'parse the overall info

    'this is the basic of parsing the web page
    'just find the start of the data you want with instr
    'then find the end of the data with instr
    'and use mid to pull out the data we want
    'rinse and repeat this method for every line of data we'd like
    loc1 = InStr(response, "popUpHead")
    loc1 = InStr(loc1, response, "<h1>") + 4
    loc2 = InStr(loc1, response, "</h1>")


    dogName = Trim(Mid(response, loc1, loc2 - loc1))
    'apparantly if dog name is blank there is data to report on the web site
    If dogName <> "" Then
        'now lets get the dogDate
        loc1 = InStr(loc2, response, "<li>")
        loc1 = InStr(loc1, response, "(") + 1
        loc2 = InStr(loc1, response, ")")
        dogDate = Trim(Mid(response, loc1, loc2 - loc1))
        'now the trainer
        loc1 = InStr(loc2, response, "<strong>Trainer</strong>") + 24
        loc2 = InStr(loc1, response, "</li>")
        trainer = Trim(Mid(response, loc1, loc2 - loc1))
        response = XmlHttpRequest(dogFormUrl & i)



        'now we need to loop through the form table and parse out the values we care about
        loc1 = InStr(response, "Full Results")
        Do While (loc1 <> 0)
            Dim raceDate As String
            Dim raceTrack As String
            Dim raceDis As String
            Dim racePos As String
            Dim raceSplit As String
            Dim raceFin As String
            Dim raceBy As String
            Dim raceTrp As String
            Dim raceRemarks As String
            Dim raceWinSec As String
            Dim raceTime As String
            Dim raceGoing As String
            Dim racePrice As String
            Dim raceGrd As String
            Dim raceCalc As String







           ' Dim raceBy As String
            'Dim raceBy As String
            'Dim raceWinSec As String

            loc1 = InStr(loc1, response, ">") + 1
            loc2 = InStr(loc1, response, "</a>")
            raceDate = Trim(Mid(response, loc1, loc2 - loc1)) ' weeksdevcode  works fine

            loc1 = InStr(loc2, response, "<td>") + 4
            loc2 = InStr(loc1, response, "</td>")
            raceTrack = Trim(Mid(response, loc1, loc2 - loc1)) 'weeksdevcode  works fine
            'ABOVE TWO WEEKSDEV CODE
            '..............................................................
            'BELOW ARE MINE
            loc1 = InStr(loc2, response, "<td>") + 25       ' column F = DISTANCE/works
            loc2 = InStr(loc1, response, "</td>") - 7
            raceDis = Trim(Mid(response, loc1, loc2 - loc1))

           loc1 = InStr(loc2, response, "<td>") + 4         'column G = POSITION/works
           loc2 = InStr(loc1, response, "</td>")
           racePos = Trim(Mid(response, loc1, loc2 - loc1))

           loc1 = InStr(loc2, response, "<td>") + 4         'column H = SPLIT/works
           loc2 = InStr(loc1, response, "</td>")
           raceSplit = Trim(Mid(response, loc1, loc2 - loc1))


           loc1 = InStr(loc2, response, "<td>") + 25        'column I =FINISH/works
           loc2 = InStr(loc1, response, "</td>") - 7
           raceFin = Trim(Mid(response, loc1, loc2 - loc1))

          'BELOW IS THE CODE FOR raceBy and raceRemarks
          'and by the side is the results.

        '  loc1 = InStr(loc2, response, "<td>") + 4       '( here I get..13&frac34 and similar;)
         ' loc2 = InStr(loc1, response, "</td>")
         ' raceBy = Trim(Mid(response, loc1, loc2 - loc1))

         ' loc1 = InStr(loc2, response, "<td>") + 7       '(Here I Geta href="http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970" onclick="return Html.popup(this, {width:800, height:480})" title="Click for Dog Form...">STORM FORCE</a>
          'loc2 = InStr(loc1, response, "</td>")
         ' raceRemarks = Trim(Mid(response, loc1, loc2 - loc1))

          ' BELOW ARE 3 variations I tried on raceTrp

          'loc1 = InStr(loc2, response, "<td>") + 18      'Here I Get.. <a href=""http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970"" onclick=""return Html.popup(this, {width:800, height:480})"" title=""Click for Dog Form..."">STORM FORCE</a>"
          'loc2 = InStr(loc1, response, "</td>")
          'raceTrp = Trim(Mid(response, loc1, loc2 - loc1))

         ' loc1 = InStr(loc2, response, "<td>") + 18       ' Here I Get.. <a href=""http://www.racingpost.com/greyhounds/dog_home.sd?dog_id=72970"" onclick=""return Html.popup(this, {width:800, height:480})"" title=""Click for Dog Form..."">STORM FORCE</a>"
          'loc2 = InStr(loc1, response, "</td>") - 6
          'raceTrp = Trim(Mid(response, loc1, loc2 - loc1))

        '  loc1 = InStr(loc2, response, "class=center")      'same as above
         ' loc1 = InStr(loc2, response, ">") + 19
          'loc2 = InStr(loc1, response, "</td>")
         ' raceTrp = Trim(Mid(response, loc1, loc2 - loc1))




            Range("A" & x).Value = dogName
            Range("B" & x).Value = dogDate
            Range("C" & x).Value = trainer
            Range("D" & x).Value = raceDate
            Range("E" & x).Value = raceTrack
            ' above weeksdev all works fine
            Range("F" & x).Value = raceDis
            Range("G" & x).Value = racePos
            Range("H" & x).Value = raceSplit
            Range("I" & x).Value = raceFin
            Range("J" & x).Value = raceBy
            Range("K" & x).Value = raceTrp
            Range("L" & x).Value = raceRemarks
            Range("M" & x).Value = raceWinSec
            Range("N" & x).Value = raceTime
            Range("O" & x).Value = raceGoing
            Range("M" & x).Value = racePrice
            Range("N" & x).Value = raceGrd
            Range("O" & x).Value = raceCalc







            loc1 = InStr(loc2, response, "Full Results")
            x = x + 1
        Loop
        Debug.Print (response)
    End If
    'parse the form table

   Next i
End Sub
Function XmlHttpRequest(url As String) As String
    Dim xml As Object
    Set xml = CreateObject("MSXML2.XMLHTTP")
    xml.Open "GET", url, False
    xml.send
    XmlHttpRequest = xml.responseText
End Function

亲切的问候 山口