使用VBA单击html按钮,然后刮取刷新的数据

时间:2015-01-07 07:02:57

标签: vba excel-vba mshtml excel

我正在尝试编写一个在输入框中输入日期的程序

<input name="Mdate" type="text" id="Mdate" size="30" value="" /></td>

点击提交按钮

<input type="submit" name="button" id="button" value="Submit" />

然后刮擦显示在“a”标签中的结果数据。

<center>
<b>Tuesday, 6 January 2015</b><br />
<a href="/horse-racing-results/Ruakaka/2015-1-6" target="_blank">Ruakaka</a>

在输入提交按钮之前,此数据不可用。我的尝试在下面全部发布。我似乎遇到的问题是我无法访问修改后的html代码(通过单击提交进行修改)。任何人都可以提供任何建议吗?

'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument                                                         'Document object
Dim inputs As MSHTML.IHTMLElementCollection                                                 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection                                            'Element collection for th tags
Dim element As MSHTML.IHTMLElement                                                          'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement                                                       'Header elements

'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False                                                                          'make IE invisible

'Navigate to webpage
Dim ieURL As String:    ieURL = "http://www.racenet.com.au/horse-racing-results/"           'set URL from which to retrieve racemeet and date data
ie.navigate ieURL                                                                           'navigate to URL
Do While ie.Busy Or ie.readyState <> 4                                                      'wait for page to load
    DoEvents
Loop

Set htmldoc = ie.document                                                                   'Document webpage
Set inputs = htmldoc.getElementsByTagName("input")                                          'Find all input tags

Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)

For i = startdate To enddate
    dd = Day(i): mm = Month(i): yyyy = Year(i)

    For Each element In inputs
        If element.Name = "Mdate" Then
            element.Value = yyyy & "-" & mm & "-" & dd
        Else
            If element.Name = "button" Then
                element.Click
                'insert scraper
                Set eles1 = htmldoc.getElementsByTagName("a")                                          'Find all centre tags
                    For Each ele1 In eles1
                        If InStr(ele1.href, "/horse-racing-results/") > 0 Then
                            raceMeet = ele1.innerText
                            raceURL = ele1.innerHTML
                            ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
                            racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
                            racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
                        End If
                    Next ele1
            Else
            End If
        End If

    Next element
Stop

Next i

ie.Quit

2 个答案:

答案 0 :(得分:1)

插入条件以在页面加载时等待。

以下重写成功从我的电脑上的目标页面获取数据:

Private Sub CommandButton1_Click()

    'dimension variables
    Dim ie As InternetExplorer
    Dim htmldoc As MSHTML.IHTMLDocument                                                         'Document object
    Dim inputs As MSHTML.IHTMLElementCollection                                                 'Element collection for "input" tags
    Dim eles1, eles2 As MSHTML.IHTMLElementCollection                                            'Element collection for th tags
    Dim element As MSHTML.IHTMLElement                                                          'input elements
    Dim ele1, ele2 As MSHTML.IHTMLElement                                                       'Header elements

    'Open InternetExplorer
    Set ie = CreateObject("InternetExplorer.Application")
    ie.Visible = True                                                                          'make IE invisible

    'Navigate to webpage
    Dim ieURL As String:    ieURL = "http://www.racenet.com.au/horse-racing-results/"           'set URL from which to retrieve racemeet and date data
    ie.navigate ieURL                                                                           'navigate to URL
    Do While ie.Busy Or ie.readyState <> 4                                                      'wait for page to load
        DoEvents
    Loop

    Set htmldoc = ie.document                                                                   'Document webpage
    Set inputs = htmldoc.getElementsByTagName("input")                                          'Find all input tags

    Dim dd, mm, yyyy As Integer
    Dim startdate, enddate As Date
    Dim i, j, k As Long
    Dim raceMeet, raceURL As String
    startdate = #1/1/2008#: enddate = Date - 1
    Dim racemeetArr As Variant
    ReDim racemeetArr(1 To 2, 1)

    For i = startdate To enddate
        dd = Day(i): mm = Month(i): yyyy = Year(i)


        For Each element In inputs
            If element.Name = "Mdate" Then
                element.Value = yyyy & "-" & mm & "-" & dd
            Else
                If element.Name = "button" Then
                    element.Click
                    Exit For
                End If
            End If

        Next element


        Do
        ' Wait until the Browser is loaded'
        Loop Until ie.readyState = READYSTATE_COMPLETE

        'insert scraper
        Set eles1 = htmldoc.getElementsByTagName("a")                                          'Find all centre tags
            For Each ele1 In eles1
                If InStr(ele1.href, "/horse-racing-results/") > 0 Then
                    raceMeet = ele1.innerText
                    raceURL = ele1.innerHTML
                    ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
                    racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
                    racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
                End If
            Next ele1

        Stop

    Next i

    ie.Quit

End Sub

修改

在分析了HTTP请求之后,我设法将代码缩减了一点(可以直接查询结果而无需填写表单并提交页面)

我不是昂贵的数组ReDims的忠实粉丝,所以我创建了一个类,并将结果保存在该类的集合中(随意使用或不使用它)。

添加一个新的类模块,将其命名为clRaceMeet并粘贴此代码:

Option Explicit

Private pMeet As String
Private pUrl As String


Public Property Let Meet(ByVal Val As String)
    pMeet = Val
End Property
Public Property Get Meet() As String
    Meet = pMeet
End Property

Public Property Let URL(ByVal Val As String)
    pUrl = Val
End Property
Public Property Get URL() As String
    URL = pUrl
End Property

然后,使用此修改后的代码版本来抓取数据并将其转储到调试窗口:

Option Explicit

Private Sub CommandButton1_Click()

    'dimension variables
    Dim ie As InternetExplorer
    Dim ieURL As String

    Dim dd As Integer
    Dim mm As Integer
    Dim yyyy As Integer
    Dim startDate As Date
    Dim endDate As Date
    Dim i As Long

    Dim htmlDoc As MSHTML.IHTMLDocument
    Dim colLeftEleColl As MSHTML.IHTMLElementCollection
    Dim colLeftEle As MSHTML.IHTMLElement
    Dim centerEleColl As MSHTML.IHTMLElementCollection
    Dim centerEle As MSHTML.IHTMLElement

    Dim raceMeet As String
    Dim raceURL As String
    Dim objRaceMeet As clRaceMeet
    Dim raceMeetColl As New Collection


    'Open InternetExplorer
    Set ie = CreateObject("InternetExplorer.Application")
    ie.Visible = True

    startDate = #1/1/2009#
    endDate = Date - 1

    For i = startDate To endDate
        dd = Day(i)
        mm = Month(i)
        yyyy = Year(i)

        ieURL = "http://www.racenet.com.au/horse-racing-results-search.asp?Mdate=" & yyyy & "-" & mm & "-" & dd
        ie.navigate ieURL

        Do
        ' Wait until the Browser is loaded'
        Loop Until ie.readyState = READYSTATE_COMPLETE

        Set htmlDoc = ie.document

        'insert scraper
        Set colLeftEleColl = htmlDoc.getElementById("ColLeft").all

        'Loop through elements of ColLeft div
        For Each colLeftEle In colLeftEleColl

            If colLeftEle.tagName = "CENTER" Then
                Set centerEleColl = colLeftEle.all

                'Loop through elements of <center> tag
                For Each centerEle In centerEleColl

                    If centerEle.tagName = "A" Then
                        If InStr(centerEle.href, "/horse-racing-results/") > 0 Then
                            raceMeet = centerEle.innerText
                            raceURL = centerEle.href

                            Set objRaceMeet = New clRaceMeet

                            objRaceMeet.Meet = raceMeet
                            objRaceMeet.URL = raceURL
                            raceMeetColl.Add objRaceMeet

                        End If
                    End If
                Next centerEle

                Exit For
            End If
        Next colLeftEle

        ' Dump results to immediate window:
        For Each objRaceMeet In raceMeetColl
            Debug.Print objRaceMeet.Meet & " - " & objRaceMeet.URL
        Next objRaceMeet

        'Stop

    Next i

    ie.Quit

End Sub

快乐投注! :)

答案 1 :(得分:0)

我玩弄了最后一个,而下一个循环中的每个循环都必须追随它。然后我也把它列入了sheet1并且它工作了。我做了一些小的调整,比如添加一个变量来增加细胞。

这段代码并没有产生网站的实际结果,不确定这是不是你的目标。