从该网站提取匹配数据“http://bet.hkjc.com/football/index.aspx?lang=en”

时间:2016-05-10 10:00:47

标签: vba web-scraping automation

我想使用以下代码从此网站“http://bet.hkjc.com/football/index.aspx?lang=en”中提取匹配数据:

Sub Macro4()
' Macro4 Macro
' steve lau 在 28/04/2016 錄製的巨集
baseURL = "http://www.hkjc.com/chinese/news/redirect_odds_ch_football.asp"
baseName = "summary"
With ActiveSheet.QueryTables.Add(Connection:= _
    "URL;" & baseURL _
    , Destination:=Range("A1"))
End With
With ActiveSheet.QueryTables.Add(Destination:=Range("A1"))
    .Name = baseName
    .FieldNames = True
    .RowNumbers = False
    .FillAdjacentFormulas = False
    .PreserveFormatting = True
    .RefreshOnFileOpen = False
    .BackgroundQuery = True
    .RefreshStyle = xlInsertDeleteCells
    .SavePassword = False
    .SaveData = True
    .AdjustColumnWidth = True
    .RefreshPeriod = 0
    .WebSelectionType = xlEntirePage
    .WebFormatting = xlWebFormattingNone
    .WebPreFormattedTextToColumns = True
    .WebConsecutiveDelimitersAsOne = True
    .WebSingleBlockTextImport = False
    .WebDisableDateRecognition = False
    .WebDisableRedirections = False
    .Refresh BackgroundQuery:=True
End With    

End Sub

但没有回复。我认为这可能是由于网页中的不同帧。任何人都可以帮助弄清楚如何提取匹配细节? 非常感谢。

1 个答案:

答案 0 :(得分:0)

您可以使用以下脚本,我使用

抓取表格
 .document.getElementById("footballmaincontent").getElementsByTagName("table")(2)

然后循环表中的行和列(行内的单元格)。

2014年6月14日页面上的示例结果

Sample results

匹配脚本的输出:

Output from script

代码:

Option Explicit
Public Sub GetInfo()
    Dim IE As New InternetExplorer, a As HTMLTable
    Const URL = "http://bet.hkjc.com/football/index.aspx?lang=en"
    Application.ScreenUpdating = True
    With IE
        .Visible = False
        .navigate URL

        While .Busy Or .readyState < 4: DoEvents: Wend

        Set a = .document.getElementById("footballmaincontent").getElementsByTagName("table")(2)

        Dim r As Long, c As Long, iRow As HTMLTableRow, iCell As HTMLTableCell

        With ActiveSheet
            For Each iRow In a.getElementsByTagName("tr")
                For Each iCell In iRow.getElementsByTagName("td")
                    Select Case iCell.innerText
                    Case "Home", "Draw", "Away"
                    Case Else
                        c = c + 1: .Cells(r + 1, c) = iCell.innerText
                    End Select
                Next iCell
                c = 0: r = r + 1
            Next iRow
        End With
        .Quit
    End With
Application.ScreenUpdating = True
End Sub

所需参考资料(VBE&gt;工具&gt;参考资料)

  1. HTML对象库
  2. Microsoft Internet Controls