Question

我正在尝试从此网站提取数据：http://securities.stanford.edu/filings.html?page=1

每个＆＃34;页面＆＃34;是一个包含21个项目的表。有97页我想从中提取数据，但我无法将其自动化，以便宏循环遍历所有97，并将结果放在每21行，从单元格A1开始。（序列：a1，a22，a43，等等......）

这是我得到的，但我不想编辑代码97时间来获取所有页面。知道如何自动完成任务吗？

Sub Macro1()
' Macro1 Macro
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://securities.stanford.edu/filings.html?page=1", Destination:=Range( _
        "A1"))
        .Name = "filings.html?page=1"**
        .FieldNames = True
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .WebSelectionType = xlAllTables
        .WebFormatting = xlWebFormattingNone
        .WebPreFormattedTextToColumns = True
        .WebConsecutiveDelimitersAsOne = True
        .WebSingleBlockTextImport = False
        .WebDisableDateRecognition = False
        .WebDisableRedirections = False
        .Refresh BackgroundQuery:=False
    End With
end Sub

Answer 1

For x = 1 to 97
    With ActiveSheet.QueryTables.Add(Connection:= _
        "URL;http://securities.stanford.edu/filings.html?page=" & x, Destination:=Range( _
        "A" & (1 + ((x - 1) * 21)))
        .Name = "filings.html?page=" & x
    End With
Next

x包含页码，并且单元格很复杂，使其从A1而不是A21开始。

您可以将其设为0 to 96和单元格& (1 + (x + 21))以及名称和查询x + 1。

Answer 2

我会放弃来自Web Query＆＃39;方法并深入研究一些xmlHTTP。对于以下内容，您将使用VBE的工具►参考添加 Microsoft HTML对象库，Microsoft Internet Controls 和 Microsoft XML 6.0 。

Option Explicit

Sub mcr_Collect_Filings()
    Dim htmlBDY As HTMLDocument, xmlHTTP As New MSXML2.ServerXMLHTTP60
    Dim rw As Long, pg As Long, iTH As Long, iTD As Long, iTR As Long
    Dim eTBL As MSHTML.IHTMLElement

    For pg = 1 To 99    '<-set to something reasonable; routine will kick out whehn it cannot find anything more
        xmlHTTP.Open "GET", "http://securities.stanford.edu/filings.html?page=" & pg, False
        xmlHTTP.setRequestHeader "Content-Type", "text/xml"
        xmlHTTP.send

        If xmlHTTP.Status <> "200" Then GoTo bm_CleanUp

        Set htmlBDY = New HTMLDocument
        htmlBDY.body.innerHTML = xmlHTTP.responseText

        Set eTBL = htmlBDY.getElementById("records").getElementsByTagName("table")(0)
        If eTBL Is Nothing Then GoTo bm_CleanUp

        'skip the header row if on page 2 and above
        With Sheet1  '<-worksheet codename
            rw = .Cells(Rows.Count, 1).End(xlUp).Row
            For iTR = (1 + (pg = 1)) To (eTBL.getElementsByTagName("tr").Length - 1)
                For iTH = 0 To (eTBL.getElementsByTagName("tr")(iTR).getElementsByTagName("th").Length - 1)
                    .Cells(rw, 1).Offset(iTR, iTH) = _
                      eTBL.getElementsByTagName("tr")(iTR).getElementsByTagName("th")(iTH).innerText
                Next iTH
                For iTD = 0 To (eTBL.getElementsByTagName("tr")(iTR).getElementsByTagName("td").Length - 1)
                    .Cells(rw, 1).Offset(iTR, iTD) = _
                      eTBL.getElementsByTagName("tr")(iTR).getElementsByTagName("td")(iTD).innerText
                Next iTD
            Next iTR
        End With
    Next pg

bm_CleanUp:
    Set eTBL = Nothing
    Set htmlBDY = Nothing
    Set xmlHTTP = Nothing

End Sub

XMLHTTP是不可见的，所以你必须对页面有一些了解，以及在不同情况下你会收到的HTML代码形式。浏览器的 Inspect Element 命令可以解决这个问题。

这是迄今为止VBA中最快的方法。虽然您实际上要检索的行数超过99行，但这在56.3秒内达到了99页。您甚至可以通过关闭屏幕更新来加快速度。

在循环中使用From Web Query

2 个答案: