从多个页面从Web下载表格

时间:2018-11-28 12:50:39

标签: html excel vba excel-vba web-scraping

我尝试通过尝试以下代码从多个链接通过网络下载表格。

Sub test()
    cnt = 0

    For i = 2 To 5
        temp = Cells(i, 1)

        lnk = Right(temp, Len(temp) - WorksheetFunction.Find("?", temp))
        ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
            "let" & Chr(13) & "" & Chr(10) & "    Source = Web.Page(Web.Contents(""https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?"" & lnk))," & Chr(13) & "" & Chr(10) & "    Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & "    #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""CALLS Chart"", type text}, {""CALLS OI"", type text}, {""CALLS Chng in OI"", type text}, {""CALLS Volume"", t" & _
            "ype text}, {""CALLS IV"", type text}, {""CALLS LTP"", type text}, {""CALLS Net Chng"", type text}, {""CALLS Bid Qty"", type text}, {""CALLS Bid Price"", type text}, {""CALLS Ask Price"", type text}, {""CALLS Ask Qty"", type text}, {""Strike Price"", type number}, {""PUTS Bid Qty"", type text}, {""PUTS Bid Price"", type text}, {""PUTS Ask Price"", type text}, {""PUTS" & _
            " Ask Qty"", type text}, {""PUTS Net Chng"", type text}, {""PUTS LTP"", type text}, {""PUTS IV"", type text}, {""PUTS Volume"", type text}, {""PUTS Chng in OI"", type text}, {""PUTS OI"", type text}, {""PUTS Chart"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Changed Type"""
        ActiveWorkbook.Worksheets.Add
        With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
            "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _
            , Destination:=Range("$A$1")).QueryTable
            .CommandType = xlCmdSql
            .CommandText = Array("SELECT * FROM [Table 0]")
            .RowNumbers = False
            .FillAdjacentFormulas = False
            .PreserveFormatting = True
            .RefreshOnFileOpen = False
            .BackgroundQuery = True
            .RefreshStyle = xlInsertDeleteCells
            .SavePassword = False
            .SaveData = True
            .AdjustColumnWidth = True
            .RefreshPeriod = 0
            .PreserveColumnInfo = True
            .ListObject.DisplayName = "Table" & cnt
            .Refresh BackgroundQuery:=False
            ActiveWorkbook.Queries("Table 0").Delete
            cnt = cnt + 1
        End With
    Next
End Sub

但是我遇到以下错误!

enter image description here

当我通过记录宏获得此代码时,我陷入了使网页链接动态化的困境。

ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
        "let" & Chr(13) & "" & Chr(10) & "    Source = Web.Page(Web.Contents(""https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJ-AUTO&instrument=OPTSTK&date=-&segmentLink=17""))," & Chr(13) & "" & Chr(10) & "    Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & "    #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""CALLS Chart"", type text}, {""CALLS OI"", type text}, {""CALLS Chng in OI"", type text}, {""CALLS Volume"", t" & _
        "ype text}, {""CALLS IV"", type text}, {""CALLS LTP"", type text}, {""CALLS Net Chng"", type text}, {""CALLS Bid Qty"", type text}, {""CALLS Bid Price"", type text}, {""CALLS Ask Price"", type text}, {""CALLS Ask Qty"", type text}, {""Strike Price"", type number}, {""PUTS Bid Qty"", type text}, {""PUTS Bid Price"", type text}, {""PUTS Ask Price"", type text}, {""PUTS" & _
        " Ask Qty"", type text}, {""PUTS Net Chng"", type text}, {""PUTS LTP"", type text}, {""PUTS IV"", type text}, {""PUTS Volume"", type text}, {""PUTS Chng in OI"", type text}, {""PUTS OI"", type text}, {""PUTS Chart"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Changed Type"""

链接FYR:

  1. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJ-AUTO&instrument=OPTSTK&date=-&segmentLink=17
  2. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJAJFINSV&instrument=OPTSTK&date=-&segmentLink=17
  3. https://www.nseindia.com/marketinfo/sym_map/symbolMapping.jsp?symbol=BAJFINANCE&instrument=OPTSTK&date=-&segmentLink=17

有人可以帮助我吗?

1 个答案:

答案 0 :(得分:3)

您可能会将XMLHTTP请求视为一种快速的检索方法。我假设链接位于名为Links的工作表的A列中,从第1行开始。

您将调整范围

Application.Transpose(ws.Range("A1:A3").Value)

确保包含所有链接。

我使用symbol确定要写入的工作表。我使用@Rory稍作修改的功能来测试工作表是否已经存在,如果不存在,则创建它。假设符号不会在URL中重复,否则您需要为工作表命名选择唯一的内容。

我使用#octable的css id选择器通过表的ID作为目标。

Option Explicit    
Public Sub Test()
    Dim sResponse As String, html As HTMLDocument, links(), hTable As HTMLTable
    Dim symbol As String, i As Long, ws As Worksheet, wsTemp As Worksheet
    Set ws = ThisWorkbook.Worksheets("Links")
    links = Application.Transpose(ws.Range("A1:A3").Value)

    With CreateObject("MSXML2.XMLHTTP")
        For i = LBound(links) To UBound(links)
            If InStr(links(i), "http") > 0 Then
                .Open "GET", links(i), False
                .send
                sResponse = StrConv(.responseBody, vbUnicode)

                Set html = New HTMLDocument
                With html
                    .body.innerHTML = sResponse
                    Set hTable = .querySelector("#octable")
                End With
                symbol = Split(Split(links(i), "symbol=")(1), "&")(0)
                If Not WorksheetExists(symbol) Then
                    Set wsTemp = ThisWorkbook.Worksheets.Add
                    wsTemp.NAME = symbol
                Else
                    Set wsTemp = ThisWorkbook.Worksheets(symbol)
                End If
                If Not hTable Is Nothing Then
                    wsTemp.UsedRange.ClearContents
                    wsTemp.Cells(1, 1) = "CALLS": wsTemp.Cells(1, 13) = "PUTS"
                    WriteTable hTable, 2, wsTemp
                End If
            End If
        Next
    End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)

    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, C As Long, tBody As Object
    r = startRow
    With ws
        Dim headers As Object, header As Object, columnCounter As Long
        Set headers = hTable.getElementsByTagName("th")
        For Each header In headers
            columnCounter = columnCounter + 1
            If columnCounter > 3 Then
            .Cells(startRow, columnCounter - 3) = header.innerText
            End If
        Next header
        startRow = startRow + 1
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody               'HTMLTableSection
            Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
            For Each tr In tRow
                r = r + 1
                Set tCell = tr.getElementsByTagName("td")
                C = 1
                For Each td In tCell             'DispHTMLElementCollection
                    .Cells(r, C).Value = td.innerText 'HTMLTableCell
                    C = C + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub

Public Function WorksheetExists(ByVal sName As String) As Boolean  '<== @Rory
    WorksheetExists = Evaluate("ISREF('" & sName & "'!A1)")
End Function

参考(VBE>工具>参考):

  1. Microsoft HTML对象库