如何从网页上的表中获取数据

时间:2017-11-04 12:36:45

标签: excel-vba web-scraping vba excel

我正在尝试从"完整的事务日志"下的表中获取数据。但我不知道如何得到它。我试图使用ID获取表的innertext,但我得到的只是标题。任何人都可以在这里指导我。

这是我想要获取数据的表格:https://screenshots.firefox.com/VDJEsuPeQLIgRYWa/www.aastocks.com

Sub test()

    Dim oDom As Object: Set oDom = CreateObject("htmlFile")
    Dim x As Long, y As Long
    Dim oRow As Object, oCell As Object
    Dim data

    With CreateObject("msxml2.xmlhttp")
        .Open "GET", "http://www.aastocks.com/en/stocks/analysis/transaction.aspx?symbol=60001", False
        .Send
        oDom.body.innerHtml = .responseText
    End With

    Set t = oDom.getElementById("tradeLog1")

    Sheet1.Range("A1") = t.innertext
'
'    'loop through each row
'    For Each r In t.Rows
'        Debug.Print r.Cells(1).innertext
'        'loop through each column in the row
'        For Each c In r.Cells
'            Debug.Print c.innertext
'        Next
'    Next

End Sub

2 个答案:

答案 0 :(得分:2)

我找到了与表相关的元素。关于如何处理工作表中的每个元素,我将向您介绍。

这已经过测试并且有效。

Option Explicit

#If VBA7 Then
    Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As LongPtr)
#Else
    Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)
#End If

Sub test()

    '#############################################################
    '########### You will need to make references to:  ###########
    '###########   (1) Microsoft Internet Controls     ###########
    '###########   (2) Microsoft HTML Object Library   ###########
    '#############################################################

    Dim IE As New InternetExplorer, Url$
    Url = "http://www.aastocks.com/en/stocks/analysis/transaction.aspx?symbol=60001"

    Dim iElem As Object, iElems As Object
    Dim iElem2 As Object, iElems2 As Object, Btn As Object

    With IE
        .Visible = True
        .navigate Url
        Do While .Busy Or .readyState < READYSTATE_LOADED
            DoEvents
        Loop
        Set iElems = IE.Document.getElementById("mainForm")
        Set iElems = iElems.getElementsByClassName("float_r pad5R pad5L")
        For Each iElem In iElems
            Set iElems2 = iElem.getElementsByTagName("input")
            For Each iElem2 In iElems2
                If iElem2.Value = 2 Then
                    Set Btn = iElem2
                    Exit For
                End If
            Next iElem2
        Next iElem
        Btn.Click
        Do While .Busy Or .readyState < READYSTATE_COMPLETE
            DoEvents
        Loop
        Sleep 500
    End With

    Dim ieDoc As HTMLDocument, myTbl As HTMLTable
    Dim tRow As HTMLTableRow, tCell As HTMLTableCell
    Set ieDoc = IE.Document
    Set myTbl = ieDoc.getElementById("tradeLog1")

    ' I will leave it up to you to place these in your own manner, this just shows that you are able to grab the table now
    For Each tRow In myTbl.Rows
        For Each tCell In tRow.Cells
            If tCell.innerText <> "" Then Debug.Print tCell.innerText
        Next tCell
    Next tRow

End Sub

答案 1 :(得分:2)

放手一搏。它会获取所需的数据。

Sub TableData()
    Dim IE As New InternetExplorer, html As HTMLDocument
    Dim htmla As Object, tRow As Object, tCel As Object

    Application.ScreenUpdating = False

    With IE
        .Visible = True
        .navigate "http://www.aastocks.com/en/stocks/analysis/transaction.aspx?symbol=60001"
        Do While .readyState <> READYSTATE_COMPLETE: Loop
        Set html = .document
    End With

    html.getElementsByClassName("float_r pad5R pad5L")(0).getElementsByTagName("input")(0).Click
    Do While IE.Busy Or IE.readyState <> 4: DoEvents: Loop

    x = 1

    Set htmla = html.getElementById("tradeLog1")
    For Each tRow In htmla.Rows
        For Each tCel In tRow.Cells
            c = c + 1: Cells(x + 1, c) = tCel.innerText
        Next tCel
        c = 0
        x = x + 1
    Next tRow

    Application.ScreenUpdating = True
    IE.Quit

End Sub

参考添加:

1. Microsoft internet controls
2. Microsoft html object library.