使用Excel vba在Investing.com中进行Web抓取

时间:2019-03-08 12:12:55

标签: excel vba web-scraping

我不了解vba。仅使用宏记录器。 我需要将数据从网页上下载到Excel电子表格,而据我所知,我无法使用vba。

特别是,我想做一个宏以将页面的数据表下载到Excel:https://www.investing.com/equities/cellnex-telecom-historical-data

必须根据时间,日期范围和顺序来配置此下载。

步骤如下: 1.-目的是将数据从“ CLNX历史数据”表复制到Excel电子表格。 2.-该下载应通过事先通过调用“术语”在下拉菜单中选择“每月”来完成。 3.-下载是通过预先选择过去2年的日期范围进行的。 4.-最后,按“最大”列的降序对表格进行排序。 5.-选择术语,日期范围和顺序后,将数据从“ CLNX历史数据”表复制到Excel电子表格。

我尝试使用宏记录器,但是无法配置术语,日期范围或顺序。

有人可以帮我吗?

感谢您的帮助。

代码:

Sub DataInvesting()

Dim IE As Object

Set IE = CreateObject("InternetExplorer.Application")

IE.navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"

Do Until IE.readyState = 4

DoEvents

Loop

IE.Document.getElementsByClassName("newInput selectBox float_lang_base_1")(0).Value = "Monthly"

IE.Visible = True

Set IE = Nothing

Set appIE = Nothing

End Sub

3 个答案:

答案 0 :(得分:1)

尽管设置了免费帐户,但我一直在说密码错误,但我无法对此进行测试。受够了5个密码重置和相同的问题,并怀疑它想要我的社交媒体详细信息。

以下内容概述了我将考虑的步骤,尽管最可能需要一些定时的等待。

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub Info()
    Dim ie As New InternetExplorer  
    Const URL As String  = ""https://www.investing.com/equities/cellnex-telecom-historical-data""
    With ie
        .Visible = True
        .Navigate2 URL

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.querySelector(".login").Click

        While .Busy Or .readyState < 4: DoEvents: Wend

        .Navigate2 URL

        While .Busy Or .readyState < 4: DoEvents: Wend


        With .document.querySelector("#loginFormUser_email")
            .Focus
            .Value = "Bob@gmail.com"
        End With
        With .document.querySelector("#loginForm_password")
            .Focus
            .Value = "systemSucksDoesn'tAcceptMyPassword"
        End With

        Application.Wait Now + TimeSerial(0, 0, 2)

        .document.querySelector("[onclick*=submitLogin]").Click

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.querySelector("#data_interval").Click
        .document.querySelector("[value=Monthly]").Click
        With .document.querySelector("#picker")
            .Focus
            .Value = "03/08/2017 - 03/08/2019"
            .FireEvent "onchange"
        End With

        'TODO Sorting column when clarified which column
        .document.querySelector("[title='Download Data']").Click

        Application.Wait Now + TimeSerial(0, 0, 10)

        Stop
        .Quit
    End With
End Sub

答案 1 :(得分:1)

我刚刚测试了以下代码,它可以工作,而不是每次需要运行此宏时都创建Internet Explorer实例,而是使用xmlhttp请求。只需复制整个代码并将其粘贴到vba中的模块即可。不要忘记将引用(工具/引用)添加到Microsoft HTML Object Library和Microsoft XML v6.0。

Option Explicit
Sub Export_Table()

'Html Objects---------------------------------------'
 Dim htmlDoc As MSHTML.HTMLDocument
 Dim htmlBody As MSHTML.htmlBody
 Dim ieTable As MSHTML.HTMLTable
 Dim Element As MSHTML.HTMLElementCollection


'Workbooks, Worksheets, Ranges, LastRow, Incrementers ----------------'
 Dim wb As Workbook
 Dim Table As Worksheet
 Dim i As Long

 Set wb = ThisWorkbook
 Set Table = wb.Worksheets("Sheet1")

 '-------------------------------------------'
 Dim xmlHttpRequest As New MSXML2.XMLHTTP60  '
 '-------------------------------------------'


 i = 2

'Web Request --------------------------------------------------------------------------'
 With xmlHttpRequest
 .Open "POST", "https://www.investing.com/instruments/HistoricalDataAjax", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "X-Requested-With", "XMLHttpRequest"
.send "curr_id=951681&smlID=1695217&header=CLNX+Historical+Data&st_date=01%2F01%2F2017&end_date=03%2F01%2F2019&interval_sec=Monthly&sort_col=date&sort_ord=DESC&action=historical_data"


 If .Status = 200 Then

        Set htmlDoc = CreateHTMLDoc
        Set htmlBody = htmlDoc.body

        htmlBody.innerHTML = xmlHttpRequest.responseText

        Set ieTable = htmlDoc.getElementById("curr_table")

        For Each Element In ieTable.getElementsByTagName("tr")
            Table.Cells(i, 1) = Element.Children(0).innerText
            Table.Cells(i, 2) = Element.Children(1).innerText
            Table.Cells(i, 3) = Element.Children(2).innerText
            Table.Cells(i, 4) = Element.Children(3).innerText
            Table.Cells(i, 5) = Element.Children(4).innerText
            Table.Cells(i, 6) = Element.Children(5).innerText
            Table.Cells(i, 7) = Element.Children(6).innerText

            i = i + 1
        DoEvents: Next Element
 End If
End With


Set xmlHttpRequest = Nothing
Set htmlDoc = Nothing
Set htmlBody = Nothing
Set ieTable = Nothing
Set Element = Nothing

End Sub

Public Function CreateHTMLDoc() As MSHTML.HTMLDocument
    Set CreateHTMLDoc = CreateObject("htmlfile")
End Function

答案 2 :(得分:0)

尝试一下。

Sub Web_Table_Option()
    Dim HTMLDoc As New HTMLDocument
    Dim objTable As Object
    Dim lRow As Long
    Dim lngTable As Long
    Dim lngRow As Long
    Dim lngCol As Long
    Dim ActRw As Long
    Dim objIE As InternetExplorer
    Set objIE = New InternetExplorer
    objIE.Navigate "https://www.investing.com/equities/cellnex-telecom-historical-data"

    Do Until objIE.ReadyState = 4 And Not objIE.Busy
        DoEvents
    Loop
    Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
    HTMLDoc.body.innerHTML = objIE.Document.body.innerHTML
    With HTMLDoc.body
        Set objTable = .getElementsByTagName("curr_table")
        For lngTable = 0 To objTable.Length - 1
            For lngRow = 0 To objTable(lngTable).Rows.Length - 1
                For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
                    ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
                Next lngCol
            Next lngRow
            ActRw = ActRw + objTable(lngTable).Rows.Length + 1
        Next lngTable
    End With
    objIE.Quit
End Sub