需要将单元格中的数据拆分为不同的列,并使用vba帮助将数据从网站复制到Excel

时间:2018-06-08 18:56:47

标签: excel vba excel-vba web-scraping

所以我正在复制this网站的流量数据。

到目前为止,我使用了以下代码:

Sub main()
Dim IE As InternetExplorer
Dim i
Set IE = New InternetExplorer

IE.Navigate "https://www.cp24.com/mobile/commuter-centre/traffic"

Do
DoEvents
Loop Until IE.ReadyState = ReadyState_Complete

Dim Doc As HTMLDocument
Set Doc = IE.Document

Dim AllRoute As String

 Set holdingsClass = 
 Doc.getElementsByClassName("trafficWidget")
 ActiveSheet.Range("A1").Value = holdingsClass(0).textContent
IE.Quit
End Sub

我面临两个问题

1)它将流量小部件类中的所有数据复制到一个单元格中,以便在单元格空间不足时删除数据

2)我想要一种分割数据的方法,所以现在一切都显示在一个单元格中

看起来应该是这样的

col.A          col.B            col.C         col.D
HighwayName    Current          Ideal         Delay

任何指导都会受到赞赏吗?

1 个答案:

答案 0 :(得分:0)

您可以使用CSS selectors来定位所需信息。

Option Explicit

Sub Getinfo()
    Dim http As New XMLHTTP60, html As New HTMLDocument '< XMLHTTP60 is for Excel 2016 so change according to your versione.g. XMLHTTP for 2013

    Const URL As String = "https://www.cp24.com/mobile/commuter-centre/traffic"
    Application.ScreenUpdating = False
    With http
        .Open "GET", URL, False
        .send
        html.body.innerHTML = .responseText
    End With

    Dim routeNodeList As Object, currentNodeList As Object, idealNodeList As Object, delayNodeList As Object

    With html
        Set routeNodeList = .querySelectorAll(".location")
        Set currentNodeList = .querySelectorAll(".current")
        Set idealNodeList = .querySelectorAll(".ideal")
        Set delayNodeList = .querySelectorAll(".delaymin")
    End With

    Dim i As Long
    For i = 0 To routeNodeList.Length - 1
        With ActiveSheet
            .Cells(i + 2, 1) = routeNodeList.item(i).innerText
            .Cells(i + 2, 2) = currentNodeList.item(i).innerText
            .Cells(i + 2, 3) = idealNodeList.item(i).innerText
            .Cells(i + 2, 4) = delayNodeList.item(i).innerText
        End With
    Next i

    Application.ScreenUpdating = True

End Sub

所需参考文献(VBE&gt;工具&gt;参考文献):

HTML Object library and MS XML < your version

示例输出:

Output

后期绑定版本:

Option Explicit

Public Sub Getinfo()
    Dim http As Object, html As Object, i As Long
    Const URL As String = "https://www.cp24.com/mobile/commuter-centre/traffic"
    Application.ScreenUpdating = False
    With CreateObject("MSXML2.serverXMLHTTP")
        .Open "GET", URL, False
        .send
        Set html = CreateObject("HTMLFile")
        html.body.innerHTML = .responseText
    End With
    Dim counter As Long: counter = 1
    With ActiveSheet
        For i = 0 To html.all.Length - 1
            Select Case html.all(i).className
            Case "location"
                counter = counter + 1
                .Cells(counter, 1).Value = html.all(i).innerText
            Case "current"
                .Cells(counter, 2).Value = html.all(i).innerText
            Case "ideal"
                .Cells(counter, 3).Value = html.all(i).innerText
            Case "delaymin"
                .Cells(counter, 4).Value = html.all(i).innerText
            End Select
        Next i
    End With

    Application.ScreenUpdating = True

End Sub