Excel VBA从企业网络中的网页获取数据

时间:2018-11-21 19:06:49

标签: html excel vba excel-vba web-scraping

我是vba的爱好者。 在我公司,我们通过代理.pac文件连接到Internet。 我需要从Accuweather获得天气。

借助我在网络上阅读的内容,我设法做了一些可以在公司以外进行的工作,例如在家里。 这是用户在其中选择城市,月份和年份的表单所调用的代码。

    Option Explicit

Sub GetExchangeRates(city As String, FromCurrency As String, Amount As String)

Dim XMLPage As New MSXML2.XMLHTTP60
Dim htmldoc As New MSHTML.HTMLDocument
Dim URL As String
Dim cn As String

Dim m As Long
Dim y As Date
Dim dfr As String

Dim d_until As Date
Dim MonthNm As String

MonthNm = FromCurrency
m = Application.Evaluate("=MONTH(1&" & Chr(34) & MonthNm & Chr(34) & ")")
y = Amount
dfr = Format(DateSerial(y, m, 1), "m/d/yyyy")

If city = "leiria" Then cn = "273891"
If city = "lisbon" Then cn = "274087"
If city = "porto" Then cn = "275317"
If city = "faro" Then cn = "273190"
If city = "coimbra" Then cn = "272818"

On Error GoTo Error_Handler

URL = "https://www.accuweather.com/pt/pt/" & city & "/" & cn & "/" & 
FromCurrency & "-weather/" & cn & "?monyr=" & dfr & "&view=table"

XMLPage.Open "GET", URL, False
XMLPage.send

htmldoc.body.innerHTML = XMLPage.responseText

ProcessHTMLPage htmldoc

Range("C1").Value = city
Range("D1").Value = Amount
Range("G2").Value = "Data"

Application.Run "FillRow"

Error_Handler_Exit:
On Error Resume Next
Exit Sub

Error_Handler:
MsgBox "Ocorreu um erro de rede." & vbCrLf & vbCrLf & _
       "Error Number: " & Err.Number & vbCrLf & _
       "Error Source: getOperatingSystem" & vbCrLf & _
       "Error Description: " & Err.Description, _
       vbCritical, "Internet access error!"
Resume Error_Handler_Exit

End Sub

Private Sub OpenRatesForm()
RatesForm.Show

End Sub


Sub ProcessHTMLPage(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLTable As MSHTML.IHTMLElement
Dim HTMLTAbles As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As New MSHTML.HTMLDocument
Dim RowNum As Long, ColNum As Integer

Set HTMLTAbles = HTMLPage.getElementsByTagName("table")

For Each HTMLTable In HTMLTAbles
Debug.Print HTMLTable.className

Worksheets.Add
Range("A1").Value = HTMLTable.className
Range("B1").Value = Now

RowNum = 2
For Each HTMLRow In HTMLTable.getElementsByTagName("tr")
    'Debug.Print vbTab & HTMLRow.innerText

    ColNum = 1
    For Each HTMLCell In HTMLRow.Children
        Cells(RowNum, ColNum) = HTMLCell.innerText
        ColNum = ColNum + 1
    Next HTMLCell

 ''''   For Each HTMLCell In HTMLTable.getElementsByTagName("td")

 RowNum = RowNum + 1

 Next HTMLRow

Next HTMLTable

Set HTMLTAbles = Nothing
End Sub

如果在公司中运行此程序,我会从拒绝访问其他人得到错误。 因此,我阅读更多内容,发现使用ie可以做到这一点。 它绕过了访问Internet的问题,但是问题是我无法使用该表单进行工作,有时ie即会打开AccuWeather页面,但不会在excel中显示数据。

    Sub Grabaccuwther()

Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Dim url As String

'start a new browser instance
Set objIE = CreateObject("InternetExplorer.Application")
'make browser visible
objIE.Visible = True

 url = "https://www.accuweather.com/en/pt/lisbon/274087/november-weather/274087?monyr=11/1/2018&view=table"

'navigate to page with needed data
objIE.navigate url

'wait for page to load
 Application.StatusBar = "Loading Web page …"
' wait until the page loads before doing anything

Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

'we will output data to excel, starting on row 1
y = 1

For Each ele In objIE.Document.getElementsByTagName("tr")

    'each 'tr' (table row) element contains 4 children ('td') elements
    'put text of 1st 'td' in col A
    Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
    'put text of 2nd 'td' in col B
    Sheets("Sheet1").Range("B" & y).Value = ele.Children(1).textContent
    'put text of 3rd 'td' in col C
    Sheets("Sheet1").Range("B" & y).Value = ele.Children(2).textContent
    'put text of 4th 'td' in col D
    Sheets("Sheet1").Range("D" & y).Value = ele.Children(3).textContent
    'increment row counter by 1
    y = y + 1
Next

Application.StatusBar = "Web page Loaded!"
objIE.Quit

End Sub

我正在尝试大约一个月来执行此操作,但是我已经改变了一些想法,但是无法使其正常工作。 我非常感谢您完成这项工作。 谢谢。

1 个答案:

答案 0 :(得分:0)

这是基于您说IE通常可以访问该页面(也请确保该网站被列入白名单)。

整个表格:

Option Explicit
Public Sub GetTable()
    Dim IE As New InternetExplorer, hTable As HTMLTable
    Dim ws As Worksheet, t As Date
    Const MAX_WAIT_SEC As Long = 5
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With IE
        .Visible = True
        .navigate "https://www.accuweather.com/en/pt/lisbon/274087/november-weather/274087?monyr=11/1/2018&view=table"
        While .Busy Or .readyState < 4: DoEvents: Wend
        With .document
            t = Timer
            Do
                DoEvents
                On Error Resume Next
                Set hTable = .querySelector(".calendar-list")

                On Error GoTo 0
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While hTable Is Nothing

            If Not hTable Is Nothing Then
                Dim clipboard As Object
                Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
                clipboard.SetText hTable.outerHTML
                clipboard.PutInClipboard
                ws.Cells(1, 1).PasteSpecial
            End If
        End With
        .Quit
    End With
End Sub

您在评论中提到的两列:

Option Explicit

Public Sub GetTable()
    Dim IE As New InternetExplorer, hTable As HTMLTable
    Dim ws As Worksheet, t As Date, r As Long, c As Long, i As Long, j As Long
    Const MAX_WAIT_SEC As Long = 5
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    With IE
        .Visible = True
        .navigate "https://www.accuweather.com/en/pt/lisbon/274087/november-weather/274087?monyr=11/1/2018&view=table"

        While .Busy Or .readyState < 4: DoEvents: Wend
        With .document
            t = Timer
            Do
                DoEvents
                On Error Resume Next
                Set hTable = .querySelector(".calendar-list")

                On Error GoTo 0
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While hTable Is Nothing

            If Not hTable Is Nothing Then
                For i = 1 To hTable.getElementsByTagName("tr").Length - 1
                    r = r + 1
                    ws.Cells(r, 1) = hTable.getElementsByTagName("th")(i).innerText
                    ws.Cells(r, 2) = hTable.getElementsByTagName("tr")(i).getElementsByTagName("td")(0).innerText
                Next
            End If
        End With
        .Quit
    End With
End Sub

参考(VBE>工具>参考):

  1. Microsoft Internet控件
  2. Microsoft HTML对象库