解析保存的HTML文件VBA

时间:2018-06-30 12:15:38

标签: html vba

我有一个HTML文件,该文件保存在本地桌面上,其中包含一个统计表,我需要从该表中提取特定数据,将其粘贴到excel工作簿表中,然后通过电子邮件发送。

我已经完成了其余的过程,我只是在努力弄清楚如何解析html文件,而我看到的所有其他示例都是在解析网站,而不是在本地保存html文件。

很抱歉,这是一个初学者的问题,但是我发现很难理解我看到的其他示例。

感谢您的协助。

1 个答案:

答案 0 :(得分:0)

感谢大家提供的示例,并指出正确的方向!下面发布的示例从存储在用户桌面上的HTML文件中复制数据,并将其粘贴到Excel中的新工作表中。

Option Explicit

Sub ParseHTML()

Dim URL           As String
Dim IE            As InternetExplorer
Dim htmldoc       As MSHTML.IHTMLDocument             'Document object
Dim eleColtr      As MSHTML.IHTMLElementCollection    'Element collection for tr tags
Dim eleColtd      As MSHTML.IHTMLElementCollection    'Element collection for td tags
Dim htmlTables    As MSHTML.IHTMLElementCollection    'Element collection for table tags
Dim eleRow        As MSHTML.IHTMLElement              'Row elements
Dim eleCol        As MSHTML.IHTMLElement              'Column elements
Dim wksOut        As Worksheet
Dim rngOut        As Range
Dim intTableIndex As Integer
Dim intRowIndex   As Integer
Dim intColIndex   As Integer

URL = Environ("userProfile") & "\desktop\FileName.HTML"

'Open InternetExplorer.
Set IE = New InternetExplorer

'Navigate to URL.
With IE
    .navigate URL
    .Visible = False


'Extract html information to objects.
Set htmldoc = IE.document
Set htmlTables = htmldoc.getElementsByTagName("table")
Set eleColtr = htmlTables(intTableIndex).getElementsByTagName("tr")

'Extract table to a new blank worksheet.
On Error Resume Next
Set wksOut = ThisWorkbook.Worksheets("WorksheetName")
If Err.Number <> 0 Then
    Set wksOut = ThisWorkbook.Worksheets.Add(After:=Worksheets(Worksheets.Count))
    wksOut.Name = "WorksheetName"
End If
With wksOut
    .Cells.Clear
    .Cells.NumberFormat = "General"
    .Cells.ColumnWidth = 2
End With
On Error GoTo 0

'This section populates Excel
intRowIndex = 0
For Each eleRow In eleColtr
    Set eleColtd = htmlTables(intTableIndex).getElementsByTagName("tr")(intRowIndex).getElementsByTagName("td") 'get all the td elements in that specific tr
    Set rngOut = wksOut.Range("A1000000").End(xlUp).Offset(1, 0)
    intColIndex = 0
    For Each eleCol In eleColtd
        rngOut.Offset(0, intColIndex) = eleCol.innerText
        intColIndex = intColIndex + 1
    Next eleCol
    intRowIndex = intRowIndex + 1
Next eleRow

wksOut.Cells.EntireColumn.AutoFit


'Cleanup
IE.Quit
Set IE = Nothing
Set htmldoc = Nothing
Set htmlTables = Nothing
Set eleColtr = Nothing
Set eleColtd = Nothing
Set wksOut = Nothing
Set rngOut = Nothing

End With

End Sub

请注意,excel可能会在运行时抛出运行时错误自动化错误:

Set IE = New InternetExplorer

如果发生这种情况,请尝试将InternetExplorer完整性设置为“中”:

Set IE = New InternetExplorerMedium

如果您需要有关InternetExplorer完整性的更多信息,请参阅 https://blogs.msdn.microsoft.com/ieinternals/2011/08/03/default-integrity-level-and-automation/

正如Tim所述,我可以在excel中打开文件,然后复制并粘贴运行速度更快的值:

Sub CopyHTML()

dim Wb as Workbook
dim Ws as Worksheet

Set Wb = ActiveWorkbook
Set Ws = Wb.Sheets("Sheet1")

'Opens html file and copies range
Workbooks.Open (Environ("userProfile") & "\desktop\FileName.html")
Range("A1:AJ21").Select
Selection.Copy

'pastes range in cell B5 on active workbook
Wb.Activate
Range("B5").Select
Ws.Paste

Application.CutCopyMode = False
Workbooks("FileName.html").Close

感谢提姆的建议!