有效地解析HTML表

时间:2015-09-10 20:03:50

标签: html vba ms-access

我有一个需要从HTML转换为Access的大表。由于身份验证限制,我无法使用内置的导入工具。但是,我可以使用Microsoft internet controls自动登录网站,然后使用HTML Object Library从html中获取表格。然后我可以运行原始HTML文本或对象并构建我的表。但它仍然需要很长时间。该文字大约有一百万个字符。也许没有更快的方法?

以下是我用来分解文本的内容。我假设直接使用HTML对象会更慢。

Sub CopyTable()
Dim HTML As String
HTML = Tbl.innerHTML
Dim grid()
ReDim grid(0 To UBound(Split(HTML, "<tr")) - 1, 0 To 100)
Dim xline As Double
Dim poss As Double
poss = 1
xline = -1
xitem = 0
Do While poss <= Len(HTML)
    Select Case Mid(HTML, poss, 3)
        Case "<td":
            poss = InStr(poss, HTML, ">")
            grid(xline, xitem) = Mid(HTML, poss + 1, InStr(poss, HTML, "<") - poss - 1)
            xitem = xitem + 1
            poss = InStr(poss + 1, HTML, "<")
        Case "<th":
            If Mid(HTML, poss, 7) = "<thead>" Then
            poss = InStr(poss + 1, HTML, "<")
            Else
            poss = InStr(poss, HTML, ">")
            grid(xline, xitem) = Mid(HTML, poss + 1, InStr(poss, HTML, "<") - poss - 1)
            xitem = xitem + 1
            poss = InStr(poss + 1, HTML, "<")
            End If
        Case "<tr":
            xitem = 0
            xline = xline + 1
            poss = InStr(poss, HTML, ">")
        Case Else:
            poss = poss + 1
            poss = InStr(poss, HTML, "<")
    End Select
    If Mid(HTML, poss, 2) = "</" Then poss = InStr(poss + 1, HTML, "<")
Loop
End Sub

0 个答案:

没有答案