有人在这里帮助我说如何使用Excel VBA从第三方应用程序读取嵌套的html表值?作为一个例子,我粘贴了HTML源代码的一部分,我想要读取所有的值,并希望将它存储到excel中。但是这里所有都是他们使用的嵌套表,而且这些表也没有任何名称在我见过的html源代码中。
<td>
<table cellspacing="1" cellpadding="0" class="data">
<tr class="colhead">
<th colspan="3">Expression</th>
</tr>
<tr class="colhead">
<th>Field</th>
<th>Operator</th>
<th>Answer</th>
</tr>
<tr class="rowLight">
<td width="40%">
Location Attributes: LOC - Sub Commodity
</td>
<td width="20%">
=
</td>
<td width="40%">
Abrasives
</td>
</tr>
<tr class="rowDark">
<td width="40%">
Location Attributes: LOC - Commodity Tier1
</td>
<td width="20%">
=
</td>
<td width="40%">
Advertising, Sales & Promotion
</td>
</tr>
谢谢, 奥雅纳
答案 0 :(得分:3)
请找到以下代码:
Option Explicit
Sub TableExample()
Dim IE As Object
Dim doc As Object
Dim strURL As String
strURL = "[URL="http://example.comwww.dectech.org/football/index.php"]http://example.com[/URL]" ' replace with URL of your choice
Set IE = CreateObject("InternetExplorer.Application")
With IE
'.Visible = True
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
End Sub
Sub GetAllTables(doc As Object)
' get all the tables from a webpage document, doc, and put them in a new worksheet
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
ws.Cells.ClearFormats
End Sub
答案 1 :(得分:1)
这就是我读HTML表的方式:
Sub ReadHTMLtable()
Dim htmldb As New ADODB.Connection
Dim htmlcmd As New ADODB.Command
Dim rs As New ADODB.Recordset
With htmldb
.Provider = "Microsoft.Jet.OLEDB.4.0"
.ConnectionString = "Data Source=Z:\filename.html;Extended Properties=""HTML Import;HDR=YES;IMEX=1"""
.Open
End With
Set htmlcmd.ActiveConnection = htmldb
htmlcmd.CommandType = adCmdText
htmlcmd.CommandText = "Select * from [table]"
rs.CursorLocation = adUseClient
rs.CursorType = adOpenDynamic
rs.LockType = adLockOptimistic
rs.Open htmlcmd
'process rs here
End Sub
这使用ADO,但对于DAO应该是相同的
答案 2 :(得分:0)
这需要将引用设置为Microsoft HTML Object Library
和Microsoft Internet Controls
Sub Extract_TD_text()
Dim URL As String
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim TDelements As IHTMLElementCollection
Dim TDelement As HTMLTableCell
Dim r As Long
'Saved from www vbaexpress com/forum/forumdisplay.php?f=17
URL = "your file pathe/URL"
Set IE = New InternetExplorer
With IE
.navigate URL
.Visible = True
'Wait for page to load
While .Busy Or .readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set HTMLdoc = .document
End With
Set TDelements = HTMLdoc.getElementsByTagName("TD")
Sheet1.Cells.ClearContents
r = 1
For Each TDelement In TDelements
'Look for required TD elements - this check is specific to VBA Express forum - modify as required
If TDelement.ParentNode.ParentNode.ParentNode.Title = "table you want's title" Then
Sheet1.Range("A1").Offset(r, 0).Formula = "=" & Chr(34) & TDelement.innerText & Chr(34)
r = r + 1
End If
Next
IE.Quit
Set IE = Nothing
End Sub
我的基础是this page
答案 3 :(得分:0)
我全神贯注地寻找这个问题的答案。我终于找到了实际上通过录制宏的解决方案。我知道,你们都认为自己超越了这一点,但这实际上是最好的方式。请在此处查看完整帖子:http://automatic-office.com/?p=344 简而言之,您想要录制宏并转到数据 - &gt;从网站导航到您的网站并选择您想要的表格 我过去使用了上面的解决方案“get element by id”类型的东西,它对于一些元素来说很棒,但是如果你想要一个完整的表,并且你没有超级经验,只需记录一个宏。 不要告诉你的朋友,然后重新格式化它看起来像你自己的工作,所以没有人知道你使用宏工具;)
代码看起来像这样(包括默认属性的所有多余设置到它们默认值,记录宏为你做的...找出哪些是额外的并删除它们
Sub Macro1()
With ActiveSheet.QueryTables.Add(Connection:= _
“URL;http://w1.weather.gov/obhistory/KFRI.html”, Destination:=Range(“$D$4″))
.Name = “KFRI”
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = “4″
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
享受
答案 4 :(得分:0)
这是另一种方式。棘手的是,如果你有一个嵌套的表,你必须 获取父容器是否为div,表。在IE或Chrome中使用F12开发人员工具,让您顺利完成工作。在下面的示例中,有一个包含表的div。在问题中给出的示例中,有一个包含表的表,因此您必须找到该父表并使用与此类似的代码来获取子表。希望有所帮助。
stabledata = ""
Set oTbl = odiv.getElementsByTagName("TABLE").Item(0)
Set oThead = odiv.getElementsByTagName("THEAD").Item(0)
Set oTRows = oThead.getElementsByTagName("TR").Item(0)
Set oTds = oTRows.getElementsByTagName("TH")
For Each oTd In oTds
DoEvents
stabledata = stabledata & oTd.innertext & Chr(9)
Next oTd
stabledata = stabledata & vbCrLf
Set oTBody = odiv.getElementsByTagName("TBODY").Item(0)
Set oTRows = oTBody.getElementsByTagName("TR")
For Each oTRow In oTRows
DoEvents
Set oTds = oTRow.getElementsByTagName("TD")
For Each oTd In oTds
DoEvents
stabledata = stabledata & oTd.innertext & Chr(9)
Next oTd
stabledata = stabledata & vbCrLf
Next oTRow