我有以下代码,根据我之前的问题进行处理,Tim Williams回答道。然而,在它工作几分钟后,我发现了一些新的要求,并没有提出原始问题。鉴于下面的代码和以下HTML结构,我如何修改代码以使用相同的"右标题"从第二个甚至第三个DIV中提取数据。类?子DIV没有类或ID,它们只是包装器。
这是HTML:
<div class="right-header">
<div>Entry 1</div>
<div>Entry 2</div>
<div>Entry 3</div>
<div>Entry 4</div>
<div>Entry 5</div>
<div>Entry 6</div>
</div>
<div class="right-header">
<div>Entry 1</div>
<div>Entry 2</div>
<div>Entry 3</div>
<div>Entry 4</div>
<div>Entry 5</div>
<div>Entry 6</div>
</div>
<div class="right-header">
<div>Entry 1</div>
<div>Entry 2</div>
<div>Entry 3</div>
<div>Entry 4</div>
<div>Entry 5</div>
<div>Entry 6</div>
</div>
以下是Tim Williams修改后的VBA:
Sub extract()
Dim IE As InternetExplorer
Dim topDiv, div, childDivs, tc As String, cntr
Set IE = New InternetExplorerMedium
IE.Visible = False
IE.Navigate2 "C:\Users\john\Documents\Test.html"
' Wait while IE loading
Do While IE.Busy
Application.Wait DateAdd("s", 1, Now)
Loop
Set topDiv = IE.document.getElementsByClassName("right-header")(0)
Set childDivs = topDiv.getElementsByTagName("div")
cntr = 2
For Each div In childDivs
tc = Trim(div.textContent)
If tc <> "" Then
Select Case Right(tc, 1)
Case "<div>"
'not sure whether you should be seeing HTML in textcontent...?
Range("B" & cntr) = CStr(tc)
Case "%"
Range("C" & cntr).Value = tc
cntr = cntr + 1
Case 0
Range("C" & cntr).Value = tc
Case Else
Range("A" & cntr).Value = tc
End Select
End If
cntr = cntr + 1
Next div
Sheets("Sheet3").Range("A1").Value = topDiv.textContent
'Cleanup
IE.Quit
Set IE = Nothing
End Sub
谢谢大家,对不起原来那么接近的问题。
答案 0 :(得分:1)
如果已知div的数量,你可以把它放在一个循环中
Sub extract()
Dim IE As InternetExplorer
Dim topDiv, div, childDivs, tc As String, cntr
Set IE = New InternetExplorerMedium
IE.Visible = False
IE.Navigate2 "C:\Nitesh\test.html"
' Wait while IE loading
Do While IE.Busy
Application.Wait DateAdd("s", 1, Now)
Loop
For i = 0 To 2
Set topDiv = IE.document.getElementsByClassName("right-header")(i)
Set childDivs = topDiv.getElementsByTagName("div")
cntr = 2
For Each div In childDivs
tc = Trim(div.textContent)
If tc <> "" Then
Select Case Right(tc, 1)
Case "<div>"
'not sure whether you should be seeing HTML in textcontent...?
Range("B" & cntr).Offset(0, i) = CStr(tc)
Case "%"
Range("C" & cntr).Offset(0, i).Value = tc
cntr = cntr + 1
Case 0
Range("C" & cntr).Offset(0, i).Value = tc
Case Else
Range("A" & cntr).Offset(0, i).Value = tc
End Select
End If
cntr = cntr + 1
Next div
Next i
End Sub
并将所有输出偏移i
以将结果输入新列。
答案 1 :(得分:1)
Sub extract()
Dim IE As InternetExplorer
Dim topDivs, topDiv, div, childDivs, tc As String, cntr
Set IE = New InternetExplorerMedium
IE.Visible = False
IE.Navigate2 "C:\Users\john\Documents\Test.html"
' Wait while IE loading
Do While IE.Busy
Application.Wait DateAdd("s", 1, Now)
Loop
cntr = 2
'get all the top-level divs
Set topDivs = IE.document.getElementsByClassName("right-header")
'loop over the top-level divs
For Each topDiv In topDivs
'get child divs for this top-level div
Set childDivs = topDiv.getElementsByTagName("div")
For Each div In childDivs
tc = Trim(div.textContent)
If tc <> "" Then
Select Case Right(tc, 1)
Case "<div>"
'not sure whether you should be seeing HTML in textcontent...?
Range("B" & cntr) = CStr(tc)
Case "%"
Range("C" & cntr).Value = tc
cntr = cntr + 1
Case 0
Range("C" & cntr).Value = tc
Case Else
Range("A" & cntr).Value = tc
End Select
End If
cntr = cntr + 1
Next div
Next topDiv
'Sheets("Sheet3").Range("A1").Value = topDiv.textContent
'Cleanup
IE.Quit
Set IE = Nothing
End Sub