一个简单的问题。我正在尝试编写一个过程来解析this Site
的HTML源代码的一部分(第154到174行)足以构成范例:
<p>(British Aircraft Company)</p>
<ul>
<li><a href="/wiki/B.A.C._I" title="B.A.C. I" class="mw-redirect">B.A.C. I</a></li>
<li><a href="/wiki/B.A.C._II" title="B.A.C. II" class="mw-redirect">B.A.C. II</a></li>
<li><a href="/wiki/B.A.C._III" title="B.A.C. III" class="mw-redirect">B.A.C. III</a></li>
<li><a href="/wiki/B.A.C._IV" title="B.A.C. IV" class="mw-redirect">B.A.C. IV</a></li>
<li><a href="/wiki/B.A.C._V" title="B.A.C. V" class="mw-redirect">B.A.C. V</a></li>
<li><a href="/wiki/B.A.C._VI" title="B.A.C. VI" class="mw-redirect">B.A.C. VI</a></li>
<li><a href="/wiki/B.A.C._VII" title="B.A.C. VII" class="mw-redirect">B.A.C. VII</a></li>
<li><a href="/wiki/B.A.C._VII_Mk.2" title="B.A.C. VII Mk.2" class="mw-redirect">B.A.C. VII Mk.2</a></li>
<li><a href="/wiki/B.A.C._VII_Planette" title="B.A.C. VII Planette" class="mw-redirect">B.A.C. VII Planette</a></li>
<li><a href="/wiki/B.A.C._VIII" title="B.A.C. VIII" class="mw-redirect">B.A.C. VIII</a></li>
<li><a href="/wiki/B.A.C._VIII_Bat-Boat" title="B.A.C. VIII Bat-Boat" class="mw-redirect">B.A.C. VIII Bat-Boat</a></li>
<li><a href="/wiki/B.A.C._IX" title="B.A.C. IX" class="mw-redirect">B.A.C. IX</a></li>
<li><a href="/wiki/B.A.C._Cupid" title="B.A.C. Cupid" class="mw-redirect">B.A.C. Cupid</a></li>
<li><a href="/wiki/B.A.C._Drone" title="B.A.C. Drone" class="mw-redirect">B.A.C. Drone</a></li>
<li><a href="/wiki/B.A.C._Super_Drone" title="B.A.C. Super Drone" class="mw-redirect">B.A.C. Super Drone</a></li>
<li><a href="/wiki/B.A._Swallow_2" title="B.A. Swallow 2" class="mw-redirect">B.A. Swallow 2</a></li>
<li><a href="/wiki/B.A._Eagle_2" title="B.A. Eagle 2" class="mw-redirect">B.A. Eagle 2</a></li>
<li><a href="/wiki/B.A._Double_Eagle" title="B.A. Double Eagle" class="mw-redirect">B.A. Double Eagle</a></li>
</ul>
我正在努力设计出一些东西。所以我可以到<p> HTML Tag
,但是我无法点击列表项来循环我想要的东西,因为它们被进一步包含在<ul></ul> tags
之间。你接下来的步骤是什么?
Sub ICE()
Set Results = IE.document.getElementsByTagName("p")
For Each itm In Results
If itm.innerHTML = "(British Aircraft Company)" Then
End If
Next itm
End Sub
为了更简洁的图片,我的学习阶段基于VBA parsing of href
提供的ron答案 的推荐 - &GT;提及desired results.
我想要的是能够使VBA在运行时“点击”我的偏好的href,因为它是一个实际的链接。我正在研究ron上的代码(可以在previous example中看到):
If itm.outerhtml = "B.A.C. VII" Then
itm.Click
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
Exit For
End If
...这里使用了outerHTML,但我努力的核心是循环和逻辑运算符
我写了这段代码然而它不起作用
Set Results = IE.document.getElementsByTagName("p")
For Each itm In Results
If itm.innerHTML = "(British Aircraft Company)" Then
Set Results2 = IE.document.getElementsByTagName("ul")
For Each itm2 In Results2
If itm2.innerHTML = "B.A.C. V" Then
MsgBox itm2.innerHTML
End If
Next itm2
End If
Next itm
答案 0 :(得分:3)
这将列出英国飞机公司的p标签下的飞机
Sub GetAircraft()
Dim xHttp As MSXML2.XMLHTTP
Dim hDoc As MSHTML.HTMLDocument
Dim hUls As MSHTML.IHTMLElementCollection
Dim hUl As MSHTML.HTMLListElement
Dim hLi As MSHTML.HTMLLIElement
Set xHttp = New MSXML2.XMLHTTP
xHttp.Open "GET", "http://en.wikipedia.org/wiki/List_of_aircraft_%28B%29"
xHttp.send
Do
DoEvents
Loop Until xHttp.readyState = 4
Set hDoc = New HTMLDocument
hDoc.body.innerHTML = xHttp.responseText
Set hUls = hDoc.getElementsByTagName("ul")
'Go through all the <ul> tags
For Each hUl In hUls
'Only if previous tag is something
If Not hUl.PreviousSibling Is Nothing Then
'Only if previous tag is <p>
If TypeName(hUl.PreviousSibling) = "HTMLParaElement" Then
'Only if previous paragraph is specified text
If hUl.PreviousSibling.innerText = "(British Aircraft Company)" Then
'loop through the <li> and print them out
For Each hLi In hUl.Children
Debug.Print hLi.innerText
Next hLi
End If
End If
End If
Next hUl
End Sub