我正在使用Excel VBA根据D列中每一行的URL启动IE浏览器选项卡。然后根据预定义的类提取相关的HTML代码,并将其填充在A到C列中。 / p>
很确定我错过了一步。该过程在D2处停止,并且不会继续从下一个URL(在D3,D4等单元格中)提取HTML。
提前感谢您的任何建议!
Sub useClassnames()
Dim element As IHTMLElement
Dim elements As IHTMLElementCollection
Dim IE As InternetExplorer
Dim html As HTMLDocument
Dim shellWins As New ShellWindows
Dim IE_TabURL As String
Dim intRowPosition As Integer
Set IE = New InternetExplorer
IE.Visible = False
intRowPosition = 2
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.navigate Sheet1.Range("D" & intRowPosition)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
While Sheet1.Range("D" & intRowPosition) <> vbNullString
IE.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
While IE.Busy
DoEvents
Wend
intRowPosition = intRowPosition + 1
Wend
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading Web page…"
DoEvents
Loop
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long
Dim erow As Long
count = 0
For Each element In elements
If element.className = "container-bs" Then
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
count = count + 1
End If
Next element
Range("A2:C2000").Select
Columns("A:A").EntireColumn.AutoFit
Columns("B:B").ColumnWidth = 36
End Sub
答案 0 :(得分:1)
您的行
Set html = IE.document
Set elements = html.getElementsByClassName("container-bs")
etc发生在While
循环之后。它必须在里面。
您的If
语句:
If element.className = "container-bs"
应该是多余的,因为您已经遍历了该类名的集合;所以我删除了这个。
您不是在循环中使用element
,因此从本质上讲,您是在使用它来控制递增的计数器变量。这表明您可以使用更好的编码策略来检索感兴趣的项目。
始终声明父级工作表,并且不依赖隐式Activesheet引用-容易出错。
我希望结构更像如下(我无法考虑重构来删除element
)
Option Explicit
Public Sub UseClassnames()
Dim element As IHTMLElement, elements As IHTMLElementCollection, ie As InternetExplorer
Dim html As HTMLDocument, intRowPosition As Long
intRowPosition = 2
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
While Sheet1.Range("D" & intRowPosition) <> vbNullString
If intRowPosition = 2 Then
ie.navigate Sheet1.Range("D" & intRowPosition)
Else
ie.navigate Sheet1.Range("D" & intRowPosition), CLng(2048)
End If
While ie.Busy Or ie.readyState < 4: DoEvents: Wend
Set html = ie.document
Set elements = html.getElementsByClassName("container-bs")
Dim count As Long, erow As Long
count = 0
For Each element In elements
erow = Sheet1.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
With Sheet1
.Cells(erow, 1) = html.getElementsByClassName("pull-left")(count + 1).innerHTML
.Cells(erow, 2) = html.getElementsByClassName("description")(count).innerHTML
.Cells(erow, 3) = html.getElementsByClassName("related-articles")(count).innerHTML
End With
count = count + 1
Next element
intRowPosition = intRowPosition + 1
Wend
With Sheet1
.Range("A2:C2000").Select
.Columns("A:A").EntireColumn.AutoFit
.Columns("B:B").ColumnWidth = 36
End With
End Sub