我创建了一个vba脚本来解析定义为postTime
的不同帖子的时间以及网页中的标题。尽管postTime
在目标页面中可用,但我想从登录页面获取它并使用从目标页面收集的postTitle
进行打印。我在脚本中定义了选择器,它们可以收集所需的内容。但是,我当前的尝试仅打印某条帖子的postTime
,而我只想打印几条帖子的postTime
。
如何在另一个循环的循环中打印项目?
到目前为止,我的脚本是
Sub CollectData()
Const baseUrl = "https://stackoverflow.com"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim post As Object, itemlist$, linklist As Variant
Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$
With Http
.Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
.send
Html.body.innerHTML = .responseText
End With
Set post = Html.querySelectorAll(".summary .question-hyperlink")
For I = 0 To post.Length - 1
postTime = Html.querySelector(".user-action-time").innerText
qualifiedLink = baseUrl & Split(post(I).getAttribute("href"), "about:")(1)
itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
Next I
linklist = Split(itemlist, " ")
For Each nlink In linklist
With Http
.Open "GET", nlink, False
.send
Html.body.innerHTML = .responseText
End With
postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
' the following line prints postTime derived from earlier loop
Debug.Print postTime, postTitle
Next nlink
End Sub
答案 0 :(得分:1)
您需要使用querySelectorAll并在第一个循环中对其进行索引,以确保获得不同的发布时间。我会将它们存储在一个集合中,并通过索引访问它们 最后循环
Option Explicit
Public Sub CollectData()
Const baseUrl = "https://stackoverflow.com"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim post As Object, itemlist$, linklist As Variant, i As Long
Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$
Dim times As Object
Set times = New Collection
With Http
.Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
.send
Html.body.innerHTML = .responseText
End With
Set post = Html.querySelectorAll(".summary .question-hyperlink")
For i = 0 To post.Length - 1
postTime = Html.querySelectorAll(".user-action-time").item(i).innerText
times.Add postTime
qualifiedLink = baseUrl & Split(post(i).getAttribute("href"), "about:")(1)
itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
Next i
linklist = Split(itemlist, " ")
Dim accessor As Long
For Each nlink In linklist
accessor = accessor + 1
With Http
.Open "GET", nlink, False
.send
Html.body.innerHTML = .responseText
End With
postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
' the following line prints postTime derived from earlier loop
Debug.Print times(accessor), postTitle
Next nlink
End Sub
更好的方法是将时间存储在变量中,而不要在循环内继续使用querySelectorAll
,因为这样会更有效:
Option Explicit
Public Sub CollectData()
Const baseUrl = "https://stackoverflow.com"
Dim Http As New XMLHTTP60, Html As New HTMLDocument
Dim post As Object, itemlist$, linklist As Variant, i As Long
Dim qualifiedLink$, nlink As Variant, postTime$, postTitle$
Dim times As Object
Set times = New Collection
With Http
.Open "GET", "https://stackoverflow.com/questions/tagged/web-scraping", False
.send
Html.body.innerHTML = .responseText
End With
Set post = Html.querySelectorAll(".summary .question-hyperlink")
Dim timesList As Object
Set timesList = Html.querySelectorAll(".user-action-time")
For i = 0 To post.Length - 1
postTime = timesList.item(i).innerText
times.Add postTime
qualifiedLink = baseUrl & Split(post(i).getAttribute("href"), "about:")(1)
itemlist = itemlist & IIf(itemlist = "", "", " ") & qualifiedLink
Next i
linklist = Split(itemlist, " ")
Dim accessor As Long
For Each nlink In linklist
accessor = accessor + 1
With Http
.Open "GET", nlink, False
.send
Html.body.innerHTML = .responseText
End With
postTitle = Html.querySelector("h1[itemprop='name'] a").innerText
' the following line prints postTime derived from earlier loop
Debug.Print times(accessor), postTitle
Next nlink
End Sub