我正在寻找可以用类名循环头数组的代码,但不能包含标签名或ID。这只是为了确保是否不存在任何类,则应将相应的单元格留空,并复制下一个元素。
我试图添加标头数组
headers = Array("size", "features", "promo", "in store", "web")
但是它需要与我不想要的标签名一起循环。
还想要促销(类名是“ promo_offers
”)“第一个月免费!”在第2行中,问题在于此促销仅针对特定的单元格提供-因此数据会产生误导,我在第4个单元格中进行了促销,然后出现了错误。
但是,我只想复制提供了促销信息的那些单位的促销,否则单元格应该为空白或需要设置任何其他值。下面是代码...
请建议如何构建代码。
Sub GetClassNames()
Dim html As HTMLDocument
Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link
Dim count As Long
Dim erow As Long
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0
Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")
For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText
Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub
对于任何内容,即promo为null,则应将相应的单元格留空,然后复制下一个元素
答案 0 :(得分:1)
您可以使用xmlhttp获取所有这些信息。
我抓取了所有li
元素,然后循环将每个li
的html放入新的HTMLDocument
中。我使用该对象的querySelector方法使用css选择器获取每一行中的所有其他项目。我将选择内容包装在On Error Resume Next On Error GoTo 0
中,以掩盖尝试访问不存在的元素时的错误,例如有些行没有促销。然后,根据要求将这些条目留空。
Option Explicit
Public Sub GetInfo()
Dim ws As Worksheet, html As HTMLDocument, s As String
Const URL As String = "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423"
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
html.body.innerHTML = s
Dim headers(), results(), listings As Object
headers = Array("Size", "Features", "Promo", "In store", "Web")
Set listings = html.querySelectorAll(".li_unit_listing")
Dim rowCount As Long, numColumns As Long, r As Long, c As Long, item As Long
rowCount = listings.Length
numColumns = UBound(headers) + 1
ReDim results(1 To rowCount, 1 To numColumns)
Dim html2 As HTMLDocument
Set html2 = New HTMLDocument
For item = 0 To listings.Length - 1
r = r + 1
html2.body.innerHTML = listings.item(item).innerHTML
On Error Resume Next
results(r, 1) = Trim$(html2.querySelector(".unit_size").innerText)
results(r, 2) = Trim$(html2.querySelector(".features").innerText)
results(r, 3) = Trim$(html2.querySelector(".promo_offers").innerText)
results(r, 4) = html2.querySelector(".board_rate").innerText
results(r, 5) = html2.querySelector("[itemprop=price]").getAttribute("content")
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
输出: