我正在从网站上提取数据,但需要帮助来提取整个字符串。
示例:
我试图研究网站源代码以了解它,但是进行不同的更改会产生不好的结果
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '< VBE > Tools > References > Microsoft HTML Object Library
Set html = New HTMLDocument
Dim ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet4")
lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
sourceValues = ws.Range("B2:D" & lastRow).Value
Dim results()
ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
Stop
For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
DoEvents
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft
html.body.innerHTML = .responseText
Dim shipping As String, order As String, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
order = html.querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = html.querySelector(".details-table a").Title
results(i, 1) = shipping
results(i, 2) = qtyOrdered
results(i, 3) = qtyShipped
results(i, 4) = product
End If
'Application.Wait Now + TimeSerial(0, 0, 1)
Next
End With
'results written out from row 2 column E
ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
我输入了包含多个部分的采购订单和邮政编码(4500987740和33314),返回的数据只是第一部分,而不是所有部分。
示例2:
我需要返回所有数据: 跟踪,订购数量,已装运数量,产品,订购数量,已装运产品等-基本上是字符串,直到显示所有零件为止
答案 0 :(得分:1)
问题在于使用querySelector
。 querySelector
仅返回第一个匹配项。在这种情况下,这意味着您只会考虑第一个
行。所需的修改是使用querySelectorAll
返回所有匹配项。然后循环这些匹配以提取每一行的信息。
此外,必须更改此选择器.details-table a
以仅返回感兴趣的项目,即返回到.details-table a[title]
的项目-
title
属性。
要适当地写出每一行,可以使用一个辅助函数来查找下一个空闲行。由于事先不知道行数,因此无法设置适当大小的数组来容纳所有结果,尽管您可以从一开始就加大数组的大小。后一点是您可以做出的修正。我改为循环写出数组。
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String, ws As Worksheet
Dim lastRow As Long, wsTarget As Worksheet, j As Long '< VBE > Tools > References > Microsoft HTML Object Library
Dim sourceValues()
Set html = New HTMLDocument
Set ws = ThisWorkbook.Worksheets("Sheet4")
Set wsTarget = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
sourceValues = ws.Range("B2:D" & lastRow).Value
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
For j = LBound(sourceValues, 1) To UBound(sourceValues, 1)
If sourceValues(j, 1) <> vbNullString And sourceValues(j, 3) <> vbNullString Then
DoEvents
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=" & sourceValues(j, 1) & "&postalCode=" & sourceValues(j, 3) & "&CSRFToken=" & csrft
html.body.innerHTML = .responseText
Dim shipping As String, orders As Object, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
Set orders = html.querySelectorAll(".order-history__item-descript--min")
Dim i As Long, c As Long, results(), products As Object
ReDim results(1 To 1, 1 To 4 * orders.length)
Dim qtyOrdered As Long, qtyShipped As String, product As String
Set products = html.querySelectorAll(".details-table a[title]")
c = 1
For i = 0 To orders.length - 1
items = Split(orders.item(i).innerText, vbNewLine)
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
results(1, c) = shipping
results(1, c + 1) = qtyOrdered
results(1, c + 2) = qtyShipped
results(1, c + 3) = products.item(i).Title
c = c + 4
Next
wsTarget.Cells(GetLastRow(wsTarget) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End If
Next
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
End With
End Function