更改代码以从站点提取其他数据

时间:2019-05-24 16:36:06

标签: excel vba web-scraping xmlhttprequest

我正在从网站上提取数据,但需要帮助来提取整个字符串。

示例:

Example

我试图研究网站源代码以了解它,但是进行不同的更改会产生不好的结果

Option Explicit

Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument
    Dim ws As Worksheet, i As Long
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value
    Dim results()
    ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send
        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        Stop
        For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft

                html.body.innerHTML = .responseText

                Dim shipping As String, order As String, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                order = html.querySelector(".order-history__item-descript--min").innerText
                items = Split(order, vbNewLine)

                Dim qtyOrdered As Long, qtyShipped As String, product As String

                qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                product = html.querySelector(".details-table a").Title

                results(i, 1) = shipping
                results(i, 2) = qtyOrdered
                results(i, 3) = qtyShipped
                results(i, 4) = product
            End If
            'Application.Wait Now + TimeSerial(0, 0, 1)
        Next
    End With
    'results written out from row 2 column E
    ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results

End Sub

我输入了包含多个部分的采购订单和邮政编码(4500987740和33314),返回的数据只是第一部分,而不是所有部分。

示例2:

example 2

我需要返回所有数据: 跟踪,订购数量,已装运数量,产品,订购数量,已装运产品等-基本上是字符串,直到显示所有零件为止

1 个答案:

答案 0 :(得分:1)

问题在于使用querySelectorquerySelector仅返回第一个匹配项。在这种情况下,这意味着您只会考虑第一个 行。所需的修改是使用querySelectorAll返回所有匹配项。然后循环这些匹配以提取每一行的信息。

此外,必须更改此选择器.details-table a以仅返回感兴趣的项目,即返回到.details-table a[title]的项目- title属性。

要适当地写出每一行,可以使用一个辅助函数来查找下一个空闲行。由于事先不知道行数,因此无法设置适当大小的数组来容纳所有结果,尽管您可以从一开始就加大数组的大小。后一点是您可以做出的修正。我改为循环写出数组。

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, ws As Worksheet
    Dim lastRow As Long, wsTarget As Worksheet, j As Long '<  VBE > Tools > References > Microsoft HTML Object Library
    Dim sourceValues()

    Set html = New HTMLDocument
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    Set wsTarget = ThisWorkbook.Worksheets("Sheet1")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send

        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        For j = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(j, 1) <> vbNullString And sourceValues(j, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(j, 1) & "&postalCode=" & sourceValues(j, 3) & "&CSRFToken=" & csrft
                html.body.innerHTML = .responseText

                Dim shipping As String, orders As Object, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                Set orders = html.querySelectorAll(".order-history__item-descript--min")

                Dim i As Long, c As Long, results(), products As Object
                ReDim results(1 To 1, 1 To 4 * orders.length)
                Dim qtyOrdered As Long, qtyShipped As String, product As String
                Set products = html.querySelectorAll(".details-table a[title]")
                c = 1
                For i = 0 To orders.length - 1
                    items = Split(orders.item(i).innerText, vbNewLine)
                    qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                    qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                    results(1, c) = shipping
                    results(1, c + 1) = qtyOrdered
                    results(1, c + 2) = qtyShipped
                    results(1, c + 3) = products.item(i).Title
                    c = c + 4
                Next
                wsTarget.Cells(GetLastRow(wsTarget) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
            End If
        Next
    End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
    With ws
        GetLastRow = .Cells(.rows.Count, columnNumber).End(xlUp).Row
    End With
End Function