Excel亚马逊卖家网络刮板问题

时间:2016-02-26 14:01:31

标签: excel vba excel-vba web-scraping

我一直在尝试将此代码用于工作流效率目的,但我似乎无法使其正常运行。

步骤: 1.登录亚马逊卖家

  1. 使用A栏中的订单号并将其放入搜索框中进行搜索
  2. 搜索“Estimated Delivery:”的元素innerText,并将信息刮到订单号旁边的B列
  3. 转到下一个订单号并重复处理,直到订单号列为空。
  4. 网页代码(我想要获得的内容突出显示):

    enter image description here

    Option Explicit
    
    Dim HTMLDoc As HTMLDocument
    Dim MyBrowser As InternetExplorer
    
    Sub MyAmazonSellereEDD()
    
        Dim MyHTML_Element As IHTMLElement
        Dim MyURL As String
        Dim oSignInLink As HTMLLinkElement
        Dim oInputEmail As HTMLInputElement
        Dim oInputPassword As HTMLInputElement
        Dim oInputSignInButton As HTMLInputButtonElement
    
        'InputSearchOrder will be the destination for order numbers taken from the workbook
        Dim InputSearchOrder As HTMLInputElement
    
        Dim InputSearchButton As HTMLInputButtonElement
        Dim IE As InternetExplorer
        Dim AAOrder As Workbook
        Dim AAws As Worksheet
        Dim AAws2 As Worksheet
    
        Dim R As Range
        Dim x As Integer
        Dim i As Long
        Dim ar As Variant
        Dim elems As IHTMLElementCollection
        Dim TDelement As HTMLTableCell
    
        Dim ExcludWords() As Variant, a As Range, b As Long, LR As Long
    
        ExcludWords = Array("Estimated Delivery:")
    
    
        MyURL = "https://sellercentral.amazon.com/gp/homepage.html"
    
        Set IE = New InternetExplorer
    
        ' Open the browser and navigate.
        With IE
            .Silent = True
            .navigate MyURL
            .Visible = True
            Do
                DoEvents
            Loop Until .readyState = READYSTATE_COMPLETE
        End With
    
    
        ' Get the html document.
        Set HTMLDoc = IE.document
    
    
    
        With HTMLDoc
            .all.Item("username").Value = "blankityblank@blank.net"
            .all.Item("password").Value = "*********"
            .all.Item("sign-in-button").Click
        End With
    
          Do
            DoEvents
        Loop Until IE.readyState = READYSTATE_COMPLETE
    
        Application.Wait (Now + TimeValue("0:00:08"))
    
        'Set AAOrder = Application.Workbooks.Open("Z:\Automation Anywhere\5 Automated Tracking Imports\Amazon Prime\PrimeOrdersWithNoFulfillment.csv")
        'Set AAws = AAOrder.Worksheets("PrimeOrdersWithNoFulfillment")
    
    
    
    
    
        x = 2
        'Do Until Range("A" & x) = ""
        If Range("B" & x).Value = "" Then
    
            'If AAws.Range("B" & x).Value = "" Then
            'x = x + 1
            Do Until Range("A" & x) = ""
                Set InputSearchOrder = HTMLDoc.getElementById("sc-search-field")
                InputSearchOrder.Value = Range("A" & x)
    
                Set InputSearchButton = HTMLDoc.getElementsByClassName("sc-search-button")(0)
                InputSearchButton.Click
                  Do
                    DoEvents
                Loop Until IE.readyState = READYSTATE_COMPLETE
                Application.Wait (Now + TimeValue("0:00:05"))
    
    
                Set elems = HTMLDoc.getElementsByTagName("td")
    
    
                'ExcludWords = Array("Package Weight:", "Tracking ID:", "Ship Date:", "Carrier:", "Shipping Service:")
    
    
                i = 2
    
                For Each TDelement In elems
                    If TDelement.className = "data-display-field" And InStr(TDelement.innerText, "Estimated Delivery:") Then
                        Range("B" & x).Value = TDelement.innerText
                        i = i + 1
                    End If
                Next
    
    
    
    
    
    
    
                LR = Range("B" & Rows.Count).End(xlUp).Row
                For i = 1 To LR
                Set a = Cells(i, "B")
                For b = 0 To UBound(ExcludWords)
                    a.Formula = Replace((a.Formula), ExcludWords(b), "")
                        Next b
                Next i
    
    
            'End If
            x = x + 1
            Loop
    
    
         'Loop
         End If
    
        Err_Clear:
            If Err <> 0 Then
                Err.Clear
                Resume Next
            End If
    
        MsgBox ("Process is done! :)")
    
    
        End Sub
    

    我的问题是,当它刮擦数据时,“Estimated Delivery:”和它应该刮去的实际估计交货日期是分开的,但仍应包含在B栏的输出数据中。它在做什么只查找并插入“Estimated Delivery:”,然后使用代码,它按照指示修剪这些字符。之后空间仍然空白。我不确定是什么问题。

1 个答案:

答案 0 :(得分:2)

您在以下代码部分中选择的TDelement仅在其innerText中包含“估算投放数量”,具有日期的部分实际上是单独的TDelement

For Each TDelement In elems
    If TDelement.className = "data-display-field" And InStr(TDelement.innerText, "Estimated Delivery:") Then
        Range("B" & x).Value = TDelement.innerText
        i = i + 1
    End If
Next

由于html代码中没有任何唯一信息(例如id,name等)用于引用包含日期的TDelement,您可以使用已经与{{1}一起使用的引用这样你就可以在包含文本“Estimated Delivery:”的元素之后获得元素。也许试试这个(目前无法测试任何东西但应该有效):

NextSibling