我如何从搜索栏中检索亚马逊的关键字/短语建议

时间:2019-05-02 16:57:11

标签: html excel vba dom web-scraping

下面是一些我找到并更改的代码,试图从亚马逊的搜索栏中捕获关键字/短语建议。我对Web抓取的概念非常陌生,因此我知道这里介绍的代码可能非常无效。我已经从F12 DOM Explorer和“网络”窗口中手动捕获了一些数据。如果最好的答案是网页抓取,则我需要使用excel vba的形式。我在下面的某些图像中看到,似乎“网络”窗口中的某些内容类型是“ application / json”,而Initiator / Type是“ XMLHttpRequest”,但这只是在它显示了与之的连接和身份验证之后“ https://completion.amazon.com”。如果那是路线,我不知道如何完成这些要求。任何帮助将非常感激。

到目前为止,我已经尝试通过代码中的脚本以编程方式调用搜索栏,但这无济于事。只是简单地将关键字“粘贴”到搜索栏中,并在其后面附加一个“空格”就不会产生建议的关键字。但是,在搜索栏中输入即可。如果输入关键字,然后从下拉建议中选择“检查元素”,则会生成动态HTML以显示建议的HTML内容(此时可以得到所需的内容)。我一直未能做到这一点。

私人子CommandButton1_Click()

Dim MyHTML_Element As IHTMLElement
Dim MyURL As String

Dim AASearchRank As Workbook
Dim AAws As Worksheet
Dim InputSearch As HTMLInputTextElement
Dim elems As IHTMLElementCollection
Dim TDelement As HTMLTableCell
Dim elems2 As IHTMLElementCollection
Dim TDelement2 As HTMLDivElement
'Dim TDelement2 As HTMLInputTextElement

Dim InputSearchButton As HTMLInputButtonElement
Dim IE As InternetExplorer

Dim x As Integer
Dim i As Long

MyURL = "https://www.amazon.com/"
Set IE = New InternetExplorer
With IE
    .Silent = True
    .Navigate MyURL
    .Visible = True
    Do
        DoEvents
    Loop Until .ReadyState = READYSTATE_COMPLETE
End With
Set HTMLDoc = IE.Document

Set AASearchRank = Application.ThisWorkbook
Set AAws = AASearchRank.Worksheets("Sheet2")

Set InputSearchButton = HTMLDoc.getElementById("nav-search-submit-text")
Set InputSearchOrder = HTMLDoc.getElementById("twotabsearchtextbox")

If Not InputSearchOrder Is Nothing Then
    InputSearchButton.Click
    Do
        DoEvents
    Loop Until IE.ReadyState = READYSTATE_COMPLETE
End If

x = 2
If AAws.Range("D" & x).Value = "" Then
    Do Until AAws.Range("B" & x) = ""
        Set InputSearch = HTMLDoc.getElementById("twotabsearchtextbox")
        InputSearch.Focus
        'When a keyword is typed in the search bar with a 'space' after, it invokes the suggestions I'm looking for.
        InputSearch.Value = "Travel "
        'InputSearch.Value = AAws.Range("C" & x) & " "

        Set InputSearchButton = HTMLDoc.getElementsByClassName("nav-input")(0)
        InputSearch.Focus

        'Here I was trying to invoke some script to see if it had any effect on the search bar drop down
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'UpNav',end:+new Date(),begin:window.navmet.tmp});"
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'Search',end:+new Date(),begin:window.navmet.tmp});"
        HTMLDoc.parentWindow.execScript "window.navmet.push({key:'NavBar',end:+new Date(),begin:window.navmet.main});"

            Do
                DoEvents
            Loop Until IE.ReadyState = READYSTATE_COMPLETE
        'Application.Wait (Now + TimeValue("0:00:05"))


        Set elems2 = HTMLDoc.getElementsByClassName("nav-issFlyout nav-flyout")
        i = 0
        For Each TDelement2 In elems2
                'Debug statements strictly for learning what each option/query returns
                Debug.Print TDelement2.innerText
                Debug.Print TDelement2.className
                Debug.Print TDelement2.dataFld

                Debug.Print TDelement2.innerHTML
                Debug.Print TDelement2.outerText
                Debug.Print TDelement2.outerHTML
                Debug.Print TDelement2.parentElement.className
                Debug.Print TDelement2.tagName
                Debug.Print TDelement2.ID

        Next

        'Once the searchbar is populated, and the drop down list provides suggestions,
        'the below code will give me what I want. If there's an easier solution,
        'I'm all for it
        Set elems = HTMLDoc.getElementsByClassName("s-suggestion")
        i = 0
        For Each TDelement In elems
            If Left(TDelement.ID, 6) = "issDiv" Then
                Debug.Print TDelement.innerText
                Debug.Print TDelement.ID
            End If

        Next
    x = x + 1
    Loop
End If

结束子

一种理想的解决方案是通过调用搜索栏动态HTML或通过Amazon的完成网站来获得这些建议的关键字,但是似乎无法向公众开放。感谢您的帮助,对于任何张贴缺陷,我们深表歉意。

1 个答案:

答案 0 :(得分:0)

您可以在“网络”标签中找到一个API调用。它返回一个json字符串,您可以将其解析为jsonparser以获得建议。我使用jsonconverter.bas,下载后将其添加到项目中,然后转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。

URL本身是一个queryString,即它是由不同的参数构成的。例如,有一个limit参数,其值为11,它指定要返回的建议数。您可能可以更改和/或删除其中一些。下面,我将SEARCH_TERM常量连接到查询字符串中以表示您的搜索值(将在搜索框中键入的值)。

我不知道任何参数是否都是基于时间的(即随着时间的流逝-自您发布问题以来,我已经提出了许多要求而没有问题)。可能可以通过事先的GET请求将必需的基于时间的值拉至Amazon搜索页面。

params = (
    ('session-id', '141-0042012-2829544'),
    ('customer-id', ''),
    ('request-id', '7E7YCB7AZZM1HQEZF2G1'),
    ('page-type', 'Search'),
    ('lop', 'en_US'),
    ('site-variant', 'desktop'),
    ('client-info', 'amazon-search-ui'),
    ('mid', 'ATVPDKIKX0DER'),
    ('alias', 'aps'),
    ('b2b', '0'),
    ('fresh', '0'),
    ('ks', '76'),
    ('prefix', 'TRAVEL'),
    ('event', 'onKeyPress'),
    ('limit', '11'),
    ('fb', '1'),
    ('suggestion-type', ['KEYWORD', 'WIDGET']),
    ('_', '1556820864750')
)

VBA:

Option Explicit
Public Sub GetTable()
    Dim json As Object, suggestion As Object                '<  VBE > Tools > References > Microsoft Scripting Runtime
    Const SEARCH_TERM As String = "TRAVEL"
    Const SEARCH_TERM2 As String = "BOOKS"
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://completion.amazon.com/api/2017/suggestions?session-id=141-0042012-2829544" & _
        "&customer-id=&request-id=7E7YCB7AZZM1HQEZF2G1&page-type=Search&lop=en_US&site-variant=" & _
        "desktop&client-info=amazon-search-ui&mid=ATVPDKIKX0DER&alias=aps&b2b=0&fresh=0&ks=76&" & _
        "prefix=" & SEARCH_TERM & "&event=onKeyPress&limit=11&fb=1&suggestion-type=KEYWORD&suggestion-type=" & _
        "WIDGET&_=1556820864750", False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        Set json = JsonConverter.ParseJson(.responseText)("suggestions")
    End With
    For Each suggestion In json
        Debug.Print suggestion("value")
    Next
End Sub