使用VBA循环访问HTML代码

时间:2018-01-04 21:59:32

标签: excel vba excel-vba

我想从网站搜索HTML代码,只有在有结果的情况下才能使用Internet Explorer打开搜索。 我尝试根据自己的需要更改附加的代码,但我发誓。

这是我要搜索的网站。

PyPI

Sub SearchBot()

    'dimension (declare or set aside memory for) our variables
    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    Dim y As Integer 'integer variable we'll use as a counter
    Dim result As String 'string variable that will hold our result link

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://duckduckgo.com"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    'in the search box put cell "A2" value, the word "in" and cell "C1" value
    objIE.document.getElementById("search_form_input_homepage").Value = _
      Sheets("Sheet1").Range("A2").Value & " in " & Sheets("Sheet1").Range("C1").Value

    'click the 'go' button
    objIE.document.getElementById("search_button_homepage").Click

    'wait again for the browser
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    'the first search result will go in row 2
    y = 2

    'for each <a> element in the collection of objects with class of 'result__a'...
    For Each aEle In objIE.document.getElementsByClassName("result__a")

        '...get the href link and print it to the sheet in col C, row y
        result = aEle
        Sheets("Sheet1").Range("C" & y).Value = result

        '...get the text within the element and print it to the sheet in col D
        Sheets("Sheet1").Range("D" & y).Value = aEle.innerText
        Debug.Print aEle.innerText

        'is it a yellowpages link?
        If InStr(result, "yellowpages.com") > 0 Or InStr(result, "yp.com") > 0 Then
            'make the result red
            Sheets("Sheet1").Range("C" & y).Interior.ColorIndex = 3
            'place a 1 to the left
            Sheets("Sheet1").Range("B" & y).Value = 1
        End If

        'increment our row counter, so the next result goes below
        y = y + 1

    'repeat times the # of ele's we have in the collection
    Next

    'add up the yellowpages listings
    Sheets("Sheet1").Range("B1").Value = _
      Application.WorksheetFunction.Sum(Sheets("Sheet1").Range("B2:B100"))

    'close the browser
    objIE.Quit

'exit our SearchBot subroutine
End Sub

在A列是我的搜索循环列表中,搜索日期应该在B列中输入

https://www.boersen-zeitung.de/index.php?li=310&subm=suche&bzpro_suche=vw&bzpro_zeitraum=1%20Woche&page_number=1#jump

这是我尝试的方式。

enter image description here

1 个答案:

答案 0 :(得分:0)

到目前为止,我在这里取得了一些进展。它得到了结果!在抓取时,如果按类名选择(类名与id不同),则需要使用querySelector。

Option Explicit


'See this http://exceldevelopmentplatform.blogspot.co.uk/2018/01/vba-mshtml-webscraping-looking-for-new.html

Sub SearchBot()

    'dimension (declare or set aside memory for) our variables
    Dim objIE As InternetExplorer 'special object variable representing the IE browser
    Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
    Dim y As Integer 'integer variable we'll use as a counter
    Dim result As String 'string variable that will hold our result link

    'initiating a new instance of Internet Explorer and asigning it to objIE
    Set objIE = New InternetExplorer

    'make IE browser visible (False would allow IE to run in the background)
    objIE.Visible = True

    'navigate IE to this web page (a pretty neat search engine really)
    objIE.navigate "https://www.boersen-zeitung.de/index.php?li=310&subm=suche"
    'objIE.navigate "https://duckduckgo.com"

    'wait here a few seconds while the browser is busy
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    Dim vCarManufacturer As Variant
    vCarManufacturer = Sheets("Sheet1").Range("A2").value

    vCarManufacturer = "Daimler" 'overriden

    Dim vSearchURL As Variant
    vSearchURL = Sheets("Sheet1").Range("C1").value
    vSearchURL = "" 'overriden

    Debug.Assert Not objIE.document Is Nothing

    Dim htmlSuche As Object
    Set htmlSuche = objIE.document.querySelector("input.suche_unternehmen")
    Debug.Assert Not htmlSuche Is Nothing

    'in the search box put cell "A2" value, the word "in" and cell "C1" value
    htmlSuche.value = _
      vCarManufacturer '& " in " & vSearchURL

    'click the 'go' button
    'objIE.document.getElementById("search_button_homepage").Click
    objIE.document.querySelector("input.suche_button21").Click

    'wait again for the browser
    Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop

    'the first search result will go in row 2
    y = 2

    'for each <a> element in the collection of objects with class of 'result__a'...

    Dim objResults As Object
    Set objResults = objIE.document.querySelectorAll("a.ue_fl_l")


    For Each aEle In objResults ' objIE.document.querysselec("result__a")

        '...get the href link and print it to the sheet in col C, row y
        Dim anchorResult As MSHTML.IHTMLAnchorElement
        Set anchorResult = aEle

        result = aEle
        Sheets("Sheet1").Range("C" & y).value = anchorResult.href ' result

        '...get the text within the element and print it to the sheet in col D
        Sheets("Sheet1").Range("D" & y).value = anchorResult.innerText
        Debug.Print aEle.innerText

        'is it a yellowpages link?
        If InStr(result, "yellowpages.com") > 0 Or InStr(result, "yp.com") > 0 Then
            'make the result red
            Sheets("Sheet1").Range("C" & y).Interior.ColorIndex = 3
            'place a 1 to the left
            Sheets("Sheet1").Range("B" & y).value = 1
        End If

        'increment our row counter, so the next result goes below
        y = y + 1

    'repeat times the # of ele's we have in the collection
    Next

    'add up the yellowpages listings
    Sheets("Sheet1").Range("B1").value = _
      Application.WorksheetFunction.Sum(Sheets("Sheet1").Range("B2:B100"))

    'close the browser
    objIE.Quit

'exit our SearchBot subroutine
End Sub