在VBA中导航HTML - 不可靠的代码问题

时间:2017-10-16 17:02:02

标签: html excel vba web-scraping

我编写了导航到特定网站(http://www.boxofficemojo.com/)的代码,然后将电影标题从电子表格复制/粘贴到搜索栏中以搜索该标题并打开其各自的电影页面((在第一种情况下它是" Rogue One:一个星球大战的故事)。这将最终用于从我的电子表格中的200部电影中抓取本网站的数据

我的问题如下。当我手动单步执行此代码时,它可以按预期可靠地工作。当我自动运行它(f5)它不能可靠地工作 - 它可能工作一次但总是脱轨并最终导航到错误的网页。更奇怪的是,如果我将断点设置为最后一行代码,然后逐步执行最后一行,则可以正常工作。我无法弄清楚为什么会这样。代码如下,任何想法都将不胜感激!

Option Explicit

'Start new subroutine
Sub FilmScraper()

'dimension (declare or set aside memory for) our variables
Dim MovieCount As Integer  'counter

Dim objIE As New InternetExplorer 
Dim Doc As HTMLDocument
Dim oSearch As HTMLDivElement
Dim SearchElement As MSHTML.IHTMLElementCollection
Dim oResult As Object, Element As Object, myLink As Object

'Counting the number of titles to search for (will eventually be my main loop)
Sheets("2016").Select
MovieCount = 200

'open IE and navigate to box office mojo homepage
With objIE
    .Visible = True
    .Navigate "http://www.boxofficemojo.com/"
        Do While objIE.Busy Or objIE.ReadyState <> 4
            DoEvents
        Loop
    Set Doc = objIE.Document
End With

'search for 1st title name in excel sheet
Set oSearch = objIE.Document.forms("searchbox").elements("q")
    oSearch.Value = Sheets("2016").Range("c3").Value
    objIE.Document.forms("searchbox").getElementsByTagName("input")(1).Click

Do While objIE.Busy Or objIE.ReadyState <> 4
        DoEvents
Loop

'open title page in box office
Set Doc = objIE.Document
Set oResult = Doc.getElementById("body").getElementsByTagName("a")
For Each Element In oResult
    If Element.outerHTML Like "*/movies/?id=*" Then
        Set myLink = Element
        Exit For
    End If
Next Element

objIE.Navigate myLink

'Scrape website and paste into excel (TBD)

End Sub

1 个答案:

答案 0 :(得分:0)

我重写了你的代码

没有hickup就可以使用5部电影列表

Rogue One: A Star Wars Story
happy death day
marshall
the foreigner
the snowman

从单元格C3开始放置电影列表....根据您的喜好调整代码以供电影列表

Option Explicit

'   ref
'     Microsoft HTML Object Library
'     Microsoft internet controls

Sub FilmScraper()

    'dimension (declare or set aside memory for) our variables
    Dim MovieCount As Integer  'counter

    Dim objIE As New InternetExplorer
    Dim Doc As HTMLDocument
    Dim oSearch As HTMLDivElement
    Dim SearchElement As MSHTML.IHTMLElementCollection
    Dim oResult As Object, Element As Object, myLink As Object

    'Counting the number of titles to search for (will eventually be my main loop)

    'Sheets("2016").Select ' this line does not do anything useful

    MovieCount = 200
    MovieCount = 5      ' test with 5 movies

    'open IE and navigate to box office mojo homepage
    With objIE
        .Visible = True
        .Navigate "http://www.boxofficemojo.com/"
        Do While objIE.Busy Or objIE.ReadyState <> 4
            DoEvents
        Loop
    End With

    Set Doc = objIE.Document

    Dim aaa As Range
'   For Each aaa In Sheets("2016").Range("c3").Resize(MovieCount)
    For Each aaa In ActiveSheet.Range("c3").Resize(MovieCount)

        'search for title name in excel sheet
        Doc.forms("searchbox").elements("q").Value = aaa.Value
        Doc.forms("searchbox").getElementsByTagName("input")(1).Click

        Do While objIE.Busy Or objIE.ReadyState <> 4
            DoEvents
        Loop

        'open title page in box office
        Set Doc = objIE.Document
        Set oResult = Doc.getElementById("body").getElementsByTagName("a")
        For Each Element In oResult
            If Element.outerHTML Like "*/movies/?id=*" Then
                Set myLink = Element
                Exit For
            End If
        Next Element

        objIE.Navigate myLink

        Do While objIE.Busy Or objIE.ReadyState <> 4
            DoEvents
        Loop

        'Scrape website and paste into excel (TBD)

        Set Doc = objIE.Document
        Set oResult = Doc.getElementsByClassName("mp_box")(1).getElementsByClassName("mp_box_content")
        aaa.Offset(0, 2).Value = oResult(0).innerText   ' put result in second cell to right of movie name


    Next aaa
End Sub