VBA HTML数据抓取指南

时间:2016-06-04 03:39:59

标签: html excel vba excel-vba web-scraping

我尝试使用VBA从以下站点提取数据,输入城市,并选择输出到excel单元格的结果。我对此非常陌生,这是我的第三次尝试,但现在我得到了一个" Object Required"我尝试运行它时出错。我已经逐步完成了它,当然,它抛出了我试图创建的IE对象的错误。关于我可以做些什么来调整我的代码的任何建议?任何帮助将非常感激!谢谢。

代码

Private Sub CreditUnion()

If Target.Row = Range("City").Row And Target.Column = Range("City").Column Then

    Dim IE As Object

    Set IE = CreateObject("internetexplorer.application")


    IE.Navigate "http://mapping.ncua.gov/SingleResult.aspx"
    IE.Visible = False

    Do While IE.Busy

        DoEvents

    Loop

    Set TableResults = IE.document.getElementsByID("MainContent_newDetails")

    Dim City As String: City = TableResults.Cells(17).innerHTML
    Dim CreditUnion As String: CreditUnion = TableResults.Cells(0).innerHTML
    Dim Region As String: Region = TableResults.Cells(9).innerHTML
    Dim Status As String: Status = TableResults.Cells(3).innerHTML
    Dim Assets As String: Assets = TableResults.Cells(13).innerHTML
    Dim Members As String: Members = TableResults.Cells(15).innerHTML


    Range("B1").Value = City
    Range("C4").Value = CreditUnion
    Range("D4").Value = Region
    Range("E4").Value = Status
    Range("F4").Value = Assets
    Range("G4").Value = Members


    IE.Quit
    Set IE = Nothing

End If

End Sub

代码无法超越这一点 [代码卡在这里] [1]

我们越来越近了!它超越了第一个屏幕。它现在只是不在案例陈述中提取数据 [在此处输入图像说明] [2]

1 个答案:

答案 0 :(得分:0)

我以纽约为例,代码如下。

我在2016/6/7重写

Public Declare Sub Sleep Lib "kernel32.dll" (ByVal dwMilliseconds As Long)

Sub CreditUnion()
    Dim IE As Object, TableResults As Object, webRow As Object, charterInfo As Variant, page As Long, pageTotal As Long, r As Long
    Dim beginTime As Date, i As Long

    Set IE = CreateObject("internetexplorer.application")
    IE.navigate "http://mapping.ncua.gov/ResearchCreditUnion.aspx"
    IE.Visible = True

    Do While IE.Busy Or IE.readystate <> 4   '4 = READYSTATE_COMPLETE 
        DoEvents
    Loop

    'input city name into form
    IE.document.getelementbyid("MainContent_txtCity").Value = "new york"
    'click find button
    IE.document.getelementbyid("MainContent_btnFind").Click
    sleep 5 * 1000

    'total pages
    pageTotal = IE.document.getelementbyid("MainContent_pager_total").innertext
    page = 0

    Do Until page = pageTotal
        DoEvents
        page = IE.document.getelementbyid("MainContent_pager_to").innertext
        With IE.document.getelementbyid("MainContent_grid")
            For r = 1 To .Rows.Length - 1
                If Not IsArray(charterInfo) Then
                    ReDim charterInfo(7, 0) As Variant
                Else
                    ReDim Preserve charterInfo(7, UBound(charterInfo, 2) + 1) As Variant
                End If

                charterInfo(0, UBound(charterInfo, 2)) = .Rows(r).Cells(0).innertext
            Next r
        End With

        If page < pageTotal Then
            IE.document.getelementbyid("MainContent_pageNext").Click
            beginTime = Now
            Application.Wait (Now + TimeValue("00:00:05"))
        End If
    Loop

    For r = 0 To UBound(charterInfo, 2)
        IE.navigate "http://mapping.ncua.gov/SingleResult.aspx?ID=" & charterInfo(0, r)
        Do While IE.Busy Or IE.readystate <> 4   '4 = READYSTATE_COMPLETE 
            DoEvents
        Loop
        'wait 5 sec. for screen refresh
        sleep 5 * 1000

        With IE.document.getelementbyid("MainContent_newDetails")
            For i = 0 To .Rows.Length - 1
                DoEvents
                Select Case .Rows(i).Cells(0).innertext
                Case "Credit Union Name:"
                    charterInfo(1, r) = .Rows(i).Cells(1).innertext
                Case "Region:"
                    charterInfo(2, r) = .Rows(i).Cells(1).innertext
                Case "Credit Union Status:"
                    charterInfo(3, r) = .Rows(i).Cells(1).innertext
                Case "Assets:"
                    charterInfo(4, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
                Case "Number of Members:"
                    charterInfo(5, r) = Replace(.Rows(i).Cells(1).innertext, ",", "")
                Case "Address:"
                    charterInfo(6, r) = .Rows(i).Cells(1).innertext
                Case "Phone:"
                    charterInfo(7, r) = "'" & .Rows(i).Cells(1).innertext
                End Select
            Next i
        End With
    Next r


    IE.Quit
    Set IE = Nothing

    'post result on Excel cell
    Worksheets(1).Range("A1").Resize(UBound(charterInfo, 2) + 1, UBound(charterInfo, 1) + 1).Value = Application.Transpose(charterInfo)
End Sub