VBA单击下一页按钮,没有唯一标识符

时间:2017-10-30 19:17:30

标签: html excel-vba for-loop web-scraping buttonclick

下面粘贴的代码部分会尝试遍历网页搜索的每个页面。这样做的按钮也由下面的html定义。无论结果页数如何,循环仅通过第二页工作,此时,我收到权限被拒绝(错误70)。

Sub finrascrape()

Dim ie As Object
Dim pgcount, a, i, commaindex As Integer
Dim bname, cc, cnum, pg, nb As IHTMLElement
Dim blist, clist, pagelist, nextb, testtxt, testtxt2 As IHTMLElementCollection

Set ie = CreateObject("internetexplorer.application")
brokersearch = InputBox("ENTER BROKER NAME OR CRD#")
firmsearch = InputBox("ENTER FIRM NAME OR CRD#")
geosearch = InputBox("ENTER ZIP CODE (must be valid zip code, otherwise leave blank)")

With ie
.navigate "https://brokercheck.finra.org/"
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With

Set intags = ie.document.getElementsByTagName("input")

For Each gat In intags
If gat.placeholder = "Name or CRD#" Then
gat.Value = brokersearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "Firm Name or CRD# (optional)" Then
gat.Value = firmsearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "City, State or Zip (optional)" Then
gat.Value = geosearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
Else
End If
Next gat

ie.document.getElementsByClassName("md-raised md-primary md-hue-2 md-button md-ink-ripple").Item.Click

Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop

Set pagelist = ie.document.getElementsByClassName("bold font-dark-blue ng-binding")

For Each pg In pagelist
If pg.className = "bold font-dark-blue ng-binding" Then
a = pg.innerText
Exit For
Debug.Print a
End If
Next pg

pgcount = WorksheetFunction.RoundUp(a / 12, 0)
Debug.Print pgcount

Sheets("Results").Select
Range("A1").Value = "BROKER NAME"
Range("B1").Value = "BROKER CRD#"
Range("C1").Value = "MAILING CITY"
Range("D1").Value = "MAILING STATE"
Range("E1").Value = "MAILING ZIP"
With Range(Cells(1, 1), Cells(1, Cells(1, Columns.Count).End(xlToLeft).Column))
    .Font.Bold = True
    .HorizontalAlignment = xlCenter
End With

For i = 1 To pgcount

On Error GoTo reportcomplete

Set blist = ie.document.getElementsByClassName("smaller ng-binding flex")

For Each bname In blist
Sheets("Results").Select
Sheets("Results").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = bname.innerText
Next bname

Set clist = ie.document.getElementsByClassName("smaller")

For Each cnum In clist
Sheets("Process").Select
If cnum.className = "smaller" Then
Sheets("Process").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = cnum.innerText
End If
Next cnum

Sheets("Process").Select
For a = 1 To Sheets("Process").Range("A" & Rows.Count).End(xlUp).Row
If Left(Sheets("Process").Range("A" & a).Value, 4) = "CRD#" Then
Sheets("Results").Select
Sheets("Results").Range("B" & Range("B" & Rows.Count).End(xlUp).Row + 1).Value = Right(Sheets("Process").Range("A" & a).Value, Len(Sheets("Process").Range("A" & a)) - 7)
If InStr(Sheets("Process").Range("A" & a + 1).Value, ",") = 0 Then
Sheets("Process").Range("A" & a + 1).Value = "UNAVAILABLE, NA XXXXX"
End If
ElseIf InStr(Sheets("Process").Range("A" & a).Value, ",") > 0 Then
commaindex = InStr(Sheets("Process").Range("A" & a).Value, ",")
Sheets("Results").Select
Sheets("Results").Range("C" & Sheets("Results").Range("C" & Rows.Count).End(xlUp).Row + 1).Value = Left(Sheets("Process").Range("A" & a).Value, commaindex - 1)
Sheets("Results").Range("D" & Sheets("Results").Range("D" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 1), 2)
Sheets("Results").Range("E" & Sheets("Results").Range("E" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 4), 5)
Else
End If
Sheets("Process").Select
Next a
Sheets("Process").Cells.Clear

Set testtxt = ie.document.getElementsByTagName("a")

For Each txt In testtxt
If txt.className = "ng-binding" Then
txt.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set testtxt2 = ie.document.getElementsByTagName("a")
For a = pgcount To 2 Step -1
For Each txt2 In testtxt2
If txt2.className = "ng-binding" And txt2.innerText = a & " of " & pgcount & " pages" Then
'Debug.Print a
GoTo end_of_for
End If
Next txt2
Next a
End If
Next txt

end_of_for:

a = pgcount

Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop

Next i

reportcomplete:
MsgBox "FINRA Web Scrape Complete.  Please review."

End Sub

HTML:

<li ng-if="::directionLinks" ng-class="{disabled: noNext()||ngDisabled}" class="pagination-next ng-scope">
<a href="" ng-click="selectPage(page + 1, $event)" class="ng-binding">›</a></li>

搜索着陆页的网址为https://brokercheck.finra.org。任何帮助将不胜感激。

2 个答案:

答案 0 :(得分:0)

我开发了如下循环,它成功地导航了“a”标记元素,根据先前元素的innerText标识了正确的按钮,并循环遍历每个页面。

Set testtxt = ie.document.getElementsByTagName("a")

If i < pgcount Then
For d = 1 To testtxt.Length
If testtxt.Item(d).innerText = i & " of " & pgcount & " pages" Then
testtxt.Item(d + 1).Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Exit For
End If
Next d
End If

答案 1 :(得分:0)

这是你可以做的遍历所有27页并获得经纪人名称。

Sub Get_Content()
    Dim ie As New InternetExplorer, html As HTMLDocument
    Dim itm As Object, post As Object, posts As Object, elem As Object

    With ie
        .Visible = True
        .navigate "https://brokercheck.finra.org/"
        Do Until .readyState = READYSTATE_COMPLETE: Loop
        Set html = .document
    End With

    Set evt = html.createEvent("keyboardevent")
    evt.initEvent "change", True, False

    For Each itm In html.getElementsByTagName("input")
        If InStr(itm.placeholder, "Name or CRD#") > 0 Then
            itm.Value = "Michael John"
            Exit For
        End If
    Next itm
    itm.dispatchEvent evt

    For Each post In html.getElementsByTagName("input")
        If InStr(post.placeholder, "Firm Name or CRD# (optional)") > 0 Then
            post.Value = "Morgan Stanley"
            Exit For
        End If
    Next post
    post.dispatchEvent evt

    html.getElementsByClassName("md-button")(0).Click
    Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop

    Do
        For Each elem In html.getElementsByClassName("smaller ng-binding flex")
            x = x + 1: Cells(x, 1) = elem.innerText
        Next elem

        html.getElementsByClassName("pagination-next")(0).getElementsByTagName("a")(0).Click
        Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop

    Loop Until InStr(html.body.innerHTML, " class=""pagination-last ng-scope disabled""") > 0
    ie.Quit
End Sub