下面粘贴的代码部分会尝试遍历网页搜索的每个页面。这样做的按钮也由下面的html定义。无论结果页数如何,循环仅通过第二页工作,此时,我收到权限被拒绝(错误70)。
Sub finrascrape()
Dim ie As Object
Dim pgcount, a, i, commaindex As Integer
Dim bname, cc, cnum, pg, nb As IHTMLElement
Dim blist, clist, pagelist, nextb, testtxt, testtxt2 As IHTMLElementCollection
Set ie = CreateObject("internetexplorer.application")
brokersearch = InputBox("ENTER BROKER NAME OR CRD#")
firmsearch = InputBox("ENTER FIRM NAME OR CRD#")
geosearch = InputBox("ENTER ZIP CODE (must be valid zip code, otherwise leave blank)")
With ie
.navigate "https://brokercheck.finra.org/"
.Visible = True
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Set intags = ie.document.getElementsByTagName("input")
For Each gat In intags
If gat.placeholder = "Name or CRD#" Then
gat.Value = brokersearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "Firm Name or CRD# (optional)" Then
gat.Value = firmsearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
ElseIf gat.placeholder = "City, State or Zip (optional)" Then
gat.Value = geosearch
Set evt = ie.document.createEvent("keyboardevent")
evt.initEvent "change", True, False
gat.dispatchEvent evt
Else
End If
Next gat
ie.document.getElementsByClassName("md-raised md-primary md-hue-2 md-button md-ink-ripple").Item.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set pagelist = ie.document.getElementsByClassName("bold font-dark-blue ng-binding")
For Each pg In pagelist
If pg.className = "bold font-dark-blue ng-binding" Then
a = pg.innerText
Exit For
Debug.Print a
End If
Next pg
pgcount = WorksheetFunction.RoundUp(a / 12, 0)
Debug.Print pgcount
Sheets("Results").Select
Range("A1").Value = "BROKER NAME"
Range("B1").Value = "BROKER CRD#"
Range("C1").Value = "MAILING CITY"
Range("D1").Value = "MAILING STATE"
Range("E1").Value = "MAILING ZIP"
With Range(Cells(1, 1), Cells(1, Cells(1, Columns.Count).End(xlToLeft).Column))
.Font.Bold = True
.HorizontalAlignment = xlCenter
End With
For i = 1 To pgcount
On Error GoTo reportcomplete
Set blist = ie.document.getElementsByClassName("smaller ng-binding flex")
For Each bname In blist
Sheets("Results").Select
Sheets("Results").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = bname.innerText
Next bname
Set clist = ie.document.getElementsByClassName("smaller")
For Each cnum In clist
Sheets("Process").Select
If cnum.className = "smaller" Then
Sheets("Process").Range("A" & Range("A" & Rows.Count).End(xlUp).Row + 1).Value = cnum.innerText
End If
Next cnum
Sheets("Process").Select
For a = 1 To Sheets("Process").Range("A" & Rows.Count).End(xlUp).Row
If Left(Sheets("Process").Range("A" & a).Value, 4) = "CRD#" Then
Sheets("Results").Select
Sheets("Results").Range("B" & Range("B" & Rows.Count).End(xlUp).Row + 1).Value = Right(Sheets("Process").Range("A" & a).Value, Len(Sheets("Process").Range("A" & a)) - 7)
If InStr(Sheets("Process").Range("A" & a + 1).Value, ",") = 0 Then
Sheets("Process").Range("A" & a + 1).Value = "UNAVAILABLE, NA XXXXX"
End If
ElseIf InStr(Sheets("Process").Range("A" & a).Value, ",") > 0 Then
commaindex = InStr(Sheets("Process").Range("A" & a).Value, ",")
Sheets("Results").Select
Sheets("Results").Range("C" & Sheets("Results").Range("C" & Rows.Count).End(xlUp).Row + 1).Value = Left(Sheets("Process").Range("A" & a).Value, commaindex - 1)
Sheets("Results").Range("D" & Sheets("Results").Range("D" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 1), 2)
Sheets("Results").Range("E" & Sheets("Results").Range("E" & Rows.Count).End(xlUp).Row + 1).Value = Left(Right(Sheets("Process").Range("A" & a).Value, _
Len(Sheets("Process").Range("A" & a)) - commaindex - 4), 5)
Else
End If
Sheets("Process").Select
Next a
Sheets("Process").Cells.Clear
Set testtxt = ie.document.getElementsByTagName("a")
For Each txt In testtxt
If txt.className = "ng-binding" Then
txt.Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Set testtxt2 = ie.document.getElementsByTagName("a")
For a = pgcount To 2 Step -1
For Each txt2 In testtxt2
If txt2.className = "ng-binding" And txt2.innerText = a & " of " & pgcount & " pages" Then
'Debug.Print a
GoTo end_of_for
End If
Next txt2
Next a
End If
Next txt
end_of_for:
a = pgcount
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Next i
reportcomplete:
MsgBox "FINRA Web Scrape Complete. Please review."
End Sub
HTML:
<li ng-if="::directionLinks" ng-class="{disabled: noNext()||ngDisabled}" class="pagination-next ng-scope">
<a href="" ng-click="selectPage(page + 1, $event)" class="ng-binding">›</a></li>
搜索着陆页的网址为https://brokercheck.finra.org。任何帮助将不胜感激。
答案 0 :(得分:0)
我开发了如下循环,它成功地导航了“a”标记元素,根据先前元素的innerText标识了正确的按钮,并循环遍历每个页面。
Set testtxt = ie.document.getElementsByTagName("a")
If i < pgcount Then
For d = 1 To testtxt.Length
If testtxt.Item(d).innerText = i & " of " & pgcount & " pages" Then
testtxt.Item(d + 1).Click
Do While ie.Busy Or ie.readyState <> 4
DoEvents
Loop
Exit For
End If
Next d
End If
答案 1 :(得分:0)
这是你可以做的遍历所有27页并获得经纪人名称。
Sub Get_Content()
Dim ie As New InternetExplorer, html As HTMLDocument
Dim itm As Object, post As Object, posts As Object, elem As Object
With ie
.Visible = True
.navigate "https://brokercheck.finra.org/"
Do Until .readyState = READYSTATE_COMPLETE: Loop
Set html = .document
End With
Set evt = html.createEvent("keyboardevent")
evt.initEvent "change", True, False
For Each itm In html.getElementsByTagName("input")
If InStr(itm.placeholder, "Name or CRD#") > 0 Then
itm.Value = "Michael John"
Exit For
End If
Next itm
itm.dispatchEvent evt
For Each post In html.getElementsByTagName("input")
If InStr(post.placeholder, "Firm Name or CRD# (optional)") > 0 Then
post.Value = "Morgan Stanley"
Exit For
End If
Next post
post.dispatchEvent evt
html.getElementsByClassName("md-button")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Do
For Each elem In html.getElementsByClassName("smaller ng-binding flex")
x = x + 1: Cells(x, 1) = elem.innerText
Next elem
html.getElementsByClassName("pagination-next")(0).getElementsByTagName("a")(0).Click
Do While ie.Busy Or ie.readyState <> 4: DoEvents: Loop
Loop Until InStr(html.body.innerHTML, " class=""pagination-last ng-scope disabled""") > 0
ie.Quit
End Sub