第一次在这里发布,希望能得到一些好的反馈!
我正在尝试自动化从以下网站检索数据的过程:https://hazards.atcouncil.org/#/seismic?lat=38.461982&lng=-122.425394&address=
返回的参数基于参考文档,风险类别和站点类别下拉值。使用Excel VBA,我已经能够导航到网页,从下拉列表中选择所需的项目,并读取输出中的参数。但是,当我进行选择时,网页不会更新输出值(参数),因此实际上我只是在读取默认参数。似乎正在等待我说“ GO”或某种“事件”,但是我对面向HTML的VBA的了解还不够,无法弄清楚该说些什么。在来到这里发表之前,我进行了很多搜索,发现其他人也遇到了类似的问题,但是下拉菜单的HTML代码结构似乎与我在本网站上所看到的有所不同在其他地方看到。我正在使用Internet Explorer 11。
我真的希望这是一个简单的解决方法。预先感谢您的帮助!
这是我的代码(Excel 2016):
Sub ScrapeData()
Dim objIE As Object
Dim Latitude As Double
Dim Longitude As Double
Dim newHour As Variant
Dim newMinute As Variant
Dim newSecond As Variant
Dim waitTime As Variant
Dim valArray() As String
Dim btnSelect As MSHTML.HTMLSelectElement
Dim btnOption As MSHTML.HTMLOptionElement, ElementCol As MSHTML.IHTMLElementCollection
Dim ElementCol1 As MSHTML.IHTMLElementCollection
'Define the latitude and longitude
Latitude = 38.221565
Longitude = -122.46558
'Create the Internet Explorer object
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Top = 0
objIE.Left = 0
objIE.Visible = True
'This will navigate to the website given the latitude and longitude
objIE.navigate ("https://hazards.atcouncil.org/#/seismic?lat=" & Latitude & "&lng=" & Longitude & "&address=")
'wait here while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'This is a designated wait time to allow it to finish loading because sometimes it's not ready
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 3
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
'Bring the web page to the front
objIE.Visible = True
'Select Design Code Requirements
'Gather all the elements under tag name "option"
Set ElementCol = objIE.document.getElementById("seismic-selector").getElementsByTagName("option")
'Look at the value of each element in ElementCol
For Each btnSelect In ElementCol
'******************************************************************************
'This is where I'm having the issue!
'******************************************************************************
'If the value is equal to what I'm looking for, then...
If btnSelect.innerText = "ASCE7-10" Then
'I need to select this value, but I also need to trigger the web page here
'I need to tell it "This is what I want, start retrieving information"
'Instead, it selects the value from the drop down, but it appears to be waiting
'for me to tell it to "Go"
'The .Focus and .FireEvent don't appear to do anything
btnSelect.Focus
btnSelect.Selected = True
btnSelect.FireEvent ("onchange")
'Wait for the web page to update
newHour = Hour(Now())
newMinute = Minute(Now())
newSecond = Second(Now()) + 3
waitTime = TimeSerial(newHour, newMinute, newSecond)
Application.Wait waitTime
'I also need to select these other items from their drop downs
ElseIf btnSelect.innerText = "IV" Then
btnSelect.Selected = True
ElseIf btnSelect.innerText = "D - Stiff Soil" Then
btnSelect.Selected = True
End If
Next btnSelect
Dim divElm3 As MSHTML.HTMLDivElement
Dim ElementCol3 As MSHTML.IHTMLElementCollection
Set ElementCol3 = objIE.document.getElementsByClassName("table-row")
i = 1
For Each divElm3 In ElementCol3
'The values have return carriages in them, this splits it up by the return carriage (vbLf)
valArray() = Split(divElm3.innerText, vbLf)
For j = 1 To (UBound(valArray()) + 1)
'This puts the values into the worksheet on the "Test" page
Worksheets("Test").Cells(i, j).Value = Application.Clean(Trim(valArray(j - 1)))
Next j
'i will be equal to the number of data values on the web page
i = i + 1
'For some reason this pulls everything twice, so I limit it to 20. If you comment
'this If statement out, you will see what I mean.
If i > 20 Then
GoTo EndSub
End If
Next divElm3
EndSub:
End Sub
这是HTML代码的相关部分:
<div id="seismic-selector">
<div class="form-group">
<span class="label">Reference Document</span>
<select>
<option value="asce7-16">ASCE7-16</option>
<option value="asce7-10">ASCE7-10</option>
<option value="asce7-05">ASCE7-05</option>
<option value="asce41-17">ASCE41-17</option>
<option value="asce41-13">ASCE41-13</option>
<option value="nehrp-2015">NEHRP-2015</option>
<option value="nehrp-2009">NEHRP-2009</option>
<option value="ibc-2015">IBC-2015</option>
<option value="ibc-2012">IBC-2012</option>
</select>
</div>
<div class="form-group">
<span class="label">Risk Category</span>
<select>
<option value="I">I</option>
<option value="II">II</option>
<option value="III">III</option>
<option value="IV">IV</option>
</select>
</div>
<div class="form-group">
<span class="label">Site Class</span>
<select>
<option value="A">A - Hard Rock</option>
<option value="B">B - Rock</option>
<option value="C">C - Very Dense Soil and Soft Rock</option>
<option value="D">D - Stiff Soil</option>
<option value="E">E - Soft Clay Soil</option>
<option value="F">F - Site Response Analysis</option>
</select>
</div>
<div class="form-group">
<span class="label">Report Title</span>
<input type="text" value="" placeholder="Enter a title..."></div></div>
答案 0 :(得分:1)
这是使用selenium basic的版本,因为页面响应自动浏览器选择项目。对于本原理示例,我直接在URL中使用了lat
和long
。它是向您展示基本的方法。如果需要,在循环中将这些值连接起来很容易。
这是一个有点 odd 的页面,在写表方面很有趣。
下载硒后,您需要转到VBE > Tools > References
并添加对Selenium Type Library
的引用。支持其他一些浏览器,包括IE和FireFox。
对图片大小的歉意-我试图通过在链接末尾添加后缀s
| m
来缩小图片的大小,但是s
太小了。
Option Explicit
Public Sub GetInfo()
Dim d As WebDriver
Set d = New ChromeDriver
Const url = "https://hazards.atcouncil.org/#/seismic?lat=38.221565&lng=-122.46558&address="
Application.ScreenUpdating = False
With d
.AddArgument "--headless"
.Start "Chrome"
.get url
With .FindElementsByCss("#seismic-selector select")
.item(1).AsSelect.SelectByText "ASCE7-10"
.item(2).AsSelect.SelectByText "II"
.item(3).AsSelect.SelectByText "D - Stiff Soil"
End With
Dim tables As WebElements
Do
Set tables = .FindElementsByClass("table", timeout:=7000)
Loop While tables.Count = 0
Dim table As Object, tr As Object, td As Object, r As Long, c As Long
Dim ws As Worksheet, headers()
headers = Array("Name", "Value", "Description")
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ws
For Each table In tables
If Not table.Text = vbNullString Then
r = GetLastRow(ws, 1) + 2
.Cells(r, 1).Resize(1, UBound(headers) + 1) = headers
For Each tr In table.FindElementsByClass("table-row")
r = r + 1: c = 0
For Each td In tr.FindElementsByTag("div")
c = c + 1
.Cells(r, c) = td.Text
Next
Next
End If
Next
End With
.Quit
Application.ScreenUpdating = True
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
网页示例:
工作表示例输出:
Option Explicit
Public Sub ScrapeData()
Dim objIE As Object, Latitude As Double, Longitude As Double, dropDowns As Object
Latitude = 38.221565: Longitude = -122.46558
Set objIE = CreateObject("InternetExplorer.Application")
Application.ScreenUpdating = True
With objIE
' .Top = 0
' .Left = 0
.Visible = True
.navigate ("https://hazards.atcouncil.org/#/seismic?lat=" & Latitude & "&lng=" & Longitude & "&address=")
Do While .Busy = True Or .readyState <> 4: DoEvents: Loop
Set dropDowns = .document.querySelectorAll("#seismic-selector select")
With dropDowns
.item(0).Focus
SendKeys "{down}"
.item(1).Focus
SendKeys "{down}"
.item(2).Focus
SendKeys "{down 3}"
End With
Dim tables As Object, table As Object, tr As Object, td As Object, r As Long, c As Long, ws As Worksheet, headers()
headers = Array("Name", "Value", "Description")
Set ws = ThisWorkbook.Worksheets("Sheet1")
Do
DoEvents
Set tables = .document.getElementsByClassName("table")
Loop While tables.Length = 0
With ws
For Each table In tables
If Not table.innerText = vbNullString Then
r = GetLastRow(ws, 1) + 2
.Cells(r, 1).Resize(1, UBound(headers) + 1) = headers
For Each tr In table.getElementsByClassName("table-row")
r = r + 1: c = 0
For Each td In tr.getElementsByTagName("div")
c = c + 1
.Cells(r, c) = td.innerText
Next
Next
End If
Next
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function