这是该问题的延续 using InStr to search for quotes, spaces, colons, etc
我也在尝试在下面获取所有这些数据。我本来打算搜索<td align="left">
的循环
但我有一种感觉,它将带来大量垃圾以及所需的结果。我想知道是否还有更好的方法。
<b>Total Hospital Beds</b></td>
<td align="left">Adult ICU (intensive care unit) CCU (critical care unit)</td>
<td align="left">26</td>
<td align="left">Medical/surgical</td>
<td align="left">198</td>
<td align="left">Pediatric</td>
<td align="left">20</td>
<td align="center" colspan="2"><b>Services</b></td>
<td align="left">Acute Hemodialoysis Service</td>
<td align="left">Chronic Hemodialysis Stations</td>
<td align="left">Magnetic Resonance Imaging - On Site</td>
<td align="left">Mixed OR's</td>
<td align="left">7</td>
这就是我所拥有的,但是现在它在第一页之后停止,加粗的行突出显示了
Public Sub VisitPages()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://healthapps.state.nj.us/facilities/acSetSearch.aspx?by=county"
While .Busy Or .ReadyState < 4: DoEvents: Wend
With .Document
.querySelector("#middleContent_cbType_1").Click
.querySelector("#middleContent_cbType_4").Click
.querySelector("#middleContent_btnGetList").Click
End With
While .Busy Or .ReadyState < 4: DoEvents: Wend
Dim list As Object, i As Long
Set list = .Document.querySelectorAll("#main_table [href*=doPostBack]")
For i = 0 To list.Length - 1
list.Item(i).Click
While .Busy Or .ReadyState < 4: DoEvents: Wend
' Application.Wait Now + TimeSerial(0, 0, 3) '<== Delete me later. This is just to demo page changes
**Debug.Print .Document.getElementById("middleContent_lbName_county").outerHTML**
'do stuff with new page
Dim FirstOcc As Long
Dim TtlHosp As Variant
Dim FLine As Variant
Dim FLineFixed As Variant
TtlHosp = Mid(.Document.Body.innerHTML, InStr(.Document.Body.innerHTML, "Total Hospital Beds"), 4000)
Do Until InStr(TtlHosp, "<td align=" & Chr(34) & "left" & Chr(34) & ">") = 0
FirstOcc = InStr(TtlHosp, "<td align=" & Chr(34) & "left" & Chr(34) & ">")
FLine = Mid(TtlHosp, FirstOcc + 17, 150)
FLineFixed = Mid(FLine, 1, InStr(FLine, "</td>") - 1)
Debug.Print FLineFixed
TtlHosp = Mid(TtlHosp, FirstOcc + 17, 2000)
Loop
.Navigate2 .Document.URL '<== back to homepage
While .Busy Or .ReadyState < 4: DoEvents: Wend
Set list = .Document.querySelectorAll("#main_table [href*=doPostBack]") 'reset list (often required in these scenarios)
Next
Stop '<== Delete me later
'.Quit '<== Remember to quit application
End With
End Sub
答案 0 :(得分:2)
该信息似乎仅用于特殊医院的选择,而床位信息在第三张表中。 (注意:结果当前是用Excel编写的)
Option Explicit
Public Sub VisitPages()
Dim IE As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With IE
.Visible = True
.navigate "http://healthapps.state.nj.us/facilities/acSetSearch.aspx?by=county"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#middleContent_cbType_5").Click
.querySelector("#middleContent_btnGetList").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim list As Object, i As Long
Set list = .document.querySelectorAll("#main_table [href*=doPostBack]")
For i = 0 To list.Length - 1
list.item(i).Click
While .Busy Or .readyState < 4: DoEvents: Wend
WriteTable .document.getElementsByTagName("table")(3), .document.getElementById("middleContent_lbName_county").innerText, GetLastRow(ws, 3) + 1, ws
'do stuff with new page
.Navigate2 .document.URL '<== back to homepage
While .Busy Or .readyState < 4: DoEvents: Wend
Set list = .document.querySelectorAll("#main_table [href*=doPostBack]") 'reset list (often required in these scenarios)
Next
.Quit '<== Remember to quit application
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Public Sub WriteTable(ByVal hTable As HTMLTable, facility As String, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, titleRow As Long
r = startRow: titleRow = startRow
With ws
Set tRow = hTable.getElementsByTagName("tr")
.Cells(titleRow, 1) = facility
For Each tr In tRow
r = r + 1
Set tCell = tr.getElementsByTagName("td")
c = 2
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
Next tr
End With
End Sub