我正在尝试使用VBA从html表中获取数据。从列表框中选择一个值后,填充一个文本框并单击一个按钮,表格就会出现。但是该网站的网址不会更改。
我的程序确实填充了该框,选择了列表框的值,然后单击“搜索”按钮,但是我无法从表中获取数据。
我需要页面末尾的表格单元格的值。 (第二
这是页面的url:
Sub Info()
Dim enlace As String
Dim id As String
Dim lista
Dim rut As Integer
Dim i As Integer
Dim largo As Integer
largo = Worksheets("Lista").Cells(rows.Count, 1).End(xlUp).Row
id = Worksheets("Lista").Cells(2, 1).Value
lista = Split(id, "-")
rut = lista(0)
enlace = "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=" & rut & "&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw+cAAhAABP4MAAz&control=svs&pestania=1"
Set objIE = CreateObject("InternetExplorer.application")
objIE.Visible = False
objIE.Navigate (enlace)
Do
If objIE.ReadyState = 4 Then
objIE.Visible = False
Exit Do
Else
DoEvents
End If
Loop
Dim button_name As String
button_name = "Aportantes"
Set link = objIE.document.getElementsByTagName("A")
For Each Hyperlink In link
If InStr(Hyperlink.innerText, button_name) > 0 Then
Hyperlink.Click
Exit For
End If
Next
Dim nuevoLink As String
nuevoLink = Hyperlink
objIE.Quit
Set ie = CreateObject("InternetExplorer.application")
ie.Visible = False
ie.Navigate (nuevoLink)
Do
If ie.ReadyState = 4 Then
ie.Visible = False
Exit Do
Else
DoEvents
End If
Loop
Dim sem As String
Dim ano As Integer
sem = "03"
ano = 2018
Dim aportantes As Object
Dim cuotas_emitidas As Object
ie.document.getElementById("semestre").Value = sem
ie.document.getElementById("aa").Value = ano
Set elems = ie.document.getElementsByTagName("input")
For Each e In elems
If (e.getAttribute("value") = "Consultar") Then
e.Click
''HERE IS THE PROBLEM
Set aportantes = ie.document.getElementsByTagName("table")(1).getElementsByTagName("tr")(0).getElementsByTagName("tr")(1)
ThisWorkbook.Worksheets("Lista").Cells(i, 4).Value = aportantes
Set cuotas_emitidas = ie.document.getElementsByTagName("table")(1).getElementsByTagName("tr")(1).getElementsByTagName("tr")(1).innerText
ThisWorkbook.Worksheets("Lista").Cells(i, 5).Value = cuotas_emitidas
End If
Next e
End Sub
HTML:
<table>
<tbody>
<tr>
<td class="fondoOscuro">2.01.60 TOTAL APORTANTES</td>
<td>58</td>
</tr>
<tr>
<td class="fondoOscuro">2.01.70 CUOTAS EMITIDAS</td>
<td>20000000 </td>
</tr>
<tr>
<td class="fondoOscuro">2.01.71 CUOTAS PAGADAS</td>
<td>7691000</td>
</tr>
<tr>
<td class="fondoOscuro">2.01.72 CUOTAS SUSCRITAS Y NO PAGADAS</td>
<td>0 </td>
</tr>
<tr>
<td class="fondoOscuro">2.01.73 NUMERO DE CUOTAS CON PROMESA DE SUSCRIPCION Y PAGO</td>
<td>0 </td>
</tr>
<tr>
<td class="fondoOscuro">2.01.74 NUMERO DE CONTRATOS DE PROMESAS DE SUSCRIPCION Y PAGO</td>
<td>0</td>
</tr>
<tr>
<td class="fondoOscuro">2.01.75 NUMERO DE PROMITENTES SUSCRIPTORES DE CUOTAS</td>
<td>0 </td>
</tr>
<tr>
<td class="fondoOscuro">2.01.80 VALOR LIBRO DE LA CUOTA</td>
<td>1.0059 </td>
</tr>
</tbody></table>
'
答案 0 :(得分:1)
您可以使用XHR进行整个操作,而无需打开浏览器即可进行抓取。将Activesheet输出更改为要将表写入到的工作表(WriteTable hTable, 1, ActiveSheet
)。
请注意POST正文的参数包括:
代码:
Public Sub GetTable()
Dim sResponse As String, hTable As Object, id As String, lista() As String, rut As String
Dim strBody As String
id = Worksheets("Lista").Cells(2, 1).Value
lista = Split(id, "-")
rut = lista(0)
strBody = "mm=12&aa=2017&rut=" & rut
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "http://www.cmfchile.cl//institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27", False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send strBody
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With CreateObject("htmlFile")
.Write sResponse
Set hTable = .getElementsByTagName("table")(1)
End With
Application.ScreenUpdating = False
WriteTable hTable, 1, ActiveSheet
Application.ScreenUpdating = True
End Sub
Public Sub WriteTable(ByVal hTable As Object, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, R As Long, C As Long, tBody As Object
R = startRow
With ws
Set tBody = hTable.getElementsByTagName("tbody")
For Each tSection In tBody 'HTMLTableSection
Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
For Each tr In tRow
R = R + 1
Set tCell = tr.getElementsByTagName("td")
C = 1
For Each td In tCell 'DispHTMLElementCollection
.Cells(R, C).Value = td.innerText 'HTMLTableCell
C = C + 1
Next td
Next tr
Next tSection
End With
End Sub
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, hTable As HTMLTable
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("aa").Value = 2017
.document.forms("consulta").submit
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementsByTagName("table")(1)
On Error GoTo 0
Loop While hTable Is Nothing
WriteTable hTable, 1, ActiveSheet
End With
Application.ScreenUpdating = True
End Sub
输出:
参考:
通过VBE>工具>引用的HTML对象库
调整代码轮廓,但仍使用pestania = 27
Option Explicit
Public Sub GetInfo()
Dim ie As New InternetExplorer, hTable As HTMLTable, lista() As String, id As String, rut As String, enlace As String
Application.ScreenUpdating = False
id = Worksheets("Lista").Cells(2, 1).Value
lista = Split(id, "-")
rut = lista(0)
enlace = "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=" & rut & "&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw+cAAhAABP4MAAz&control=svs&pestania=27"
With ie
.Visible = True
.navigate enlace '"http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.getElementById("aa").Value = 2017
.document.forms("consulta").submit
Do
DoEvents
On Error Resume Next
Set hTable = .document.getElementsByTagName("table")(1)
On Error GoTo 0
Loop While hTable Is Nothing
WriteTable hTable, 1, ActiveSheet
End With
Application.ScreenUpdating = True
End Sub
答案 1 :(得分:1)
您已经有了不错的答案。问题是QHarr
决定参加比赛时,他几乎没有任何选择让其他人站出来。但是,以下脚本将为您节省一些额外时间。我使用IE获取了page source
,然后应用了更快的方法来管理其余部分。我试图解析针对2016
年填充的相关表格数据。可以根据需要随意更改年份。
Sub ScrapeTabularInfo()
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim Htmldoc As New HTMLDocument, post As Object, elem As Object
Dim trow As Object, R&, C&
With IE
.Visible = False
.navigate "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
While .Busy Or .readyState < 4: DoEvents: Wend
Set Html = .document
Html.querySelector("#aa").innerText = 2016
Html.querySelector("input[value='Consultar']").Click
Do: Set post = Html.getElementsByTagName("table")(1): DoEvents: Loop While post Is Nothing
End With
Htmldoc.body.innerHTML = Html.DocumentElement.outerHTML
For Each elem In Htmldoc.getElementsByTagName("table")(1).Rows
For Each trow In elem.Cells
C = C + 1: Cells(R + 1, C) = trow.innerText
Next trow
C = 0: R = R + 1
Next elem
IE.Quit
End Sub
这里最好的方法是利用post
请求,您已经有一个演示。
添加到库中的参考(考虑到您拥有IE9
或更高版本才能使.querySelector()
正常工作):
Microsoft Internet Controls
Microsoft HTML Object Libray