我正在尝试从网站中提取特定数据并粘贴到工作表中以每天更新数据库。但是由于无法以excel或csv格式下载表,因此我应该直接从网站中提取表。
按照我的代码执行,并在哪里遇到问题(哪里有“ HERE”)。
Sub Scrape_Stats()
'Create Internet Explorer Browser
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
'Ask Browser to navigate to website (.Visible=False will hide IE when running)
With appIE
.Navigate "https://www.infomoney.com.br/mercados/ferramentas/contratos-di-futuro"
.Visible = True
End With
'Have the macro pause while IE is busy opening and navigating
Do While appIE.Busy
DoEvents
Loop
'Designate the table to be extracted and Copy the data from table - HERE
'Close IE and clear memory
appIE.Quit
Set appIE = Nothing
'Clear area and paste extracted text into the appropriate sheet/cells - HERE
Worksheets("Sheet1").Range("A2:H1000").ClearContents
Sheets("PPG").Select
Range("A2").Select
End Sub
答案 0 :(得分:3)
应该可以,我正在使用剪贴板一次移动表中的数据。
Sub Scrape_Stats()
Dim Clip As Object: Set Clip = CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Dim Text As String
Dim ws As Worksheet: Set ws = ThisWorkbook.Worksheets("Sheet1")
With CreateObject("internetexplorer.application")
.Navigate "https://www.infomoney.com.br/mercados/ferramentas/contratos-di-futuro"
Do While .Busy And .readyState <> 4: DoEvents: Loop
Text = .Document.getElementsByTagName("Table")(1).outerhtml
.Quit
End With
Clip.SetText Text
Clip.PutInClipboard
ws.Range("A2:H1000").ClearContents
ws.Range("A2").Select
ws.PasteSpecial Format:="Unicode Text"
Set Clip = Nothing
End Sub
答案 1 :(得分:3)
发出xmlhtttp请求而不打开浏览器并解析隐藏在响应的一个属性(data-DIContracts
中的json的速度要快得多。
我使用的是jsonconverter.bas,您可以从here下载。将.bas添加到项目后,请转到vbe>工具>引用,然后添加对Microsoft Scripting Runtime
的引用,并为Microsoft HTML Object Library
添加一个引用。
行
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
防止由于频繁的页面更新而提供缓存结果。
Option Explicit
Public Sub GetTable()
Dim sResponse As String, html As HTMLDocument, json As Object, i As Long
Application.ScreenUpdating = False
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.infomoney.com.br/mercados/ferramentas/contratos-di-futuro", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
With html
.body.innerHTML = sResponse
Set json = JsonConverter.ParseJson(.querySelector("#serverDI").getAttribute("data-DIContracts"))
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells.ClearContents
.Cells(1, 1).Resize(1, UBound(json(1).keys) + 1) = json(1).keys
For i = 1 To json.Count
.Cells(i + 1, 1).Resize(1, UBound(json(i).keys) + 1) = json(i).Items
Next
End With
Application.ScreenUpdating = True
End Sub
答案 2 :(得分:2)
您可以尝试另一种方法。我在脚本中使用了选择器来消除冗长的内容。
Sub FetchTabularContent()
Dim IE As New InternetExplorer, Html As HTMLDocument
Dim I&, C&, N&, R&
With IE
.Visible = False
.navigate "https://www.infomoney.com.br/mercados/ferramentas/contratos-di-futuro"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .document
With Html.querySelectorAll("table tr")
For N = 1 To .Length - 1
With .item(N).querySelectorAll("th,td")
For I = 0 To .Length - 1
C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = .item(I).innerText
Next I
C = 0: R = R + 1
End With
Next N
End With
End With
End Sub
执行前要添加的参考:
Microsoft Internet Controls
Microsoft HTML Object Library