我写了一个宏去WU获取历史数据,并且在大多数情况下都有效。但是,我认为该宏的运行速度太快,无法从网站上获取数据。
https://www.wunderground.com/history/daily/us/tx/el-paso/KELP/date/2017-1-3 网站和我要获取的表格是否可以通过tableaw排序?
我尝试了以下操作:DoEvents
和Application.Wait (Now + TimeValue("00:00:01"))
以尝试减慢该过程。
Sub BrowseToWU()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument
Dim RowAddress As Integer
Dim WebAddress As String
Dim DateSheet As Date
Dim WkDay As Integer
Dim DateSheetName As String
'Application.ScreenUpdating = False
'Application.StatusBar = True
RowAddress = 2
IE.Visible = True
Do Until RowAddress = 60
WebAddress = Range("A" & RowAddress)
DateSheet = Right(WebAddress, 8)
DateSheetName = Right(WebAddress, 8)
WkDay = Weekday(DateSheet, vbSunday)
If WkDay < 3 Then
RowAddress = RowAddress + 1
ElseIf WkDay > 6 Then
RowAddress = RowAddress + 1
Else
IE.Navigate WebAddress
Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop
Set HTMLDoc = IE.Document
DoEvents
Application.Wait (Now + TimeValue("00:00:05"))
DoEvents
ProcessHTMLPage HTMLDoc
DateSheet = Right(WebAddress, 8)
DoEvents
Application.Wait (Now + TimeValue("00:00:01"))
ActiveSheet.Name = DateSheetName
DoEvents
RowAddress = RowAddress + 1
'IE.Quit
Worksheets("Sheet1").Activate
End If
Loop
End Sub
Option Explicit
Sub ProcessHTMLPage(HTMLPage As MSHTML.HTMLDocument)
Dim HTMLTable As MSHTML.IHTMLElement
Dim HTMLTables As MSHTML.IHTMLElementCollection
Dim HTMLRow As MSHTML.IHTMLElement
Dim HTMLCell As MSHTML.IHTMLElement
Dim RowNum As Long, ColNum As Integer
'Dim IE As New SHDocVw.InternetExplorer
'Dim Ws As Worksheet
Set HTMLTables = HTMLPage.getElementsByClassName("tablesaw-sortable")
'DoEvents
For Each HTMLTable In HTMLTables
Worksheets.Add
DoEvents
Range("A1").Value = HTMLTable.className
Range("B1").Value = Now
RowNum = 2
For Each HTMLRow In HTMLTable.getElementsByTagName("tr")
'Debug.Print vbTab & HTMLRow.innerText
ColNum = 1
For Each HTMLCell In HTMLRow.Children
Cells(RowNum, ColNum) = HTMLCell.innerText
ColNum = ColNum + 1
Next HTMLCell
RowNum = RowNum + 1
Next HTMLRow
Next HTMLTable
DoEvents
'IE.Quit
End Sub
如果宏满足一周中某天的条件,则应该遍历sheet1来获取历史数据的网址。
IE将打开,然后将跳至下一个接收数据的模块。
将创建一个新的工作表,并将数据粘贴到新的工作表中。
工作表已重命名为数据日期。
该网址表再次被激活,并且过程重新开始。
我得到的错误是数据不是从网站上获取的,所以For语句结束并且Web地址表被重命名并且发生错误。
答案 0 :(得分:3)
一种解决方法是调用页面用来获取该信息的API。
API返回json,您可以使用json解析器进行解析。我使用jsonconverter.bas。在该链接中的代码安装到名为JsonConverter的标准模块中之后,请转到VBE>工具>引用>添加对Microsoft脚本运行时的引用。
查找API:
如果按 F12 打开开发人员工具并转到Network
标签,然后按 F5 刷新任何感兴趣的URL,您将看到记录的网络流量。您可以在此处找到API调用。
有关如何使用您希望在响应中看到的特定观察值搜索网络流量的方法,请参见我的答案here-这会将网络流量列表过滤到包含感兴趣值的那些项目。选择值时要明智-您希望其他地方不大可能发生某些事情。您也可以仅将网络流量过滤到XHR。
API响应:
API返回json。更具体地说,它返回包含2个键的字典。第二个键“ observations
”可用于返回字典(由[]
表示)的集合(由{}
表示)。
每个字典代表表格的一行(每日观察)。您可以循环此集合,然后循环内部字典,以访问表行值并通过填充数组来重建表。探索示例json响应here。
json结构的说明:
单击here放大
代码说明:
该代码分为多个辅助子和函数,将某些任务分配给每个子和函数,以 使代码更易于调试和遵循,并更好地与面向对象的编程原理保持一致。
整个过程是:
Worksheet("Sheet1")
的网址。辅助功能GetAllUrls
。"yyyymmdd"
的字符串,因此可以在以后传递给API。这由辅助函数GetOnlyQualifyingUrlsDates
和IncludeThisDate
处理。 IncludeThisDate
执行是否包括在内的检查; GetOnlyQualifyingUrlsDates
处理结果的循环和格式化。GetTables
执行的。CreateWorksheet
处理。此函数调用另一个辅助函数SheetExists
,以确保仅在不存在工作表的情况下创建工作表,否则,将使用具有该名称的现有工作表。WriteOutResults
,该子程序接受json变量和输出工作表对象作为参数。它从json响应中提取所有信息;本质上是重建表格。它将表和标题添加到适当的工作表。
它调用辅助函数Epoch2Date
,该函数处理json对象中两个unix字段的unix时间戳到日期时间的转换。待办事项:
VBA:
Option Explicit
Public Sub GetTables()
'VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, qualifyingUrlsDates(), urls(), url As String
Dim ws As Worksheet, wsOutput As Worksheet, i As Long, startDate As String, endDate As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
urls = GetAllUrls(2, ws, "A")
qualifyingUrlsDates = GetOnlyQualifyingUrlsDates(urls)
'API key may be not be valid over time so look at obtaining by prior request
With CreateObject("MSXML2.XMLHTTP") 'issue xmlhttp request for each valid date (this would be better done using start and enddate to specify entire range _
of batches e.g. months within total range to cut down on requests
For i = LBound(qualifyingUrlsDates) To UBound(qualifyingUrlsDates)
startDate = qualifyingUrlsDates(i)
endDate = startDate ' a little verbose but useful for explaining
url = "https://api.weather.com/v1/geocode/31.76/-106.49/observations/historical.json?apiKey=6532d6454b8aa370768e63d6ba5a832e&startDate=" & startDate & "&endDate=" & endDate & "&units=e"
.Open "GET", url, False
.send
Set json = JsonConverter.ParseJson(.responseText)("observations")
Set wsOutput = CreateWorksheet(qualifyingUrlsDates(i))
WriteOutResults wsOutput, json
Next
End With
End Sub
Public Sub WriteOutResults(ByVal wsOutput As Worksheet, ByVal json As Object)
'json is a collection of dictionaries. Each dictionary is a time period reading from the day i.e. one row in output
Dim results(), item As Object, headers(), r As Long, c As Long, key As Variant
headers = json.item(1).keys 'get the headers which are the keys of each dictionary
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 0 'increase row in results array to store results for table row
For Each key In item.keys
c = c + 1 'increase column number in results array for writing out results
Select Case key
Case "valid_time_gmt", "expire_time_gmt" 'convert unix timestamp fields to datetime
results(r, c) = Epoch2Date(item(key))
Case Else
results(r, c) = item(key)
End Select
Next
Next
With wsOutput
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetOnlyQualifyingUrlsDates(ByVal urls As Variant) As Variant
Dim i As Long, output(), counter As Long
ReDim output(1 To UBound(urls))
For i = LBound(urls) To UBound(urls)
If IncludeThisDate(urls(i)) Then 'check if weekday is to be included
counter = counter + 1
output(counter) = Format$(Right$(urls(i), 8), "yyyymmdd") 'if to include then add to output array of urls of interest
End If
Next
ReDim Preserve output(1 To counter)
GetOnlyQualifyingUrlsDates = output
End Function
Public Function IncludeThisDate(ByVal url As String) As Boolean
'tue, wed, thurs are valid
IncludeThisDate = Not IsError(Application.Match(Weekday(Right$(url, 8), vbSunday), Array(3, 4, 5)))
End Function
Public Function SheetExists(ByVal sheetName As String) As Boolean '<== function by @Rory
SheetExists = Evaluate("ISREF('" & sheetName & "'!A1)")
End Function
Public Function GetAllUrls(ByVal startRow As Long, ByVal ws As Worksheet, ByVal columnName As String) As Variant
'transpose used based on premise no more than a couple of years of dates
'startRow is start row for urls, ws is sheet where urls found, columnName is string representation of column for urls e.g. "A"
With ws
GetAllUrls = Application.Transpose(ws.Range("A" & startRow & ":A" & .Cells(.rows.Count, columnName).End(xlUp).Row).Value)
End With
End Function
Public Function CreateWorksheet(ByVal sheetName As String) As Worksheet
Dim ws As Worksheet
If SheetExists(sheetName) Then
Set ws = ThisWorkbook.Worksheets(sheetName)
'do something.... clear it? Then add new data to it?
Else
Set ws = ThisWorkbook.Worksheets.Add
ws.Name = sheetName
End If
Set CreateWorksheet = ws
End Function
Public Function Epoch2Date(ByVal E As Currency, Optional msFrac) As Date '@ Schmidt http://www.vbforums.com/showthread.php?805245-EPOCH-to-Date-and-vice-versa
Const Estart As Double = #1/1/1970#
msFrac = 0
If E > 10000000000@ Then E = E * 0.001: msFrac = E - Int(E)
Epoch2Date = Estart + (E - msFrac) / 86400
End Function