所以我有这样的链接
https://finance.yahoo.com/most-active?offset=0&count=100
我想使用如下查询
let
Source = Web.Page(Web.Contents("https://finance.yahoo.com/most-active?offset=0&count=100")),
Data0 = Source{0}[Data],
#"Changed Type" = Table.TransformColumnTypes(Data0,{{"Column1", type text}, {"Column2", type text}, {"Column3", type number}, {"Column4", type number}, {"Column5", Percentage.Type}, {"Column6", type text}, {"Column7", type text}, {"Column8", type text}, {"Column9", type text}, {"Column10", type text}})
in
#"Changed Type"
但是,我需要它遍历URL并更改offset = 0&count = 100。它需要将偏移量设置为0、100、200、300、400、500 ... 6000。 我不习惯VBA,并且在添加for循环时遇到很多语法错误。
答案 0 :(得分:0)
据我所知,您正在循环播放。当前有180个结果,即2页。下面的代码查找结果总数,仅发出所需数量的请求。 52周的范围是由javascript生成的画布标签内容,无法直接从页面上刮取(Canvas内容不是fallover内容的一部分,而是DOM的一部分。)如果我能算出如何生成适当的内容内容我将更新此答案。可能需要为此打开浏览器,并且XHR将作为检索方法被删除。
我使用一个基本类来保存XMLHTTP对象,以避免创建和销毁该对象的开销。如果将来有大量的数据集,则有必要对其进行调整以运行成批的请求。
警告:点击该网站太多次,太快,您可能会受到限制(不是字面上的意思)。
类模块clsHTTP:
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
Dim sResponse As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
GetString = sResponse
End With
End Function
标准模块:
Option Explicit
Public Sub GetTables()
Dim sResponse As String, html As HTMLDocument, hTable As HTMLTable, http As clsHTTP
Dim headers(), url As String, offset As Long, ws As Worksheet, results As Long
Application.ScreenUpdating = False
On Error GoTo errhand
Set http = New clsHTTP: Set ws = ThisWorkbook.Worksheets("Sheet1")
headers = Array("Symbol", "Name", "Price (intraday)", "Change", "% Change", "Volume", "Avg Vol(3 month)", "Market Cap", "PE ratio (TTM)", "52 Week Range")
url = "https://finance.yahoo.com/most-active?offset=0&count=100"
Set html = New HTMLDocument
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
With html
.body.innerHTML = http.GetString(url)
Set hTable = .querySelector("table[class^=W]")
results = Split(Split(.querySelector("#fin-scr-res-table [class*='Mstart']").innerText, "of ")(1), " results")(0)
WriteTable hTable, GetLastRow(ws, 1) + 1, ws
End With
For offset = 100 To results Step 100
url = "https://finance.yahoo.com/most-active?offset=" & offset & "&count=100"
With html
.body.innerHTML = http.GetString(url)
Set hTable = .querySelector("table[class^=W]")
WriteTable hTable, GetLastRow(ws, 1) + 1, ws
End With
Next
errhand:
Application.ScreenUpdating = True
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long, tBody As Object
r = startRow
With ws
Set tBody = hTable.getElementsByTagName("tbody")
For Each tSection In tBody
Set tRow = tSection.getElementsByTagName("tr")
For Each tr In tRow
Set tCell = tr.getElementsByTagName("td")
c = 1
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
r = r + 1
Next tr
Next tSection
End With
End Sub
作为暂时的解决方法,您可以使用返回的符号从报价登陆页面中检索52周的范围信息:
Dim lastRow As Long, i As Long, symbols(), rowCounter As Long
lastRow = GetLastRow(ws, 1): rowCounter = 2
symbols = ws.Range("A2:A" & lastRow).Value
For i = LBound(symbols, 1) To 3 'UBound(symbols, 1)
With html
.body.innerHTML = http.GetString("https://finance.yahoo.com/quote/" & symbols(i, 1) & "/key-statistics?p=" & symbols(i, 1))
ws.Cells(rowCounter, UBound(headers) + 1) = .querySelector("[data-test='FIFTY_TWO_WK_RANGE-value']").innerText
End With
Next
我强烈建议您寻找一个API来提供此信息。
使用画布探索的途径(我会逐步添加):
答案 1 :(得分:0)
既然您发布了一个M代码段,那么我想您知道将以下内容复制粘贴到哪里了。
您的代码段已变成名为getYahooData
的函数,该函数接受offset
的数字参数,以便我们可以重复使用。
我们基本上会生成一个offsets
的列表(0到6000,以100为步长),然后遍历该列表,同时每次都调用getYahooData
函数。每次,如果成功,该函数将返回一个数据表;如果失败,则返回null
。
该函数返回null
后,将假定在其余offsets
处不存在任何表(这意味着我们不再向Yahoo提出任何数据请求)。
let
getYahooData = (offset as number) =>
let
Source = Web.Page(Web.Contents("https://finance.yahoo.com/most-active?offset=" & Text.From(offset) & "&count=100")),
dataWasReturned = Table.Contains(Source, [ClassName="W(100%)"]),
nullOrTable = if not dataWasReturned then null else
let
dataTable = Source{0}[Data],
changedTypes = Table.TransformColumnTypes(dataTable, {{"Column1", type text}, {"Column2", type text}, {"Column3", type number}, {"Column4", type number}, {"Column5", Percentage.Type}, {"Column6", type text}, {"Column7", type text}, {"Column8", type text}, {"Column9", type text}, {"Column10", type text}})
in
changedTypes
in
nullOrTable,
offsets = List.Numbers(0, 61, 100),
// Loop over the list to produce a separate list (which will contain tables or null)
data = List.Accumulate(offsets, {}, (listOfTables, currentOffset) =>
let
runOutOfResults = List.Contains(listOfTables, null),
toCombine = if runOutOfResults then {null} else {getYahooData(currentOffset)},
appendToList = List.Combine({listOfTables, toCombine})
in
appendToList
),
toTable = Table.FromColumns({offsets, data}, {"Offsets", "Data"}),
nestedColumnNames =
let
excludeNulls = Table.SelectRows(toTable, each not([Data] is null)),
combineNestedTables = Table.Combine(excludeNulls[Data]),
columnNames = Table.ColumnNames(combineNestedTables)
in
columnNames,
expandColumn = Table.ExpandTableColumn(toTable, "Data", nestedColumnNames)
in
expandColumn
在我的屏幕上,Column6
至Column9
尚未转换为数字类型,因为数字以字符串M
,B
,T
结尾。您可以使用Table.TransformColumns
,Number.FromText
和/或其他M函数将它们分别转换为数百万,数十亿,万亿。