我设法从EDGAR DB中提取数据。但是,我已从所有实例文档中提取所有数据。无论我多少尝试找到一种方法只从选定的实例元素中挑选选定的元素文档,我找不到方法。代码如下:
Sub READSITE()
Dim IE As InternetExplorer
Dim els, el, colDocLinks As New Collection
Dim lnk, res
Dim Ticker As String
Dim colXMLPaths As New Collection
Dim XMLElement As String
Dim fillingType As String
Set IE = New InternetExplorer
IE.Visible = False
Ticker = Worksheets("Sheet1").Range("A1").Value
fillingType = Worksheets("Sheet3").Range("L1").Value
LoadPage IE, "https://www.sec.gov/cgi-bin/browse-edgar?" & _
"action=getcompany&CIK=" & Ticker & "&type=" & fillingType & _
"&dateb=&owner=exclude&count=20"
Set els = IE.Document.getelementsbytagname("a")
For Each el In els
If Trim(el.innertext) = "Documents" Then
colDocLinks.Add el.href
End If
Next el
For Each lnk In colDocLinks
LoadPage IE, CStr(lnk)
For Each el In IE.Document.getelementsbytagname("a")
If el.href Like "*[0-9].xml" Then
Debug.Print el.innertext, el.href
colXMLPaths.Add el.href
End If
Next el
Next lnk
XMLElement = Range("C1").Value
'For each link, open the URL and display the Debt Instrument Insterest Rate
For Each lnk In colXMLPaths
res = GetData(CStr(lnk), XMLElement)
With Worksheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Offset(1, 0)
.NumberFormat = "@"
.Value = Ticker
.Offset(0, 1).Value = lnk
.Offset(0, 2).Value = res
End With
Next lnk
End Sub
Function GetData(sURL As String, sXMLElement As String)
Dim strXMLSite As String
Dim objXMLHTTP As New MSXML2.XMLHTTP
Dim objXMLDoc As New MSXML2.DOMDocument
Dim objXMLNodexbrl As MSXML2.IXMLDOMNode
Dim objXMLNodeElement As MSXML2.IXMLDOMNode
Dim objXMLNodeStkhldEq As MSXML2.IXMLDOMNode
'''''''''''''''''''''
Dim userBeanList As MSXML2.IXMLDOMNodeList
Dim userbean As MSXML2.IXMLDOMNode
Dim beanChild As MSXML2.IXMLDOMNode
Dim i As Long
'''''''''''''''''''''
' In Sheet 3 determine if Row 2 is free of data and set start row to 2. Else get the last free row in column b
Sheets("Sheet3").Select
Sheets("Sheet3").Range("B2").Select
If ActiveCell.Value = "" Then
i = 2
Else
Sheets("Sheet3").Range("B1").Select
Selection.End(xlDown).Select
ActiveCell.Offset(1, -1).Range("A1").Select
i = ActiveCell.Row
End If
'Get tge XML from SEc
GetData = "?" 'No data from XML
objXMLHTTP.Open "GET", sURL, False '<<EDIT: GET the site
objXMLHTTP.send
objXMLDoc.LoadXML objXMLHTTP.responseText
objXMLDoc.setProperty "SelectionNamespaces", "xmlns:r='http://www.xbrl.org/2003/instance'"
Set objXMLNodexbrl = objXMLDoc.SelectSingleNode("r:xbrl")
'Get a single element value from the returned XML
Set objXMLNodeElement = objXMLNodexbrl.SelectSingleNode(sXMLElement)
If Not objXMLNodeElement Is Nothing Then
GetData = objXMLNodeElement.Text
End If
'Print all nodes name and value for each Element in the XML
Set userBeanList = objXMLDoc.SelectNodes("r:xbrl")
For Each userbean In userBeanList
Worksheets("Sheet3").Cells(i, 1).Value = sURL
For Each beanChild In userbean.ChildNodes
With Worksheets("Sheet3")
.Cells(i, 2).Value = beanChild.nodeName
.Cells(i, 3).Value = beanChild.Text
End With
i = i + 1
Next beanChild
Next userbean
End Function
Sub LoadPage(IE As Object, url As String)
IE.Navigate url
Do While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE
DoEvents
Loop
End Sub
如何为每个实例文档提取所有数据而不是每个实例文档选择15个元素?
答案 0 :(得分:1)
如果您希望每个实例最多只获得15个元素,请为循环添加一个条件/添加一个if语句,该语句将在15次迭代后退出循环。就像你使用i = i + 1来控制你正在打印的行一样,使用一个新变量(x,y无关紧要)来计算你运行beanChild循环的次数。如果您需要实际代码让我知道,但如果您自己完成了所有这些编码,那么您似乎已经足够熟练了解它:)
编辑:
好的,这是我所得到的一个例子。如果你想每个源最多15个元素,你可以这样做:
For Each userbean In userBeanList
Worksheets("Sheet3").Cells(i, 1).Value = sURL
x = 0
For Each beanChild In userbean.ChildNodes
If x < 15 then
With Worksheets("Sheet3")
.Cells(i, 2).Value = beanChild.nodeName
.Cells(i, 3).Value = beanChild.Text
End With
i = i + 1
x = x + 1
Next beanChild
Else
Exit For
End If
Next userbean