按实例过滤特定元素

时间:2014-03-04 13:45:04

标签: xml excel vba excel-vba

我设法从EDGAR DB中提取数据。但是,我已从所有实例文档中提取所有数据。无论我多少尝试找到一种方法只从选定的实例元素中挑选选定的元素文档,我找不到方法。代码如下:

Sub READSITE()

    Dim IE As InternetExplorer
    Dim els, el, colDocLinks As New Collection
    Dim lnk, res
    Dim Ticker As String
    Dim colXMLPaths As New Collection
    Dim XMLElement As String
    Dim fillingType As String

    Set IE = New InternetExplorer

    IE.Visible = False

    Ticker = Worksheets("Sheet1").Range("A1").Value

    fillingType = Worksheets("Sheet3").Range("L1").Value

    LoadPage IE, "https://www.sec.gov/cgi-bin/browse-edgar?" & _
                  "action=getcompany&CIK=" & Ticker & "&type=" & fillingType & _
                  "&dateb=&owner=exclude&count=20"

    Set els = IE.Document.getelementsbytagname("a")
    For Each el In els
        If Trim(el.innertext) = "Documents" Then
            colDocLinks.Add el.href
        End If
    Next el

    For Each lnk In colDocLinks
        LoadPage IE, CStr(lnk)
        For Each el In IE.Document.getelementsbytagname("a")
            If el.href Like "*[0-9].xml" Then
                Debug.Print el.innertext, el.href
                colXMLPaths.Add el.href
            End If
        Next el
    Next lnk

    XMLElement = Range("C1").Value

    'For each link, open the URL and display the Debt Instrument Insterest Rate
    For Each lnk In colXMLPaths
        res = GetData(CStr(lnk), XMLElement)
        With Worksheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Offset(1, 0)
            .NumberFormat = "@"
            .Value = Ticker
            .Offset(0, 1).Value = lnk
            .Offset(0, 2).Value = res
        End With
    Next lnk

End Sub

Function GetData(sURL As String, sXMLElement As String)
    Dim strXMLSite As String
    Dim objXMLHTTP As New MSXML2.XMLHTTP
    Dim objXMLDoc As New MSXML2.DOMDocument
    Dim objXMLNodexbrl As MSXML2.IXMLDOMNode
    Dim objXMLNodeElement As MSXML2.IXMLDOMNode
    Dim objXMLNodeStkhldEq As MSXML2.IXMLDOMNode

    '''''''''''''''''''''
    Dim userBeanList As MSXML2.IXMLDOMNodeList
    Dim userbean As MSXML2.IXMLDOMNode
    Dim beanChild As MSXML2.IXMLDOMNode
    Dim i As Long
    '''''''''''''''''''''

    ' In Sheet 3 determine if Row 2 is free of data and set start row to 2. Else get the last free row in column b
    Sheets("Sheet3").Select
    Sheets("Sheet3").Range("B2").Select
    If ActiveCell.Value = "" Then
        i = 2
    Else
        Sheets("Sheet3").Range("B1").Select
        Selection.End(xlDown).Select
        ActiveCell.Offset(1, -1).Range("A1").Select
        i = ActiveCell.Row
    End If


    'Get tge XML from SEc

    GetData = "?" 'No data from XML
    objXMLHTTP.Open "GET", sURL, False  '<<EDIT: GET the site
    objXMLHTTP.send
    objXMLDoc.LoadXML objXMLHTTP.responseText
    objXMLDoc.setProperty "SelectionNamespaces", "xmlns:r='http://www.xbrl.org/2003/instance'"

    Set objXMLNodexbrl = objXMLDoc.SelectSingleNode("r:xbrl")

    'Get a single element value from the returned XML
    Set objXMLNodeElement = objXMLNodexbrl.SelectSingleNode(sXMLElement)

    If Not objXMLNodeElement Is Nothing Then
        GetData = objXMLNodeElement.Text
    End If

    'Print all nodes name and value for each Element in the XML
    Set userBeanList = objXMLDoc.SelectNodes("r:xbrl")
    For Each userbean In userBeanList
    Worksheets("Sheet3").Cells(i, 1).Value = sURL
        For Each beanChild In userbean.ChildNodes
                With Worksheets("Sheet3")
                    .Cells(i, 2).Value = beanChild.nodeName
                    .Cells(i, 3).Value = beanChild.Text
                End With
                i = i + 1
        Next beanChild
    Next userbean

End Function

Sub LoadPage(IE As Object, url As String)
    IE.Navigate url
    Do While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE
        DoEvents
    Loop
End Sub

如何为每个实例文档提取所有数据而不是每个实例文档选择15个元素?

1 个答案:

答案 0 :(得分:1)

如果您希望每个实例最多只获得15个元素,请为循环添加一个条件/添加一个if语句,该语句将在15次迭代后退出循环。就像你使用i = i + 1来控制你正在打印的行一样,使用一个新变量(x,y无关紧要)来计算你运行beanChild循环的次数。如果您需要实际代码让我知道,但如果您自己完成了所有这些编码,那么您似乎已经足够熟练了解它:)

编辑:

好的,这是我所得到的一个例子。如果你想每个源最多15个元素,你可以这样做:

For Each userbean In userBeanList
Worksheets("Sheet3").Cells(i, 1).Value = sURL

x = 0  

For Each beanChild In userbean.ChildNodes
     If x < 15 then
           With Worksheets("Sheet3")
                .Cells(i, 2).Value = beanChild.nodeName
                .Cells(i, 3).Value = beanChild.Text
            End With
            i = i + 1
            x = x + 1
    Next beanChild
    Else
        Exit For
    End If
Next userbean