VBA XML DOM搜索可能并不总是存在的项目

时间:2015-04-18 20:11:20

标签: xml vba dom xpath xml-parsing

如何在为其他节点解析数据的同时为可能并不总是属于其父节点的节点创建循环?

假设您有一个包含多个这些项目的非常大的文件,但为了简单起见,请使用此XML(请注意第一个'图书ID'没有我们想要的节点,所以我们的循环已经失败):

<?xml version="1.0"?>
<catalog>
<book id="Adventure" ISBN="00113" version="13">
   <author>Ralls, Kim</author>
   <title>XML Developer's Guide</title>
   <price>44.95</price>
   <misc>
        <editor id="9B">
            <editorBrand>Partial Edit</editorBrand>
            <editorEmphasis>Minimal</editorEmphasis>
        </editor>
   </misc>
</book>
<book id="Adventure" ISBN="00114" version="14">
   <author>Ralls, Kim</author>
   <title>Midnight Rain</title>
   <price>5.95</price>
   <misc>
        <Publisher id="5691">
            <PublisherLocation>Los Angeles</PublisherLocation>
        </Publisher>
        <PublishedAuthor id="Ralls">
            <StoreLocation>Store A/8</StoreLocation>
            <seriesTitle>AAA</seriesTitle>
                <store id="8">
                    <copies>26</copies>
                </store>
    </misc>
</book>
<book id="Adventure" ISBN="00115" version="14">
   <author>Ralls, Kim</author>
   <title>Mist</title>
   <price>15.95</price>
   <misc>
        <Publisher id="8101">
            <PublisherLocation>New Mexico</PublisherLocation>
        </Publisher>
        <PublishedAuthor id="Ralls">
            <StoreLocation>Market C/13</StoreLocation>
            <seriesTitle>BBB</seriesTitle>
                <store id="9">
                    <copies>150</copies>
                </store>
                <store id="13">
                    <copies>60</copies>
                </store>
        </PublishedAuthor>
    </misc>
</book>
<book id="Mystery" ISBN="00116" version="13">
   <author>Bill, Simmons</author>
   <title>NBA Insider</title>
   <price>16.99</price>
   <misc>
        <editor id="11N">
            <editorBrand>Full Edit</editorBrand>
            <editorEmphasis>Full</editorEmphasis>
        </editor>
    </misc>
</book>
</catalog>

我们的VBA代码:

Sub mySub()

Dim XMLFile As Variant
Dim seriesTitle As Variant
Dim series As String, Author As String, Title As String, StoreLocation As String
Dim ISBN As String, copies As String, storelc As String
Dim seriesArray() As String, AuthorArray() As String, BookTypeArray() As String, TitleArray() As String
Dim StoreLocationArray() As String, ISBNArray() As String, copiesArray() As String
Dim i As Long, x As Long, j As Long, pn As Object, loc As Object, arr, ln As String, loc2 As Object

Dim mainWorkBook As Workbook
Dim n As IXMLDOMNode
Set mainWorkBook = ActiveWorkbook
Set XMLFile = CreateObject("Microsoft.XMLDOM")
XMLFile.Load ("C:\Books.xml")
XMLFile.setProperty "SelectionLanguage", "XPath"

x = 1
j = 0

Set seriesTitle = XMLFile.SelectNodes("/catalog/book/misc/PublishedAuthor/seriesTitle")
For i = 0 To (seriesTitle.Length - 1)

series = seriesTitle(i).Text
storelc = seriesTitle(i).SelectSingleNode("store/copies").Text

If series = "AAA" Or series = "BBB" Then

    Set pn = seriesTitle(i).ParentNode
    StoreLocation = pn.getElementsByTagName("StoreLocation").Item(0).nodeTypedValue
    Author = pn.ParentNode.ParentNode.getElementsByTagName("author").Item(0).nodeTypedValue
    Title = pn.ParentNode.ParentNode.getElementsByTagName("title").Item(0).nodeTypedValue
    ISBN = pn.ParentNode.ParentNode.getAttribute("ISBN")

    Set loc = pn.SelectSingleNode("seriesTitle/store[@id='" & storelc & "']/copies")
    If loc Is Nothing Then
        arr = Split(storelc, "/")
        ln = Trim(arr(UBound(arr)))
        Set loc = pn.SelectSingleNode("seriesTitle/store[@id='" & ln & "']/copies")
    End If

    If Not loc Is Nothing Then
        copies = loc.Text
    Else
        copies = "?"
    End If

    AddValue seriesArray, series
    AddValue AuthorArray, Author
    AddValue TitleArray, Title
    AddValue StoreLocationArray, StoreLocation
    AddValue ISBNArray, ISBN
    AddValue copiesArray, copies

    j = j + 1
    x = x + 1
End If
Next

Range("A3").Resize(j, 1).Value = WorksheetFunction.Transpose(AuthorArray)
Range("B3").Resize(j, 1).Value = WorksheetFunction.Transpose(TitleArray)
Range("C3").Resize(j, 1).Value = WorksheetFunction.Transpose(ISBNArray)
Range("D3").Resize(j, 1).Value = WorksheetFunction.Transpose(seriesArray)
Range("E3").Resize(j, 1).Value = WorksheetFunction.Transpose(StoreLocationArray)
Range("F3").Resize(j, 1).Value = WorksheetFunction.Transpose(copiesArray)

End Sub

'Utility method - resize an array as needed, and add a new value

Sub AddValue(arr, v)
    Dim i As Long
    i = -1
    On Error Resume Next
    i = UBound(arr) + 1
    On Error GoTo 0
    If i = -1 Then i = 0
    ReDim Preserve arr(0 To i)
    arr(i) = v
End Sub

我的目标是搜索&#34; seriesTitle&#34;。所以我将专门创建一个For循环,搜索找到的项目的长度,然后解析&#34; seriesTitle&#34;以及ISBN,StoreLocation,作者,书名和副本。

  1. 如果seriesTitle存在 - 它的版本14那么 - 我想打印出seriesTitle,ISBN,StoreLocation,Author,Book Title和copy。
  2. 如果seriesTitle不存在 - 它的版本13那么 - 我只想打印ISBN,作者和书名。
  3. 但问题在于,对于每本书的内容都是如此。存在,不一定是&#34; seriesTitle&#34; - 我们可以绘制的唯一关系是,当版本= 13&#39;没有系列标题。

    • 如果您没有创建For循环搜索的对象,您将如何遍历整个文档?当&#34; seriesTitle&#34;不存在,您将如何继续向ISBN,作者和书名列表中添加项目?

    感谢您教导我任何有用的意见和建议!

2 个答案:

答案 0 :(得分:1)

首先,您的xml包含错误。您缺少结束标记。请参阅下面的新XML

&#13;
&#13;
<?xml version="1.0"?>
<catalog>
  <book id="Adventure" ISBN="00113" version="13">
    <author>Ralls, Kim</author>
    <title>XML Developer's Guide</title>
    <price>44.95</price>
    <misc>
      <editor id="9B">
        <editorBrand>Partial Edit</editorBrand>
        <editorEmphasis>Minimal</editorEmphasis>
      </editor>
    </misc>
  </book>
  <book id="Adventure" ISBN="00114" version="14">
    <author>Ralls, Kim</author>
    <title>Midnight Rain</title>
    <price>5.95</price>
    <misc>
      <Publisher id="5691">
        <PublisherLocation>Los Angeles</PublisherLocation>
      </Publisher>
      <PublishedAuthor id="Ralls">
        <StoreLocation>Store A/8</StoreLocation>
        <seriesTitle>AAA</seriesTitle>
        <store id="8">
          <copies>26</copies>
        </store>
      </PublishedAuthor>
      </misc>
  </book>
  <book id="Adventure" ISBN="00115" version="14">
    <author>Ralls, Kim</author>
    <title>Mist</title>
    <price>15.95</price>
    <misc>
      <Publisher id="8101">
        <PublisherLocation>New Mexico</PublisherLocation>
      </Publisher>
      <PublishedAuthor id="Ralls">
        <StoreLocation>Market C/13</StoreLocation>
        <seriesTitle>BBB</seriesTitle>
        <store id="9">
          <copies>150</copies>
        </store>
        <store id="13">
          <copies>60</copies>
        </store>
      </PublishedAuthor>
    </misc>
  </book>
  <book id="Mystery" ISBN="00116" version="13">
    <author>Bill, Simmons</author>
    <title>NBA Insider</title>
    <price>16.99</price>
    <misc>
      <editor id="11N">
        <editorBrand>Full Edit</editorBrand>
        <editorEmphasis>Full</editorEmphasis>
      </editor>
    </misc>
  </book>
</catalog>​
&#13;
&#13;
&#13;

如果您有多级对象且缺少级别,则必须一次搜索一个级别。每本书都有一个&#34; misc&#34;标签。因此,您首先必须按照&#34; misc&#34;列举书籍。然后测试孩子是否存在。

&#13;
&#13;
Set misc = XMLFile.SelectNodes("catalog/book/misc")
For a = 0 To (misc.Length - 1)
   Set publishedAuthor = XMLFile.SelectNodes("/catalog/book/misc/PublishedAuthor/seriesTitle")
   If Not publishedAuthor Is Nothing Then

   End If
Next a
&#13;
&#13;
&#13;

答案 1 :(得分:1)

根据我的评论,似乎你最好只是循环遍历所有<book>元素并读取他们的子节点以获取所需的值,而不是在DOM树上上下导航

Sub Tester()

Dim d As New MSXML2.DOMDocument
Dim bks As MSXML2.IXMLDOMNodeList
Dim bk As Object
Dim cat As Object, sertitle
Dim isbn, storeLoc, auth, seriesTitle, vsn, copies, title

    d.setProperty "SelectionLanguage", "XPath"
    d.LoadXML Sheet1.Range("A1").Value

    Set bks = d.SelectNodes("/catalog/book")
    For Each bk In bks

        vsn = bk.getAttribute("version")
        isbn = bk.getAttribute("ISBN")
        title = GetTextSafely(bk, "title")
        storeLoc = GetTextSafely(bk, "misc/PublishedAuthor/StoreLocation")
        seriesTitle = GetTextSafely(bk, "misc/PublishedAuthor/seriesTitle")
        auth = GetTextSafely(bk, "author")

        copies = "??" '  I'm unclear exactly what you're doing here....

        Debug.Print vsn, isbn, storeLoc, seriesTitle, auth, title, copies

    Next bk

End Sub

'utility function: get a node's value if it exists
Function GetTextSafely(el As Object, path As String)
    Dim nd, rv
    Set nd = el.SelectSingleNode(path)
    If Not nd Is Nothing Then rv = nd.nodeTypedValue
    GetTextSafely = rv
End Function