使用VBA宏解析xml / html文本文件

时间:2015-02-01 00:08:59

标签: xml vba parsing

我在一个文本文件中有一些原始数据输出来自我需要存储然后格式化的程序。文件输出如下:

<Request>
    <code_set>1604</code_set>
    <start_code_value>-1</start_code_value>
    <block_size>10000</block_size>
    <ignore_access_ind>0</ignore_access_ind>
</Request>

所有数据都存储在请求或回复块

所有数据都位于文本文件的 单行

每个请求/回复块都有许多子块 - 这是有条件的;可能有1,可能有1,000

块内的数据也是有条件的 - 可能有数据,可能没有

我需要解析它,因此在excel表的A列中,它读取子块的名称,列B包含数据(如果有的话)。

1 个答案:

答案 0 :(得分:1)

Option Explicit

Sub ExtractParams()
    Dim sRawData, arrCells(), arrBlocks, arrBlock, arrSubBlocks, j, n, k
    ' Read raw data from the specified file
    sRawData = ReadTextFile("C:\Users\DELL\Desktop\tmp\tmp.txt", -2)
    j = 0
    ' Put each Request / Reply block name-content pairs into array
    ParseText sRawData, "<(Request|Reply)>([\s\S]*?)<\/\1>", arrBlocks
    For Each arrBlock In arrBlocks
        ' Put each subblock name-content pairs into array
        ParseText arrBlock(1), "<(\w*?)[\s\S]*?>([\s\S]*?)<\/\1>", arrSubBlocks
        n = UBound(arrSubBlocks)
        ' Converting array of arrays to 2-dimensional array
        Erase arrCells
        ReDim Preserve arrCells(n, 1)
        For k = 0 To n
            arrCells(k, 0) = arrSubBlocks(k)(0)
            arrCells(k, 1) = arrSubBlocks(k)(1)
        Next
        ' Output 2-dimensional array starting from next unfilled row
        ActiveSheet.Range(Cells(j + 1, 1), Cells(j + n + 1, 2)).Value = arrCells
        ActiveSheet.Columns.AutoFit
        j = j + n + 1
    Next
End Sub

Sub ParseText(sText, sPattern, arrItems)
    Dim oMatch, sSubMatch, arrItem
    arrItems = Array()
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .Pattern = sPattern
        ' Process matches
        For Each oMatch In .Execute(sText)
            arrItem = Array()
            ' Process submatches
            For Each sSubMatch In oMatch.SubMatches
                ' Each submatch string add to array of submatches
                PushItem arrItem, sSubMatch
            Next
            ' Array of submatches for each match add to output array
            PushItem arrItems, arrItem
        Next
    End With
End Sub

Sub PushItem(arrList, varItem)
    ReDim Preserve arrList(UBound(arrList) + 1)
    arrList(UBound(arrList)) = varItem
End Sub

Function ReadTextFile(sPath, iFormat)
    ' iFormat defines the file format
    ' -2 - system default
    ' -1 - Unicode
    ' 0 - ASCII
    With CreateObject("Scripting.FileSystemObject").OpenTextFile(sPath, 1, False, iFormat)
        ReadTextFile = ""
        If Not .AtEndOfStream Then ReadTextFile = .ReadAll
        .Close
    End With
End Function