我的目标如下:
我必须面对的其他约束:
我在下面提供输入和输出示例。如果有人可以帮助我,我将不胜感激。
//INPUT
<SequenceNumber="1">
<PurchaseSegment DayDateTime="2020-02-29T06:45:00" ArrivalDateTime="2020-02-29T09:40:00" StopQuantity="0" PurchaseNumber="229" ElapsedTime="115">"
<DayPoS LocationCode="AAA" DockID="4" />"
<ArrivalPoS LocationCode="CCC" />"
</SequenceNumber>
<SequenceNumber="2">
<PurchaseSegment DayDateTime="2019-09-28T06:41:00" ArrivalDateTime="2020-02-29T09:40:00" StopQuantity="1" PurchaseNumber="123" ElapsedTime="115">"
<DayPoS LocationCode="AAA" DockID="3" />"
<ArrivalPoS LocationCode="QQC" />"
</SequenceNumber>
在这里您还可以找到我的VBA尝试,我利用了一些已经找到但未成功的VBA代码。
Public Sub TextDataToColumn()
Dim val As Variant val = "PurchaseSegment DayDateTime" // it would be great to have a list of paramaters here...
Set c = Cells.Find(val, LookIn:=xlValues, MatchCase:=False)
If Not c Is Nothing Then
Do
MsgBox "Value of val is found at " & c.Address & vbCrLf & c.Offset(0, 1).Value & vbCrLf & c.Offset(0, 2).Value
Set c = Cells.FindNext(c)
Loop While Not c Is Nothing And c.Address <> firstaddress
End If
End Sub
答案 0 :(得分:2)
您可以使用VBA文本功能解析文本。
正如我在评论中提到的那样,您发布的内容不是有效的XML文档。
我将使用Dictionary和Classs所做的事情改编为使用Collection和Array。 (尽管有一个Mac加载项可以启用字典对象的使用。)
在对文本行进行预处理以使其易于解析之后,我们遍历所有文本行并将适当的项目存储在数组中定义的位置。
我们将项目的每一行收集到收集对象中,然后将它们输出到工作表上。
它适用于您发布的样本数据,但是如果您的数据除了无效的xml之外,在不同节点的命名和格式方面也存在不规则之处,则您将需要一种更复杂的解析方法。
Option Explicit
Option Compare Text
Sub splitSeq()
Dim cS As Collection
Dim WB As Workbook, wsSrc As Worksheet, wsRes As Worksheet
Dim rRes As Range
Dim vSrc As Variant, vRes As Variant
Dim I As Long, v, w, x
'Set workbook, worksheet and range parameters
Set WB = ThisWorkbook
With WB
Set wsSrc = .Worksheets("Sheet4") 'or wherever the data exists
On Error Resume Next 'add a sheet if not present
Set wsRes = Worksheets("Results")
Select Case Err.Number
Case 9 'need to add a sheet
Set wsRes = WB.Worksheets.Add
wsRes.Name = "Results"
Case Is <> 0 'Something else went wrong
MsgBox "Error number " & Err.Number & vbLf & Err.Description
Err.Clear
End Select
End With
'set results range
Set rRes = wsRes.Cells(1, 1)
'read data into array for processing speed
'assuming all data is in column A
With wsSrc
vSrc = .Range(.Cells(1, 1), .Cells(.Rows.Count, 1).End(xlUp))
End With
'loop through data and save the Sequence objects
'Each starts with <sequence and ends with </sequence
Set cS = New Collection
For I = 1 To UBound(vSrc, 1)
If vSrc(I, 1) Like "<Sequence*" Then
ReDim vRes(1 To 8) 're-initialize array
Else
'Remove confusing spaces in node names and <> in attributes
vSrc(I, 1) = Replace(vSrc(I, 1), "Segment DayDate", "SegmentDayDate")
vSrc(I, 1) = Replace(vSrc(I, 1), "Pos Location", "PosLocation")
vSrc(I, 1) = Replace(vSrc(I, 1), "<", "")
vSrc(I, 1) = Replace(vSrc(I, 1), ">""", "")
vSrc(I, 1) = Replace(vSrc(I, 1), ">", "")
v = Split(vSrc(I, 1))
For Each w In v
x = Split(w, "=")
'Remove leading and trailing double quotes
If Left(x(1), 1) = """" And Right(x(1), 1) = """" Then
x(1) = Mid(x(1), 2)
x(1) = Left(x(1), Len(x(1)) - 1)
End If
Select Case x(0)
Case "PurchaseSegmentDayDateTime"
vRes(1) = x(1)
Case "ArrivalDateTime"
vRes(2) = x(1)
Case "StopQuantity"
vRes(3) = x(1)
Case "PurchaseNumber"
vRes(4) = x(1)
Case "ElapsedTime"
vRes(5) = x(1)
Case "DayPosLocationCode"
vRes(6) = x(1)
Case "ArrivalPosLocationCode"
vRes(8) = x(1)
Case "DockID"
vRes(7) = x(1)
Case "/SequenceNumber"
cS.Add vRes
End Select
Next w
End If
Next I
'set up results array
ReDim vRes(0 To cS.Count, 1 To 8)
'Headers
vRes(0, 1) = "PurchaseSegment DayDateTime"
vRes(0, 2) = "ArrivalDateTime"
vRes(0, 3) = "StopQuantity"
vRes(0, 4) = "PurchaseNumber"
vRes(0, 5) = "ElapsedTime"
vRes(0, 6) = "DayPoS LocationCode"
vRes(0, 7) = "DockID"
vRes(0, 8) = "ArrivalPoS LocationCode"
'fill in the data
I = 0
For Each v In cS
I = I + 1
With v
vRes(I, 1) = v(1)
vRes(I, 2) = v(2)
vRes(I, 3) = v(3)
vRes(I, 4) = v(4)
vRes(I, 5) = v(5)
vRes(I, 6) = v(6)
vRes(I, 7) = v(7)
vRes(I, 8) = v(8)
End With
Next v
'Set Results range
Set rRes = rRes.Resize(UBound(vRes, 1) + 1, UBound(vRes, 2))
'Write and format results
With rRes
.EntireColumn.Clear
.Value2 = vRes
.Style = "Output"
.EntireColumn.AutoFit
End With
End Sub