我有以下数据集
Key ID Status 1 Status 2 Order ID
1 A1 FALSE TRUE 1234-USF-0025
1 A1 FALSE TRUE 1234-USF-0026
1 A1 FALSE TRUE 1234-USF-0027
2 A1 TRUE TRUE 1234-USF-0025
2 A1 TRUE TRUE 1234-USF-0026
2 A1 TRUE TRUE 1234-USF-0027
3 A1 FALSE TRUE 1234-USF-0025
3 A1 FALSE TRUE 1234-USF-0026
3 A1 FALSE TRUE 1234-USF-0027
4 A2 TRUE TRUE 1234-USF-0028
4 A2 TRUE TRUE 1234-USF-0029
4 A2 TRUE TRUE 1234-USF-0030
5 A3 TRUE TRUE 1234-USF-0031
5 A3 TRUE TRUE 1234-USF-0032
5 A3 TRUE TRUE 1234-USF-0033
6 A4 TRUE TRUE 1234-USF-0034
6 A4 TRUE TRUE 1234-USF-0035
6 A4 TRUE TRUE 1234-USF-0036
我需要以下内容
Order ID ID TRUE FALSE
1234-USF-0025 A1 2 1,3
1234-USF-0026 A1 2 1,3
1234-USF-0027 A1 2 1,3
1234-USF-0028 A2 4
1234-USF-0029 A2 4
1234-USF-0030 A2 4
1234-USF-0031 A3 5
1234-USF-0032 A3 5
1234-USF-0033 A3 5
1234-USF-0034 A4 6
1234-USF-0035 A4 6
1234-USF-0036 A4 6
在第二个表(我需要的表)中,每个Order ID
都在相应的ID
旁边列出。尽管A1
在原始数据集中列出了9次,但Order IDs
总共只有3个唯一的A1
。但是,A1
也与3个不同的键相关联。
目标是为每个Keys
和Order ID
组合合并ID
,其中Status 1
和Status 2
均为TRUE
并列出它们在TRUE
列中。对于其中至少一个Order ID
为ID
的{{1}}和Status
组合,应在FALSE
列下列出Keys
。>
我尝试过的事情
FALSE
列开始,使用INDEX-MATCH作为数组公式,尽管我知道以下公式无法满足我想要的最终目标,但我还是尝试从小开始并以此为基础公式。不幸的是,我对数组的知识是有限的,我不确定该如何进行,因为我不明白为什么数组会返回它所做的事情,或者从这一点来看如何达到我的目标。 TRUE
TRUE:=INDEX($C$2:$C$19,MATCH(1,($H2 = $B$2:$B$19) * ($G2 = $E$2:$E$19)))
错误:=IF(AND($C2=TRUE,$D2=TRUE),$A2,"")
注释:
=IF(OR($C2<>TRUE,$D2<>TRUE),$A2,"")
与至少一个ID
相关联,但可以有更多Key
可以对同一Order ID
重复,但只能对该ID
重复不同的Keys
。我也对基于ID
,VBA
或Python
的解决方案持开放态度,但不确定如何为该任务启动脚本,因此我一直专注于R
。
答案 0 :(得分:1)
这是一个冗长的解决方案,并假定您的数据与您发布的数据完全相同(并且也在sheet1上),但是可以正常工作(我认为)。您还需要为输出数据创建第二张纸。如果您不确定该代码的发布位置/运行方式,请告诉我。
Sub DoStuff()
'Initialize the output sheet
Sheet2.Cells.Clear
Sheet2.Cells(1, 1) = "Order ID"
Sheet2.Cells(1, 2) = "ID"
Sheet2.Cells(1, 3) = "TRUE"
Sheet2.Cells(1, 4) = "FALSE"
newRow = 2
'Loop through the first sheet and remove duplicates
lastRow = Sheet1.Range("E:E").Cells.SpecialCells(xlCellTypeConstants).Count
For i = 2 To lastRow
exists = False
For j = 2 To newRow
If Sheet1.Cells(i, 5).Value = Sheet2.Cells(j, 1).Value Then
exists = True
Exit For
End If
Next
If exists = False Then
Sheet2.Cells(newRow, 1) = Sheet1.Cells(i, 5).Value
Sheet2.Cells(newRow, 2) = Sheet1.Cells(i, 2).Value
'Populate the true and false columns
For k = 2 To lastRow
If Sheet1.Cells(k, 5).Value = Sheet1.Cells(i, 5).Value Then
If Sheet1.Cells(k, 3).Value = True And Sheet1.Cells(k, 4).Value = True Then
Sheet2.Cells(newRow, 3) = Sheet2.Cells(newRow, 3).Value & Sheet1.Cells(k, 1).Value & ", "
Else
Sheet2.Cells(newRow, 4) = Sheet2.Cells(newRow, 4).Value & Sheet1.Cells(k, 1).Value & ", "
End If
End If
Next
'Remove extra characters, if there are any
If Sheet2.Cells(newRow, 3).Value <> "" Then
Sheet2.Cells(newRow, 3).Value = Left(Sheet2.Cells(newRow, 3).Value, Len(Sheet2.Cells(newRow, 3).Value) - 2)
End If
If Sheet2.Cells(newRow, 4).Value <> "" Then
Sheet2.Cells(newRow, 4).Value = Left(Sheet2.Cells(newRow, 4).Value, Len(Sheet2.Cells(newRow, 4).Value) - 2)
End If
newRow = newRow + 1
End If
Next
End Sub
使用您发布的数据得出的结果:
答案 1 :(得分:0)
我使用了字典和Class模块来帮助收集和转换数据。 由于命名的参数或多或少是显而易见的,它还具有易于跟踪和维护的优点。
我还用VBA阵列“完成了工作”,因为对于任何大型数据库,执行速度都将大大提高。
在代码中应该清楚地定义要用于源数据和结果的工作表和范围
Option Explicit
'Set reference to Microsoft Scripting Runtime
Sub orgOrders()
Dim wsSrc As Worksheet, wsRes As Worksheet, rRes As Range
Dim vSrc As Variant, vRes As Variant
Dim dOrds As Dictionary, cOrd As cOrder
Dim I As Long, V As Variant
Dim sKey As String
'set source and result worksheet and range
Set wsSrc = Worksheets("Sheet2")
Set wsRes = Worksheets("Sheet2")
Set rRes = wsRes.Cells(1, 10)
'read source data into array
With wsSrc
vSrc = .Range(.Cells(1, 1), .Cells(.Rows.Count, 5).End(xlUp))
End With
'Read into order dictionary
Set dOrds = New Dictionary
For I = 2 To UBound(vSrc, 1)
Set cOrd = New cOrder
sKey = vSrc(I, 5) 'Order ID
With cOrd
.ID = vSrc(I, 2)
.Key = vSrc(I, 1)
.Status1 = vSrc(I, 3)
.Status2 = vSrc(I, 4)
.addTrueFalse .Key, .Status1, .Status2
If Not dOrds.Exists(sKey) Then
dOrds.Add Key:=sKey, Item:=cOrd
Else
dOrds(sKey).addTrueFalse .Key, .Status1, .Status2
End If
End With
Next I
'Dim Results array
ReDim vRes(0 To dOrds.Count, 1 To 4)
'Headers
vRes(0, 1) = "Order ID"
vRes(0, 2) = "ID"
vRes(0, 3) = "TRUE"
vRes(0, 4) = "FALSE"
'Data
I = 0
For Each V In dOrds.Keys
I = I + 1
With dOrds(V)
vRes(I, 1) = V
vRes(I, 2) = .ID
vRes(I, 3) = .TrueFalse(True)
vRes(I, 4) = .TrueFalse(False)
End With
Next V
'Write results
Set rRes = rRes.Resize(UBound(vRes, 1) + 1, UBound(vRes, 2))
With rRes
.EntireColumn.Clear
.Value = vRes
.Rows(1).Font.Bold = True
With .EntireColumn
.HorizontalAlignment = xlCenter
.AutoFit
End With
End With
End Sub
重命名此模块cOrder
Option Explicit
Private pKey As Long
Private pID As String
Private pStatus1 As Boolean
Private pStatus2 As Boolean
Private pTrueFalse As Dictionary
Public Property Get Key() As Long
Key = pKey
End Property
Public Property Let Key(Value As Long)
pKey = Value
End Property
Public Property Get ID() As String
ID = pID
End Property
Public Property Let ID(Value As String)
pID = Value
End Property
Public Property Get Status1() As Boolean
Status1 = pStatus1
End Property
Public Property Let Status1(Value As Boolean)
pStatus1 = Value
End Property
Public Property Get Status2() As Boolean
Status2 = pStatus2
End Property
Public Property Let Status2(Value As Boolean)
pStatus2 = Value
End Property
Public Function addTrueFalse(Key As Long, Status1 As Boolean, Status2 As Boolean)
If Status1 = True And Status2 = True Then
If Not pTrueFalse.Exists(True) Then
pTrueFalse.Add Key:=True, Item:=Key
Else
pTrueFalse(True) = pTrueFalse(True) & "," & Key
End If
Else
If Not pTrueFalse.Exists(False) Then
pTrueFalse.Add Key:=False, Item:=Key
Else
pTrueFalse(False) = pTrueFalse(False) & "," & Key
End If
End If
End Function
Public Property Get TrueFalse() As Dictionary
Set TrueFalse = pTrueFalse
End Property
Private Sub Class_Initialize()
Set pTrueFalse = New Dictionary
End Sub