TraMineR :: seqecmpgroup变量长度不同(找到' group')

时间:2016-04-21 09:26:50

标签: r traminer

考虑以下数据集:

SimulatedDated <- structure(list(CustumerId = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 
17L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 
23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 
26L, 26L, 26L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 28L, 
28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 
34L, 34L, 34L, 34L, 34L), ProductId = c(6L, 3L, 4L, 9L, 8L, 10L, 
1L, 5L, 7L, 1L, 5L, 3L, 4L, 2L, 7L, 6L, 10L, 8L, 7L, 4L, 10L, 
5L, 1L, 3L, 8L, 6L, 2L, 9L, 6L, 1L, 2L, 4L, 7L, 8L, 5L, 9L, 10L, 
3L, 2L, 5L, 9L, 4L, 10L, 3L, 6L, 1L, 8L, 8L, 10L, 2L, 4L, 3L, 
9L, 5L, 6L, 5L, 6L, 4L, 9L, 10L, 8L, 2L, 7L, 1L, 3L, 10L, 3L, 
2L, 8L, 9L, 7L, 5L, 4L, 1L, 7L, 1L, 3L, 2L, 4L, 8L, 9L, 6L, 5L, 
10L, 1L, 9L, 2L, 4L, 7L, 3L, 8L, 7L, 9L, 8L, 4L, 10L, 3L, 5L, 
1L, 6L, 2L, 6L, 4L, 9L, 3L, 10L, 1L, 8L, 7L, 5L, 2L, 9L, 5L, 
7L, 4L, 10L, 1L, 3L, 2L, 6L, 5L, 9L, 2L, 4L, 3L, 8L, 1L, 10L, 
6L, 7L, 10L, 9L, 2L, 1L, 5L, 8L, 6L, 4L, 7L, 3L, 9L, 8L, 3L, 
5L, 6L, 10L, 1L, 7L, 4L, 1L, 6L, 9L, 10L, 3L, 4L, 2L, 8L, 7L, 
10L, 8L, 1L, 6L, 4L, 5L, 9L, 3L, 7L, 2L, 4L, 8L, 3L, 7L, 10L, 
1L, 6L, 5L, 5L, 6L, 4L, 7L, 1L, 10L, 3L, 10L, 8L, 3L, 1L, 4L, 
5L, 6L, 2L, 9L, 5L, 6L, 4L, 8L, 2L, 10L, 3L, 1L, 8L, 4L, 10L, 
6L, 9L, 7L, 2L, 3L, 8L, 3L, 6L, 7L, 9L, 4L, 5L, 2L, 10L, 1L, 
5L, 9L, 3L, 7L, 6L, 10L, 8L, 2L, 4L, 8L, 7L, 1L, 4L, 2L, 10L, 
10L, 3L, 8L, 1L, 7L, 5L, 4L, 6L, 2L, 10L, 6L, 1L, 2L, 5L, 4L, 
8L, 1L, 10L, 8L, 3L, 2L, 9L, 5L, 6L, 4L, 9L, 10L, 6L, 2L, 1L, 
7L, 4L, 8L, 5L, 1L, 5L, 9L, 10L, 3L, 8L, 7L, 2L, 4L, 10L, 1L, 
5L, 7L, 6L, 2L, 3L, 4L, 9L, 8L, 1L, 5L, 2L, 7L, 3L, 6L, 10L, 
4L, 9L, 9L, 5L, 10L, 8L, 2L), DaysSinceEpoch = c(7L, 20L, 31L, 
40L, 105L, 146L, 162L, 169L, 212L, 10L, 18L, 31L, 65L, 84L, 122L, 
156L, 202L, 206L, 1L, 4L, 7L, 11L, 14L, 24L, 25L, 100L, 148L, 
149L, 3L, 10L, 12L, 14L, 18L, 26L, 35L, 41L, 96L, 147L, 9L, 22L, 
66L, 80L, 102L, 104L, 170L, 199L, 234L, 10L, 24L, 36L, 38L, 75L, 
122L, 163L, 169L, 9L, 16L, 35L, 39L, 54L, 58L, 79L, 116L, 133L, 
224L, 27L, 35L, 37L, 49L, 73L, 91L, 105L, 141L, 252L, 16L, 28L, 
51L, 73L, 76L, 83L, 126L, 202L, 97L, 105L, 150L, 172L, 203L, 
207L, 223L, 256L, 259L, 25L, 28L, 38L, 40L, 63L, 100L, 120L, 
176L, 186L, 191L, 7L, 22L, 36L, 37L, 40L, 41L, 53L, 67L, 114L, 
233L, 1L, 16L, 17L, 23L, 40L, 52L, 125L, 184L, 186L, 12L, 42L, 
53L, 65L, 67L, 69L, 83L, 149L, 154L, 265L, 10L, 14L, 33L, 47L, 
67L, 106L, 133L, 181L, 247L, 258L, 6L, 21L, 26L, 41L, 49L, 68L, 
89L, 112L, 119L, 9L, 34L, 88L, 91L, 102L, 110L, 132L, 171L, 200L, 
6L, 14L, 21L, 36L, 40L, 60L, 64L, 88L, 109L, 208L, 8L, 17L, 21L, 
55L, 77L, 85L, 97L, 168L, 18L, 28L, 42L, 44L, 70L, 77L, 101L, 
14L, 23L, 33L, 84L, 107L, 123L, 124L, 125L, 25L, 29L, 33L, 57L, 
79L, 83L, 98L, 112L, 119L, 5L, 31L, 64L, 91L, 102L, 131L, 222L, 
234L, 27L, 46L, 48L, 60L, 61L, 64L, 72L, 103L, 161L, 8L, 24L, 
27L, 50L, 60L, 62L, 92L, 99L, 147L, 159L, 16L, 19L, 20L, 84L, 
175L, 202L, 17L, 21L, 25L, 46L, 69L, 121L, 161L, 175L, 267L, 
10L, 14L, 20L, 39L, 58L, 90L, 229L, 32L, 35L, 39L, 40L, 60L, 
66L, 98L, 153L, 173L, 2L, 3L, 25L, 46L, 51L, 80L, 96L, 166L, 
202L, 43L, 70L, 76L, 77L, 115L, 160L, 183L, 202L, 223L, 25L, 
33L, 61L, 72L, 74L, 77L, 85L, 91L, 152L, 265L, 16L, 62L, 63L, 
64L, 66L, 82L, 104L, 126L, 181L, 47L, 49L, 55L, 58L, 67L), BoughtPAD = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L)), .Names = c("CustumerId", 
"ProductId", "DaysSinceEpoch", "BoughtPAD"), row.names = c(NA, 
300L), class = "data.frame")

然后,做

library(TraMineR)
SimSeq <- seqecreate(id = SimulatedDated$CustumerId, 
                        timestamp = SimulatedDated$DaysSinceEpoch,
                        event = SimulatedDated$ProductId)
Cohort <- factor(SimulatedDated$BoughtPAD, labels = c("PAD", "NPAD"))
Fsubseq <- seqefsub(seq = SimSeq, pMinSupport = .01)
DiscrCohort <- seqecmpgroup(subseq = Fsubseq, group = Cohort)

产生:

Error in model.frame.default(formula = ww ~ group + seqmatrix[, index]) : 
  variable lengths differ (found for 'group')

我想知道,这可能导致这个问题?

1 个答案:

答案 0 :(得分:1)

group变量的长度应等于序列数,即您案例中的客户数。此外,它应该在整个序列中保持不变(在您的示例中不是这种情况)。

当您只有34个客户时,用作Cohort参数的group变量的长度为事件总数(300)。所以你需要通过CustumerID聚合它。

以下是如何做到这一点(此处为每个客户取最大的组值。)

bylist <- list(id = SimulatedDated$CustumerId)
agg.PAD <- aggregate(SimulatedDated[,c("CustumerId","BoughtPAD")], by=bylist, FUN="max")
Cohort <- agg.PAD$BoughtPAD

现在,您可以查找最能区分群组的子序列

DiscrCohort <- seqecmpgroup(subseq = Fsubseq, group = Cohort)
print(DiscrCohort[1:10])

希望这有帮助。