我有一些看起来像这样的数据:
E_Add Action ActionType Call Callback Email
xxxx Task Call 1 0 0
xxxx Task Call 1 0 0
xxxx Event Start 0 0 0
xxxx Task Call 1 0 0
xxxx Event Trial 0 0 0
yyyy Task Call 1 0 0
yyyy Task Callback 0 1 0
yyyy Task Email 0 0 1
yyyy Task Call 1 0 0
yyyy Event Start 0 0 0
我希望它看起来像这样:
E_Add Action ActionType Call Callback Email CallSum CallbackSum EmailSum
xxxx Task Call 1 0 0
xxxx Task Call 1 0 0 2
xxxx Event Start 0 0 0
xxxx Task Call 1 0 0 1
xxxx Event Trial 0 0 0
yyyy Task Call 1 0 0
yyyy Task Callback 0 1 0 1
yyyy Task Email 0 0 1 1
yyyy Task Call 1 0 0 2
yyyy Event Start 0 0 0
我的意思是它看起来可能与此不同,但这就是我想要的想法。我想通过电子邮件对“电话”,“回拨”和“电子邮件”进行汇总,并将它们放入新列中。但我想在每次点击“操作”列中的“事件”时重置“呼叫”,“回调”或“电子邮件”的总和,当然也可以通过电子邮件进行此操作。
答案 0 :(得分:2)
您想要的输出不是很清楚,但我认为这可以满足您的需求(您还有Email
列两次)
library(data.table)
cols <- c("Call", "Callback", "Email") # Choose columns to modify
第一个解决方案(简单版)
setDT(df)[, paste0(cols, "Sum") :=
lapply(.SD, function(x) c(rep(0L, .N - 1L), sum(x))),
by = .(E_Add, cumsum(Action == "Event")),
.SDcols = cols][]
# E_Add Action ActionType Call Callback Email.1 CallSum CallbackSum EmailSum
# 1: xxxx Task Call 1 0 0 0 0 0
# 2: xxxx Task Call 1 0 0 2 0 0
# 3: xxxx Event Start 0 0 0 0 0 0
# 4: xxxx Task Call 1 0 0 1 0 0
# 5: xxxx Event Trial 0 0 0 0 0 0
# 6: yyyy Task Call 1 0 0 0 0 0
# 7: yyyy Task Callback 0 1 0 0 0 0
# 8: yyyy Task Email 0 0 1 0 0 0
# 9: yyyy Task Call 1 0 0 2 1 1
# 10: yyyy Event Start 0 0 0 0 0 0
第二个解决方案匹配完全输出
setDT(df)[, paste0(cols, "Sum") :=
lapply(.SD, function(x) {
if(any(x == 1L)){
indx <- max(which(x == 1L))
x[indx] <- sum(x)
x[-indx] <- 0L
x
} else 0L
}),
by = .(E_Add, cumsum(Action == "Event")),
.SDcols = cols][]
# E_Add Action ActionType Call Callback Email.1 CallSum CallbackSum EmailSum
# 1: xxxx Task Call 1 0 0 0 0 0
# 2: xxxx Task Call 1 0 0 2 0 0
# 3: xxxx Event Start 0 0 0 0 0 0
# 4: xxxx Task Call 1 0 0 1 0 0
# 5: xxxx Event Trial 0 0 0 0 0 0
# 6: yyyy Task Call 1 0 0 0 0 0
# 7: yyyy Task Callback 0 1 0 0 1 0
# 8: yyyy Task Email 0 0 1 0 0 1
# 9: yyyy Task Call 1 0 0 2 0 0
# 10: yyyy Event Start 0 0 0 0 0 0
根据评论进行修改(如果您想在Event
上显示总和
df[, paste0(cols, "Sum") :=
lapply(.SD, function(x) c(rep(0L, .N - 1L), sum(x))),
by = .(E_Add, cumsum(c(FALSE, (Action == "Event")[-length(Action)]))),
.SDcols = cols][]
# E_Add Action ActionType Call Callback Email CallSum CallbackSum EmailSum
# 1: xxxx Task Call 1 0 0 0 0 0
# 2: xxxx Task Call 1 0 0 0 0 0
# 3: xxxx Event Start 0 0 0 2 0 0
# 4: xxxx Task Call 1 0 0 0 0 0
# 5: xxxx Event Trial 0 0 0 1 0 0
# 6: yyyy Task Call 1 0 0 0 0 0
# 7: yyyy Task Callback 0 1 0 0 0 0
# 8: yyyy Task Email 0 0 1 0 0 0
# 9: yyyy Task Call 1 0 0 0 0 0
# 10: yyyy Event Start 0 0 0 2 1 1
答案 1 :(得分:1)
df = structure(list(Email = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("xxxx", "yyyy"), class = "factor"), Action = structure(c(2L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("Event", "Task"
), class = "factor"), ActionType = structure(c(1L, 1L, 4L, 1L,
5L, 1L, 2L, 3L, 1L, 4L), .Label = c("Call", "Callback", "Email",
"Start", "Trial"), class = "factor"), Call = c(1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 0L), Callback = c(0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L), Emails = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L)), .Names = c("Email", "Action", "ActionType", "Call", "Callback",
"Emails"), class = "data.frame", row.names = c(NA, -10L))
df$CallSum=''
df$CallBackSum=''
df$EmailSum=''
CSum =0
CBSum =0
ESum =0
for(i in 1:nrow(df)){
CSum = CSum+ df[[4]][i]
CBSum = CBSum+ df[[5]][i]
ESum = ESum+ df[[6]][i]
if(df[[2]][i] == 'Event'){
#
df[[7]][i] = CSum
df[[8]][i] = CBSum
df[[9]][i] = ESum
#clear out vars
CSum =0
CBSum =0
ESum =0
}
}
Email Action ActionType Call Callback Emails CallSum CallBackSum EmailSum
1 xxxx Task Call 1 0 0
2 xxxx Task Call 1 0 0
3 xxxx Event Start 0 0 0 2 0 0
4 xxxx Task Call 1 0 0
5 xxxx Event Trial 0 0 0 1 0 0
6 yyyy Task Call 1 0 0
7 yyyy Task Callback 0 1 0
8 yyyy Task Email 0 0 1
9 yyyy Task Call 1 0 0
10 yyyy Event Start 0 0 0 2 1 1
答案 2 :(得分:1)
这是我的尝试。我最终覆盖了三列(即Call,Callback和Email.1)。这可能是获得你所追求的一种方式。最初,我在第一个mutate
中创建了一个组变量。我通过电子邮件和组对数据进行分组,并计算Call,Callback和Email.1的总和。最后,我想要有零而不是空的空间。所以我在最后的replace()
中使用了mutate
。
library(zoo)
library(dplyr)
group_by(mydf, Email) %>%
mutate(group = ifelse(Action == "Event", row_number(), NA),
group = na.locf(group, fromLast = TRUE)) %>%
group_by(Email, group) %>%
mutate_each(funs(sum(., na.rm = TRUE)), Call:Email.1) %>%
mutate_each(funs(replace(., which(Action != "Event"), 0)), Call:Email.1) %>%
ungroup %>%
select(-group)
# Email Action ActionType Call Callback Email.1
#1 xxxx Task Call 0 0 0
#2 xxxx Task Call 0 0 0
#3 xxxx Event Start 2 0 0
#4 xxxx Task Call 0 0 0
#5 xxxx Event Trial 1 0 0
#6 yyyy Task Call 0 0 0
#7 yyyy Task Callback 0 0 0
#8 yyyy Task Email 0 0 0
#9 yyyy Task Call 0 0 0
#10 yyyy Event Start 2 1 1
DATA
mydf <- structure(list(Email = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("xxxx", "yyyy"), class = "factor"), Action = structure(c(2L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L), .Label = c("Event", "Task"
), class = "factor"), ActionType = structure(c(1L, 1L, 4L, 1L,
5L, 1L, 2L, 3L, 1L, 4L), .Label = c("Call", "Callback", "Email",
"Start", "Trial"), class = "factor"), Call = c(1L, 1L, 0L, 1L,
0L, 1L, 0L, 0L, 1L, 0L), Callback = c(0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L), Email.1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L)), .Names = c("Email", "Action", "ActionType", "Call",
"Callback", "Email.1"), class = "data.frame", row.names = c(NA,
-10L))