> tempDT <- data.table(colA = c("E","E","A","C","E","C","E","C","E"), colB = c(20,30,40,30,30,40,30,20,10), group = c(1,1,1,1,2,2,2,2,2), want = c(NA, 30, 40, 70,NA,40,70,20,30))
> tempDT
colA colB group want
1: E 20 1 NA
2: E 30 1 30
3: A 40 1 40
4: C 30 1 70
5: E 30 2 NA
6: C 40 2 40
7: E 30 2 70
8: C 20 2 20
9: E 10 2 30
我有列'colA''colB''组':在每个'组'中,我想从下到上总结'colB',直到'colA'为'E'。
答案 0 :(得分:0)
根据预期的&#39;想要&#39;,我们创建一个游程ID列&#39; grp&#39;通过检查值是否为“E&#39;在&#39; colA&#39;中,然后创建&#39; want1&#39;作为&#39; colB&#39;的累积总和经过&#39; grp&#39;分组后和&#39; group&#39;,获取&#39; colA&#39;中duplicated
元素的行索引(&#39; i1&#39;)并且也是&#39; E&#39;并分配&#39; colB&#39;值为&#39;想要1&#39;
tempDT[, grp:= rleid(colA=="E") * (colA != "E")
][grp!= 0, want1 := cumsum(colB), .(grp, group)]
i1 <- tempDT[, .I[colA=="E" & duplicated(colA)], group]$V1
tempDT[i1, want1 := colB][, grp := NULL][]
# colA colB group want want1
#1: E 20 1 NA NA
#2: E 30 1 30 30
#3: A 40 1 40 40
#4: C 30 1 70 70
#5: E 30 2 NA NA
#6: C 30 2 30 30
答案 1 :(得分:0)
希望这有帮助!
library(dplyr)
df %>%
group_by(group) %>%
mutate(row_num = n():1) %>%
group_by(group) %>%
mutate(sum_colB = sum(colB[row_num < row_num[which(colA=='E')]]),
flag = ifelse(row_num >= row_num[which(colA=='E')], 0, 1),) %>%
mutate(sum_colB = ifelse(flag==1 & row_num==1, sum_colB, ifelse(flag==0, NA, colB))) %>%
select(-flag, -row_num) %>%
data.frame()
输出为:
colA colB group want sum_colB
1 E 20 1 NA NA
2 E 30 1 30 NA
3 A 40 1 40 40
4 C 30 1 70 70
5 E 30 2 NA NA
6 C 30 2 30 30
示例数据:
df <- structure(list(colA = structure(c(3L, 3L, 1L, 2L, 3L, 2L), .Label = c("A",
"C", "E"), class = "factor"), colB = c(20, 30, 40, 30, 30, 30
), group = c(1, 1, 1, 1, 2, 2), want = c(NA, 30, 40, 70, NA,
30)), .Names = c("colA", "colB", "group", "want"), row.names = c(NA,
-6L), class = "data.frame")
答案 2 :(得分:0)
有一种方法:行参考+总和
# input data
tempDT <- data.table(colA = c("E","E","A","C","E","C","E","C","E"), colB = c(20,30,40,30,30,40,30,20,10), group = c(1,1,1,1,2,2,2,2,2), want = c(NA, 30, 40, 70,NA,40,70,20,30))
tempDT
# find row reference previous row where colA is "E"
lastEpos <- function(i) tail(which(tempDT$colA[1:(i-1)] == "E"), 1)
tempDT[, rowRef := sapply(.I, lastEpos), by = "group"]
# sum up
sumEpos <- function(i) {
valTEMP <- tempDT$rowRef[i]
outputTEMP <- sum(tempDT$colB[(valTEMP+1):i]) # sum
return(outputTEMP)
}
tempDT[, want1 := sapply(.I, sumEpos), by = "group"]
# deal with first row in every group
tempDT[, want1 := c(NA, want1[-1]), by = "group"]
# clean output
tempDT[, rowRef := NULL]
tempDT