我正在尝试为两个单独的组找到一个累加的总和,并在单独的列中列出这些总和。
这是数据帧,根据时间排序:
time group value
0 A 0
0 B 0
0 A 0
1 A 0
1 B 1
1 B 0
2 B 1
2 A 1
2 A 1
2 A -1
3 A 0
3 B 1
这是我必须按组查找cumsum并创建cumsum列的方法:
df$cumsum <- ave(df$value, df$group, FUN=cumsum)
time group value cumsum
0 A 0 0
0 B 0 0
0 A 0 0
1 A 0 0
1 B 1 1
1 B 0 1
2 B 1 2
2 A 1 1
2 A 1 2
2 A -1 1
3 A 0 1
3 B 1 3
如何将结果分成两列,一列用于A,一列用于B?或者,是否有可能找到有条件的总和?无论哪种方式,我都希望结果看起来像这样:
time group value cumsum_A cumsum_B
0 A 0 0 0
0 B 0 0 0
0 A 0 0 0
1 A 0 0 0
1 B 1 0 1
1 B 0 0 1
2 B 1 0 2
2 A 1 1 2
2 A 1 2 2
2 A -1 1 2
3 A 0 1 2
3 B 1 1 3
谢谢!
答案 0 :(得分:1)
您可以首先找出unique
值,然后对它们使用sapply
/ lapply
循环以有条件地为每个值计算cumsum
。
unique_val <- unique(df$group)
df[paste0("cumsum_", unique_val)] <- lapply(unique_val,
function(x) cumsum((df$group == x) * df$value))
df
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
答案 1 :(得分:1)
当不属于所需组的情况下,您也可以使用if_else
将value
替换为0
,如下所示。这里不需要dplyr
(使用base::ifelse
并避免使用mutate
)
library(tidyverse)
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A", "A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, -1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))
df1 %>%
mutate(
cumsum_A = cumsum(if_else(group == "A", value, 0L)),
cumsum_B = cumsum(if_else(group == "B", value, 0L))
)
#> time group value cumsum_A cumsum_B
#> 1 0 A 0 0 0
#> 2 0 B 0 0 0
#> 3 0 A 0 0 0
#> 4 1 A 0 0 0
#> 5 1 B 1 0 1
#> 6 1 B 0 0 1
#> 7 2 B 1 0 2
#> 8 2 A 1 1 2
#> 9 2 A 1 2 2
#> 10 2 A -1 1 2
#> 11 3 A 0 1 2
#> 12 3 B 1 1 3
由reprex package(v0.3.0)于2019-06-25创建
答案 2 :(得分:0)
这里是table
和colCumsums
的选项
library(matrixStats)
nm1 <- paste0("cumsum_", unique(df1$group))
df1[nm1] <- colCumsums(table(seq_len(nrow(df1)),df1$group) * df1$value)
df1
# time group value cumsum_A cumsum_B
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或者另一个选择是model.matrix
colCumsums((model.matrix(~ group -1, df1)) * df1$value)
或者将model.matrix
与tidyverse
library(tidyverse)
df1 %>%
model.matrix( ~group - 1, .) %>%
as_tibble %>%
mutate_all(~ cumsum(. * df1$value)) %>%
rename_all(~ str_replace(., "group", "cumsum")) %>%
bind_cols(df1, .)
# time group value cumsumA cumsumB
#1 0 A 0 0 0
#2 0 B 0 0 0
#3 0 A 0 0 0
#4 1 A 0 0 0
#5 1 B 1 0 1
#6 1 B 0 0 1
#7 2 B 1 0 2
#8 2 A 1 1 2
#9 2 A 1 2 2
#10 2 A -1 1 2
#11 3 A 0 1 2
#12 3 B 1 1 3
或与count
一起使用spread
df1 %>%
mutate(rn = row_number()) %>%
dplyr::count(group, rn) %>%
mutate(group = str_c("cumsum", group)) %>%
spread(group, n, fill = 0) %>%
mutate_at(-1, ~ cumsum(. * df1$value)) %>%
select(-rn) %>%
bind_cols(df1, .)
df1 <- structure(list(time = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L), group = c("A", "B", "A", "A", "B", "B", "B", "A", "A",
"A", "A", "B"), value = c(0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L,
-1L, 0L, 1L)), class = "data.frame", row.names = c(NA, -12L))