我有一个数据集df
> df
date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36
我想创建一个新列“x.total”,其中一些 x 值根据两个条件求和:
按照这些条件,结果应该是这样的:
date group x x.total
1 197302 A 0.53 1.21
2 197303 A 0.60 1.31
3 197304 A 0.57 1.22
4 197302 B 0.68 1.21
5 197303 B 0.71 1.31
6 197304 B 0.65 1.22
7 197302 C 0.16 0.47
8 197303 C 0.25 0.64
9 197304 C 0.22 0.58
10 197302 D 0.31 0.47
11 197303 D 0.39 0.64
12 197304 D 0.36 0.58
有人知道我该怎么做吗?
答案 0 :(得分:1)
你可以试试tidyverse
library(tidyverse)
read.table(text = "date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36") %>%
mutate(gr = fct_collapse(group, AB = c("A", "B"), CD = c("C", "D"))) %>%
group_by(date, gr) %>%
mutate(total = sum(x))
# A tibble: 12 x 5
# Groups: date, gr [6]
date group x gr total
<int> <chr> <dbl> <fct> <dbl>
1 197302 A 0.53 AB 1.21
2 197303 A 0.6 AB 1.31
3 197304 A 0.570 AB 1.22
4 197302 B 0.68 AB 1.21
5 197303 B 0.71 AB 1.31
6 197304 B 0.65 AB 1.22
7 197302 C 0.16 CD 0.47
8 197303 C 0.25 CD 0.64
9 197304 C 0.22 CD 0.580
10 197302 D 0.31 CD 0.47
11 197303 D 0.39 CD 0.64
12 197304 D 0.36 CD 0.580
答案 1 :(得分:0)
这是一种方法,通过使用 tidyverse 并对组进行一些重构
library(tidyverse)
df <- tibble::tribble(
~date, ~group, ~num,
197302L, "A", 0.53,
197303L, "A", 0.6,
197304L, "A", 0.57,
197302L, "B", 0.68,
197303L, "B", 0.71,
197304L, "B", 0.65,
197302L, "C", 0.16,
197303L, "C", 0.25,
197304L, "C", 0.22,
197302L, "D", 0.31,
197303L, "D", 0.39,
197304L, "D", 0.36
)
df %>%
mutate(group_2 = ifelse(group %in% c("A","B"),"AB",
ifelse(group %in% c("C","D"),"CD","other"))) %>%
group_by(date,group_2) %>%
mutate(sum = sum(num))
答案 2 :(得分:0)
这也行
library(tidyverse)
df <- read.table(text = "date group x
1 197302 A 0.53
2 197303 A 0.60
3 197304 A 0.57
4 197302 B 0.68
5 197303 B 0.71
6 197304 B 0.65
7 197302 C 0.16
8 197303 C 0.25
9 197304 C 0.22
10 197302 D 0.31
11 197303 D 0.39
12 197304 D 0.36")
df %>% group_by(date, group %in% c("A", "B")) %>%
mutate(Total = sum(x))
# A tibble: 12 x 5
# Groups: date, group %in% c("A", "B") [6]
date group x `group %in% c("A", "B")` Total
<int> <chr> <dbl> <lgl> <dbl>
1 197302 A 0.53 TRUE 1.21
2 197303 A 0.6 TRUE 1.31
3 197304 A 0.570 TRUE 1.22
4 197302 B 0.68 TRUE 1.21
5 197303 B 0.71 TRUE 1.31
6 197304 B 0.65 TRUE 1.22
7 197302 C 0.16 FALSE 0.47
8 197303 C 0.25 FALSE 0.64
9 197304 C 0.22 FALSE 0.580
10 197302 D 0.31 FALSE 0.47
11 197303 D 0.39 FALSE 0.64
12 197304 D 0.36 FALSE 0.580
答案 3 :(得分:0)
library(data.table)
df <- fread("
date group x
197302 A 0.53
197303 A 0.60
197304 A 0.57
197302 B 0.68
197303 B 0.71
197304 B 0.65
197302 C 0.16
197303 C 0.25
197304 C 0.22
197302 D 0.31
197303 D 0.39
197304 D 0.36"
)
group1 <- c("A","B")
group2 <- c("C","D")
df[, x.sum := fcase(
group %chin% group1, sum(.SD[group %chin% group1,x]),
group %chin% group2, sum(.SD[group %chin% group2,x])
),
by=date][]
答案 4 :(得分:0)
您可以在 sum
中将 ave
用于组 date
和 group %in% c("A", "B")
:
df$x.total <- ave(df$x, df$date, df$group %in% c("A", "B"), FUN=sum)
df
# date group x x.total
#1 197302 A 0.53 1.21
#2 197303 A 0.60 1.31
#3 197304 A 0.57 1.22
#4 197302 B 0.68 1.21
#5 197303 B 0.71 1.31
#6 197304 B 0.65 1.22
#7 197302 C 0.16 0.47
#8 197303 C 0.25 0.64
#9 197304 C 0.22 0.58
#10 197302 D 0.31 0.47
#11 197303 D 0.39 0.64
#12 197304 D 0.36 0.58