test <- data.frame('prod_id'= c("shoe", "shoe", "shoe", "shoe", "shoe", "shoe", "boat", "boat","boat","boat","boat","boat"),
'seller_id'= c("a", "b", "c", "d", "e", "f", "a","g", "h", "r", "q", "b"),
'Dich'= c(1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0),
'price' = c(120, 20, 10, 4, 3, 4, 30, 43, 56, 88, 75, 44)
)
test
prod_id seller_id Dich price
1 shoe a 1 120
2 shoe b 0 20
3 shoe c 0 10
4 shoe d 0 4
5 shoe e 0 3
6 shoe f 0 4
7 boat a 0 30
8 boat g 0 43
9 boat h 1 56
10 boat r 0 88
11 boat q 0 75
12 boat b 0 44
我想创建一个新列,该列基于Dich的值来获取价格列中观察值之间的差异,其中每个观察值与每个prod_id组中Dich == 1的观察值之间存在差异。这样做的语法如下。
test %>%
group_by(prod_id) %>%
mutate(diff_p = if(any(Dich ==1)) price - price[Dich == 1] else NA)
prod_id seller_id Dich price diff_p
1 shoe a 1 120 0
2 shoe b 0 20 -100
3 shoe c 0 10 -110
4 shoe d 0 4 -116
5 shoe e 0 3 -117
6 shoe f 0 4 -116
7 boat a 0 30 -26
8 boat g 0 43 -13
9 boat h 1 56 0
10 boat r 0 88 32
11 boat q 0 75 19
12 boat b 0 44 -12
现在,我想创建一个使用相同语法的函数,以便可以在新数据帧上使用该函数并获得相同结果。但是,当我尝试新创建的列仅具有NA值时。我在想通过在函数中使用mutate来解决问题?
trans <- function(e) {e %>%
group_by(prod_id) %>%
mutate(diff_p = if(any(Dich ==1)) price -price[Dich == 1] else NA)
}
答案 0 :(得分:2)
一种选择是利用量化和评估(!!
)
library(tidyverse)
trans <- function(dat, groupCol, valCol1, valCol2) {
groupCol <- enquo(groupCol)
valCol1 <- enquo(valCol1)
valCol2 <- enquo(valCol2)
dat %>%
group_by(!! groupCol) %>%
mutate(diff_p = if(any((!! valCol1) ==1)) (!!valCol2) -
(!!valCol2)[(!!valCol1) == 1] else NA)
}
trans(test, prod_id, Dich, price)
# A tibble: 12 x 5
# Groups: prod_id [2]
# prod_id seller_id Dich price diff_p
# <fct> <fct> <dbl> <dbl> <dbl>
# 1 shoe a 1 120 0
# 2 shoe b 0 20 -100
# 4 shoe d 0 4 -116
# 5 shoe e 0 3 -117
# 6 shoe f 0 4 -116
# 7 boat a 0 30 -26
# 8 boat g 0 43 -13
# 9 boat h 1 56 0
#10 boat r 0 88 32
#11 boat q 0 75 19
#12 boat b 0 44 -12
注意:将列名作为参数应用到其他数据集可能更具有普遍性