Question

当我将一个变量提供给mutate_at时，它似乎会回归变异，如下面的代码所示。这是一个错误还是我做错了？

library("tidyverse")
x <- data_frame("a" = 1:3, "b" = 4:6)
f <- function(y){2L * y}

x %>% mutate_at(vars(c("a", "b")), funs(x = f))

x %>% mutate_at(c("a"),            funs(x = f))
x %>% mutate_at(vars(c("a")),      funs(x = f))
x %>% mutate_at(vars(c("a", "a")), funs(x = f))
x %>% mutate_at(c("a", "a"),       funs(x = f))

Answer 1

这不是错误，这是mutate_at工作方式的一部分。从帮助文件：

默认情况下，新创建的列具有唯一标识输出所需的最短名称。要强制包含名称，即使不需要，也要为输入命名（有关详细信息，请参阅示例）。

smalliris <- select(iris, ends_with("Length"))

smalliris %>% head

# Sepal.Length Petal.Length
# 1          5.1          1.4
# 2          4.9          1.4
# 3          4.7          1.3
# 4          4.6          1.5
# 5          5.0          1.4
# 6          5.4          1.7

smalliris %>% 
    mutate_at(vars(ends_with("Length")), funs(sd)) %>% head

# Sepal.Length Petal.Length
# 1    0.8280661     1.765298
# 2    0.8280661     1.765298
# 3    0.8280661     1.765298
# 4    0.8280661     1.765298
# 5    0.8280661     1.765298
# 6    0.8280661     1.765298

smalliris %>% 
    mutate_at(vars(ends_with("Length")), funs(sd, var)) %>% head

# Sepal.Length Petal.Length Sepal.Length_sd Petal.Length_sd Sepal.Length_var Petal.Length_var
# 1          5.1          1.4       0.8280661        1.765298        0.6856935         3.116278
# 2          4.9          1.4       0.8280661        1.765298        0.6856935         3.116278
# 3          4.7          1.3       0.8280661        1.765298        0.6856935         3.116278
# 4          4.6          1.5       0.8280661        1.765298        0.6856935         3.116278
# 5          5.0          1.4       0.8280661        1.765298        0.6856935         3.116278
# 6          5.4          1.7       0.8280661        1.765298        0.6856935         3.116278

smalliris %>% 
    mutate_at(vars(ends_with("Length")), funs(myname = sd)) %>% head

# Sepal.Length Petal.Length Sepal.Length_myname Petal.Length_myname
# 1          5.1          1.4           0.8280661            1.765298
# 2          4.9          1.4           0.8280661            1.765298
# 3          4.7          1.3           0.8280661            1.765298
# 4          4.6          1.5           0.8280661            1.765298
# 5          5.0          1.4           0.8280661            1.765298
# 6          5.4          1.7           0.8280661            1.765298

Answer 2

我在github上问了这个问题，并给出了一个很好的答案：

手动：只需确保列的向量已命名，即select * from ( select line, name, country, max(supplier), city from tab group by line, name, country, city having count(supplier)=1 union all select line, name, country, case when city = 'London' then max(supplier) when city = 'Oxford' then min(supplier) end supplier, city from tab group by line, name, country, city having count(supplier)>1 ) q order by q.line;
与助手一起使用：使用c(a="a")

tidyselect::vars_select

当输入单个变量时，dplyr :: mutate_at的行为类似于mutate

2 个答案: