我正在尝试计算数据框中某些列的平均值和标准差,并将这些值返回到数据框中的新列。我可以让这个工作的意思是:
library(dplyr)
mtcars = mutate(mtcars, mean=(hp+drat+wt)/3)
然而,当我尝试对标准偏差做同样的事情时,我有一个问题,因为我不能像我的意思那样硬编码等式非常容易。所以,我尝试使用一个函数,如下所示:
mtcars = mutate(mtcars, mean=(hp+drat+wt)/3, stdev = sd(hp,drat,wt))
导致错误“sd(hp,drat,wt)中的错误:未使用的参数(wt)”。我该如何纠正我的语法?谢谢。
答案 0 :(得分:5)
你可以尝试
library(dplyr)
library(matrixStats)
nm1 <- c('hp', 'drat', 'wt')
res1 <- mtcars %>%
mutate(Mean= rowMeans(.[nm1]), stdev=rowSds(as.matrix(.[nm1])))
head(res1,3)
# mpg cyl disp hp drat wt qsec vs am gear carb Mean stdev
#1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 38.84000 61.62969
#2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 38.92500 61.55489
#3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 33.05667 51.91809
或使用do
res2 <- mtcars %>%
rowwise() %>%
do(data.frame(., Mean=mean(unlist(.[nm1])),
stdev=sd(unlist(.[nm1]))))
head(res2,3)
# mpg cyl disp hp drat wt qsec vs am gear carb Mean stdev
#1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 38.84000 61.62969
#2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 38.92500 61.55489
#3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 33.05667 51.91809
答案 1 :(得分:2)
您也可以在
中编写自己的矢量化RowSD
函数
RowSD <- function(x) {
sqrt(rowSums((x - rowMeans(x))^2)/(dim(x)[2] - 1))
}
然后
mtcars %>%
mutate(mean = (hp + drat + wt)/3, stdev = RowSD(cbind(hp, drat, wt)))
## mpg cyl disp hp drat wt qsec vs am gear carb mean stdev
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 38.84000 61.62969
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 38.92500 61.55489
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 33.05667 51.91809
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 38.76500 61.69136
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 60.53000 99.13403
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 37.07333 58.82726
## ...
答案 2 :(得分:2)
不需要进行太多更改,只需添加rowwise()
(感谢@akrun的评论)并将您的列名包装在c(...)
中(以修复错误):
library(dplyr)
mtcars %>%
rowwise() %>%
mutate(mean=(hp+drat+wt)/3, stdev = sd(c(hp,drat,wt)))
## Source: local data frame [32 x 13]
## Groups: <by row>
## mpg cyl disp hp drat wt qsec vs am gear carb mean stdev
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 38.84000 61.62969
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 38.92500 61.55489
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 33.05667 51.91809
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 38.76500 61.69136
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 60.53000 99.13403
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 37.07333 58.82726
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 83.92667 139.49371
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 22.96000 33.81056
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 34.02333 52.80875
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 43.45333 68.88985
## .. ... ... ... ... ... ... ... .. .. ... ... ... ...
答案 3 :(得分:0)
@ r2evans使用相同的命令给我相同的sd值。意思是工作正常。见下面的输出
> mtcars %>%
+ rowwise() %>%
+ mutate(mean=(hp+drat+wt)/3, stdev = sd(c(hp,drat,wt)))
Source: local data frame [32 x 13]
Groups: <by row>
# A tibble: 32 x 13
mpg cyl disp hp drat wt qsec vs am gear carb mean stdev
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 38.84000 78.38681
2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 38.92500 78.38681
3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 33.05667 78.38681
4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 38.76500 78.38681
5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 60.53000 78.38681
6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 37.07333 78.38681
7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 83.92667 78.38681
8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 22.96000 78.38681
9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 34.02333 78.38681
10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 43.45333 78.38681
# ... with 22 more rows