我有以下数据框:
structure(list(vnum1 = c(-1.38, 1.22, -0.17, -0.47, -0.08, -1.11,
-1.56, -0.14, 0.55, -0.43, 0.25, 0.8, 0.77, -0.1, -0.21, -0.62,
-0.6, -0.19, -0.41, 0.11, -0.46, -3.08, -2.09, 1.27, -1.5, 0.57,
-1.69, 0.86, -0.12, -0.22, -0.85, 0.66, 0.11, -1.15, 0.32, -0.36,
-0.42, -1.17, -0.71, 0.45, -0.41, 0.43, 2.18, 0.39, 0.1, -0.12,
1.64, -1.24, -1.14, 1.22), vint1 = c(7L, 7L, 9L, 6L, 6L, 2L,
8L, 10L, 8L, 8L, 10L, 5L, 7L, 4L, 7L, 4L, 2L, 9L, 3L, 7L, 4L,
9L, 3L, 4L, 10L, 10L, 1L, 6L, 4L, 2L, 1L, 6L, 10L, 9L, 3L, 9L,
3L, 8L, 7L, 7L, 3L, 4L, 5L, 6L, 5L, 9L, 3L, 10L, 10L, 4L), vfac1 = structure(c(2L,
4L, 2L, 1L, 1L, 2L, 3L, 3L, 3L, 2L, 4L, 2L, 2L, 3L, 2L, 3L, 3L,
3L, 1L, 2L, 1L, 2L, 3L, 3L, 3L, 1L, 2L, 2L, 3L, 2L, 1L, 3L, 3L,
2L, 4L, 2L, 4L, 3L, 1L, 1L, 2L, 4L, 3L, 4L, 1L, 1L, 2L, 1L, 1L,
4L), .Label = c("1", "2", "3", "4"), class = "factor")), .Names = c("vnum1",
"vint1", "vfac1"), row.names = c(NA, -50L), class = "data.frame")
> head(ddf)
vnum1 vint1 vfac1
1 -1.38 7 2
2 1.22 7 4
3 -0.17 9 2
4 -0.47 6 1
5 -0.08 6 1
6 -1.11 2 2
>
我想创建一个矩阵,其中vint1的唯一值为行,vfac1的唯一值为列。对于相应的vint1和vfac1,矩阵需要用vnum1的平均值填充。我尝试了以下功能:
df2mat = function(gdf){
for(i in sort(unique(vint1))) cat("\t",i)
cat("\n")
for(j in sort(levels(vfac1))) {
cat("j:",j)
sum =0
for(j in 1:10){
cat(with(gdf[vint1==i & vfac1==j,], mean(vnum1, na.rm=T)),"\t")
#cat("\t")
}
cat("\n")
}
cat("\n")
}
> df2mat(ddf)
1 2 3 4 5 6 7 8 9 10
j: 1-0.6033333 NaN -0.51 0.25 NaN NaN NaN NaN NaN NaN
j: 2-0.6033333 NaN -0.51 0.25 NaN NaN NaN NaN NaN NaN
j: 3-0.6033333 NaN -0.51 0.25 NaN NaN NaN NaN NaN NaN
j: 4-0.6033333 NaN -0.51 0.25 NaN NaN NaN NaN NaN NaN
由于重复第一行的值,因此产生的输出不正确。此外,缺失值会产生NaN错误。另外,我怎样才能将它变成一个合适的矩阵对象?我该如何纠正这些问题。谢谢你的帮助。
答案 0 :(得分:4)
这可以很好地利用标准tapply
和两个因素。你可以做到
with(ddf, tapply(vnum1, list(vint1,vfac1), mean))
# 1 2 3 4
#1 -0.8500000 -1.6900 NA NA
#2 NA -0.6650 -0.6000000 NA
#3 -0.4100000 0.6150 -2.0900000 -0.050
#4 -0.4600000 NA 0.1075000 0.825
#5 0.1000000 0.8000 2.1800000 NA
#6 -0.2750000 0.8600 0.6600000 0.390
#7 -0.1300000 -0.1775 NA 1.220
#8 NA -0.4300 -0.7266667 NA
#9 -0.1200000 -1.1900 -0.1900000 NA
#10 -0.6033333 NA -0.5100000 0.250
答案 1 :(得分:1)
您可以使用acast
包中的reshape2
功能,它可以满足您的需求:
library(reshape2)
acast(ddf, vint1 ~ vfac1, fun.aggregate = mean, value.var = 'vnum1')
1 2 3 4
1 -0.8500000 -1.6900 NaN NaN
2 NaN -0.6650 -0.6000000 NaN
3 -0.4100000 0.6150 -2.0900000 -0.050
4 -0.4600000 NaN 0.1075000 0.825
5 0.1000000 0.8000 2.1800000 NaN
6 -0.2750000 0.8600 0.6600000 0.390
7 -0.1300000 -0.1775 NaN 1.220
8 NaN -0.4300 -0.7266667 NaN
9 -0.1200000 -1.1900 -0.1900000 NaN
10 -0.6033333 NaN -0.5100000 0.250