从3个矩阵创建data.frame

时间:2014-09-29 14:16:45

标签: r matrix

我有以下数据和代码:

DATA:

> ddf
           vnum1      vnum2 vint1 vint2 vyear
1   0.3878784349 0.61856052     3     9  2013
2  -0.1715074407 0.53045776     8     4  2014
3  -0.5835102451 0.06261218     7     6  2009
4  -0.0001529602 0.52475223     9     4  2012
5  -0.3788561360 0.73767935     7     8  2011
6   0.4655585989 0.35300322     3    11  2013
7   0.6647057814 0.53344731     5     7  2014
8   1.1537467543 0.83244251     6     7  2012
9   2.7525645298 0.66707480     1     9  2009
10  0.9063236184 0.89991709    10     4  2014
11 -1.2079772974 0.83952909     4     3  2013
12 -0.9229426008 0.59305684     9     3  2010
13 -0.9189135265 0.67158655     4     9  2011
14  0.0171407600 0.21849402     6     6  2012
15  0.3238003823 0.72707166     1     2  2012
16  0.7023232273 0.10858232    10     3  2013
17  1.8774988873 0.66128645     8     4  2011
18 -1.0178875266 0.10277654     3    12  2014
19 -1.5038029776 0.74933385     2    11  2011
20 -2.2545370826 0.52037556     7     1  2013
21 -0.5070672623 0.70556583    10    11  2012
22  0.5332096642 0.05624691     2     4  2009
23 -0.1016724979 0.30406996     3     8  2010
24  1.6554541496 0.24258070    10     5  2011
25 -1.8601185335 0.34306609     7     6  2010
> 
> dput(ddf)
structure(list(vnum1 = c(0.387878434882536, -0.171507440740263, 
-0.583510245133433, -0.000152960203773939, -0.378856135972161, 
0.465558598897948, 0.664705781421418, 1.15374675427851, 2.75256452978797, 
0.90632361837742, -1.20797729742402, -0.922942600847586, -0.918913526542506, 
0.0171407600378223, 0.323800382345413, 0.702323227252284, 1.87749888731167, 
-1.01788752655329, -1.50380297756717, -2.25453708258335, -0.507067262341542, 
0.53320966418502, -0.101672497856537, 1.65545414961551, -1.86011853354809
), vnum2 = c(0.618560523493215, 0.530457757413387, 0.062612181995064, 
0.524752234807238, 0.737679345766082, 0.353003220865503, 0.533447309629992, 
0.832442505517974, 0.667074795579538, 0.899917090777308, 0.839529090793803, 
0.593056835001335, 0.67158655426465, 0.218494015280157, 0.727071655681357, 
0.108582322485745, 0.661286452319473, 0.102776538114995, 0.749333853134885, 
0.520375560736284, 0.705565832322463, 0.056246911874041, 0.304069962818176, 
0.242580699035898, 0.343066089553759), vint1 = c(3L, 8L, 7L, 
9L, 7L, 3L, 5L, 6L, 1L, 10L, 4L, 9L, 4L, 6L, 1L, 10L, 8L, 3L, 
2L, 7L, 10L, 2L, 3L, 10L, 7L), vint2 = c(9L, 4L, 6L, 4L, 8L, 
11L, 7L, 7L, 9L, 4L, 3L, 3L, 9L, 6L, 2L, 3L, 4L, 12L, 11L, 1L, 
11L, 4L, 8L, 5L, 6L), vyear = c(2013L, 2014L, 2009L, 2012L, 2011L, 
2013L, 2014L, 2012L, 2009L, 2014L, 2013L, 2010L, 2011L, 2012L, 
2012L, 2013L, 2011L, 2014L, 2011L, 2013L, 2012L, 2009L, 2010L, 
2011L, 2010L)), .Names = c("vnum1", "vnum2", "vint1", "vint2", 
"vyear"), class = "data.frame", row.names = c(NA, -25L))
> 

我在上面使用得到相关系数和p值的矩阵。

> cor <- rcorr(as.matrix(ddf), type="pearson")
> cor$r
      vnum1 vnum2 vint1 vint2 vyear
vnum1  1.00  0.05 -0.01  0.00 -0.09
vnum2  0.05  1.00 -0.01 -0.01  0.22
vint1 -0.01 -0.01  1.00 -0.37  0.17
vint2  0.00 -0.01 -0.37  1.00 -0.02
vyear -0.09  0.22  0.17 -0.02  1.00


> cor$P
      vnum1  vnum2  vint1  vint2  vyear 
vnum1        0.8290 0.9772 0.9999 0.6631
vnum2 0.8290        0.9729 0.9473 0.2980
vint1 0.9772 0.9729        0.0671 0.4030
vint2 0.9999 0.9473 0.0671        0.9057
vyear 0.6631 0.2980 0.4030 0.9057       
> 

> cor$n
      vnum1 vnum2 vint1 vint2 vyear
vnum1    25    25    25    25    25
vnum2    25    25    25    25    25
vint1    25    25    25    25    25
vint2    25    25    25    25    25
vyear    25    25    25    25    25

如何组合这两个矩阵以获得包含以下列的data.frame:

var1    var2    r_value     p_value     N
vnum1   vnum1   1.00        NA          25
vnum1   vnum2   0.05        0.8290      25
vnum1   vint1   -0.01       0.9722      25
...

我试过了:

outdf = data.frame(var1=character(), var2=character(), r_value=numeric(), p_value=numeric(), stringsAsFactors=FALSE)
outdf[,1:2] = expand.grid(colnames(df), colnames(df))
outdf$r_value = cor$r[outdf$var1, outdf$var2]
outdf

但它不起作用。谢谢你的帮助。

编辑:它还应该处理缺失值(NA)。

2 个答案:

答案 0 :(得分:3)

您可以尝试:

library(reshape2)
lst1 <- setNames(list(cor$r, cor$P, cor$n), c("r_value", "P_value", "N"))
res <- dcast(melt(lst1),Var1+Var2~L1, value.var="value")
res$r_value <- round(res$r_value,2)
head(res,2)
#  Var1  Var2  N  P_value r_value
#1 vnum1 vnum1 25       NA    1.00
#2 vnum1 vnum2 25 0.828973    0.05

或者@Ananda Mahto建议

res2 <- dcast(melt(unclass(cor)), Var1 + Var2 ~ L1)

并相应地更改列名

答案 1 :(得分:1)

以下代码适用于您的示例:

varnames <- expand.grid( names(ddf), names(ddf) )
outdf <- data.frame( varnames, t(apply(varnames, 1, function(row) c(COR=cor$r[row[1], row[2]], PVAL=cor$P[row[1], row[2]],N=cor$n[row[1], row[2]]))))