我有以下数据和代码:
DATA:
> ddf
vnum1 vnum2 vint1 vint2 vyear
1 0.3878784349 0.61856052 3 9 2013
2 -0.1715074407 0.53045776 8 4 2014
3 -0.5835102451 0.06261218 7 6 2009
4 -0.0001529602 0.52475223 9 4 2012
5 -0.3788561360 0.73767935 7 8 2011
6 0.4655585989 0.35300322 3 11 2013
7 0.6647057814 0.53344731 5 7 2014
8 1.1537467543 0.83244251 6 7 2012
9 2.7525645298 0.66707480 1 9 2009
10 0.9063236184 0.89991709 10 4 2014
11 -1.2079772974 0.83952909 4 3 2013
12 -0.9229426008 0.59305684 9 3 2010
13 -0.9189135265 0.67158655 4 9 2011
14 0.0171407600 0.21849402 6 6 2012
15 0.3238003823 0.72707166 1 2 2012
16 0.7023232273 0.10858232 10 3 2013
17 1.8774988873 0.66128645 8 4 2011
18 -1.0178875266 0.10277654 3 12 2014
19 -1.5038029776 0.74933385 2 11 2011
20 -2.2545370826 0.52037556 7 1 2013
21 -0.5070672623 0.70556583 10 11 2012
22 0.5332096642 0.05624691 2 4 2009
23 -0.1016724979 0.30406996 3 8 2010
24 1.6554541496 0.24258070 10 5 2011
25 -1.8601185335 0.34306609 7 6 2010
>
> dput(ddf)
structure(list(vnum1 = c(0.387878434882536, -0.171507440740263,
-0.583510245133433, -0.000152960203773939, -0.378856135972161,
0.465558598897948, 0.664705781421418, 1.15374675427851, 2.75256452978797,
0.90632361837742, -1.20797729742402, -0.922942600847586, -0.918913526542506,
0.0171407600378223, 0.323800382345413, 0.702323227252284, 1.87749888731167,
-1.01788752655329, -1.50380297756717, -2.25453708258335, -0.507067262341542,
0.53320966418502, -0.101672497856537, 1.65545414961551, -1.86011853354809
), vnum2 = c(0.618560523493215, 0.530457757413387, 0.062612181995064,
0.524752234807238, 0.737679345766082, 0.353003220865503, 0.533447309629992,
0.832442505517974, 0.667074795579538, 0.899917090777308, 0.839529090793803,
0.593056835001335, 0.67158655426465, 0.218494015280157, 0.727071655681357,
0.108582322485745, 0.661286452319473, 0.102776538114995, 0.749333853134885,
0.520375560736284, 0.705565832322463, 0.056246911874041, 0.304069962818176,
0.242580699035898, 0.343066089553759), vint1 = c(3L, 8L, 7L,
9L, 7L, 3L, 5L, 6L, 1L, 10L, 4L, 9L, 4L, 6L, 1L, 10L, 8L, 3L,
2L, 7L, 10L, 2L, 3L, 10L, 7L), vint2 = c(9L, 4L, 6L, 4L, 8L,
11L, 7L, 7L, 9L, 4L, 3L, 3L, 9L, 6L, 2L, 3L, 4L, 12L, 11L, 1L,
11L, 4L, 8L, 5L, 6L), vyear = c(2013L, 2014L, 2009L, 2012L, 2011L,
2013L, 2014L, 2012L, 2009L, 2014L, 2013L, 2010L, 2011L, 2012L,
2012L, 2013L, 2011L, 2014L, 2011L, 2013L, 2012L, 2009L, 2010L,
2011L, 2010L)), .Names = c("vnum1", "vnum2", "vint1", "vint2",
"vyear"), class = "data.frame", row.names = c(NA, -25L))
>
我在上面使用得到相关系数和p值的矩阵。
> cor <- rcorr(as.matrix(ddf), type="pearson")
> cor$r
vnum1 vnum2 vint1 vint2 vyear
vnum1 1.00 0.05 -0.01 0.00 -0.09
vnum2 0.05 1.00 -0.01 -0.01 0.22
vint1 -0.01 -0.01 1.00 -0.37 0.17
vint2 0.00 -0.01 -0.37 1.00 -0.02
vyear -0.09 0.22 0.17 -0.02 1.00
> cor$P
vnum1 vnum2 vint1 vint2 vyear
vnum1 0.8290 0.9772 0.9999 0.6631
vnum2 0.8290 0.9729 0.9473 0.2980
vint1 0.9772 0.9729 0.0671 0.4030
vint2 0.9999 0.9473 0.0671 0.9057
vyear 0.6631 0.2980 0.4030 0.9057
>
> cor$n
vnum1 vnum2 vint1 vint2 vyear
vnum1 25 25 25 25 25
vnum2 25 25 25 25 25
vint1 25 25 25 25 25
vint2 25 25 25 25 25
vyear 25 25 25 25 25
如何组合这两个矩阵以获得包含以下列的data.frame:
var1 var2 r_value p_value N
vnum1 vnum1 1.00 NA 25
vnum1 vnum2 0.05 0.8290 25
vnum1 vint1 -0.01 0.9722 25
...
我试过了:
outdf = data.frame(var1=character(), var2=character(), r_value=numeric(), p_value=numeric(), stringsAsFactors=FALSE)
outdf[,1:2] = expand.grid(colnames(df), colnames(df))
outdf$r_value = cor$r[outdf$var1, outdf$var2]
outdf
但它不起作用。谢谢你的帮助。
编辑:它还应该处理缺失值(NA)。
答案 0 :(得分:3)
您可以尝试:
library(reshape2)
lst1 <- setNames(list(cor$r, cor$P, cor$n), c("r_value", "P_value", "N"))
res <- dcast(melt(lst1),Var1+Var2~L1, value.var="value")
res$r_value <- round(res$r_value,2)
head(res,2)
# Var1 Var2 N P_value r_value
#1 vnum1 vnum1 25 NA 1.00
#2 vnum1 vnum2 25 0.828973 0.05
或者@Ananda Mahto建议
res2 <- dcast(melt(unclass(cor)), Var1 + Var2 ~ L1)
并相应地更改列名
答案 1 :(得分:1)
以下代码适用于您的示例:
varnames <- expand.grid( names(ddf), names(ddf) )
outdf <- data.frame( varnames, t(apply(varnames, 1, function(row) c(COR=cor$r[row[1], row[2]], PVAL=cor$P[row[1], row[2]],N=cor$n[row[1], row[2]]))))