我有以下数据:
x1 = sample(1:10, 100, replace=T)
x2 = sample(1:3, 100, replace=T)
x3 = sample(50:100, 100, replace=T)
y1 = sample(50:100, 100, replace=T)
y2 = sample(50:100, 100, replace=T)
mydf = data.frame(x1,x2,x3,y1,y2)
head(mydf)
x1 x2 x3 y1 y2
1 2 2 96 100 73
2 5 2 77 93 52
3 10 1 86 54 80
4 3 2 98 59 94
5 2 2 85 94 85
6 9 2 56 79 99
我有以下数据:
我想做相关并产生以下输出:
x1 x2 x3
y1 r.value; p.value r.value; p.value r.value; p.value
y2 r.value; p.value r.value; p.value r.value; p.value
R值需要四舍五入为2位,p_value为3位。
如何做到这一点?谢谢你的帮助。
我试过以下:
library(Hmisc)
res = rcorr(as.matrix(mydf), type="pearson")
res
x1 x2 x3 y1 y2
x1 1.00 -0.01 -0.16 -0.28 -0.21
x2 -0.01 1.00 -0.20 -0.10 -0.13
x3 -0.16 -0.20 1.00 0.14 -0.09
y1 -0.28 -0.10 0.14 1.00 0.12
y2 -0.21 -0.13 -0.09 0.12 1.00
n= 100
P
x1 x2 x3 y1 y2
x1 0.9520 0.1089 0.0047 0.0364
x2 0.9520 0.0444 0.3463 0.1887
x3 0.1089 0.0444 0.1727 0.3948
y1 0.0047 0.3463 0.1727 0.2482
y2 0.0364 0.1887 0.3948 0.2482
matrix(paste0(round(res[[1]][,1:3],2),';',round(res[[3]][1:2,],4)),ncol=3)
[,1] [,2] [,3]
[1,] "1;NA" "-0.01;0.0444" "-0.16;NA"
[2,] "-0.01;0.952" "1;0.0047" "-0.2;0.952"
[3,] "-0.16;0.952" "-0.2;0.3463" "1;0.952"
[4,] "-0.28;NA" "-0.1;0.0364" "0.14;NA"
[5,] "-0.21;0.1089" "-0.13;0.1887" "-0.09;0.1089"
但这种组合并不正确。
答案 0 :(得分:2)
您还可以执行以下操作,这不需要精确所需的行/列的位置:
matrix(paste(unlist(round(res[[1]],2)),unlist(round(res[[3]],3)),sep=";"),
nrow=nrow(res[[1]]),dimnames=dimnames(res[[1]]))
更新:我添加了一个dimnames参数,因此dimnames被"传输"到结果矩阵。
例如,通过我的随机抽样,您将得到:
x1 x2 x3 y1 y2
x1 "1;NA" "-0.2;0.052" "0.02;0.833" "-0.04;0.674" "0.02;0.819"
x2 "-0.2;0.052" "1;NA" "-0.13;0.202" "-0.01;0.896" "0.05;0.653"
x3 "0.02;0.833" "-0.13;0.202" "1;NA" "-0.05;0.636" "-0.13;0.185"
y1 "-0.04;0.674" "-0.01;0.896" "-0.05;0.636" "1;NA" "-0.02;0.858"
y2 "0.02;0.819" "0.05;0.653" "-0.13;0.185" "-0.02;0.858" "1;NA"
答案 1 :(得分:1)
尝试
r2 <- matrix(0, ncol=3, nrow=2,
dimnames=list( paste0('y',1:2), paste0('x',1:3)))
r2[] <- paste(round(res$r[4:5,1:3],2), round(res$P[4:5,1:3],4), sep="; ")
您可以创建如下所示的功能
f1 <- function(df){
df1 <- df[order(colnames(df))]
indx <- sub('\\d+', '', colnames(df1))
indx1 <- which(indx[-1]!= indx[-length(indx)])
indx2 <- (indx1+1):ncol(df1)
r2 <- matrix(0, ncol=indx1, nrow=(ncol(df1)-indx1),
dimnames=list(colnames(df1)[indx2], colnames(df1)[1:indx1]))
r1 <- rcorr(as.matrix(df1), type='pearson')
r2[] <- paste(round(r1$r[indx2,1:indx1],2), round(r1$P[indx2,1:indx1],4),
sep="; ")
r2
}
f1(mydf) #using your dataset (`set.seed` is different)
# x1 x2 x3
#y1 "0.07; 0.4773" "0.02; 0.84" "0.21; 0.0385"
#y2 "-0.08; 0.4363" "0.08; 0.4146" "0.02; 0.8599"
Testing with unordered dataset
f1(mydf1)
# x1 x2 x3 x4
#y1 "-0.08; 0.4086" "0.17; 0.0945" "-0.25; 0.0112" "-0.16; 0.1025"
#y2 "0.07; 0.5174" "-0.1; 0.3054" "0.03; 0.7478" "-0.06; 0.5776"
如果希望函数具有数字索引参数
f2 <- function(df, v1, v2){
r2 <- matrix(0, nrow=length(v2), ncol=length(v1),
dimnames=list(colnames(df)[v2], colnames(df)[v1]))
r1 <- rcorr(as.matrix(df), type='pearson')
r2[] <- paste(round(r1$r[v2,v1],2), round(r1$P[v2,v1],4), sep="; ")
r2
}
f2(mydf, 1:3, 4:5)
f2(mydf, c(1,3), c(2,4,5))
set.seed(29)
x1 = sample(1:10, 100, replace=T)
x2 = sample(1:3, 100, replace=T)
x3 = sample(50:100, 100, replace=T)
x4 <- sample(40:80, 100, replace=TRUE)
y1 = sample(50:100, 100, replace=T)
y2 = sample(50:100, 100, replace=T)
mydfN = data.frame(x1,x2,x3,x4, y1,y2)
set.seed(25)
mydf1 <- mydfN[sample(colnames(mydfN))]