我拼命想用R corrplot解决问题。我有一个小矩阵,我想使用R和corrplot()
函数进行可视化。我使用以下脚本生成我的corrplot:
library(corrplot)
corr_rohdaten<-read.csv(file="path_only.txt", sep="\t", header=TRUE)
M<-cor(corr_rohdaten)
cor.mtest <- function(mat, conf.level = 0.95) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
diag(p.mat) <- 0
diag(lowCI.mat) <- diag(uppCI.mat) <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
}
}
return(list(p.mat, lowCI.mat, uppCI.mat))
}
res1 <- cor.mtest(M, 0.95)
res2 <- cor.mtest(M, 0.99)
## specialized the insignificant value according to the significant level
##corrplot(M, p.mat = res1[[1]], sig.level = 0.2)
corrplot(M, p.mat = res1[[1]], insig = "blank", method="color", tl.col="black", type="lower", tl.cex=1.2)
现在的问题是,corrplot显示了很多insig字段,应该根据spss和R是不重要的并被清除。
path_only.txt如下所示:
MyoD9w MyoD52w TNC9w TNC52w OxCI9w OxCI52w OxCII9w OxCII52w OxCIII9w OxCIII52w OxCV9w OxCV52w Myogenin9w Myogenin52w VEGF9w VEGF52w COX4I29w COX4I252w
0.6508 0.3862 0.0888 0.8239 1.3390 4.2471 2.7513 6.1901 4.9440 6.0180 1.1619 0.9240 2.0130 1.5483 1.0000 0.6016 0.7826 0.8244
0.1956 0.3954 0.1959 3.1073 1.3103 2.2127 0.9862 5.4116 1.7748 5.3906 0.7411 1.0165 2.1557 2.6942 1.1199 1.0128 1.4144 1.8681
0.6217 1.0000 0.4912 1.0000 0.4237 2.1208 0.7313 2.7154 0.5653 0.9250 0.9000 0.7145 4.8147 6.3509 1.2985 1.4768 2.2194 1.0000
我的假设是,这里计算或比较p值是错误的,或者它们是四舍五入的。
对于像你这样的人来说,错误在几秒钟之内就能看出来。我花了几个小时在Google上搜索。
我有兴趣解决的另一个问题是:我希望仅显示与p <0.05和r ^ 2&gt;的相关性。 0.7分r ^ 2&lt; 0.7。如果可以在此图表中完成,我会将一些啤酒运送给第一个正确解决此问题的啤酒!
答案 0 :(得分:0)
我认为你想要corr_rohdaten
中列之间相关性的重要性。以下是使用cor.test
从outer
函数中提取统计信息以将测试应用于各种列组合的方法。
## Define a function to operate on combinations of columns,
## vectorized for arguments x and y so we can pass it to `outer`
f <- Vectorize(function(x, y, data, statistic, dim=1, ...)
cor.test(data[,x], data[,y], ...)[[statistic]][[dim]], vec=c("x", "y"))
## Wrap this in a another function to get the p.value and confidence intervals
myCorr <- function(data, conf.level=0.95) {
pvals <- outer(names(data), names(data), f,
data=data, statistic="p.value", # additional arguments to f
conf.level=conf.level) # additional to cor.test
lower <- outer(names(data), names(data), f, data=data, statistic="conf.int")
upper <- outer(names(data), names(data), f, data=data, statistic="conf.int", dim=2)
list(pvals=pvals, lower=lower, upper=upper)
}
## To recreate the plot you have, you would need to test the correlation between
## the columns of the correlation matrix, M. I wasn't sure if this was what you wanted.
pvals2 <- outer(1:ncol(M), 1:ncol(M), f, data=M, statistic="p.value", conf.level=0.95)
## Make corrplot using p.values from test between columns of corr_rohdaten
## The upper and lower confidence intervals will be NULL because there are only
## 3 observations (conf.int requires at least 4)
stats <- myCorr(corr_rohdaten, conf.level = 0.95)
library(corrplot)
corrplot(M, p.mat = stats[["pvals"]], insig = "blank", method="color", tl.col="black",
type="lower", tl.cex=1.2, diag=F)
这不是最有效的方法。它将重新运行相关测试三次,但我发现它相当简单,outer
非常快。
要修复您的功能以重现这些结果非常简单。只需将corr_rohdatem
而不是M
传递给它,并在提取conf.int
之前添加一个检查以获取足够的观察结果。
cor.mtest <- function(mat, conf.level = 0.95) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
diag(p.mat) <- 0
diag(lowCI.mat) <- diag(uppCI.mat) <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
if (dim(mat)[1] > 3) {
lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
}
}
}
list(p=p.mat, lower=lowCI.mat, upper=uppCI.mat)
}
res <- cor.mtest(corr_rhodaten)
corrplot(M, p.mat = res[["p"]], insig = "blank", method="color", tl.col="black",
type="lower", tl.cex=1.2, diag=F)