Question

我在R中有一个大的相关矩阵结果 - 现在大约有30个项目彼此相关 - 所以这个数组有大约10,000个单元格。我想找到最大的5个和最小的5个结果。我怎么能这样做？

这是一个非常小的部分 - 左上角 - 看起来像：

               PL1         V3          V4         V5
PL1     1.00000000 0.19905701 -0.02994034 -0.1533846
V3      0.19905701 1.00000000  0.09036472  0.1306054
V4     -0.02994034 0.09036472  1.00000000  0.1848030
V5     -0.15338465 0.13060539  0.18480296  1.0000000

表中的值始终介于1和1之间。 -1如果它有帮助，作为一个相关矩阵，对角线上方的上半部分是对角线下半部分的副本。

我需要最小的5小于1和最大的负5包括-1，如果它存在。

提前致谢。

Answer 1

这是另一种粗略的方法（毫无疑问有一种更简单的方法），但是将它包装在一个函数中并不太难：

编辑：缩短了代码。

 # Simulate correlation matrix (taken from Patrick's answer)
set.seed(1)
n<-100
x<-matrix(runif(n^2),n,n)
cor<-cor(x)

# Set diagonal and one triangle to to 0:
diag(cor) <- 0
cor[upper.tri(cor)] <- 0

# Get sorted values:
sort <- sort(cor)

# Create a dummy matrix and get lowest 5:
min <- matrix(cor %in% sort[1:5] ,n,n)
which(min,arr.ind=T)

# Same for highest 5:
max <- matrix(cor %in% sort[(n^2-5):(n^2)] ,n,n)
which(max,arr.ind=T)

正如ulidtko所说，另一种选择是制作图表。您可以尝试我的名为qgraph的程序包，该程序包可用于将关联矩阵可视化为网络：

library(qgraph)
qgraph(cor(x),vsize=2,minimum=0.2,filetype="png")

qgraph output in PNG format

Answer 2

有趣的网络图Sacha。这是真实的数据。似乎我有更强的积极而不是负相关。

enter image description here

Answer 3

您希望找到最大和最小的相关性，并且可能不仅知道什么，而且知道这些值的来源。这很容易。

x<-matrix(runif(25),5,5)
cor<-cor(x)
l <- length(cor)
l1 <- length(cor[cor<1])

#the actual high and low correlation indexes 
corHigh <- order(cor)[(l1-4):l1]
corLow <- order(cor)[1:5]
#(if you just want to view the correlations cor[corLow] or cor[corHigh] works fine)

#isolate them in the matrix so you can see where they came from easily
corHighView <- cor
corHighView[!1:l %in% corHigh] <- NA
corLowView <- cor
corLowView[!1:l %in% corLow] <- NA

#look at your matrix with your target correlations sticking out like a sore thumb
corLowView
corHighView

Answer 4

有点脏：

x<-matrix(runif(25),5,5)
cor<-cor(x)
max1<-max(cor)
max2<-max(cor[cor!=max1])
max3<-max(cor[cor!=max1 & cor!=max2])
max4<-max(cor[cor!=max1& cor!=max2& cor!=max3])
max5<-max(cor[cor!=max1& cor!=max2& cor!=max3& cor!=max4])
max6<-max(cor[cor!=max1& cor!=max2& cor!=max3& cor!=max4& cor!=max5])
maxes<-c(max2,max3,max4,max5,max6)
maxes
matrix(cor %in% maxes,5,5)

Answer 5

一个漂亮的奶油情节怎么样？ :)

> m <- matrix(runif(100)*2-1, ncol=10)
> colnames(m) <- rownames(m) <- paste("V", 1:10, sep="")
> m
             V1          V2         V3         V4         V5         V6           V7           V8         V9         V10
V1  -0.40101571 -0.27049070  0.2414295 -0.1889384  0.6459941 -0.8851884  0.332284597 -0.431312791  0.3828374  0.46398193
V2   0.38557771  0.37083911 -0.3004923  0.1253908 -0.4405188 -0.5424613  0.869493425  0.023291914  0.9625392 -0.83196773
V3   0.61923503 -0.27615909  0.1759168 -0.7333568 -0.4256801 -0.6170807  0.438613391 -0.003632086  0.4113488 -0.40590330
V4   0.72093123  0.68479573  0.5032486  0.3720876 -0.6775834  0.2445693  0.353658359 -0.839104640 -0.8122970 -0.42322187
V5  -0.08640529  0.04432795 -0.5120129 -0.9327905 -0.5821378  0.4671473 -0.367677007  0.483375219 -0.7849003  0.57686729
V6  -0.72451704  0.75814550  0.7838393 -0.7650238  0.6742669  0.2260757  0.001645839  0.570753074  0.1944579  0.07917656
V7   0.64516271  0.51994540  0.9057388 -0.3976167 -0.7403159 -0.2873382 -0.809354444  0.319095368 -0.9766422 -0.71981321
V8  -0.51509049  0.18727837 -0.1971454 -0.4290346  0.5657622  0.5324266  0.451608266 -0.715594335 -0.2749510  0.38234855
V9   0.49035803  0.50252397  0.7736783  0.3342899 -0.2732427  0.1128947  0.870315070 -0.291482237  0.5171181 -0.59784449
V10 -0.51811224 -0.67159723  0.8903813 -0.7562222 -0.9790557 -0.5830560 -0.715136643  0.167987391 -0.0529399  0.44570592

> library(ggplot2)
> p <- ggplot(data=melt(m), aes(x=X1, y=X2, color=value))
> p + geom_point(size=5, alpha=0.7) + scale_color_gradient2()

the plot

我不认为看100x100的情节并用眼睛找到极端值是不难的。 :)

Answer 6

我不相信这一点，只是在the link死亡的情况下张贴代码...在r-help列表中归功于Dimitris。它返回一个 p 顶部相关的列表，涉及每个变量，已排序。

cor.mat <- cor(matrix(rnorm(100*1000), 1000, 100)) 
p <- 30 # how many top items
n <- ncol(cor.mat)
cmat <- col(cor.mat)
ind <- order(-cmat, cor.mat, decreasing = TRUE) - (n * cmat - n)
dim(ind) <- dim(cor.mat)
ind <- ind[seq(2, p + 1), ]
out <- cbind(ID = c(col(ind)), ID2 = c(ind)) 
as.data.frame(cbind(out,cor = cor.mat[out]))

找到最大值5，小于1，最低5个值

6 个答案: