我有一个由62个变量(分类和数字)和60,000个观察值组成的数据集。我想从我为预测变量计算的距离矩阵生成图形分析。 由于所有评估的单位之间都存在距离,因此每个节点的顶点数相同。我只想要那些距离最短(关系最强)的单位。我怎样才能做到这一点?有什么方法可以加权距离矩阵并将其转换为二进制邻接矩阵吗?
可复制的示例:
library(RColorBrewer)
library(igraph)
size=200
set.seed(1)
df <- data.frame(
x1 = rnorm(size,mean = 0, sd=1),
x2 = rnorm(size,mean = 0, sd=1),
x3 = rnorm(size,mean = 0, sd=1),
x4 = sample(c(1, 0), size, replace = TRUE),
x5 = sample(c(1, 0), size, replace = TRUE),
x6 = sample(c(1, 0), size, replace = TRUE))
y = rnorm(size,mean = 0, sd=1)
df <- data.frame(df, row.names = paste0("ID_",1:size))
d <- dist(df, method = "euclidean")
m <- data.frame(t(combn(rownames(df),2)), as.numeric(d))
names(m) <- c("c1", "c2", "distance")
n <- cbind(paste0("ID_",1:size),sample(LETTERS[1:5],size, replace = T))
unities <- graph_from_data_frame(m, directed = FALSE, vertices = n)
set.seed(1001)
unities_deg<-degree(unities,mode=c("All"))
pal<-brewer.pal(length(unique(V(unities)$V2 )), "Set3")
plot(unities,edge.color = 'black',vertex.label.cex =0.5,
vertex.color=pal[as.numeric(as.factor(vertex_attr(unities, "V2")))],
vertex.size = sqrt(unities_deg)/3, edge.width=sqrt(E(unities)$weight/800),
layout = layout.fruchterman.reingold)