根据具有边缘的聚类杂散创建新图,边缘是创建的组中的连接器

时间:2018-11-26 09:41:20

标签: r cluster-analysis igraph

library(network)
library(networkD3)
library(igraph)
library(visNetwork)

df <- read.table(header = TRUE, 
                 text = "src   target
                 cllient1  cllient2
                 cllient1  cllient4
                 cllient1  cllient6
                 cllient2  cllient3
                 cllient4  cllient1
                 cllient4  cllient3
                 cllient5  cllient6
                 cllient6  cllient5")

df_graph <- graph_from_data_frame(df)
simpleNetwork(df,zoom = T,fontSize = 9)

enter image description here

wc <- cluster_walktrap(df_graph)

members <- membership(wc)

df_graph_cntrctd <- contract(df_graph, members, vertex.attr.comb = toString)

df_graph_cntrctd <-as.undirected(df_graph_cntrctd)

df_graph_cntrctd <- as_long_data_frame(df_graph_cntrctd)

idLabel <- df_graph_cntrctd[,c(2,4)]

idLabel <- idLabel[!duplicated(df_graph_cntrctd[c("to","ver2[el[, 2], ]")]),]

colnames(idLabel)[1] <- "id"
colnames(idLabel)[2] <- "title"
idLabel['label'] <- idLabel$id

FromTo <-df_graph_cntrctd[,c(1,2)]
FromTo <- FromTo[!duplicated(FromTo[c("from","to")]),]

nodes <- data.frame(id = idLabel$id, 
                    label = idLabel$label,
                    title = idLabel$title)      

edges <- data.frame(from = FromTo$from, to = FromTo$to)


network<-(visNetwork(nodes, edges, width = "100%",height = 900 ) %>% 
            visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE))

enter image description here 到目前为止,我们已经建立了一个网络:)并且有可能获得客户端列表,通过该列表,walktrap创建的组之间将具有连接。这个想法是要向边缘展示那些客户。创建的是下面的代码行,这些代码行以降序显示所有连接的列表

V(df_graph)$name <- members
x <- as_edgelist(df_graph, names = T)
V(df_graph)$name <- 1:vcount(df_graph)
E(df_graph)[x[,1] != x[,2]]

结果我们得到

+ 1/8 edge from c92bcba (vertex names):
[1] 1->5

据我所知,这表示标签为“ 1”的组通过ID为1的“ client1”与ID为5的“ client6”与组(标签“ 2”)相连。 我的问题是如何获得像这样的结果,我们可以在其中创建所有这样的表:

  from  to  label
    1   1   NA
    1   2   Client1,Client6
    2   2   NA

其中“从”和“到”是从集群分析创建的组的名称,而Client1和Client6是连接这两个组的那些客户端

enter image description here

2 个答案:

答案 0 :(得分:1)

这有效,但非常优雅:

df <- read.table(header = TRUE, 
             text = "src   target
             cllient1  cllient2
             cllient1  cllient4
             cllient1  cllient6
             cllient2  cllient3
             cllient4  cllient1
             cllient4  cllient3
             cllient5  cllient6
             cllient6  cllient5")

df_graph <- graph_from_data_frame(df)
wc <- cluster_walktrap(df_graph)
df_graph0 <- df_graph
V(df_graph)$name <- membership(wc)

根据成员资格获取边缘列表,这会在您的请求中转换为fromto

x <- as_edgelist(df_graph, names = T)
communities <- ends(df_graph, E(df_graph))

将名称重置为顶点ID(不要求,但可能有用)

V(df_graph)$name <- 1:vcount(df_graph)
ids <- ends(df_graph, E(df_graph))

设置对应于客户端(label)的顶点名称

V(df_graph)$name <- V(df_graph0)$name
label <- ends(df_graph, E(df_graph))

存储在dataframe

df_result <- data.frame(from = communities[,1], to = communities[,2],
                    label1 = label[,1], label2 = label[,2], ids1 = ids[,1], ids2 = ids[,2])

这将导致以下结果:

  from to   label1   label2 ids1 ids2
1    1  1 cllient1 cllient2    1    2
2    1  1 cllient1 cllient4    1    3
3    1  2 cllient1 cllient6    1    5
4    1  1 cllient2 cllient3    2    6
5    1  1 cllient4 cllient1    3    1
6    1  1 cllient4 cllient3    3    6
7    2  2 cllient5 cllient6    4    5
8    2  2 cllient6 cllient5    5    4

此外,您可以粘贴label1label2,以使逗号分隔的label列。

编辑:为了“收缩”标签,您可以执行以下操作:

 library(tidyr) 
 library(dplyr) 
 df_result$label <- paste(df_result$label1, df_result$label2, sep = ",") 
 df_nested <- df_result %>% select(from, to, label) %>% nest(-from, -to) 

要将这些嵌套标签用作边缘标签或字符串,请将它们粘贴在一起:

 df_nested$data <- sapply(1:nrow(df_nested), 
                     function(x) paste(unlist(df_nested$data[[x]]), collapse = " "))

答案 1 :(得分:0)

使用Ben Nutzer的代码:

df_result$label <- paste(df_result$label1, df_result$label2, sep = ",") 

使用此功能,我们可能会发现社区之间有多少联系:

library(plyr)
ddply(df_result,.(from,to),nrow)

并获得:

from to V1 1 1 1 5 2 1 2 1 3 2 2 2

这告诉我们组之间只有一个联系