我的数据:
data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
我希望作为输出,根据多数投票,有两个社区(带有他们的元素)。例如:group1={item1, item2}
,group2={item3}
。
答案 0 :(得分:1)
您可以尝试此操作,基于R
:
res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))
setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
#$`1`
#[1] "item 1" "item 2"
#$`2`
#[1] "item 3"
答案 1 :(得分:1)
该函数被传递矩阵,其中每列是项目,并且每行是根据聚类方法对应于项目的分区的隶属度向量。组成每一行的元素(数字)除了指示成员资格之外没有任何意义,并且从一行到另一行循环使用。该函数返回多数投票分区。当项目不存在共识时,第一行给出的分区获胜。例如,这允许通过降低模块化值来对分区进行排序。
consensus.final <-
function(data){
output=list()
for (i in 1:nrow(data)){
row=as.numeric(data[i,])
output.inner=list()
for (j in 1:length(row)){
group=character()
group=c(group,colnames(data)[which(row==row[j])])
output.inner[[j]]=group
}
output.inner=unique(output.inner)
output[[i]]=output.inner
}
# gives the mode of the vector representing the number of groups found by each method
consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])
# removes the elements of the list that do not correspond to this consensus solution
output=output[lapply(output,length)==consensus.n.comm]
# 1) find intersection
# 2) use majority vote for elements of each vector that are not part of the intersection
group=list()
for (i in 1:consensus.n.comm){
list.intersection=list()
for (p in 1:length(output)){
list.intersection[[p]]=unlist(output[[p]][i])
}
# candidate group i
intersection=Reduce(intersect,list.intersection)
group[[i]]=intersection
# we need to reinforce that group
for (p in 1:length(list.intersection)){
vector=setdiff(list.intersection[[p]],intersection)
if (length(vector)>0){
for (j in 1:length(vector)){
counter=vector(length=length(list.intersection))
for (k in 1:length(list.intersection)){
counter[k]=vector[j]%in%list.intersection[[k]]
}
if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
group[[i]]=c(group[[i]],vector[j])
}
}
}
}
}
group=lapply(group,unique)
# variables for which consensus has not been reached
unclassified=setdiff(colnames(data),unlist(group))
if (length(unclassified)>0){
for (pp in 1:length(unclassified)){
temp=matrix(nrow=length(output),ncol=consensus.n.comm)
for (i in 1:nrow(temp)){
for (j in 1:ncol(temp)){
temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
}
}
# use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
index.best=which(temp[1,]==TRUE)
group[[index.best]]=c(group[[index.best]],unclassified[pp])
}
}
output=list(group=group,unclassified=unclassified)
}
一些例子:
data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group
[[1]]
[1] "item 1" "item 2"
[[2]]
[1] "item 3"
data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group
[[1]]
[1] "item 1" "item 2" "item 3"
data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:4)
data
consensus.final(data)$group
[[1]]
[1] "item 1"
[[2]]
[1] "item 2"
[[3]]
[1] "item 3"