Question

我的数据：

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)

我希望作为输出，根据多数投票，有两个社区（带有他们的元素）。例如：group1={item1, item2}，group2={item3}。

Answer 1

您可以尝试此操作，基于R：

res=apply(data,2,function(u) as.numeric(names(sort(table(u), decreasing=T))[1]))

setNames(lapply(unique(res), function(u) names(res)[res==u]), unique(res))
#$`1`
#[1] "item 1" "item 2"

#$`2`
#[1] "item 3"

Answer 2

该函数被传递矩阵，其中每列是项目，并且每行是根据聚类方法对应于项目的分区的隶属度向量。组成每一行的元素（数字）除了指示成员资格之外没有任何意义，并且从一行到另一行循环使用。该函数返回多数投票分区。当项目不存在共识时，第一行给出的分区获胜。例如，这允许通过降低模块化值来对分区进行排序。

    consensus.final <-
  function(data){
    output=list()
    for (i in 1:nrow(data)){
      row=as.numeric(data[i,])
      output.inner=list()
      for (j in 1:length(row)){
        group=character()
        group=c(group,colnames(data)[which(row==row[j])])
        output.inner[[j]]=group
      }
      output.inner=unique(output.inner)
      output[[i]]=output.inner
    }

    # gives the mode of the vector representing the number of groups found by each method
    consensus.n.comm=as.numeric(names(sort(table(unlist(lapply(output,length))),decreasing=TRUE))[1])

    # removes the elements of the list that do not correspond to this consensus solution
    output=output[lapply(output,length)==consensus.n.comm]

    # 1) find intersection 
    # 2) use majority vote for elements of each vector that are not part of the intersection

    group=list()

    for (i in 1:consensus.n.comm){ 
      list.intersection=list()
      for (p in 1:length(output)){
        list.intersection[[p]]=unlist(output[[p]][i])
      }

      # candidate group i
      intersection=Reduce(intersect,list.intersection)
      group[[i]]=intersection

      # we need to reinforce that group
      for (p in 1:length(list.intersection)){
        vector=setdiff(list.intersection[[p]],intersection)
        if (length(vector)>0){
          for (j in 1:length(vector)){
            counter=vector(length=length(list.intersection))
            for (k in 1:length(list.intersection)){
              counter[k]=vector[j]%in%list.intersection[[k]]
            }
            if(length(which(counter==TRUE))>=ceiling((length(counter)/2)+0.001)){
              group[[i]]=c(group[[i]],vector[j])
            }
          }
        }
      }
    }

    group=lapply(group,unique)

    # variables for which consensus has not been reached
    unclassified=setdiff(colnames(data),unlist(group))

    if (length(unclassified)>0){
      for (pp  in 1:length(unclassified)){
        temp=matrix(nrow=length(output),ncol=consensus.n.comm)
        for (i in 1:nrow(temp)){
          for (j in 1:ncol(temp)){
            temp[i,j]=unclassified[pp]%in%unlist(output[[i]][j])
          }
        }
        # use the partition of the first method when no majority exists (this allows ordering of partitions by decreasing modularity values for instance)
        index.best=which(temp[1,]==TRUE)
        group[[index.best]]=c(group[[index.best]],unclassified[pp])
      }
    }
    output=list(group=group,unclassified=unclassified)
  }

一些例子：

data=cbind(c(1,1,2,1,1,3),c(1,1,2,1,1,1),c(2,2,1,2,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2"

[[2]]
[1] "item 3"

data=cbind(c(1,1,1,1,1,3),c(1,1,1,1,1,1),c(1,1,1,2,1,2)) 
colnames(data)=paste("item",1:3) 
rownames(data)=paste("method",1:6)
data
consensus.final(data)$group

[[1]]
[1] "item 1" "item 2" "item 3"

data=cbind(c(1,3,2,1),c(2,2,3,3),c(3,1,1,2))
colnames(data)=paste("item",1:3)
rownames(data)=paste("method",1:4)
data
consensus.final(data)$group

[[1]]
[1] "item 1"

[[2]]
[1] "item 2"

[[3]]
[1] "item 3"

在R中获得多个分区方法的一致意见

2 个答案: