我在igraph中使用了spinglass算法来分析网络中的聚类。由于spinglass使用随机方法,如果我们在一个网络上多次运行算法,结果会有所不同。这当然是可以预料的。
然而,我遇到了一件我无法解释的事情。
当我们在相同的网络结构上运行spinglass 1000次,10次重复(10x1000次)时,复制1-9中大约90%的运行返回2个集群:
但是,复制10中大约90%的运行返回3个集群。
这种行为已在不同的结构中得到复制。模特儿复制(请参阅下面的可重现示例的代码)。
似乎igraph / spinglass有某种顺序(而不是随机)的内部种子计数器,导致后续迭代不相互独立(另见https://lists.nongnu.org/archive/html/igraph-help/2010-02/msg00031.html)。
你知道这可能是什么吗?
即使spinglass使用这样的种子,考虑到网络的明确聚类,为什么特定种子会导致如此奇怪(而且不太可能)的结果呢?
感谢大家的时间和帮助!
下面你可以找到运行相同类型的迭代估计的代码(请注意,由于估计的数量,运行大约需要40分钟 - 1小时;我们需要这么多的迭代和估计,因为奇怪的结果只会出现在几次之后,即1000次估计的10次迭代)。除了运行迭代的spinglass估计之外,此代码还重新关联cluster-node-membership以使这些成员在不同的估计中具有可比性(因为spinglass可以在一次估算中将1分配给一个集群,但在后续估算中将其标记为2)。 p>
# create network
net <- matrix(0,18,18)
net[1:9,1:9] <- 0.1
net[10:18,10:18] <- 0.1
net[1,10] <- net[10,1] <- 0.1
net[2,11] <- net[11,2] <- 0.1
diag(net) <- 0
# Draw:
library("qgraph")
qgraph(net, layout = "spring", title = "True network", esize = 2, vsize = 5)
# Generate data and bootstrap
library("bootnet")
# Setseed for simulation of network
#set.seed(1)
# setwd!
setwd("C:/")
Data <- ggmGenerator()(500, net)
n1 <- qgraph(cor_auto(Data), graph="EBICglasso", sampleSize=500,
layout="spring")
# SPINGLASS PLOTTING FUNCTION
# the number of spins is now equal to the number of nodes, which is logical
spinComRec <- function(graphQgraph, # qgraph object
numberSpins = graphQgraph$graphAttributes$Graph$nNodes,
numberIterations = 10,
numberEstimations = 1000
){
library(igraph)
graphIgraph <- as.igraph(graphQgraph)
for (yIt in 1:numberIterations){
# Repeated estimation with stable number of spins
#set.seed(3+yIt)
clusterStabilitySpin <- list()
for (i in 1:numberEstimations){
#set.seed(3+i)
clusterStabilitySpin[[i]] <- cluster_spinglass(graphIgraph,
weights = E(graphIgraph)$weight,
spins = numberSpins,
start.temp = 1,
stop.temp = 0.01,
cool.fact = 0.99) # default settings
# we do not specify gamma but it should be 1 which = missing/non-missing equally important
# also, they weights are taken into consideration
# we do not use the version that takes negative edges into account as
# indicating edges between different communities because that is not
# logical for psychological networks
}
# how many clusters were estimated in each estimation?
clusterStabilitySpinNumber <- c()
for (i in 1:length(clusterStabilitySpin)){
clusterStabilitySpinNumber[i] <- length(clusterStabilitySpin[[i]])
}
### Spinglass Recoding
# Checking whether algorithm switches community-node assignment
# (within one and the same spin number)
# compare each node membership with each other node membership
testVector <- c()
testMatrix <- matrix(ncol = graphQgraph$graphAttributes$Graph$nNodes, nrow = graphQgraph$graphAttributes$Graph$nNodes)
testList <- list()
for (k in length(clusterStabilitySpin):1){
for (j in length(clusterStabilitySpin[[k]]$membership):1){
for (i in length(clusterStabilitySpin[[k]]$membership):1){
testVector[i] <- clusterStabilitySpin[[k]]$membership[j] == clusterStabilitySpin[[k]]$membership[i]
}
testMatrix[,j] <- testVector
}
testList[[k]] <- testMatrix
}
# testlist represents each node compared to all others (logical values
# indicating whether nodes belong to the same cluster)
newMatrix <- list()
for (l in 1:length(testList)){
newMatrix[[l]] <- unique(testList[[l]], MARGIN = 2)
}
# now we took all unique logical vectors (we only have vectors that
# logically indicate clusters) - each column in newmatrix represents one cluster
# newMatrix holds all unique vectors
# the following substitutes each TRUE for its respective column number for each
# list element in newMatrix and thereby recodes membership labels
membershipVector <- c()
membershipMatrix <- matrix(nrow = graphQgraph$graphAttributes$Graph$nNodes, ncol = length(clusterStabilitySpin))
for (m in 1:length(newMatrix)){
for (n in 1:ncol(newMatrix[[m]])){
for (o in 1:length(which(newMatrix[[m]][,n]))){
membershipVector[which(newMatrix[[m]][,n])[o]] <- n
}
}
membershipMatrix[,m] <- membershipVector
}
# Loop that compares all logical matrices to all matrices
identMatrix <- matrix(ncol = length(newMatrix), nrow = length(newMatrix))
for (i in 1:length(newMatrix)){
for (j in 1:length(newMatrix)){
identMatrix[i,j] <- identical(newMatrix[[i]], newMatrix[[j]])
}
}
uniqIdentMatrix <- unique(identMatrix, MARGIN = 2)
# extracting unique comparisons (i.e., matrices that are equal to each other
# (indicated by the row number)
# See how many comparable to each other
equalMatrixList <- list()
for (k in 1:ncol(uniqIdentMatrix)){
equalMatrixList[[k]] <- which(uniqIdentMatrix[,k]==TRUE)
}
# equalMatrixList # each list element lists the indexes of those matrices that are equal to each other
# sapply(equalMatrixList, length)
# which(sapply(equalMatrixList, length)==max(sapply(equalMatrixList, length)))
# for each of the unique memberships we get the frequency and the one with the most frequent est
uniqueMembershipVectors <- matrix(nrow = nrow(membershipMatrix), ncol = length(equalMatrixList))
for (i in 1:length(equalMatrixList)){
uniqueMembershipVectors[,i] <- as.matrix(membershipMatrix[,equalMatrixList[[i]]])[,1]
}
# order the membership estimations according to frequency
equalMatrixList <- equalMatrixList[order(sapply(equalMatrixList,length),decreasing=T)]
# run one spinglass estimation to use as object to put in the new membershipvectors
communitySGPlot1 <- cluster_spinglass(graphIgraph,
weights = E(graphIgraph)$weight,
spins = 2)
# create a pdf for each iteration with a network on each page
# the network shows the communities, the number of communities, and the proportion
# of all estimations in which this constellation has come up
# the plotting starts with the estimation that has come up most frequently
lalaName <- paste("AllSpinglassCommunities", yIt, ".pdf", sep = "")
pdf(lalaName, width=10, height=10)
for (i in 1:ncol(uniqueMembershipVectors)){
communitySGPlot1$membership <- uniqueMembershipVectors[,i]
plot(communitySGPlot1, graphIgraph, layout = graphQgraph$layout)
text(1,1,labels = length(unique(uniqueMembershipVectors[,i])))
text(1,1.1, labels = (sapply(equalMatrixList, length)[i])/length(clusterStabilitySpin))
}
dev.off()
# same as above but next to each other on one page (i.e., in groups of 5)
lalaName <- paste("AllSpinglassCommunitiesOnepage", yIt, ".pdf", sep = "")
pdf(lalaName, width = 200, height = 200)
par(mfrow = c(5,5))
for (i in 1:ncol(uniqueMembershipVectors)){
communitySGPlot1$membership <- uniqueMembershipVectors[,i]
plot(communitySGPlot1, graphIgraph, layout = graphQgraph$layout)
text(1,1,labels = length(unique(uniqueMembershipVectors[,i])))
text(1,1.1, labels = (sapply(equalMatrixList, length)[i])/length(clusterStabilitySpin))
}
dev.off()
}
}
spinComRec(n1, numberIterations = 10, numberEstimations = 1000)