我正在搜索一个包来计算两个聚类之间调整后的互信息。我只通过谷歌发现了一些python代码,有没有人知道R的包?
此致
答案 0 :(得分:4)
这是一个链接 https://github.com/defleury/adjusted_mutual_information
它说“包含用于快速和并行计算调整后的互信息(AMI),规范化互信息(NMI)和调整后的兰德指数(ARI)的代码,用于R中的聚类。”< / p>
对于小型集群,这里有3个功能。函数f_rez()
将2个向量作为输入,其中数字表示该元素的分区并返回AMI。在双核非平行上,3对长度为N = 11.117的簇需要大约30秒。
f_nij <- function(v1,v2,l1,l2){ #contingency table n(i,j)=t(i,j)
m <- matrix(0,l1,l2)
for (i in 1:length(v1)){
m[v1[i],v2[i]] <- m[v1[i],v2[i]] +1
}
m
}
f_emi <- function(s1,s2,l1,l2,n){ #expected mutual information
s_emi <- 0
for(i in 1:l1){
for (j in 1:l2){
min_nij <- max(1,s1[i]+s2[j]-n)
max_nij <- min(s1[i],s2[j])
n.ij <- seq(min_nij, max_nij) #sequence of consecutive numbers
t1<- (n.ij / n) * log((n.ij * n) / (s1[i]*s2[j]))
t2 <- exp(lfactorial(s1[i]) + lfactorial(s2[j]) + lfactorial(n - s1[i]) + lfactorial(n - s2[j]) - lfactorial(n) - lfactorial(n.ij) - lfactorial(s1[i] - n.ij) - lfactorial(s2[j] - n.ij) - lfactorial(n - s1[i] - s2[j] + n.ij))
emi <- sum(t1*t2)
s_emi <- s_emi + emi
}
}
return(s_emi)
}
f_rez <- function(v1,v2){
library(infotheo)
s1 <- tabulate(v1);
s2 <- tabulate(v2);
l1 <- length(s1)
l2 <- length(s2)
N <- length(v1)
tij <- f_nij(v1,v2,l1,l2) #contingency table n(i,j)=t(i,j). this would be equivalent with table(v1,v2)
mi <- mutinformation(v1,v2) #function for Mutual Information from package infotheo
h1 <- -sum(s1*log(s1/N))/N
h2 <- -sum(s2*log(s2/N))/N
nmi <- mi/max(h1,h2) # NMI Normalized MI
emi <- f_emi(s1,s2,l1,l2,N) # EMI Expected MI
ami <- (mi-emi)/max(h1,h2) #AMI Adjusted MI
return(c(nmi,ami))
}