我是R语言的初学者。我正在尝试运行k-means代码,但是在运行它时出现错误,无法解决。
代码如下:
require(MASS)
require(ggplot2)
set.seed(1234)
set1=mvrnorm(n = 300, c(-4,10), matrix(c(1.5,1,1,1.5),2))
set2=mvrnorm(n = 300, c(5,7), matrix(c(1,2,2,6),2))
set3=mvrnorm(n = 300, c(-1,1), matrix(c(4,0,0,4),2))
set4=mvrnorm(n = 300, c(10,-10), matrix(c(4,0,0,4),2))
set5=mvrnorm(n = 300, c(3,-3), matrix(c(4,0,0,4),2))
DF = data.frame(
rbind(set1,set2,set3,set4,set5),
cluster=as.factor(c(rep(1:5,each=300))))
ggplot(DF,aes(x=X1,y=X2,color=cluster))+geom_point()
kmeans=function(data,K=4,stopcrit=10e-5)
{
#Initialisation of clusters
centroids=data[sample.int(nrow(data),K),]
current_stopcrit=1000
cluster=rep(0,nrow(data))
converged=F
it=1
while(current_stopcrit>=stopcrit & converged==F) {
it=it+1
if (current_stopcrit<=stopcrit) {
converged=T }
old_centroids=centroids
##Assigning each point to a centroid
for (i in 1:nrow(data)) {
min_dist=10e10
for (centroid in 1:nrow(centroids)) {
distance_to_centroid=sum((centroids[centroid,]-data[i,])^2)
if (distance_to_centroid<=min_dist) {
cluster[i]=centroid
min_dist=distance_to_centroid
}
}
}
##Assigning each point to a centroid
for (i in 1:nrow(centroids)) {
centroids[i,]=apply(data[cluster==i,],2,mean)
}
current_stopcrit=mean((old_centroids-centroids)^2)
}
return(list(data=data.frame(data,cluster),centroids=centroids))
}
res=kmeans(DF[1:2],K=5)
res$centroids$cluster=1:5
res$data$isCentroid=F
res$centroids$isCentroid=T
data_plot=rbind(res$centroids,res$data)
ggplot(data_plot,aes(x=X1,y=X2,color=as.factor(cluster),size=isCentroid,alpha=isCentroid))+geom_point()
正在发生的错误:
res=kmeans(DF[1:2],K=5)
Error in while (current_stopcrit >= stopcrit & converged == F) { :
missing value where TRUE/FALSE needed
In addition: Warning message:
In mean.default((old_centroids - centroids)^2) :
argument is not numeric or logical: returning NA
> res$centroids$cluster=1:5
Error in res$centroids$cluster = 1:5 : object 'res' not found
> res$data$isCentroid=F
Error in res$data$isCentroid = F : object 'res' not found
> res$centroids$isCentroid=T
Error in res$centroids$isCentroid = T : object 'res' not found
> data_plot=rbind(res$centroids,res$data)
Error in rbind(res$centroids, res$data) : object 'res' not found
> ggplot(data_plot,aes(x=X1,y=X2,color=as.factor(cluster),size=isCentroid,alpha=isCentroid))+geom_point()
Error in ggplot(data_plot, aes(x = X1, y = X2, color = as.factor(cluster), :
object 'data_plot' not found