在R中运行K-均值聚类时出错

时间:2020-05-16 18:06:27

标签: r ggplot2

我是R语言的初学者。我正在尝试运行k-means代码,但是在运行它时出现错误,无法解决。

代码如下:

require(MASS)
require(ggplot2)

set.seed(1234)

set1=mvrnorm(n = 300, c(-4,10), matrix(c(1.5,1,1,1.5),2))
set2=mvrnorm(n = 300, c(5,7), matrix(c(1,2,2,6),2))
set3=mvrnorm(n = 300, c(-1,1), matrix(c(4,0,0,4),2))
set4=mvrnorm(n = 300, c(10,-10), matrix(c(4,0,0,4),2))
set5=mvrnorm(n = 300, c(3,-3), matrix(c(4,0,0,4),2))

DF = data.frame(
     rbind(set1,set2,set3,set4,set5),
    cluster=as.factor(c(rep(1:5,each=300))))

ggplot(DF,aes(x=X1,y=X2,color=cluster))+geom_point()

 kmeans=function(data,K=4,stopcrit=10e-5)
 {

   #Initialisation of clusters
   centroids=data[sample.int(nrow(data),K),]
   current_stopcrit=1000
   cluster=rep(0,nrow(data))
   converged=F
   it=1
   while(current_stopcrit>=stopcrit & converged==F)  {
     it=it+1
     if (current_stopcrit<=stopcrit)     {
       converged=T     }
     old_centroids=centroids

     ##Assigning each point to a centroid
     for (i in 1:nrow(data)) {
       min_dist=10e10
       for (centroid in 1:nrow(centroids))  {
         distance_to_centroid=sum((centroids[centroid,]-data[i,])^2)
         if (distance_to_centroid<=min_dist)  {
           cluster[i]=centroid
           min_dist=distance_to_centroid
         }
       }
     }

     ##Assigning each point to a centroid
     for (i in 1:nrow(centroids))   {
       centroids[i,]=apply(data[cluster==i,],2,mean)
     }

     current_stopcrit=mean((old_centroids-centroids)^2)
   }

   return(list(data=data.frame(data,cluster),centroids=centroids))
 }

  res=kmeans(DF[1:2],K=5)
  res$centroids$cluster=1:5
  res$data$isCentroid=F
  res$centroids$isCentroid=T
  data_plot=rbind(res$centroids,res$data) 
  ggplot(data_plot,aes(x=X1,y=X2,color=as.factor(cluster),size=isCentroid,alpha=isCentroid))+geom_point()

正在发生的错误:

res=kmeans(DF[1:2],K=5)

Error in while (current_stopcrit >= stopcrit & converged == F) { : 
  missing value where TRUE/FALSE needed
In addition: Warning message:
In mean.default((old_centroids - centroids)^2) :
  argument is not numeric or logical: returning NA

>   res$centroids$cluster=1:5

Error in res$centroids$cluster = 1:5 : object 'res' not found

>   res$data$isCentroid=F 

Error in res$data$isCentroid = F : object 'res' not found

>   res$centroids$isCentroid=T

Error in res$centroids$isCentroid = T : object 'res' not found

>   data_plot=rbind(res$centroids,res$data) 

Error in rbind(res$centroids, res$data) : object 'res' not found

>   ggplot(data_plot,aes(x=X1,y=X2,color=as.factor(cluster),size=isCentroid,alpha=isCentroid))+geom_point()
Error in ggplot(data_plot, aes(x = X1, y = X2, color = as.factor(cluster),  : 

  object 'data_plot' not found

0 个答案:

没有答案