R将自组织映射的k均值聚类映射回数据

时间:2017-10-28 04:42:25

标签: r

我使用K-means聚类来对自组织地图进行分类(SOM,并且不希望使用SOM群集对数据进行反向编码。
下面的示例脚本。

# Load package
require(kohonen)

# Set data
data(iris)

# Scale and centre
dt <- scale(iris[, 1:4],center=TRUE)

# Prepare SOM
set.seed(590507)
som1 <- som(dt,
             somgrid(6,6, "hexagonal"),
             rlen=500,
            keep.data=TRUE)

# Plot codes map
myPal1=colorRampPalette(c("black","orange","red","green"))

plot(som1,
     type="codes",
     palette.name = myPal1,
     main="Codes",
     shape="straight",
     border ="gray")

# Extract the codebooks from SOM
cds <- as.data.frame(som1$codes)

# Compute WSS for up to 6 clusters for codebook vectors

wss <- (nrow(cds)-1)*sum(apply(cds,2,var))
for (i in 2:6){
  wss[i] <- sum(kmeans(cds,centers=i)$withinss)
}

# Plot the scree plot
par(mar = c(8,5,8,2))
plot(1:6, 
 wss, 
 type="b", 
 xlab="Number of Clusters",
 ylab="Within groups sum of squares", 
 main="Within cluster sum of squares (WCSS)",
 col="blue",
 lwd =2)

# Scree plot - 3 clusters look sensible choice
nCls =3
som1.km <- kmeans(cds, nCls, nstart = 20)


# Plot the SOM codes map with 3 clusters as background
MyPal3 <- c("grey80", 'aquamarine', 'burlywood1')
par(mar = c(0,5,0,2))

plot(som1, 
     type="codes", 
     palette.name= myPal1,
     bgcol = MyPal3[som1.km$cluster], 
     main = "k-mean cluster",
     shape="straight",
     border ="gray"
)

legend("right",
       x=7,
       y=4,
       cex=1.5,
       title="Cluster",
       legend = c(1:nCls),
       fill= MyPal3[c(1:nCls)]
)


# Get the SOM cell number number assoicated with each of the 150 data
SOM.clss <- as.data.frame(som1$unit.classif)
names(SOM.clss) <- "Cell.Nmbr"
unique(SOM.clss)

# Get the k-means 3-class classification of the 36 SOM cells
kMns.clst <- as.data.frame(som1.km$cluster)
names(kMns.clst) <- "Clstr"

# Add a SOM cell reference for a lookup table
kMns.clst$Cell.Nmbr <- 1:nrow(kMns.clst)

# Use the lookup table to map the cluster number to each datum
dt.clst <- merge(SOM.clss,kMns.clst,by="Cell.Nmbr")

# Add the cluster column to the original data
iris.clst <- cbind(iris,dt.clst)

# Compute means as a reality check
aggregate(iris.clst[,1:4], 
          by=list(iris.clst$Clstr),
          FUN=mean
          )

答案似乎有道理,但我不确定这种方法是否正确。这是正确的,如果有的话,有更有效的方法来进行这种反向编码练习吗?

0 个答案:

没有答案