我使用K-means聚类来对自组织地图进行分类(SOM,并且不希望使用SOM群集对数据进行反向编码。
下面的示例脚本。
# Load package
require(kohonen)
# Set data
data(iris)
# Scale and centre
dt <- scale(iris[, 1:4],center=TRUE)
# Prepare SOM
set.seed(590507)
som1 <- som(dt,
somgrid(6,6, "hexagonal"),
rlen=500,
keep.data=TRUE)
# Plot codes map
myPal1=colorRampPalette(c("black","orange","red","green"))
plot(som1,
type="codes",
palette.name = myPal1,
main="Codes",
shape="straight",
border ="gray")
# Extract the codebooks from SOM
cds <- as.data.frame(som1$codes)
# Compute WSS for up to 6 clusters for codebook vectors
wss <- (nrow(cds)-1)*sum(apply(cds,2,var))
for (i in 2:6){
wss[i] <- sum(kmeans(cds,centers=i)$withinss)
}
# Plot the scree plot
par(mar = c(8,5,8,2))
plot(1:6,
wss,
type="b",
xlab="Number of Clusters",
ylab="Within groups sum of squares",
main="Within cluster sum of squares (WCSS)",
col="blue",
lwd =2)
# Scree plot - 3 clusters look sensible choice
nCls =3
som1.km <- kmeans(cds, nCls, nstart = 20)
# Plot the SOM codes map with 3 clusters as background
MyPal3 <- c("grey80", 'aquamarine', 'burlywood1')
par(mar = c(0,5,0,2))
plot(som1,
type="codes",
palette.name= myPal1,
bgcol = MyPal3[som1.km$cluster],
main = "k-mean cluster",
shape="straight",
border ="gray"
)
legend("right",
x=7,
y=4,
cex=1.5,
title="Cluster",
legend = c(1:nCls),
fill= MyPal3[c(1:nCls)]
)
# Get the SOM cell number number assoicated with each of the 150 data
SOM.clss <- as.data.frame(som1$unit.classif)
names(SOM.clss) <- "Cell.Nmbr"
unique(SOM.clss)
# Get the k-means 3-class classification of the 36 SOM cells
kMns.clst <- as.data.frame(som1.km$cluster)
names(kMns.clst) <- "Clstr"
# Add a SOM cell reference for a lookup table
kMns.clst$Cell.Nmbr <- 1:nrow(kMns.clst)
# Use the lookup table to map the cluster number to each datum
dt.clst <- merge(SOM.clss,kMns.clst,by="Cell.Nmbr")
# Add the cluster column to the original data
iris.clst <- cbind(iris,dt.clst)
# Compute means as a reality check
aggregate(iris.clst[,1:4],
by=list(iris.clst$Clstr),
FUN=mean
)
答案似乎有道理,但我不确定这种方法是否正确。这是正确的,如果有的话,有更有效的方法来进行这种反向编码练习吗?