我很想从无监督的拟合SOM模型中预测一个新的数据集,并且不确定我是否走上了正确的道路。感谢您的指导
目标: 要使用先前适合训练集的聚类对新数据集进行分类。
我不确定的是什么
a。在测试集中分配的群集组是否与训练集中相似地分配。即测试集中的第1组必须具有与训练集中的第1组相同的特征。
b。虹膜无人看管的贴合度似乎不太合适。
library('kohonen')
set.seed(1)
idx_n <- sample(nrow(iris),120)
train <- iris[idx_n,]
row.names(train) <- NULL
test <- iris[-idx_n,]
row.names(test) <- NULL
#preprocess
train.sc <- scale(train[,-5])
#train model
som_grid <- somgrid(xdim = 5
,ydim=5
,topo="hexagonal"
,toroidal = F)
som.iris<- som(train.sc
,grid=som_grid
,rlen=200
,alpha=c(0.05,0.01)
,keep.data = TRUE )
set_cluster <- 3
## use hierarchical clustering to cluster the codebook vectors
som.iris.hc <- cutree(hclust(dist(som.iris$codes[[1]])), set_cluster)
# --------- Predict new dataset ----------
#scale test set acording to fitted model data
test.sc <- scale(test[,-5],
center = attr(som.iris$data[[1]], "scaled:center"),
scale = attr(som.iris$data[[1]], "scaled:scale"))
test.pred <- predict(som.iris,
newdata = test.sc)
set_cluster <- 3
## use hierarchical clustering to cluster the codebook vectors
som.iris.hc_test <- cutree(hclust(dist(test.pred$predictions[[1]])), set_cluster)
#attach cluster groups. Am I doing this right?
train_final <- cbind(train,cluster=som.iris.hc[som.iris$unit.classif])
test_final <- cbind(test,cluster=som.iris.hc_test)
#explore each clusters
by(train_final, train_final$cluster, summary)
by(test_final, test_final$cluster, summary)
#results - Not very Spectacular
table(train_final$Species,train_final$cluster)
答案 0 :(得分:0)
我当前的解决方法是,首先确定无监督的SOM模型,一旦我确定了集群的数量,就将集群标记并重新训练为有监督的SOM模型。这样,我就可以更有针对性地预测新数据集。想听听您的想法。
library('kohonen')
set.seed(1)
idx_n <- sample(nrow(iris),120)
train <- iris[idx_n,]
row.names(train) <- NULL
test <- iris[-idx_n,]
row.names(test) <- NULL
#preprocess
train.sc <- scale(train[,-5])
#train model as unsupervised
som_grid <- somgrid(xdim = 5
,ydim=5
,topo="hexagonal"
,toroidal = F)
som.iris<- som(train.sc
,grid=som_grid
,rlen=200
,alpha=c(0.05,0.01)
,keep.data = TRUE )
set_cluster <- 3
## use hierarchical clustering to cluster the codebook vectors
som.iris.hc <- cutree(hclust(dist(som.iris$codes[[1]])), set_cluster)
train_cluster <- as.factor(as.vector(som.iris.hc[som.iris$unit.classif]))
#assign new clusters into training set
train.l.sc <- list(x=train.sc,y=train_cluster)
#retrain model as supervised learning
mygrid = somgrid(5, 5, "hexagonal")
som.iris.l <- supersom(train.l.sc, grid = mygrid, maxNA.fraction = .5)
# --------- Predict new dataset ----------
#scale test set acording to fitted model data
test.l.sc <- list(x = as.matrix(scale(test[,-5]
,center = attr(som.iris.l$data[[1]], "scaled:center"),
scale = attr(som.iris.l$data[[1]], "scaled:scale")
)))
test.pred <- predict(som.iris.l,
newdata = test.l.sc)
#attach cluster groups
train_final <- cbind(train,cluster=train_cluster)
test_final <- cbind(test,cluster=test.pred$predictions$y)
#explore each clusters
by(train_final, train_final$cluster, summary)
by(test_final, test_final$cluster, summary)