R自组织地图-使用拟合的无监督SOM预测新数据

时间:2018-07-24 13:37:58

标签: r cluster-analysis prediction unsupervised-learning som

我很想从无监督的拟合SOM模型中预测一个新的数据集,并且不确定我是否走上了正确的道路。感谢您的指导

目标: 要使用先前适合训练集的聚类对新数据集进行分类。

我不确定的是什么

a。在测试集中分配的群集组是否与训练集中相似地分配。即测试集中的第1组必须具有与训练集中的第1组相同的特征。

b。虹膜无人看管的贴合度似乎不太合适。

library('kohonen')
set.seed(1)

idx_n <- sample(nrow(iris),120)

train <- iris[idx_n,]
row.names(train) <- NULL

test <- iris[-idx_n,]
row.names(test) <- NULL

#preprocess
train.sc <- scale(train[,-5])

#train model
som_grid <- somgrid(xdim = 5
                    ,ydim=5
                    ,topo="hexagonal"
                    ,toroidal = F)  
som.iris<- som(train.sc
               ,grid=som_grid
               ,rlen=200
               ,alpha=c(0.05,0.01)
               ,keep.data = TRUE )

set_cluster <- 3

## use hierarchical clustering to cluster the codebook vectors
som.iris.hc <- cutree(hclust(dist(som.iris$codes[[1]])), set_cluster)



# --------- Predict new dataset ----------
#scale test set acording to fitted model data
test.sc  <- scale(test[,-5],
                  center = attr(som.iris$data[[1]], "scaled:center"),
                  scale  = attr(som.iris$data[[1]], "scaled:scale"))

test.pred <- predict(som.iris, 
                     newdata = test.sc)


set_cluster <- 3

## use hierarchical clustering to cluster the codebook vectors
som.iris.hc_test <- cutree(hclust(dist(test.pred$predictions[[1]])), set_cluster)

#attach cluster groups. Am I doing this right?
train_final <- cbind(train,cluster=som.iris.hc[som.iris$unit.classif])
test_final <- cbind(test,cluster=som.iris.hc_test)

#explore each clusters
by(train_final, train_final$cluster, summary)
by(test_final, test_final$cluster, summary)

#results - Not very Spectacular
table(train_final$Species,train_final$cluster)

1 个答案:

答案 0 :(得分:0)

我当前的解决方法是,首先确定无监督的SOM模型,一旦我确定了集群的数量,就将集群标记并重新训练为有监督的SOM模型。这样,我就可以更有针对性地预测新数据集。想听听您的想法。

library('kohonen')
set.seed(1)

idx_n <- sample(nrow(iris),120)

train <- iris[idx_n,]
row.names(train) <- NULL

test <- iris[-idx_n,]
row.names(test) <- NULL

#preprocess
train.sc <- scale(train[,-5])

#train model as unsupervised
som_grid <- somgrid(xdim = 5
                    ,ydim=5
                    ,topo="hexagonal"
                    ,toroidal = F)  
som.iris<- som(train.sc
               ,grid=som_grid
               ,rlen=200
               ,alpha=c(0.05,0.01)
               ,keep.data = TRUE )

set_cluster <- 3

## use hierarchical clustering to cluster the codebook vectors
som.iris.hc <- cutree(hclust(dist(som.iris$codes[[1]])), set_cluster)
train_cluster <- as.factor(as.vector(som.iris.hc[som.iris$unit.classif]))

#assign new clusters into training set
train.l.sc <- list(x=train.sc,y=train_cluster)

#retrain model as supervised learning
mygrid = somgrid(5, 5, "hexagonal")
som.iris.l <- supersom(train.l.sc, grid = mygrid, maxNA.fraction = .5)

# --------- Predict new dataset ----------
#scale test set acording to fitted model data
test.l.sc <- list(x = as.matrix(scale(test[,-5]
                                      ,center = attr(som.iris.l$data[[1]], "scaled:center"),
                                      scale  = attr(som.iris.l$data[[1]], "scaled:scale")
                                      )))

test.pred <- predict(som.iris.l, 
                     newdata = test.l.sc)

#attach cluster groups
train_final <- cbind(train,cluster=train_cluster)
test_final <- cbind(test,cluster=test.pred$predictions$y)

#explore each clusters
by(train_final, train_final$cluster, summary)
by(test_final, test_final$cluster, summary)