我尝试使用Kohonen包进行SOM地图分析。我使用了本教程:https://www.shanelynn.ie/self-organising-maps-for-customer-segmentation-using-r/。这是我的代码:
require(kohonen)
data = matrix(
c(6, 6, 80, 280, 404, 0, 158, 158197, 158197233,
6, 13, 80, 280, 404, 0, 158, 158197, 158197233,
6, 13, 80, 283, 404, 0, 158, 158197, 158197233,
6, 35, 80, 321, 301, 0, 158, 158197, 158197233,
6, 35, 80, 3131, 200, 0, 158, 158197, 158197233,
6, 35, 80, 20073, 200, 0, 158, 158197, 158197233,
6, 35, 80, 183, 200, 0, 158, 158197, 158197233,
6, 35, 80, 25, 302, 0, 158, 158197, 158197233,
6, 35, 80, 13744, 200, 0, 158, 158197, 158197233,
6, 35, 80, 280, 404, 0, 158, 158197, 158197233,
9, 33, 80, 2859, 200, 0, 158, 158197, 15819736,
9, 33, 80, 463, 200, 0, 158, 158197, 15819736,
9, 33, 80, 2065, 200, 0, 158, 158197, 15819736,
9, 33, 80, 1298, 200, 0, 158, 158197, 15819736,
9, 33, 80, 86659, 200, 0, 158, 158197, 15819736,
9, 33, 80, 942, 200, 0, 158, 158197, 15819736,
9, 33, 80, 2027, 200, 0, 158, 158197, 15819736,
9, 33, 80, 2979, 200, 0, 158, 158197, 15819736,
9, 33, 80, 127390, 200, 0, 158, 158197, 15819736,
9, 33, 80, 1150, 200, 0, 158, 158197, 15819736,
9, 33, 80, 1150, 200, 0, 158, 158197, 15819736,
9, 33, 80, 942, 200, 0, 158, 158197, 15819736,
9, 40, 80, 287, 200, 0, 158, 158197, 15819736,
9, 40, 80, 90, 200, 0, 158, 158197, 15819736,
9, 40, 80, 2, 200, 0, 158, 158197, 15819736,
9, 40, 80, 3314, 200, 0, 158, 158197, 15819736,
9, 40, 80, 483, 200, 0, 158, 158197, 15819736,
9, 40, 80, 2, 200, 0, 158, 158197, 15819736,
9, 40, 80, 543, 200, 0, 158, 158197, 15819736,
9, 40, 80, 439, 200, 0, 158, 158197, 15819736,
9, 40, 80, 689, 200, 0, 158, 158197, 15819736,
9, 40, 80, 797, 200, 0, 158, 158197, 15819736,
9, 40, 80, 732, 200, 0, 158, 158197, 15819736,
9, 40, 80, 5403, 200, 0, 158, 158197, 15819736,
9, 40, 80, 496, 200, 0, 158, 158197, 15819736,
9, 40, 80, 743, 200, 0, 158, 158197, 15819736,
9, 40, 80, 3049, 200, 0, 158, 158197, 15819736,
9, 40, 80, 3064, 200, 0, 158, 158197, 15819736,
9, 40, 80, 3042, 200, 0, 158, 158197, 15819736,
9, 40, 80, 3077, 200, 0, 158, 158197, 15819736,
12, 8, 80,305 ,301 , 0, 142, 1424, 14245,
12, 8, 80, 10531,200 , 0, 142, 1424, 14245,
12, 8, 80, 445,400 , 0, 142, 1424, 14245,
12, 8, 80, 445,400 , 0, 142, 1424, 14245,
12, 8, 80, 445,400 , 0, 142, 1424, 14245,
12, 8, 80, 324,400 , 0, 142, 1424, 14245,
12, 8, 80, 445,400 , 0, 142, 1424, 14245,
0, 0, 80,0 ,0 , 0, 42, 424, 4245,
12, 25, 80, 171,200 , 0, 42, 424, 4245,
12, 25, 80, 2970,200 , 0,42, 424, 4245,
12, 25, 80, 171,200 , 0, 42, 424, 4245,
12, 25, 80, 2970,200 , 0, 42, 424, 4245,
12, 25, 80, 171,200 , 0, 42, 424, 4245,
12, 25, 80, 2970,200 , 0, 42, 424, 4245,
15, 32, 80, 2860,200 , 0, 88, 88212, 8821237,
15, 32, 80, 2859,200 , 0, 88, 88212, 8821237,
15, 32, 80, 86659,200 , 0, 88, 88212, 8821237,
15, 32, 80, 22495,200 , 0, 88, 88212, 8821237,
15, 32, 80, 949,200 , 0, 88, 88212, 8821237,
15, 32, 80, 1298,200 , 0, 88, 88212, 8821237,
15, 32, 80, 2027,200 , 0, 88, 88212, 8821237,
15, 32, 80, 2979,200 , 0, 88, 88212, 8821237,
15, 32, 80, 103646,200 , 0, 88, 88212, 8821237,
15, 32, 80, 406,200 , 0, 88, 88212, 8821237,
9, 34, 80, 1169,200 , 0, 88, 88212, 8821237,
9, 34, 80, 212,200 , 0, 88, 88212, 8821237,
9, 34, 80, 2250,200 , 0, 88, 88212, 8821237,
9, 34, 80, 8496,200 , 0, 88, 88212, 8821237,
9, 34, 80, 0,200 , 0, 88, 88212, 8821237,
9, 34, 80, 946,200 , 0, 88, 88212, 8821237,
9, 34, 80, 716263,200 , 0, 88, 88212, 8821237,
9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
9, 34, 80, 1573,200 , 0, 88, 88212, 8821237,
9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
9, 34, 80, 974,200 , 0, 88, 88212, 8821237,
9, 34, 80, 63,200 , 0, 88, 88212, 8821237,
9, 34, 80, 926,200 , 0, 88, 88212, 8821237,
9, 34, 80, 1150,200 , 0, 88, 88212, 8821237,
9, 34, 80, 800,200 , 0, 88, 88212, 8821237,
9, 34, 80, 225,200 , 0, 88, 88212, 8821237,
9, 34, 80, 79,200 , 0, 88, 88212, 8821237,
9, 34, 80, 408,200 , 0, 88, 88212, 8821237,
9, 34, 80, 535,200 , 0, 88, 88212, 8821237,
3, 21, 80, 208,404 , 0, 207, 20746, 2074613,
3, 49, 80, 1298,200 , 0, 207, 20746, 2074613,
5, 17, 80, 302,230 , 0, 207, 20746, 2074613,
8, 28, 80, 2857,200 , 0, 5, 5188, 518810,
13, 08, 80, 2860,200 , 0, 5, 5188, 518810,
14, 08, 80, 2860,200 , 0, 5, 5188, 518810,
15, 58, 80, 208,404 , 0, 66, 66249, 6624966,
15, 58, 80, 463,200 , 0, 66, 66249, 6624966,
15, 58, 80, 2065,200 , 0, 66, 66249, 6624966,
15, 58, 80, 2065,200 , 0, 66, 66249, 6624966,
13, 05, 80, 608,200 , 0, 88, 88212, 8821240,
13, 08, 80, 608,200 , 0, 88, 88212, 8821240,
13, 11, 80, 608,200 , 0, 88, 88212, 8821240,
13, 14, 80, 608,200 , 0, 88, 88212, 8821240,
13, 17, 80, 608,200 , 0, 88, 88212, 8821240,
13, 20, 80, 608,200 , 0, 88, 88212, 8821240 ),
nrow=100,
ncol=9,
byrow = TRUE)
data_train <- data[, c(1,2,4,5,7,8,9)]
#data_train <- data[, c(2,4,5,8)]
data_train_matrix <- as.matrix(scale(data_train))
som_grid <- somgrid(xdim = 3, ydim=4, topo="hexagonal")
som_model <- som(data_train_matrix,
grid=som_grid,
rlen=500,
alpha=c(0.05,0.01),
keep.data = TRUE )
#training proces
plot(som_model, type="changes")
#nodes
plot(som_model, type="count", main="Node Counts")
#distance
#plot(som_model, type="dist.neighbours", main = "SOM neighbour distances")
#codes and weight vectors
#plot(som_model, type="codes")
#heatmap
plot(som_model, type = "property", property = getCodes(som_model)[,4], main="Heat map - parameter 4")
这是我的地图可视化:
地图没问题。我的问题是:在某种程度上如何找到白色节点中的数据?我看一下getCodes(som_model),但只有地图编号。所以我知道白色节点是V4。但是节点V4中的数据是什么?我查看了所有som_model值,但它没有帮助。有任何想法吗 ?
> getCodes(som_model)[,4]
V1 V2 V3 V4 V5 V6 V7 V8
-0.727734454 -0.183272487 -0.342681905 2.361366190 -0.343764866 -0.343764866 1.298987948 -0.343532184
V9 V10 V11 V12
-0.343764307 1.350552793 -0.003492471 -0.343764866
我希望结果像这样:
> inV4
[1] 2 25 0
可以在Kohonen套餐中这样做吗?
更新: 在Kohonen包中看起来不可能轻松实现。所以我试试这个:
# find which node is white
q <- getCodes(som_model)[,4]
for (i in 1:length(q)){
if(q[i]>2){
t<- q[i]
}
}
# find name od node
node <- names(t)
#remove "V" letter from node name
mynode <- gsub("V","",node)
#find which node has which input ???
mydata2 <- som_model$unit.classif
print (myadat2)
#choose just imputs which go to right node
result <- vector('list',length(mydata2))
for (i in 1:length(mydata2)){
result <- som_model$unit.classif== mynode
}
#remove FALSE results
result2 <- which(result == TRUE)
#write all input line
for (i in 1:length(result2)){
print (data[result2[i],])
}
但我不确定这是否正确。我不确定这是否给了我正确的结果输入。有什么方法可以检查吗?
答案 0 :(得分:0)
很抱歉,现在来不及帮助您。它可能会帮助别人。
由于您未设置随机种子,因此我们无法完全重现您的结果。当我运行您的代码时,我得到了情节
我得到了代码:
getCodes(som_model)[,4]
V1 V2 V3 V4 V5 V6 V7
-0.3437649 -0.3437649 2.3146262 0.4037323 -0.3437649 -0.6034393 -0.3434484
V8 V9 V10 V11 V12
0.6730257 0.2089917 -0.3437649 -0.1038754 2.6302823
对我来说,白色节点是V12。通过查看属于som对象的单位分类,可以获得V12中的数据点。
som_model$unit.classif
[1] 12 12 12 9 9 9 9 9 9 12 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
[26] 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 3 2 3 3 3 3 3 6 6 6
[51] 6 6 6 6 1 1 1 1 1 1 1 1 1 1 5 5 5 5 5 5 7 5 5 5 5
[76] 5 5 5 5 5 5 5 5 5 8 8 8 6 6 6 4 4 4 4 2 2 2 2 2 2
要获取V12中的要点,只需使用:
which(som_model$unit.classif == 12)
[1] 1 2 3 10