查找由hclust函数生成的簇的质心

时间:2018-07-17 08:24:35

标签: r

我需要获取通过分层方法计算的每个聚类的质心。 首先,这是获取可复制示例的数据集的一部分:

> dput(DATABASE[1:20,])
structure(list(TYPE_PEAU = c(2L, 2L, 3L, 2L, 2L, 2L, 2L, 4L, 
3L, 2L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 2L, 4L, 2L), SENSIBILITE = c(3L, 
2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 
1L, 3L, 3L), IMPERFECTIONS = c(2L, 2L, 3L, 3L, 1L, 2L, 2L, 3L, 
2L, 2L, 2L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 2L, 2L), BRILLANCE = c(3L, 
3L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 3L, 
3L, 3L, 3L), GRAIN_PEAU = c(3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 3L, 
2L, 1L, 3L, 1L, 1L, 3L, 1L, 3L, 3L, 1L, 3L), RIDES_VISAGE = c(3L, 
1L, 1L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 
3L, 3L, 3L), MAINS = c(2L, 2L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 
3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L), PEAU_CORPS = c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 
2L, 1L), INTERET_ALIM_NATURELLE = c(1L, 1L, 3L, 1L, 1L, 1L, 1L, 
1L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L), INTERET_ORIGINE_GEO = c(1L, 
1L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 3L, 3L, 1L, 
1L, 1L, 1L), INTERET_VACANCES = c(1L, 2L, 3L, 1L, 1L, 1L, 1L, 
2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 2L), INTERET_ENVIRONNEMENT = c(1L, 
3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), AGE_INTERVAL = c(3L, 3L, 4L, 2L, 2L, 3L, 3L, 4L, 
4L, 3L, 4L, 2L, 1L, 3L, 3L, 2L, 2L, 2L, 2L, 3L), ATTENTE_BEAUTE_1 = c(1L, 
6L, 4L, 4L, 6L, 6L, 3L, 1L, 1L, 4L, 3L, 6L, 2L, 5L, 5L, 6L, 7L, 
4L, 6L, 3L), ATTENTE_BEAUTE_2 = c(2L, 2L, 3L, 6L, 4L, 1L, 4L, 
7L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 2L, 6L, 2L, 2L, 2L), MILIEU_VIE = c(1L, 
1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), PROFIL_SELECTIONNE = c(1L, 32L, 21L, 23L, 34L, 31L, 
15L, 6L, 1L, 20L, 14L, 34L, 9L, 28L, 28L, 32L, 42L, 20L, 32L, 
14L), NOMBRE_ACHAT = c(14L, 6L, 3L, 9L, 8L, 13L, 10L, 14L, 4L, 
3L, 10L, 8L, 12L, 3L, 7L, 6L, 4L, 13L, 3L, 3L), NOMBRE_CADEAU = c(2L, 
1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
2L, 1L, 1L)), .Names = c("TYPE_PEAU", "SENSIBILITE", "IMPERFECTIONS", 
"BRILLANCE", "GRAIN_PEAU", "RIDES_VISAGE", "MAINS", "PEAU_CORPS", 
"INTERET_ALIM_NATURELLE", "INTERET_ORIGINE_GEO", "INTERET_VACANCES", 
"INTERET_ENVIRONNEMENT", "AGE_INTERVAL", "ATTENTE_BEAUTE_1", 
"ATTENTE_BEAUTE_2", "MILIEU_VIE", "PROFIL_SELECTIONNE", "NOMBRE_ACHAT", 
"NOMBRE_CADEAU"), row.names = c(NA, 20L), class = "data.frame")

然后我按如下方式使用:

mydist = dist(DATABASE)
clusters = cutree(hclust(mydist),k=3)

> clusters
  [1] 1 2 3 3 2 2 3 1 1 3 1 2 1 3 2 2 2 3 2 1 3 2 1 1 1 1 2 1 2 1 3 3 2 3 2 2 1 1 1 1 3 2 1 1 3 2 1 2 2 1 2 2 3 1 3 1 3
 [58] 1 3 2 2 1 1 2 1 2 2 2 3 2 3 1 2 2 1 1 3 3 2 1 2 2 1 2 3 3 3 1 2 1 2 1 1 1 1 1 3 2 2 2 1 1 3 2 2 1 1 1 2 1 1 1 1 3
[115] 1 2 2 1 2 3 1 1 2 3 1 1 1 2 1 3 1 2 3 2 2 1 2 1 1 3 3 2 1 2 2 1 1 1 1 2 1 2 2 3 3 1 1 3 1 3 3 3 3 2 3 1 2 3 3 3 1
[172] 1 2 2 1 1 2 1 2 2 1 3 3 1 2 2 1 1 1 2 2 1 1 1 1 3 2 3 3 1 1 2 2 2 3 1 1 1 2 2 1 2 1 3 1 2 1 3 3 1 1 1 1 2 1 2 2 2
[229] 3 3 1 1 2 1 3 2 2 2 1 1 2 1 3 1 2 1 3 1 3 1 3 1 1 1 1 2 2 1 3 3 3 2 1 2 3 2 2 1 1 3 1 2 3 1 1 2 1 1 1 1 2 2 2 3 2
[286] 1 2 1 1 2 1 2 1 2 2 1 2 3 1 3 1 3 1 1 3 1 1 2 2 1 3 3 2 2 1 2 1 1 2 2 1 3 3 2 2 1 3 3 3 1 1 1 1 3 3 2 1 3 1 2 1 2
[343] 1 2 3 3 2 3 1 3 2 3 3 1 2 2 1 2 2 3 2 1 3 2 2 1 2 3 2 3 3 3 2 2 3 2 1 1 1 2 3 2 2 1 2 2 2 1 2 1 1 1 3 1 2 2 1 1 2
[400] 1 1 1 1 1 2 2 2

请注意,目标是要计算内部和内部惯性: 因此,我需要计算每个质心与其群集中包含的所有点之间的距离。

所以我需要计算每个质心与其相关簇之间的距离 然后用于计算内部和内部惯性。

2 个答案:

答案 0 :(得分:0)

您可能希望将k值指定为1:3而不是3

here is the code and how to find the center (mean)

答案 1 :(得分:0)

您可以在Sub Makro5() Makro5 Makro ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _ "let" & Chr(13) & "" & Chr(10) & " Źródło = Web.Page(Web.Contents(""https://www.bankier.pl/gielda/notowania/akcje/4FUNMEDIA/wyniki-finansowe/skonsolidowany/kwartalny/standardowy/1""))," & Chr(13) & "" & Chr(10) & " Data0 = Źródło{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Zmieniono typ"" = Table.TransformColumnTypes(Data0,{{"""", type text}, {""II Q 2017"", type text}, {""III Q 2017"", type text}, {""IV Q 2017"", type text}, {""I Q 2018"", " & _ "type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Zmieniono typ""" ActiveWorkbook.Worksheets.Add With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _ "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _ , Destination:=Range("$A$1")).QueryTable .CommandType = xlCmdSql .CommandText = Array("SELECT * FROM [Table 0]") .RowNumbers = False .FillAdjacentFormulas = False .PreserveFormatting = True .RefreshOnFileOpen = False .BackgroundQuery = True .RefreshStyle = xlInsertDeleteCells .SavePassword = False .SaveData = True .AdjustColumnWidth = True .RefreshPeriod = 0 .PreserveColumnInfo = True .ListObject.DisplayName = "Table_0" .Refresh BackgroundQuery:=False End With End Sub 中将质心定义为每个簇的变量均值。

DATABASE