计算分组的MDS

时间:2019-04-22 10:11:00

标签: r text distance mds

我有此数据:

glimpse(merged_dat2)
Observations: 15
Variables: 3
Groups: Brand, topic [15]
$ Brand <fct> Samsung, BLU, Apple, Samsung, BLU, Apple, Samsung, BLU, Apple, Samsung, BLU, App...
$ topic <int> 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5
$ term  <chr> "tri price defect never samsung warranti water brand time wellno version", "tri ...

数据是潜在的Dirichlet客户评论分配结果。 Brand是特定评论所基于的品牌。 topic是潜在的Dirichlet分配的主题。 term是一个主题的术语。

我的目标是将结果映射到MDS。这应通过计算品牌之间每个主题的词向量(terms)的距离来完成。结果应如下所示(假设维度分别表示主题1和2)。 我的问题是如何制作这样的分组MDS?

enter image description here

structure(list(Brand = structure(c(3L, 2L, 1L, 3L, 2L, 1L, 3L, 
2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L), .Label = c("Apple", "BLU", "Samsung"
), class = "factor"), topic = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 
3L, 4L, 4L, 4L, 5L, 5L, 5L), term = c("tri price defect never samsung warranti water brand time wellno version", 
"tri att week plenti ram purchas ship", "replac", "work fast econom flash refurbish excel funtion intuit", 
"work fast dont hour junk earphon life everyth seller", "work inch", 
"doesnt even data answer call number record phonework problem trueli", 
"doesnt even right ear fact fall unlock", "doesnt livabl top imag damag", 
"phone issu supos rep fantasticthanx much", "phone screen confort walk gift", 
"phone screen issu finger still", "servic just option buy databas imeiseri month support flawlessno hard", 
"servic just crack money text wast heavi photo sometim bang busi far", 
"alway annoy though trustful")), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -15L), vars = c("Brand", 
"topic"), labels = structure(list(Brand = structure(c(1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("Apple", 
"BLU", "Samsung"), class = "factor"), topic = c(1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), class = "data.frame", row.names = c(NA, 
-15L), vars = c("Brand", "topic"), labels = structure(list(Brand = structure(c(1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("Apple", 
"BLU", "Samsung"), class = "factor"), topic = c(1L, 2L, 3L, 4L, 
5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), row.names = c(NA, 
-15L), class = "data.frame", vars = c("Brand", "topic"), drop = TRUE, indices = list(
    20L, c(27L, 43L), c(46L, 63L, 64L, 65L, 66L), c(79L, 80L, 
    82L, 83L, 86L, 94L, 95L), 120:123, c(1L, 2L, 3L, 4L, 15L, 
    16L, 17L, 18L, 19L), c(24L, 25L, 26L, 29L, 36L, 37L, 38L, 
    39L, 40L, 41L, 42L), c(45L, 48L, 58L, 59L, 60L, 61L, 62L), 
    c(72L, 73L, 74L, 75L, 76L, 77L, 78L, 81L, 91L, 92L, 93L), 
    c(98L, 101L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 
    118L, 119L), c(0L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
    14L), c(21L, 22L, 23L, 28L, 30L, 31L, 32L, 33L, 34L, 35L), 
    c(44L, 47L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L), 
    c(67L, 68L, 69L, 70L, 71L, 84L, 85L, 87L, 88L, 89L, 90L), 
    c(96L, 97L, 99L, 100L, 102L, 103L, 104L, 105L, 106L, 107L, 
    108L, 109L)), group_sizes = c(1L, 2L, 5L, 7L, 4L, 9L, 11L, 
7L, 11L, 12L, 11L, 10L, 11L, 11L, 12L), biggest_group_size = 12L, labels = structure(list(
    Brand = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L), .Label = c("Apple", "BLU", "Samsung"
    ), class = "factor"), topic = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 
    3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L)), row.names = c(NA, -15L), class = "data.frame", vars = c("Brand", 
"topic"), drop = TRUE, indices = list(120L, 121L, 51L, 3:4, 117L, 
    11L, 118L, 103L, 52L, 91L, 110L, 66L, 49:50, 104L, 6L, 44:46, 
    36L, 59L, 39L, 30L, 47:48, 41L, 33L, 60L, 61L, 89L, 119L, 
    28:29, 94L, 31L, 108L, 34L, 93L, 109L, 114L, 37L, 65L, 105L, 
    43L, 35L, 84:86, 38L, 99:101, 40L, 63L, 111L, 106L, 90L, 
    7L, 53L, 102L, 67:80, 55L, 115L, 16L, 5L, 56L, 18L, 17L, 
    54L, 32L, 88L, 20L, 58L, 8L, 81:83, 42L, 96:98, 19L, 116L, 
    95L, 87L, 107L, 112L, 122L, 12L, 64L, 0:2, 57L, 123L, 62L, 
    14L, 92L, 9L, 113L, 10L, 15L, 13L, 21:27), group_sizes = c(1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L), biggest_group_size = 14L, labels = structure(list(
    term = c("alway", "annoy", "answer", "att", "bang", "brand", 
    "busi", "buy", "call", "confort", "crack", "damag", "data", 
    "databas", "defect", "doesnt", "dont", "ear", "earphon", 
    "econom", "even", "everyth", "excel", "fact", "fall", "fantasticthanx", 
    "far", "fast", "finger", "flash", "flawlessno", "funtion", 
    "gift", "hard", "heavi", "hour", "imag", "imeiseri", "inch", 
    "intuit", "issu", "junk", "just", "life", "livabl", "money", 
    "month", "much", "never", "number", "option", "phone", "phonework", 
    "photo", "plenti", "price", "problem", "purchas", "ram", 
    "record", "refurbish", "rep", "replac", "right", "samsung", 
    "screen", "seller", "servic", "ship", "sometim", "still", 
    "supos", "support", "text", "though", "time", "top", "tri", 
    "trueli", "trustful", "unlock", "version", "walk", "warranti", 
    "wast", "water", "week", "wellno", "work")), row.names = c(NA, 
-89L), class = "data.frame", vars = "term", drop = TRUE))), indices = list(
    18L, c(21L, 37L), c(40L, 56L, 57L, 58L, 59L), c(62L, 64L, 
    66L, 74L, 75L), 98:101, c(1L, 2L, 13L, 14L, 15L, 16L, 17L
    ), c(20L, 23L, 30L, 31L, 32L, 33L, 34L, 35L, 36L), c(39L, 
    42L, 51L, 52L, 53L, 54L, 55L), c(61L, 63L, 71L, 72L, 73L), 
    c(77L, 79L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 
    97L), c(0L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), c(19L, 
    22L, 24L, 25L, 26L, 27L, 28L, 29L), c(38L, 41L, 43L, 44L, 
    45L, 46L, 47L, 48L, 49L, 50L), c(60L, 65L, 67L, 68L, 69L, 
    70L), c(76L, 78L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L)), drop = TRUE, group_sizes = c(1L, 
2L, 5L, 5L, 4L, 7L, 9L, 7L, 5L, 12L, 11L, 8L, 10L, 6L, 10L), biggest_group_size = 12L), indices = list(
    2L, 5L, 8L, 11L, 14L, 1L, 4L, 7L, 10L, 13L, 0L, 3L, 6L, 9L, 
    12L), drop = TRUE, group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L)

0 个答案:

没有答案