kmeans模型输出不“预期”

时间:2019-04-26 14:32:25

标签: r

我的kmeans模型在哪里出问题?

我试图对多个小点应用kmeans模型,但是对我而言,图中的结果看起来并不“正确”,这使我想到我必须在某些地方做错了事。

我期待着类似以下的内容(暂时忽略簇的数量)expected results,但是实际上我得到了类似的内容(在完整的示例中)。 enter image description here(对我来说),kmeans模型似乎仅在value_mgnt变量上聚类。最终模型将包含更多变量,我只是想确保代码在扩展到更多变量之前能正常工作。我执行的步骤正确吗?

代码:

k_means_centers = 4
kclust <- x %>%
  as_tibble() %>% 
  group_by(compare_year_mgnt, compare_year_risk) %>%
  nest(.key = "value") %>%
  mutate(kmeans_data = map(value, ~select(.x, -"id_key")),
         kmeans = map(kmeans_data, ~kmeans(.x[[1]],
                                           centers = k_means_centers, iter.max = 10, nstart = 1)),
         tidied = map(kmeans, tidy),
         glanced = map(kmeans, glance),
         augmented = map2(kmeans, kmeans_data, augment))

kclust


assignments <- kclust %>% 
  unnest(augmented, value) %>%
  select(-compare_year_risk, -compare_year_risk, -value_mgnt1, -value_risk1) %>%
  filter(compare_year_mgnt == "2009_2010") %>%
  select(value_mgnt, value_risk, `.cluster`, id_key) %>%
  mutate(`.cluster` = as.numeric(as.character(`.cluster`))) %>%
  setNames(c("value_mgnt", "value_risk", "cluster", "id_key"))
assignments

ggplot(assignments, aes(x = value_mgnt, y = value_risk, color = cluster))+
  geom_point()

数据

x <- structure(list(value_mgnt = c(0.0110388174791035, 0.898946927517024, 
0.94115393661753, 0.924209762589329, 0.000059393860742526, 0.897723429847656, 
0.88579128904482, 0.943152925646893, 0.746491608725375, 0.982409435758668, 
0.98989065500874, 0.257986142528283, 0.889982876670799, 0.922857426360599, 
0.722023482802919, 0.978590927957757, 0.844466847783071, 0.945688565344339, 
0.884928093922649, 0.864221848521698, 0.775056472237581, 0.723246039803769, 
0.93059626028564, 0.953608394249242, 0.884763629201837, 0.96163116036515, 
0.988733563254601, 0.801168388781437, 0.789105185281443, 1, 1, 
0.939523291609511, 0.913340843868584, 0.0106370220766829, 0.953319964691787, 
1, 0.959317139090706, 0.933774536362949, 0.00650398684544899, 
0.857378551165802, 1, 0.47052696853382, 0.220610345508807, 0.980510694839292, 
0.585091473825237, 0.667255144349022, 0.944366049490067, 0.00812864555510591, 
0.796075398833235, 0.860926103408849, 1, 0.852684128125634, 0.453195933425619, 
1, 0.966954913948055, 0.857889126227409, 0.95302299135438, 0.810878876015034, 
0.862216500723706, 0.997760895711359, 0.854034756147371, 0.917313239938199, 
0.916787187093396, 0.863438628728223, 0.779528122930639, 0.9533727522704, 
0.97121460806916, 0.941196864550177, 0.865135616732165, 0.0894832963033601, 
0.91678883739486, 0.854551664704795, 0.453195933425619, 0.98989065500874, 
0.935301147412811, 0.0894832963033601, 0.565141052115378, 0.906695935064591, 
0.934191644498168, 0.97034454648928, 0.773178946365605, 0.985676250948248, 
0.921868983734365, 0.852101626300682, 0.818080548271926, 0.858826311409112, 
0.664676422804121, 0.950852373994365, 1, 0.988222695150481, 0.628624961278305, 
0.964818503727056, 0.842135160975889, 0.961598647037449, 0.900164163525953, 
0.910323427024138, 1, 0.923946767628493, 0.790227374389228, 0.985539123519253, 
0.792894090141254, 0.938557381769876, 0.938601420188532, 0.974455450953519, 
0.83815667874572, 0.935976357004196, 0.928584155851895, 0.974455450953519, 
0.963766392103854, 0.939351456717306, 0.987705872084036, 0.998936148352861, 
0.955865837464406, 0.918254262497922, 0.995083124321476, 0.935375101967953, 
0.984375067499917, 0.942946309551421, 0.940392372008284, 0.975272837407882, 
0.925423725693359, 0.902406271118155, 0.666006602061257, 0.943486143445243, 
0.942946309551421, 0.966348753053509, 0.844301413469089, 0.974455450953519, 
0.908332227867959, 0.762950596610165, 1, 1, 0.935736511064939, 
0.978554692144823, 0.694857647075684, 0.0106300287294934, 1, 
0.981203070263654, 0.9313802743807, 0.942946309551421, 0.9675890080408, 
0.904661380040416, 0.857681770220483, 0.933365611304007, 0.927259464010254, 
1, 0.885904610620244, 0.797973508114398, 0.93769817305342, 0.955005618512994, 
1, 0.943759257798692, 0.894241772837151, 0.971756348540894, 0.951076607057108, 
0.908332227867959, 0.780954996796012, 0.923892185446328, 1, 0.966954604702246, 
0.993576555558297, 0.871643369946504, 0.827961682890824, 0.854853469633853, 
0.930679755459162, 1, 1, 0.969111672067112, 0.964288319046871, 
0.966524773789151, 0.975550295958069, 0.35374908527329, 0.908177483501598, 
0.867457159188591, 0.975807530363443, 1, 0.891685600595667, 0.982568394222027, 
0.784895632676499, 0.945482702171043, 0.583089628005855, 0.936200689322239, 
0.82993942561058, 0.787690956966503, 1, 0.93600722049079, 0.7958844906993, 
0.967375169195596, 0.981457763792911, 0.857365406681746, 0.926588517078563, 
0.629961597930074, 0.987596655901052, 0.872475575659895, 0.929535458449966, 
0.64229475190462, 0.836554542942252, 0.932263578890796, 1, 0.00178738987442098
), compare_year_mgnt = c("2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011"), value_risk = c(0.946546306337345, 
0.912174068173779, 0.949660470539855, 0.997512157438295, 0.266333556522027, 
0.908207052708363, 0.902032323436553, 0.959682238828097, 0.813956648692671, 
0.937763162127675, 0.826919429835034, 0.187925704407133, 0.98385727412313, 
0.90066110667982, 0.308438342257892, 0.960372341528122, 0.977408085282025, 
0.78639467425056, 0.925977733727168, 0.929125088930013, 0.927046920387734, 
0.931567073460975, 0.912158662971236, 0.853702625994998, 0.956249952311555, 
0.953654630763891, 0.999920099677485, 0.825206070981134, 0.392306647852379, 
0.97838113724459, 0.981701627136152, 0.99511523832232, 0.976113572660343, 
0.956501620994099, 0.999102220666347, 0.942815375649731, 0.896179844365992, 
0.902288539796489, 0.59944034446177, 0.944128259093776, 1, 0.819803792899473, 
0.246328623366836, 0.648734630460053, 0.799487501305065, 0.907010272663048, 
0.9025322138624, 0.00395926715342627, 0.869928631148843, 0.834075430915017, 
0.986539396273446, 0.795224865006846, 0.990680161008128, 0.970814679862398, 
0.939143015156724, 0.938238413074579, 0.996271249424252, 0.922169153415158, 
0.94796724659962, 0.89511929636525, 0.483886438516458, 0.969071501528402, 
0.966885399077624, 0.856974516244115, 0.916520797550305, 0.987427143063365, 
0.980795169271579, 0.98141510748386, 0.803281988410246, 0.0691376771621042, 
0.89807956226392, 0.978295805864714, 0.990680161008128, 0.826919429835034, 
0.987627636914074, 0.0691376771621042, 0.921147439837532, 0.966557071830578, 
0.896497307072369, 0.911832319922501, 0.260621905916568, 0.991071097245202, 
0.864322758390591, 0.544681203578209, 0.960039938783663, 0.932214621830412, 
0.967135055725683, 0.955377191165227, 0.837154459311194, 0.926821618837857, 
0.553126225438248, 0.965565523010522, 0.98907905346962, 0.889060652536614, 
0.924822981531525, 0.952513587659607, 0.97126881704748, 0.932424158071947, 
0.816134359325688, 0.988123461438127, 0.842450190429406, 0.976995393278696, 
0.838876447126404, 0.99074528386509, 0.846825737106085, 0.871268503266259, 
0.970322412618742, 0.99074528386509, 0.917909277524218, 0.976111576522556, 
0.971295744067109, 0.836507914343338, 0.950878313147086, 0.874436032009303, 
0.988627590550134, 0.945283016385604, 0.826854182265903, 0.9954826027266, 
0.899316039118236, 0.933281012264474, 0.986555885077338, 0.61974389756098, 
0.653574042867885, 0.987854583972447, 0.9954826027266, 0.983227336399729, 
0.95423133823121, 0.99074528386509, 0.932326142786019, 0.894943124276984, 
0.818248183232706, 1, 0.969863777023023, 0.982014620775736, 0.990545253149014, 
0.0607051489063304, 0.989218180846633, 0.821455039376924, 0.972044819715966, 
0.9954826027266, 0.99441230766265, 0.98777111036196, 0.970846888022037, 
0.923632909483839, 0.893713863514953, 0.946694365363574, 0.846136214068607, 
0.566194004735035, 0.941176347422879, 0.878214441835681, 0.937836015792024, 
0.976786751417467, 0.830051186868787, 0.992802720670924, 0.983949209987261, 
0.932326142786019, 0.95953236124163, 0.965265152126847, 1, 0.841543484163377, 
0.995785685351229, 0.73205213273416, 0.401355055033127, 0.936765210733108, 
0.923299275822597, 0.976760690404608, 0.96264573758988, 0.975422514331076, 
0.982769905678311, 0.969353068419139, 0.991429403927868, 0.887730287752104, 
0.822406697124774, 0.597019496412545, 0.92726898193019, 0.839394490909181, 
0.738163557532819, 0.984268864412331, 0.878363900786381, 0.904532843499266, 
0.980466499576335, 0.918950603309048, 0.983493744013983, 0.753371899372633, 
0.974529555616935, 0.922845605574439, 0.682166207754687, 0.993141510252534, 
0.958722926473936, 0.835019355720056, 0.835719339155208, 0.87965218870661, 
0.961769357004018, 0.928336337484138, 0.94408084593039, 0.942295340618994, 
0.90331177434957, 0.905307560330107, 0.992883646824905, 0.8881171771733
), compare_year_risk = c("2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2009_2010", "2009_2010", "2009_2010", 
"2009_2010", "2009_2010", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011", "2010_2011", "2010_2011", "2010_2011", 
"2010_2011", "2010_2011"), id_key = c("2009_2010_62709", "2009_2010_74208", 
"2009_2010_878736", "2009_2010_1045810", "2009_2010_726513", 
"2009_2010_77551", "2009_2010_317093", "2009_2010_1004858", "2009_2010_20388", 
"2009_2010_104169", "2009_2010_1466258", "2009_2010_882095", 
"2009_2010_891024", "2009_2010_1350031", "2009_2010_1047862", 
"2009_2010_29534", "2009_2010_37785", "2009_2010_1361658", "2009_2010_864328", 
"2009_2010_1038357", "2009_2010_4447", "2009_2010_877890", "2009_2010_745732", 
"2009_2010_858339", "2009_2010_1166126", "2009_2010_1413329", 
"2009_2010_1800", "2009_2010_1174922", "2009_2010_1637459", "2009_2010_918160", 
"2009_2010_313616", "2009_2010_28917", "2009_2010_865752", "2009_2010_906107", 
"2009_2010_1156039", "2009_2010_316206", "2009_2010_1001082", 
"2009_2010_100893", "2009_2010_812074", "2009_2010_203077", "2009_2010_949039", 
"2009_2010_793952", "2009_2010_310158", "2009_2010_55785", "2009_2010_857005", 
"2009_2010_73124", "2009_2010_352915", "2009_2010_1115222", "2009_2010_58492", 
"2009_2010_216228", "2009_2010_765880", "2009_2010_78814", "2009_2010_929887", 
"2009_2010_773910", "2009_2010_822416", "2009_2010_77449", "2009_2010_66740", 
"2009_2010_1067701", "2009_2010_1659166", "2009_2010_794367", 
"2009_2010_35527", "2009_2010_764622", "2009_2010_769397", "2009_2010_43362", 
"2009_2010_2488", "2009_2010_38777", "2009_2010_920448", "2009_2010_1336920", 
"2009_2010_714154", "2009_2010_1458891", "2009_2010_68505", "2009_2010_98362", 
"2009_2010_929887", "2009_2010_1466258", "2009_2010_1004440", 
"2009_2010_1458891", "2009_2010_1545158", "2009_2010_1518832", 
"2009_2010_811669", "2009_2010_1124887", "2009_2010_77476", "2009_2010_1398987", 
"2009_2010_1138118", "2009_2010_354190", "2009_2010_723125", 
"2009_2010_874016", "2009_2010_1090727", "2009_2010_1110783", 
"2009_2010_50341", "2009_2010_356028", "2009_2010_106040", "2009_2010_72741", 
"2009_2010_874501", "2009_2010_70538", "2009_2010_34408", "2009_2010_849213", 
"2009_2010_764180", "2009_2010_894315", "2009_2010_50863", "2009_2010_50104", 
"2010_2011_1091667", "2010_2011_1040971", "2010_2011_896878", 
"2010_2011_1688568", "2010_2011_77551", "2010_2011_822416", "2010_2011_1370946", 
"2010_2011_1688568", "2010_2011_1018724", "2010_2011_42582", 
"2010_2011_811156", "2010_2011_40493", "2010_2011_766704", "2010_2011_1124887", 
"2010_2011_20520", "2010_2011_859737", "2010_2011_30625", "2010_2011_37785", 
"2010_2011_1595262", "2010_2011_6201", "2010_2011_1039684", "2010_2011_11199", 
"2010_2011_723527", "2010_2011_24491", "2010_2011_37785", "2010_2011_24741", 
"2010_2011_27904", "2010_2011_1688568", "2010_2011_1095073", 
"2010_2011_1001250", "2010_2011_104889", "2010_2011_835541", 
"2010_2011_352947", "2010_2011_715957", "2010_2011_46765", "2010_2011_314808", 
"2010_2011_1048911", "2010_2011_40704", "2010_2011_936340", "2010_2011_37785", 
"2010_2011_920448", "2010_2011_6281", "2010_2011_1324404", "2010_2011_791907", 
"2010_2011_72903", "2010_2011_315852", "2010_2011_796343", "2010_2011_1457543", 
"2010_2011_100517", "2010_2011_1022079", "2010_2011_859014", 
"2010_2011_8818", "2010_2011_1396009", "2010_2011_1156039", "2010_2011_91419", 
"2010_2011_1095073", "2010_2011_314132", "2010_2011_1138118", 
"2010_2011_949573", "2010_2011_788784", "2010_2011_818479", "2010_2011_1020416", 
"2010_2011_1140859", "2010_2011_93102", "2010_2011_1116521", 
"2010_2011_313616", "2010_2011_1067983", "2010_2011_794323", 
"2010_2011_14195", "2010_2011_1510295", "2010_2011_1336917", 
"2010_2011_793952", "2010_2011_33185", "2010_2011_708819", "2010_2011_814453", 
"2010_2011_84792", "2010_2011_107189", "2010_2011_1466258", "2010_2011_1163302", 
"2010_2011_47111", "2010_2011_96879", "2010_2011_100893", "2010_2011_1090727", 
"2010_2011_785161", "2010_2011_1467373", "2010_2011_205520", 
"2010_2011_90185", "2010_2011_885725", "2010_2011_1115222", "2010_2011_39911", 
"2010_2011_1410636", "2010_2011_96223", "2010_2011_1413329", 
"2010_2011_1361658", "2010_2011_70858", "2010_2011_1065088", 
"2010_2011_2488", "2010_2011_101830", "2010_2011_51143", "2010_2011_833320"
)), row.names = c(NA, -200L), class = c("grouped_df", "tbl_df", 
"tbl", "data.frame"), groups = structure(list(compare_year_risk = c("2009_2010", 
"2010_2011"), .rows = list(1:100, 101:200)), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))

1 个答案:

答案 0 :(得分:1)

难怪kmeans函数仅聚集在value_mgnt变量上,而您只是将该变量传递给kmeans函数。只需传递两个变量,它就会按预期工作。

k_means_centers = 4
kclust <- x %>%
  as_tibble() %>% 
  group_by(compare_year_mgnt, compare_year_risk) %>%
  nest(.key = "value") %>%
  mutate(kmeans_data = map(value, ~select(.x, -"id_key")),
         kmeans = map(kmeans_data, ~kmeans(.x[1:2],
                                           centers = k_means_centers, iter.max = 10, nstart = 1)),
         tidied = map(kmeans, tidy),
         glanced = map(kmeans, glance),
         augmented = map2(kmeans, kmeans_data, augment))

kclust


assignments <- kclust %>% 
  unnest(augmented, value) %>%
  select(-compare_year_risk, -compare_year_risk, -value_mgnt1, -value_risk1) %>%
  filter(compare_year_mgnt == "2009_2010") %>%
  select(value_mgnt, value_risk, `.cluster`, id_key) %>%
  mutate(`.cluster` = as.numeric(as.character(`.cluster`))) %>%
  setNames(c("value_mgnt", "value_risk", "cluster", "id_key"))
assignments

ggplot(assignments, aes(x = value_mgnt, y = value_risk, color = as.factor(cluster)))+
  geom_point()

enter image description here