说我有以下数据集:
structure(list(x = structure(c(-0.640988601698674, 2.83880475590451,
1.972285329221, -0.748438401134246, -0.709535253162132, 0.549618381811837,
-0.335293922304472, -0.32480176541841, 2.97845789228857, -1.01602000461101,
2.08950041925233, 1.61861747466703, 1.97019953899666, -0.282769932109789,
-0.208029115737693, 1.19608693982774, -0.239631997924626, -0.437845275001069,
-0.859016885906324, 1.19140971326407, -0.612546007730434, 0.834865996431093,
-0.317943939983846, -0.296137951274862, -0.123681023180769, 1.40687816401458,
-0.252273150799399, -0.233311421487239, 2.28897781161625, 0.727416196995521,
-0.739210359535662, -0.519886357158347, -1.06762751122227, -0.682072348541687,
0.708454467683362, -0.299677474079799, -0.0387956816266673, -0.47636918838694,
-0.45453159679577, 0.594684091810403, -0.268580238007856, 2.42831491917844,
-1.1043184574413, -0.517737361169636, 1.98002803535679, 0.114320282569023,
0.0669159592886234, -0.18590709820684, -1.14031414025221, -0.327677627697421,
-0.701034077853847, -0.573484845347385, 0.0163513477895307, -0.334440644485425,
-0.0373735519282552, -0.618866584167821, -0.603065143074355,
-0.730740787109563, -0.264724686381051, -0.536699090481795, 0.0163513477895307,
-0.457691885014463, -0.536699090481795, -0.396698322393683, -0.258593727236786,
-0.0152515343974022, 0.074184622191618, 0.937227731834569, -0.366043526672358,
-0.226959242167666, -0.418188282280797, -0.309663984850869, -0.0942587398647345,
-0.919378390883366, -0.42134857049949, -0.489294767201396, -0.201708539300306,
0.702133891245975, -0.00893095796001564, 0.929674642991892, 0.882270319711493,
-0.631507737042594, -0.182746809988147, -0.562487042346333, -0.0173373246217397,
1.81139505600732, 3.1657997778927, 2.2917588652487, -0.511416784732249,
-0.508035276338247, 2.03656559158922, 1.04660530708354, -0.306282476456867,
-0.410287561734064, 0.743375652499923, 0.386515906845076, -0.520897649388329,
-0.21434969217508, 1.16012285989901, -0.433231254201777, -0.384815638691396,
0.809583690681547, 1.60366931139261, -0.355014120789118, 1.23992013742101,
2.92125667553022, -1.06301349042298, 1.46938866498033, -1.08052148715454,
-0.635616111726895, -1.1584857975097, -0.789522147977259, 0.524557296237599,
-0.757508428321896, 1.41882405348124, -0.830605894820271, -0.660139948303955,
-0.0821548359871392, -0.201518922007185, 2.60124589050533, -1.02730223355174,
2.13864290105301, -0.359722950234971, 3.10881978130966, -0.884330794538058,
0.972938988705803, -0.355266943846614, 2.26963684771785, 0.785375882926356,
1.06433452399041, 0.442990257313125, -0.437845275001069, 1.8133860375851,
-0.530378514044409, 0.847507149305866, 1.95992860228591, -0.155315508249889,
-0.44505073213969, -0.144633734070706, -0.752704790229482, -0.18590709820684,
-0.141663063145134, 1.65970122151004, 2.05157696062801, -0.889956107567332,
0.84118657286848, -0.328120068048038, -0.876208853816016, -0.391325832421904,
-0.772456591596315, -0.228191754572956, 2.42765125865251, 1.84615822641295,
1.73238785053999, 0.790621961369387, -0.527218225825716, -0.773720706883792,
-0.650469466354754, -0.74850160689862, 0.61048553290387, -0.106899892739508,
-0.70419436607254, -0.263049733625143, -0.834493049329264, -1.10239068162789,
-0.184706188683737, -0.222440030014934, -0.130696863026268, -0.735797248259473,
1.92943182097551, 1.17380690788595, -0.676699858569908, -0.776880995102486,
-0.555660819793955, -0.308147046505896, -0.876208853816016, -0.39764640885929,
-0.735797248259473, -0.616433162239427, -0.056335281240415, -0.492455055420089,
-0.116380757395588, -1.10567738137534, -0.39003011425224, -0.574622549106115,
-0.119541045614281, -0.697873789635153, -0.407285287926305, -0.498775631857476,
-0.539859378700489, -0.398152054974281, -0.343921509141505, -0.738957536478166,
-0.473493326107929, -0.410287561734064, -0.538974497999255, 2.67415373971059,
-0.225695126880189, -0.450644442286777, -0.337032080824753, -0.129021910270361,
-0.933852510924981, 0.800102826025467, 0.718598992865367, 0.468272563062671,
0.0384733653203839, -0.362883238453664, -0.73036155252332, 0.173449275140774,
-0.0626242547956146, -0.594848393705752, 0.477753427718751, 3.13501857064263,
1.96940946694199, 0.681339194766973, 1.08531883776254, 0.3608227636271,
0.147092471396872, -0.6757517721043, 0.105439872674495, -0.659950331010834,
-0.824285318382885, -0.527439446001024, -0.278313925721432, -0.20929323102517,
-0.543019666919182, 2.66641103357479, -0.419768426390144, 1.03014020546415,
-0.318639203391958, 2.78713404352887, -0.308621089738701, -0.875229164468221,
2.7029123625007, -0.133319902247784, -0.562708262521641, -0.751598689352939,
-0.615706295949128, -1.19087875175131, -0.761079554009019, -0.17642623355076,
-0.170105657113373, -0.179586521769453, -0.735797248259473, -0.750334574065462,
-0.565141684450035, -0.343921509141505, -0.40712727351537, -0.204868827519,
-0.353117947857902, 0.32289930500278, -0.574622549106115, 3.28408936591839,
1.67550266260351, 0.480913715937445, 2.17166791293836, -0.667377008324763,
-0.850168078893983, -0.0864528279645621, -1.08526191948258, -0.644148889917367,
3.05680143722997, -0.824285318382885, -0.811644165508112, -0.351822229688238,
-0.0604436559247162, 0.992880407365758, -0.546179955137875, -0.527218225825716,
-0.325339014415588, -0.398341672267403, 0.875949743274106, -0.41660813817145,
2.46642799509588, -0.480351151542494, -0.475800336507576, -0.486134478982703,
-0.909613100287604, -0.697873789635153, -0.767747762150462, 0.424028528000965,
0.59038609983298, 0.358231327287771, 0.604164956466483, 1.37211499360895,
2.47293818882639, 1.1129713596761, -0.921748607047386, -0.269212295651595,
-0.00482258327571425, -0.8687505736199, -0.580943125543501, 0.503035733468298,
-0.220670268612466, -0.748438401134246, -0.650469466354754, -0.865369065225898,
-1.25092422790648, -0.95385713534931, -0.757919265790326, -0.466540692026804,
-0.321799491610651, -0.565141684450035, -0.179586521769453, -0.470333037889236,
-0.0973558223190539, -0.173265945332067, -0.14798363958252, -0.247943555939789,
-0.982868581196914, -0.479813902545316, -1.23246814470931, -0.435032618486432,
-0.659950331010834, -0.201708539300306, -0.470333037889236, -0.659950331010834,
-0.340761220922811, -0.982426140846297, -0.277397442138011, -0.511005947263819,
-0.230277544797294, -0.596744566636968, -0.289943786366223, -0.428838453577793,
-0.353402373797584, -0.672591483885607, -0.378684679547131, -0.43556986748361,
-1.08342895231573, -0.277555456548945, -0.111387502010052, -0.378684679547131,
-0.887491082756751, -0.274806005798682, -0.46401246145185, -0.347081797360198,
-0.508256496513556, -0.979645087213847, -0.776880995102486, -0.367718479428265,
-0.6757517721043, 0.0100307713521441, -0.318639203391958, 0.095358553256863,
-1.0518260701288, -0.599904854855661, -0.0878749576629741, -0.46401246145185,
0.181444804334068, -0.596744566636968, -0.196020020506658, -0.754758977571633,
-0.878010218100671, 2.84796959173872, -0.571462260887422, -0.285392971331305,
-1.04234520547272, -1.03602462903534, -1.11819212272136, 3.00914429089208,
-0.688392924979073, -0.0215721108347888, -0.912773388506297,
-0.938055694255843, -1.15611558134568, -0.890651370975444, -0.495615343638783,
-0.324959779829345, -1.15611558134568, -0.647309178136061, -0.501935920076169,
0.436669680875739, 1.77031130916431, 0.996040695584451, 1.70078496835306,
-0.479813902545316, 0.790621961369387, -1.16875673422045, -0.735797248259473,
0.0068704831334509, -0.43556986748361), .Dim = c(382L, 1L), "`scaled:center`" = 2.61825994764398, "`scaled:scale`" = 3.16426835402208),
y = structure(c(-0.318543715352772, 2.47081694838525, 1.73859335545195,
-0.257920815427294, -0.287929150890406, 0.91806240496054,
-0.284493853227963, 0.000130661922180457, 2.59367935890088,
-0.882235646493226, 2.07606083170381, 1.79315396538488, 1.47397439727722,
-0.115659076935495, 0.560488333566729, 1.19551321028617,
0.166439484051089, -0.466867743837127, -1.18060135229314,
1.00960298384801, -0.449893331857988, 0.550384516912484,
-0.495764659468271, -0.0720105889891414, 0.0104365549095101,
1.80325778203913, 0.499865433641244, 0.176543300705334, 2.0030102372936,
1.28796313267252, -0.594984139012977, -0.400990859251431,
-1.69266278033039, -0.470100965166486, 0.893914283156885,
0.0350898675458732, -0.721281847191066, -0.31187519636097,
0.105816584125604, 0.23716620063082, -0.697234763553957,
2.12415499897803, -1.173124527969, -0.278128448735784, 1.78133249989942,
0.216958567322322, 0.590799783529472, -0.00532539907111466,
-1.42996354731996, 0.00295973058537025, -0.550931498400461,
-0.488489911477216, -0.177090282193311, 0.0552975008543707,
-0.116467382267833, -0.106363565613588, -0.520620048437719,
-0.413014401069989, -0.634389023964542, -0.358958981969767,
-0.490308598474976, -0.268024632081539, -0.520620048437719,
-0.805850792587116, -0.712592564868413, 0.408931083753023,
0.23928800212821, 0.894924664822314, -0.0154292157253598,
-0.31753333368735, -0.54072664357967, -0.229933243295026,
-0.510516231783473, -0.316927104688096, -0.520923162937349,
-0.328647532007025, -0.470100965166486, 1.03536771631635,
0.227062383976574, 1.12630206620457, 1.15661351616731, 0.216958567322322,
0.277581467247807, 0.279097039745945, -0.0841351689742384,
1.76284251542214, 2.77918543267287, 2.10637228166655, 0.691837950071945,
-0.377145851947406, 1.78971866772244, 1.1970287827843, -0.453934858519685,
-0.197297915501809, 1.03435733465092, 0.368212702636405,
-0.641865848288682, 0.247270017285065, 0.948070740423652,
-0.64014819945746, -0.157488877884074, 0.590799783529472,
1.47094325228094, 0.132389621926273, 1.27351467485695, 2.63975276284425,
-1.24536681704687, 1.2802842320153, -1.23394950422757, -0.371184600121403,
-0.933259920597174, -0.773215464793898, 0.154112827732908,
-2.09307703433821, 1.23178591207491, -0.783319281448147,
-0.486974338979079, 1.02920438815726, 0.204126720171433,
2.59277001540201, -1.12108987219963, 1.50115366407714, -0.672177298251425,
2.72523105173919, -0.591346765017449, 0.463087541019784,
-0.110809244941453, 2.06595701504956, 0.43631242688603, 0.825915597073805,
1.5405585490287, -0.466867743837127, 1.48266367959987, 0.0654013175086159,
1.7325310654594, 2.0962684650123, -0.0922182222976389, -0.0962597489593355,
0.182504552531344, -0.148193366562167, 0.317996733864795,
0.126024217434101, 1.89419213192736, 2.01543793177832, -0.866978883345312,
0.873706649848394, -0.328647532007025, -1.33266379293956,
0.0856089508171134, -0.171937335699646, -0.00512332273803019,
2.44990204791095, 2.16699518159203, 2.13668373162929, 1.9042959485816,
-0.33875134866127, -0.783319281448147, -0.672177298251425,
-0.619536413482801, 1.89419213192736, 0.338204367173293,
-0.490308598474976, 0.0376158217094399, -1.17211414630358,
-1.19757576427228, -0.165976083873644, 0.201903880507497,
0.0127604327399923, -0.530723865091964, 1.91935063539643,
0.938876267268284, -0.579525299531982, -0.449893331857988,
-0.0356368490338573, -0.457976385181389, -1.3308451059418,
-0.611554398325947, -0.854045998027874, -0.757958701645988,
-0.601450581671694, -0.288232265390037, -0.722696381522665,
-0.647220871115439, -0.840001692878471, -0.510516231783473,
-0.783319281448147, -0.813630731410886, -0.250848143769316,
-0.237713182118797, -0.520620048437719, -0.818177448905297,
-0.864149814682122, -0.753007831485401, -0.409478065241,
-0.409478065241, -0.495764659468271, 2.55710354261252, -0.347844783650093,
-0.596499711511114, -0.303792143037577, -0.571139131708952,
-0.333800478500689, 0.752460849997424, 0.735385399851746,
0.540280700258232, 0.23716620063082, 0.0957127674713585,
0.159973041392372, 0.423076427068965, -0.120205794429907,
-0.562550887552844, 0.671630316763448, 2.68976665528278,
1.77294633207639, 0.416812060743332, 0.866937092690046, 0.63121505014646,
0.441263297046611, -0.449893331857988, 0.399231419764947,
-0.369062798624012, -0.924772714607604, -1.32245893811878,
-0.324808081678405, -0.148496481061797, -0.399374248586755,
2.3479545378696, -0.0356368490338573, 0.679208179254133,
0.277581467247807, 2.50122943651453, -0.436758370207469,
-0.801405113259247, 2.47303978804917, 0.397008580101011,
-0.491723132806574, -1.308717747469, -1.4804826305912, -1.41985973066572,
-0.944980347916102, -0.854045998027874, -0.823734548065135,
-0.894461264644865, -0.98539561453309, -2.01578283693322,
-0.571139131708952, -0.874253631336371, -0.500412415129221,
-0.66207348159718, -0.803526914756641, 0.216958567322322,
-0.0154292157253598, 2.80949688263561, 1.52035091572021,
0.459450167024256, 1.94461017703205, -0.675006366914615,
-0.56972459737736, -0.0816092148106789, -0.953669630238752,
-0.540827681746216, 2.52901493231371, -1.1369528643468, -1.06622614776707,
-0.375125088616561, 0.754279536995191, 0.802979933268664,
-0.419581881895245, -0.126571198922078, -0.784834853946284,
-0.411397790405306, 0.621111233492215, -0.540827681746216,
1.97532577966096, -0.637117054461186, -0.178706892857994,
-0.874253631336371, -0.68228111490567, -0.83383836471938,
-0.915376165119155, 0.23716620063082, 0.296071451725083,
0.0828809203204622, 0.206854750668077, 1.21723641609279,
2.0251375957664, 0.429138717061514, -0.652575893942188, -0.261457151256276,
0.00983032591025668, -0.0237143453818447, -0.106363565613588,
0.206854750668077, -0.207401732156054, -1.46027499728271,
-1.55120934717093, -1.15716049765529, -1.97556964664931,
-0.753007831485401, -1.12684904769255, -0.918508348281971,
-1.1167452310383, -0.490308598474976, -0.975291797878841,
-0.490308598474976, -0.525267804098676, -0.884357447990616,
-0.399374248586755, -0.659951680099789, -0.704509511545019,
-0.247816998773042, -0.854652227027131, -0.912547096455965,
-0.753007831485401, -0.995499431187335, -1.20767958092653,
-1.1167452310383, -0.823734548065135, -0.409882217907169,
-0.838789234879964, -0.726434793684731, -0.441810278534588,
-0.742904014831155, -0.581242948363204, -0.954073782904921,
-0.90456508129911, -0.934876531261853, -0.631762031634437,
-0.530723865091964, -0.813630731410886, -0.631762031634437,
-0.501422796794651, -0.66207348159718, -1.12684904769255,
-0.207300693989515, -0.490308598474976, -0.83383836471938,
0.115920400779856, -1.00732089667281, -1.21778339758078,
-0.548910735069609, -0.429685698549491, -0.0356368490338573,
0.206854750668077, 0.530176883603987, -1.19757576427228,
-0.843942181373629, 0.447426625205705, 0.024986050891628,
0.552203203910245, -0.116467382267833, -0.193559503339735,
-1.06622614776707, -0.783319281448147, 2.31855243140574,
-0.571139131708952, -0.0622098868345265, -0.813630731410886,
-0.611554398325947, -0.894461264644865, 2.49031731452794,
-0.257920815427294, 0.23716620063082, -0.914668897953359,
-0.601450581671694, -0.712592564868413, -0.965187981224596,
-0.419581881895245, -0.146778832230576, -0.692384931559922,
-0.550931498400461, 0.368515817136035, 0.621111233492215,
1.66180434887967, 1.50014328241172, 1.5607661823372, -0.247816998773042,
0.833291383231406, -1.03591469780433, -0.793423098102392,
0.499865433641244, -0.530723865091964), .Dim = c(382L, 1L
), "`scaled:center`" = 32.7652706806283, "`scaled:scale`" = 0.989725006123945),
site = structure(c(5L, 9L, 9L, 2L, 3L, 7L, 4L, 3L, 9L, 3L,
8L, 7L, 8L, 5L, 3L, 8L, 4L, 3L, 2L, 8L, 6L, 8L, 7L, 3L, 3L,
8L, 3L, 4L, 9L, 8L, 5L, 5L, 1L, 4L, 7L, 2L, 6L, 5L, 2L, 7L,
4L, 7L, 1L, 5L, 8L, 3L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 5L, 8L, 2L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 3L, 9L, 3L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 8L, 5L, 9L, 9L, 4L, 4L, 7L, 8L, 4L, 2L, 8L, 4L,
5L, 8L, 8L, 7L, 8L, 9L, 1L, 8L, 2L, 5L, 2L, 5L, 9L, 1L, 8L,
1L, 2L, 9L, 7L, 8L, 1L, 8L, 3L, 9L, 2L, 9L, 3L, 8L, 7L, 7L,
9L, 3L, 7L, 2L, 8L, 8L, 5L, 4L, 5L, 5L, 5L, 5L, 7L, 7L, 2L,
8L, 1L, 1L, 5L, 5L, 5L, 7L, 7L, 8L, 8L, 5L, 1L, 1L, 5L, 9L,
7L, 5L, 3L, 3L, 1L, 5L, 2L, 3L, 1L, 8L, 9L, 3L, 2L, 3L, 4L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 5L, 7L, 5L, 5L, 5L, 5L, 5L, 9L, 8L, 7L,
7L, 7L, 7L, 7L, 7L, 4L, 8L, 8L, 8L, 8L, 9L, 9L, 3L, 4L, 7L,
5L, 2L, 4L, 3L, 5L, 5L, 9L, 7L, 8L, 8L, 7L, 7L, 5L, 7L, 9L,
4L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 1L, 4L, 4L, 4L, 5L, 6L,
7L, 7L, 8L, 8L, 9L, 8L, 5L, 1L, 7L, 2L, 1L, 8L, 1L, 1L, 5L,
9L, 8L, 5L, 7L, 4L, 5L, 9L, 1L, 8L, 5L, 2L, 2L, 1L, 4L, 5L,
9L, 9L, 8L, 8L, 8L, 8L, 8L, 1L, 7L, 7L, 7L, 7L, 7L, 7L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 6L,
3L, 3L, 3L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 1L, 5L, 6L, 6L, 1L, 4L, 4L, 1L, 2L, 1L, 4L,
4L, 3L, 6L, 2L, 2L, 3L, 4L, 2L, 3L, 2L, 3L, 5L, 1L, 2L, 9L,
4L, 2L, 3L, 4L, 1L, 9L, 2L, 7L, 1L, 3L, 4L, 3L, 5L, 4L, 4L,
1L, 8L, 7L, 8L, 9L, 8L, 3L, 9L, 1L, 4L, 2L, 5L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9"), class = "factor")), class = "data.frame", row.names = c(NA,
-382L))
看起来像
我有兴趣将相似的网站以定量的方式组合在一起。眼球数据表明可能存在三类-a)站点7-9,b)站点2-6,c)站点1-但我想采用一种客观的方法。 kmeans
(例如,kmeans(df %>% select(x, y), 3)
)没什么用,因为它将相似的 points 而不是相似的 sites 分组。同样,树状图(例如hclust(dist(df %>% select(x, y))) %>% plot
)会将各个观察结果聚在一起。我该如何将相似的站点聚在一起而不是相似的观察?
修改:
这是一个建议-使用均值和标准差进行聚类。
首先,计算x
和y
的平均值和标准偏差。
df_stats <- df %>%
group_by(site) %>%
summarise_all(funs(mean, sd)) %>%
mutate(cluster = kmeans(.[] %>% select(x_mean, y_mean, x_sd, y_sd), 3)$cluster) %>%
select(site, cluster)
然后,将群集ID与原始数据帧连接起来:
df %<>%
left_join(df_stats, by = "site")
给出,
(对我来说)这些都是直观的结果,但是这种方法有效吗?有更好的方法吗?
答案 0 :(得分:1)
所以,如果我是你,我将像这样计算组平均值:
# Calculating site centroids
site_centr <- df %>%
group_by(site) %>%
summarise_all(funs(mean))
head(site_centr)
#> # A tibble: 6 x 3
#> site x y
#> <fct> <dbl> <dbl>
#> 1 1 -0.846 -1.08
#> 2 2 -0.488 -0.416
#> 3 3 -0.463 -0.413
#> 4 4 -0.509 -0.585
#> 5 5 -0.452 -0.378
#> 6 6 -0.300 -0.570
然后,您可以像这样以图形方式显示网站的平均值:
# Make a plot with all points
ggplot() +
geom_point(data = df, aes(x = x, y = y, col = site), alpha = 0.3) +
geom_point(data = site_centr, aes(x = x, y = y, col = site), size = 5) +
theme_classic()
然后,您可以仅使用分层聚类分析对组进行聚类。与hclust
相比,我更喜欢kmeans
,因为您可以在查看树状图后决定簇的数量:
# Now do hierarchical clustering
dists <- dist(subset(site_centr, select = -c(site)))
hcl <- hclust(dists, method = "single")
plot(hcl)
现在可以做的是,确定要拥有的站点群集的数量。乍看之下,似乎有四个集群。因此,我们可以将站点分配给它们各自的集群,如下所示:
cutree(hcl, 4)
#> [1] 1 2 2 2 2 2 3 4 4