我正在尝试编写一个脚本来分析我的数据。如果你可以帮助我,那将是很棒的。
让我们从我的数据开始:
> dput(tbl_alles[1:100,])
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), `20` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), `52.5` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.7306675, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), `81` = c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0.91538769,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0), `110` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.85441768, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.61947266, 0, 0, 0), `140.5` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.5664111,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6800275, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6300494, 0, 0, 0), `189` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0.6234154, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.987181, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.6194727,
0, 0, 0), `222.5` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0.8632862, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.3456471, 1, 0, 0), `278` = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0.61502309,
0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0.44036184, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.42691496, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.32234716, 0, 0, 0, 0.08890253, 0.67918373,
0, 0), `340` = c(0, 0, 0, 0, 0, 0, 0.583163048, 0, 0, 0, 0, 0,
1, 1, 0, 0, 0, 0, 1, 0.218194067, 0.325932107, 1, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.663889907, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.789927058, 0, 0.44201215,
0, 0, 0, 0, 0.134339392, 0, 0.668372028, 0, 0, 0, 0, 0, 0, 0,
0.387740087, 0, 0, 0, 0, 0, 0, 0), `397` = c(0, 1, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 0, 0.63953839, 0, 1, 0, 0, 0, 1, 1, 0.81888525,
0.89884151, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.51459601, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0.75141988, 0, 0, 0, 0, 0, 0, 0, 0, 0.65763553, 1, 0, 0,
0, 0, 1, 0, 1, 0.67607045, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0,
0, 0.80260185, 0, 0, 0, 0, 0, 0, 1), `453.5` = c(0, 0.66069369,
0, 0, 0, 1, 0.57541627, 1, 1, 0, 0, 0, 1, 0.64615661, 0, 0.45209671,
0, 0, 0, 0.17022498, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0.02056466, 0.08765142, 1, 0, 0, 0, 0, 0, 1, 0.53107365,
0, 0, 1, 0, 0.47454662, 1, 0.58701126, 1, 0.83194495, 0, 0, 1,
0, 0, 0, 0, 0.04550448, 0, 1, 0, 0.65763553, 1, 1, 0.05581525,
0, 0, 0.78992706, 1, 0.80859968, 0.80441503, 1, 1, 0, 0.5866736,
0, 0, 0.75623688, 0.36602167, 0, 0, 1, 0, 0.31053744, 1, 0.52549512,
1, 0, 0, 0, 0, 0.51742419), `529` = c(0, 0.521435654, 0, 0, 1,
0, 0.175996209, 0, 0, 0, 1, 0, 0, 0, 0, 0.886059888, 0, 0, 0,
0.17022498, 0, 0, 0.643526362, 0, 0, 0, 0, 0, 0.438237074, 0.01594858,
0, 0.383182117, 0, 0, 0, 0, 0, 0, 1, 0.101027654, 1, 0, 0, 0,
0, 0, 0, 0.733614607, 0.89648659, 0, 0, 0.703255538, 0, 1, 0.383200069,
0.868653232, 1, 0.065540432, 0, 0, 0.221275397, 0.229618981,
0, 0, 0, 0.14131076, 0, 0.665883882, 0, 0.399096177, 0.570186116,
0.917797708, 0.349222091, 0, 0, 0.872987981, 0, 0, 1, 1, 0.58275186,
0, 0.211497647, 0, 0, 0.929066091, 0.826799766, 0, 0, 0, 0, 0.148043509,
0.802601847, 1, 0.780383116, 0, 0, 0, 0, 0.340224249), `580` = c(0,
0.437291195, 0, 0, 1, 0, 0.20731698, 0, 0, 0, 1, 0, 0, 0, 0,
0.719755907, 0, 0, 0, 0.033248127, 0, 0, 0, 0, 0, 0, 0, 0, 0.443305568,
0, 0, 0.558877749, 0, 0, 0, 0, 0, 0, 1, 0.171621995, 1, 0, 0,
0, 0, 0, 0, 0.28952456, 1, 1, 0, 0.470920245, 0, 0.690299657,
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0.111377617, 0.740623952, 0,
0, 0.53029633, 0, 0.917797708, 0.395559317, 0, 0, 0.484708125,
0, 0, 0.174273053, 0, 0.693355663, 0, 0.050471201, 1, 0, 0, 1,
0, 0, 0, 0, 0.698359908, 0.298609118, 0.702753583, 0.926794372,
0, 0, 0, 0, 0.320673115), `630.5` = c(0, 0.52204783, 0, 0, 0,
0, 0.48815538, 0, 0, 0, 0, 1, 0, 0, 0, 0.82709638, 0, 0, 0, 0.09539534,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.45656691,
0.74836669, 0, 0, 0, 0, 0, 0, 0, 0.95701562, 1, 0, 0.67884433,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0.65763553,
0, 0, 1, 0, 0, 0.77875219, 0, 0, 0.25002477, 0, 1, 0, 0, 0, 0,
0, 0.82679977, 0, 0, 0, 0, 0.84843874, 0.38138487, 0.79820877,
0, 0, 0, 0, 0, 0.51876177), `683.5` = c(0, 0.52429838, 0, 0,
0, 0, 0.59605685, 0, 0, 0, 0, 0, 0, 0, 0, 0.27845748, 0.28224351,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0.94942976, 0, 0, 0, 0, 0, 0.74467188, 0, 0, 0.31501511,
0, 0, 0, 0, 0, 0, 0, 0.73190143, 0, 0, 0, 0, 0, 0.90254266, 0.42921624,
0, 0, 0.56841245, 0, 0, 0.48306937, 0.54177946, 0, 0.70689046,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.72875619, 0, 0, 0, 0, 1, 0.26676304,
0.77778861, 0, 0, 0, 0, 0, 0.17064527), `735.5` = c(1, 0.3768651,
0, 1, 0, 0, 0.51381348, 0, 0, 0, 0, 0, 0, 0, 0, 0.39914361, 0.22206677,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0.42663351, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.61727598,
0, 0, 0, 0, 0, 0, 0.70045244, 0, 0, 0, 0, 1, 0.62631217, 0, 0,
0, 0, 0, 0, 0.14653411, 1, 1, 0.27513455, 0, 0, 0, 0, 0.75025613,
0, 0, 0, 0, 0, 0.92484335, 0, 1, 0, 1, 0.84843874, 0.12198269,
0.58556836, 0, 0, 0, 0, 0, 0.09714178), `784` = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.45656691, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0.61727598, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 0, 0, 0.84843874, 0, 0, 0, 0, 0, 0, 0, 0
), `832` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.16189002, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0.2511846, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.23427262, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0.45750616,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0.86974453,
1, 0, 0, 0.48180864, 0, 0, 0, 0.93083267, 0, 0, 0, 0), `882.5` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.3111616,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.63931007, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.73948906, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0), `926.5` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.63485498, 0, 0, 0, 0, 0, 0, 1, 0,
0.68547559, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.35567368,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0.84973396, 0, 0, 1, 0), `973` = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.86100786, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.68128251, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0.30811206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.79599822, 0,
0, 0, 0), `1108` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0.75365872, 0, 0, 1, 0, 0, 0, 0, 0, 0.52862914,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.20061435, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0.93083267, 0, 0, 0, 0), `1200` = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0.67241551, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0.55638877, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("10",
"20", "52.5", "81", "110", "140.5", "189", "222.5", "278", "340",
"397", "453.5", "529", "580", "630.5", "683.5", "735.5", "784",
"832", "882.5", "926.5", "973", "1108", "1200"), row.names = c("at1g01050.1",
"at1g01080.1", "at1g01090.1", "at1g01220.1", "at1g01420.1", "at1g01470.1",
"at1g01800.1", "at1g01910.5", "at1g01920.2", "at1g01980.1", "at1g02020.2",
"at1g02100.2", "at1g02130.1", "at1g02140.1", "at1g02150.1", "at1g02500.2",
"at1g02560.1", "at1g02780.1", "at1g02880.3", "at1g02920.1", "at1g02930.2",
"at1g03030.1", "at1g03110.1", "at1g03130.1", "at1g03210.1", "at1g03220.1",
"at1g03230.1", "at1g03310.2", "at1g03330.1", "at1g03475.1", "at1g03630.2",
"at1g03680.1", "at1g03870.1", "at1g03900.1", "at1g04130.1", "at1g04170.1",
"at1g04190.1", "at1g04270.2", "at1g04350.1", "at1g04410.1", "at1g04420.1",
"at1g04530.1", "at1g04640.2", "at1g04690.1", "at1g04750.2", "at1g04810.1",
"at1g04850.1", "at1g04870.2", "at1g05010.1", "at1g05180.1", "at1g05320.3",
"at1g05350.1", "at1g05520.1", "at1g05560.1", "at1g05620.2", "at1g06000.1",
"at1g06110.1", "at1g06130.2", "at1g06290.1", "at1g06410.1", "at1g06550.1",
"at1g06560.1", "at1g06570.1", "at1g06620.1", "at1g06650.2", "at1g06680.2",
"at1g06900.1", "at1g07080.1", "at1g07110.1", "at1g07140.1", "at1g07240.1",
"at1g07250.1", "at1g07440.1", "at1g07750.1", "at1g07780.4", "at1g07890.8",
"at1g07990.1", "at1g08110.3", "at1g08200.1", "at1g08360.1", "at1g08490.1",
"at1g08520.1", "at1g08550.2", "at1g08820.2", "at1g08830.2", "at1g08980.1",
"at1g09010.1", "at1g09020.1", "at1g09130.2", "at1g09210.1", "at1g09300.2",
"at1g09310.1", "at1g09340.1", "at1g09430.1", "at1g09490.2", "at1g09620.1",
"at1g09640.1", "at1g09750.1", "at1g09760.1", "at1g09780.1"), class = "data.frame")
正如您所看到的,大多数单元格都包含一个数字" 0"。
我想创建数据的子集。我们称之为"集群"。我想放入相同的群集"元素" (row.names)在同一列中具有任何值(不同于0)。
例如:
row.name Column1 Column2 Column3 Column4
at1g02560.1 0 0.12 0 0
at1g02020.2 1 0 0.55 0.31
at1g14560.2 0.15 0.47 0 0
如您所见at1g02560.1
仅在第2列中有值,还有一行包含第2列中的任何值,它是at1g14560.2
。这意味着它们属于同一个集群(让我们说集群1)。
问题是at1g02020.2
和at1g14560.2
在第1列中的值不同于0。它们应该在同一个集群中(集群2)。
总之,at1g14560.2
属于两个不同的集群(集群1和集群2)。
因此,我想在名为clusters
的数据中添加其他列,并用数字填充。
示例:
row.name data data data data Clusters
at1g02560.1 1
at1g14560.2 1,2
at1g02020.2 2
甚至可以用R吗?
答案 0 :(得分:1)
是的,可以使用R.这是使用apply的一种方式:
res <- cbind(tbl_alles,
Clusters=apply(tbl_alles, 1, function(x) paste(which(x!=0), collapse=",")))
head(res[, c(1:5, ncol(res))])
# 10 20 52.5 81 110 Clusters
# at1g01050.1 0 0 0 0 0 17
# at1g01080.1 0 0 0 0 0 11,12,13,14,15,16,17
# at1g01090.1 0 0 0 1 0 4
# at1g01220.1 0 0 0 0 0 17
# at1g01420.1 0 0 0 0 0 13,14
# at1g01470.1 0 0 0 0 0 12
答案 1 :(得分:1)
您也可以尝试:
indx <- which(!!tbl_alles,arr.ind=T)
tbl_alles$Clusters <- tapply(indx[,2],indx[,1], FUN=paste, collapse=",")
tbl_alles[1:5,c(1:5, ncol(tbl_alles))]
# 10 20 52.5 81 110 Clusters
#at1g01050.1 0 0 0 0 0 17
#at1g01080.1 0 0 0 0 0 11,12,13,14,15,16,17
#at1g01090.1 0 0 0 1 0 4
#at1g01220.1 0 0 0 0 0 17
#at1g01420.1 0 0 0 0 0 13,14