一组的元素

时间:2014-10-30 13:30:26

标签: r

我有以下几套: 对于每个名称,都有一个组编号。所以现在我想要每个小组,有多少成员,以及哪些成员。

1030 3083 1900 4778 1714 3394 1253 2838 4463 3350 1334 5400  890 3048 2366 3848 1684 2918 3655 
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19 
5706 3592 4875  222 4307 2785 1223 5283 4249 2115 2953 1448 5639 2913 4352 1435  545 6030 4985 
  20   21   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38 
5600 5250 2857 5480 2835  554 4748 2723  330 1720  859 5832 4109 3692 3958 4442 3385  263 2464 
  39   40   41   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56   57 
4687   72  318  519 2639 2534 1994 4376 6159  132 4067 3729 5390 2609 3496 4134 3436 5827 3028 
  58   59   60   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75   76 
 310 1255 3661 5205  858 5835 1975  949 5291  888  193 5393  113 2393 2859 3225 4110 3298 2207 
  60   77   78   79   80   81   82   83   84   85   86   70   87   88   89   90   91   92   93 
5379 4684 5356 4969 6036  587  508  212 5189  473 5197 4504 3336 3962  906   41 1096 4518 3607 
  94   95   96   97   98   99  100  101  102  103  104  105  106  107  108  109  110  111  112 
3750 3516 6075 5319 6126  902 3596  861 6119 4899 2447 5798 5881 2352 1371 6107  487 1472 5261 
 113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131 
2824 3314  481 2127 4661   46  184 4854 5194 2362 5167 5685 2102 5470 5934 1860 1883 1297 3829 
 132  133  134  135  136  137  138  139  140   15  141  142  143  144  145  146  147  148  149 
 952 3954 6162  376 4232 4901 5136 4374 6077 1807 1201 2726 4032 1597 2001 3208  608 5368 4949 
 150  151  152  153  154  155  156  157  121  158  159  160  161  162  163  164  165   96  166 
3884 4783 3354 4450 3319 2139 4451 3568 3940 5472 4982 1214 4005 3271 1638 5657 1260 4603 3913 
 167  168  169  170  171  172  173  174  175  176  177  178  111  179  180  181  182  183  184 
 815 5576 4091 3425 5917 3969 2705 5585  392 5628 
 185  186  187  188  189  190  191  192  193  194 

所以,例如, 组60包含“310”和“318”

结构:

structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 60L, 
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 70L, 87L, 88L, 
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 
112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 
123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 
134L, 135L, 136L, 137L, 138L, 139L, 140L, 15L, 141L, 142L, 143L, 
144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 
155L, 156L, 157L, 121L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 
165L, 96L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 
175L, 176L, 177L, 178L, 111L, 179L, 180L, 181L, 182L, 183L, 184L, 
185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L), .Names = c("1030", 
"3083", "1900", "4778", "1714", "3394", "1253", "2838", "4463", 
"3350", "1334", "5400", "890", "3048", "2366", "3848", "1684", 
"2918", "3655", "5706", "3592", "4875", "222", "4307", "2785", 
"1223", "5283", "4249", "2115", "2953", "1448", "5639", "2913", 
"4352", "1435", "545", "6030", "4985", "5600", "5250", "2857", 
"5480", "2835", "554", "4748", "2723", "330", "1720", "859", 
"5832", "4109", "3692", "3958", "4442", "3385", "263", "2464", 
"4687", "72", "318", "519", "2639", "2534", "1994", "4376", "6159", 
"132", "4067", "3729", "5390", "2609", "3496", "4134", "3436", 
"5827", "3028", "310", "1255", "3661", "5205", "858", "5835", 
"1975", "949", "5291", "888", "193", "5393", "113", "2393", "2859", 
"3225", "4110", "3298", "2207", "5379", "4684", "5356", "4969", 
"6036", "587", "508", "212", "5189", "473", "5197", "4504", "3336", 
"3962", "906", "41", "1096", "4518", "3607", "3750", "3516", 
"6075", "5319", "6126", "902", "3596", "861", "6119", "4899", 
"2447", "5798", "5881", "2352", "1371", "6107", "487", "1472", 
"5261", "2824", "3314", "481", "2127", "4661", "46", "184", "4854", 
"5194", "2362", "5167", "5685", "2102", "5470", "5934", "1860", 
"1883", "1297", "3829", "952", "3954", "6162", "376", "4232", 
"4901", "5136", "4374", "6077", "1807", "1201", "2726", "4032", 
"1597", "2001", "3208", "608", "5368", "4949", "3884", "4783", 
"3354", "4450", "3319", "2139", "4451", "3568", "3940", "5472", 
"4982", "1214", "4005", "3271", "1638", "5657", "1260", "4603", 
"3913", "815", "5576", "4091", "3425", "5917", "3969", "2705", 
"5585", "392", "5628"))

2 个答案:

答案 0 :(得分:4)

这是一个更通用的解决方案,它提供了完整的结果(假设df是您的数据集)

library(data.table)
df2 <- setNames(data.frame(df), "Group")
df2 <- setDT(transform(df2, Members = row.names(df2)))[, 
             list(Size = .N, Members = paste(Members, collapse = ", ")), by = Group]
df2
#    Group Size Members
#   1:   1    1    1030
#   2:   2    1    3083
#   3:   3    1    1900
#   4:   4    1    4778
#   5:   5    1    1714
# ---                 
# 190: 190    1    3969
# 191: 191    1    2705
# 192: 192    1    5585
# 193: 193    1     392
# 194: 194    1    5628

或者正如@KFB暗示的那样,它可以简化为

dt <- setnames(as.data.table(df, keep.rownames = TRUE), 2, "Group")
dt2 <- dt[, list(Size = .N, Members = paste(rn, collapse = ", ")), by = Group]

答案 1 :(得分:2)

由于您的群组身份是值且个人身份是姓名,因此请根据names(x)分割x

ss <- split(names(x),x)
ss[["60"]]
## [1] "318" "310"

每组的元素数量:

lengthvec <- sapply(ss,length)
lengthvec["60"]
## 60 
## 2 

注意逐个字符(组ID),而不是数字(向量或列表中的位置)。