如何按列的值对行名称进行分组

时间:2015-03-27 23:43:23

标签: r dataframe

我有以下数据框:

df <- structure(list(group = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4), 
    mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 
    19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, 30.4, 
    33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, 
    19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 
    8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 
    8, 4), disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 
    140.8, 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 
    78.7, 75.7, 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 
    351, 145, 301, 121), hp = c(110, 110, 93, 110, 175, 105, 
    245, 62, 95, 123, 123, 180, 180, 180, 205, 215, 230, 66, 
    52, 65, 97, 150, 150, 245, 175, 66, 91, 113, 264, 175, 335, 
    109), drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 
    3.92, 3.92, 3.92, 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 
    4.93, 4.22, 3.7, 2.76, 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 
    4.22, 3.62, 3.54, 4.11), wt = c(2.62, 2.875, 2.32, 3.215, 
    3.44, 3.46, 3.57, 3.19, 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 
    5.25, 5.424, 5.345, 2.2, 1.615, 1.835, 2.465, 3.52, 3.435, 
    3.84, 3.845, 1.935, 2.14, 1.513, 3.17, 2.77, 3.57, 2.78), 
    qsec = c(16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 
    20, 22.9, 18.3, 18.9, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 
    19.47, 18.52, 19.9, 20.01, 16.87, 17.3, 15.41, 17.05, 18.9, 
    16.7, 16.9, 14.5, 15.5, 14.6, 18.6), vs = c(0, 0, 1, 1, 0, 
    1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 
    0, 1, 0, 1, 0, 0, 0, 1), am = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 
    1, 1, 1, 1), gear = c(4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 
    3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 
    4), carb = c(4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 
    4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, 2, 2, 4, 6, 8, 2)), .Names = c("group", 
"mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", 
"gear", "carb"), row.names = c("Mazda RX4", "Mazda RX4 Wag", 
"Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", "Valiant", 
"Duster 360", "Merc 240D", "Merc 230", "Merc 280", "Merc 280C", 
"Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", 
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", 
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", 
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", 
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", 
"Volvo 142E"), class = "data.frame")

看起来像这样:

> df
                    group  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Mazda RX4               1 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
Mazda RX4 Wag           1 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
Datsun 710              1 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
Hornet 4 Drive          1 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
Hornet Sportabout       1 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
Valiant                 1 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
Duster 360              1 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
Merc 240D               1 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
Merc 230                1 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
Merc 280                1 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
Merc 280C               1 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
Merc 450SE              1 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
Merc 450SL              1 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
Merc 450SLC             2 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
Cadillac Fleetwood      2 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
Lincoln Continental     2 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
Chrysler Imperial       2 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
Fiat 128                2 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
Honda Civic             2 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
Toyota Corolla          2 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
Toyota Corona           2 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
Dodge Challenger        2 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
AMC Javelin             2 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
Camaro Z28              3 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
Pontiac Firebird        3 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
Fiat X1-9               3 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
Porsche 914-2           3 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
Lotus Europa            3 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
Ford Pantera L          4 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
Ferrari Dino            4 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
Maserati Bora           4 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
Volvo 142E              4 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

我想要做的是将基于group列的行名称分组到列表中。

group1 = "Mazda RX4", ..."Merc 450SL"
group2 = "Merc 450SLC", ...."AMC Javelin"

我怎样才能做到这一点?

1 个答案:

答案 0 :(得分:3)

split(df,df[,"group"])

将数据框拆分为由组变量

定义的组
split(rownames(df),df[,"group"])

如果您只想将数据框的rownames保留在不同的组中