为每个因子级别子集数据帧

时间:2016-10-27 21:08:54

标签: r subset

鉴于下面的数据集red_wine_data,如何创建列表l,其中包含unique(red_wine_data$condition)中所有值的以下四个子集数据框? 我正在寻找一种灵活且动态的解决方案,可以产生类似于这些硬编码命令的结果,但即使因子水平发生变化,这也适用于任何类似的数据帧。

l[["red_usa"]] <- subset(red_wine_data, red_wine_data$condition=="USA")
l[["red_france"]] <- subset(red_wine_data, red_wine_data$condition=="France")
l[["red_australia"]] <- subset(red_wine_data, red_wine_data$condition=="Australia")
l[["red_argentina"]] <- subset(red_wine_data, red_wine_data$condition=="Argentina")

red_wine_data <- structure(list(subject = 1:400, condition = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Argentina", 
"Australia", "France", "USA"), class = "factor"), Ratings = c(77L, 
82L, 75L, 92L, 83L, 75L, 84L, 86L, 85L, 79L, 92L, 84L, 77L, 65L, 
89L, 81L, 81L, 88L, 87L, 85L, 87L, 86L, 82L, 67L, 85L, 81L, 80L, 
71L, 78L, 84L, 91L, 80L, 84L, 81L, 71L, 78L, 78L, 81L, 89L, 86L, 
80L, 79L, 86L, 85L, 76L, 76L, 84L, 86L, 80L, 87L, 84L, 77L, 83L, 
73L, 91L, 95L, 78L, 74L, 85L, 80L, 98L, 81L, 86L, 81L, 76L, 82L, 
68L, 91L, 82L, 96L, 84L, 76L, 85L, 74L, 72L, 83L, 78L, 81L, 82L, 
77L, 77L, 80L, 89L, 70L, 85L, 83L, 88L, 79L, 84L, 83L, 77L, 89L, 
89L, 86L, 92L, 85L, 72L, 77L, 72L, 78L, 70L, 91L, 95L, 89L, 76L, 
87L, 75L, 86L, 73L, 85L, 73L, 79L, 82L, 73L, 80L, 84L, 93L, 91L, 
77L, 86L, 65L, 74L, 77L, 73L, 82L, 69L, 89L, 84L, 72L, 63L, 63L, 
73L, 79L, 82L, 80L, 73L, 79L, 74L, 88L, 76L, 72L, 79L, 76L, 75L, 
64L, 57L, 68L, 82L, 81L, 76L, 59L, 92L, 67L, 63L, 76L, 81L, 69L, 
73L, 86L, 75L, 74L, 70L, 76L, 66L, 69L, 68L, 77L, 69L, 92L, 78L, 
83L, 76L, 80L, 79L, 77L, 86L, 71L, 81L, 76L, 71L, 70L, 87L, 79L, 
71L, 70L, 91L, 74L, 67L, 76L, 61L, 83L, 66L, 67L, 86L, 70L, 73L, 
77L, 70L, 79L, 69L, 71L, 81L, 67L, 66L, 80L, 71L, 70L, 60L, 39L, 
65L, 64L, 75L, 77L, 58L, 73L, 63L, 89L, 69L, 89L, 69L, 86L, 72L, 
68L, 72L, 91L, 60L, 60L, 93L, 79L, 50L, 89L, 83L, 55L, 63L, 86L, 
77L, 81L, 64L, 71L, 77L, 76L, 65L, 75L, 69L, 79L, 50L, 65L, 75L, 
75L, 65L, 84L, 68L, 78L, 71L, 83L, 78L, 63L, 65L, 56L, 80L, 78L, 
73L, 52L, 60L, 69L, 60L, 67L, 90L, 76L, 54L, 56L, 83L, 81L, 67L, 
73L, 79L, 40L, 78L, 98L, 65L, 75L, 63L, 60L, 94L, 54L, 85L, 71L, 
62L, 79L, 39L, 80L, 89L, 66L, 65L, 57L, 80L, 76L, 72L, 65L, 71L, 
63L, 63L, 66L, 66L, 69L, 61L, 73L, 67L, 66L, 65L, 73L, 65L, 67L, 
66L, 76L, 63L, 67L, 54L, 71L, 63L, 76L, 68L, 66L, 72L, 64L, 80L, 
68L, 63L, 69L, 69L, 62L, 65L, 72L, 68L, 67L, 62L, 69L, 63L, 69L, 
67L, 63L, 57L, 63L, 69L, 76L, 66L, 62L, 60L, 62L, 64L, 76L, 64L, 
63L, 67L, 66L, 61L, 68L, 69L, 78L, 73L, 68L, 61L, 69L, 69L, 64L, 
63L, 66L, 75L, 70L, 75L, 68L, 57L, 63L, 65L, 69L, 66L, 74L, 71L, 
62L, 67L, 68L, 62L, 68L, 74L, 61L, 68L, 71L, 63L, 59L, 71L, 65L, 
63L, 62L, 71L, 65L, 66L, 64L, 71L, 60L, 69L)), .Names = c("subject", 
"condition", "Ratings"), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96", "97", "98", "99", "100", 
"101", "102", "103", "104", "105", "106", "107", "108", "109", 
"110", "111", "112", "113", "114", "115", "116", "117", "118", 
"119", "120", "121", "122", "123", "124", "125", "126", "127", 
"128", "129", "130", "131", "132", "133", "134", "135", "136", 
"137", "138", "139", "140", "141", "142", "143", "144", "145", 
"146", "147", "148", "149", "150", "151", "152", "153", "154", 
"155", "156", "157", "158", "159", "160", "161", "162", "163", 
"164", "165", "166", "167", "168", "169", "170", "171", "172", 
"173", "174", "175", "176", "177", "178", "179", "180", "181", 
"182", "183", "184", "185", "186", "187", "188", "189", "190", 
"191", "192", "193", "194", "195", "196", "197", "198", "199", 
"200", "201", "202", "203", "204", "205", "206", "207", "208", 
"209", "210", "211", "212", "213", "214", "215", "216", "217", 
"218", "219", "220", "221", "222", "223", "224", "225", "226", 
"227", "228", "229", "230", "231", "232", "233", "234", "235", 
"236", "237", "238", "239", "240", "241", "242", "243", "244", 
"245", "246", "247", "248", "249", "250", "251", "252", "253", 
"254", "255", "256", "257", "258", "259", "260", "261", "262", 
"263", "264", "265", "266", "267", "268", "269", "270", "271", 
"272", "273", "274", "275", "276", "277", "278", "279", "280", 
"281", "282", "283", "284", "285", "286", "287", "288", "289", 
"290", "291", "292", "293", "294", "295", "296", "297", "298", 
"299", "300", "301", "302", "303", "304", "305", "306", "307", 
"308", "309", "310", "311", "312", "313", "314", "315", "316", 
"317", "318", "319", "320", "321", "322", "323", "324", "325", 
"326", "327", "328", "329", "330", "331", "332", "333", "334", 
"335", "336", "337", "338", "339", "340", "341", "342", "343", 
"344", "345", "346", "347", "348", "349", "350", "351", "352", 
"353", "354", "355", "356", "357", "358", "359", "360", "361", 
"362", "363", "364", "365", "366", "367", "368", "369", "370", 
"371", "372", "373", "374", "375", "376", "377", "378", "379", 
"380", "381", "382", "383", "384", "385", "386", "387", "388", 
"389", "390", "391", "392", "393", "394", "395", "396", "397", 
"398", "399", "400"))

参考文献:

请注意,示例数据来自Datacamp练习,我已经解决过了。这个问题是通过解决这个练习来激发的,但并不相同。

1 个答案:

答案 0 :(得分:1)

只需一行代码即可:

l <- split(red_wine_data, red_wine_data$condition)