用tidyr重塑data.frame

时间:2018-06-16 09:18:48

标签: r dataframe tidyr

我有以下data.frame

data1 <- structure(list(id = c(1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 
8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11), type_id = c(1, 
1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 
1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 
2, 2, 2, 1, 1, 2, 2), min = c("0", "0", "66", "71", "74", "81", 
"0", "0", "0", "0", "14", "17", "0", "0", "45", "60", "87", "0", 
"0", "49", "89", "0", "0", "60", "60", "75", "0", "0", "7", "47", 
"66", "75", "83", "89", "0", "0", "68", "73", "0", "0", "85", 
"0", "0", "46", "71", "87", "0", "0", "81", "90"), sec = c("0", 
"0", "37", "20", "20", "28", "0", "0", "0", "0", "22", "26", 
"0", "0", "1", "38", "38", "0", "0", "0", "53", "0", "0", "8", 
"10", "10", "0", "0", "2", "55", "33", "39", "31", "41", "0", 
"0", "18", "53", "0", "0", "47", "0", "0", "44", "36", "49", 
"0", "0", "53", "12"), group_id = c(1, 0, 1, 0, 1, 0, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
a1 = c("11334, 98745, 81880, 111457, 38411, 156074, 153256, 84450, 59966, 37605, 50175, 41792, 44346, 48844, 20467, 153133, 69140, 56864", 
"17745, 40725, 37402, 61604, 15033, 95658, 103025, 203341, 101668, 78412, 51938, 172850, 172632, 173515, 13152, 38038, 155569, 149828", 
"11334, 81880, 98745, 41792, 38411, 111457, 37605, 84450, 44346, 50175, 59966, 48844, 20467, 153133, 69140, 56864", 
"17745, 40725, 37402, 203341, 15033, 95658, 103025, 61604, 101668, 155569, 51938, 172850, 172632, 173515, 13152, 38038, 149828", 
"11334, 81880, 98745, 84450, 38411, 111457, 20467, 41792, 44346, 37605, 59966, 48844, 153133, 69140, 56864", 
"17745, 40725, 37402, 203341, 15033, 95658, 103025, 155569, 101668, 173515, 51938, 172850, 172632, 13152, 38038, 149828", 
"121160, 58621, 100180, 97299, 17476, 57410, 61366, 27789, 37572, 205651, 20664, 33148, 103955, 57112, 182156, 165809, 14664, 209244", 
"131897, 11352, 43808, 40845, 61933, 83299, 109345, 66242, 38499, 60307, 112516, 19071, 83543, 48860, 11735, 2513, 20529, 15137", 
"60772, 41328, 82263, 19419, 41270, 102380, 17878, 116594, 94245, 47431, 135365, 20310, 88482, 167767, 171162, 135363, 194794, 181911", 
"21205, 68983, 40616, 39847, 17761, 51927, 60586, 40145, 40399, 83314, 90517, 103914, 44699, 60551, 12813, 41674, 98747, 39158", 
"60772, 41328, 82263, 41270, 102380, 17878, 116594, 94245, 47431, 135365, 20310, 88482, 167767, 171162, 135363, 194794, 181911", 
"60772, 41328, 82263, 102380, 41270, 135363, 17878, 116594, 94245, 47431, 20310, 88482, 167767, 171162, 194794, 181911", 
"20066, 55494, 74230, 173954, 19188, 201084, 66975, 19197, 54861, 126187, 82403, 11554, 49413, 60252, 75773, 86417, 94926, 218112", 
"57513, 104545, 54284, 168991, 48760, 85368, 87428, 74471, 169141, 160190, 86176, 79733, 168977, 51344, 80755, 147303, 84112, 87856", 
"20066, 55494, 74230, 173954, 19188, 201084, 126187, 19197, 54861, 60252, 82403, 11554, 49413, 75773, 86417, 94926, 218112", 
"57513, 80755, 54284, 168991, 48760, 85368, 87428, 74471, 169141, 160190, 86176, 79733, 168977, 51344, 147303, 84112, 87856", 
"57513, 80755, 54284, 168991, 48760, 85368, 84112, 74471, 87428, 86176, 51344, 79733, 168977, 147303, 87856", 
"105666, 61858, 39487, 50089, 37869, 103192, 40555, 14295, 91972, 68312, 40276, 9047, 42564, 221267, 20208, 3773, 205102, 6744", 
"111234, 177815, 12745, 7645, 19159, 106611, 42774, 80801, 108438, 13017, 109065, 26901, 56192, 87447, 10318, 173807, 194164, 219352", 
"111234, 56192, 12745, 109065, 106611, 7645, 80801, 42774, 177815, 13017, 108438, 26901, 87447, 10318, 173807, 194164, 219352", 
"111234, 56192, 12745, 173807, 106611, 7645, 80801, 42774, 26901, 13017, 177815, 87447, 10318, 194164, 219352", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 18073, 43020, 163526, 41464, 55459, 38580, 60706, 73889, 8380, 113564, 204480", 
"51940, 20695, 58877, 74208, 197365, 76359, 43670, 62398, 66749, 57249, 176297, 148225, 55909, 109322, 42899, 59846, 41184, 40002", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 204480, 43020, 73889, 41464, 55459, 38580, 60706, 8380, 113564", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 204480, 43020, 73889, 41464, 55459, 38580, 60706, 8380, 113564", 
"51940, 20695, 58877, 74208, 197365, 76359, 57249, 62398, 66749, 41184, 176297, 148225, 55909, 109322, 42899, 59846, 40002", 
"19838, 109528, 106618, 153127, 77359, 58845, 56983, 50232, 104547, 168580, 104953, 101148, 114243, 155513, 149736, 195384, 59735, 128389", 
"37915, 158534, 115556, 39104, 55605, 39194, 45268, 93264, 78830, 108823, 80607, 85971, 165990, 54756, 39215, 97485, 157668, 168763", 
"19838, 109528, 58845, 50232, 101148, 77359, 56983, 153127, 104547, 168580, 104953, 114243, 155513, 149736, 195384, 59735, 128389", 
"19838, 109528, 58845, 153127, 101148, 149736, 56983, 168580, 104547, 104953, 114243, 155513, 195384, 59735, 128389", 
"37915, 158534, 115556, 39104, 55605, 39194, 80607, 93264, 78830, 108823, 85971, 165990, 54756, 39215, 97485, 157668, 168763", 
"19838, 109528, 56983, 58845, 101148, 149736, 153127, 195384, 168580, 104953, 114243, 155513, 59735, 128389", 
"37915, 158534, 115556, 54756, 55605, 39194, 80607, 93264, 78830, 39104, 85971, 165990, 39215, 97485, 157668, 168763", 
"37915, 158534, 115556, 54756, 55605, 39194, 157668, 93264, 78830, 80607, 85971, 165990, 39215, 97485, 168763", 
"37096, 49539, 28654, 15114, 57145, 149266, 49277, 11829, 80146, 173879, 93464, 57586, 61760, 42996, 59940, 106899, 96305, 169432", 
"40383, 58822, 40146, 17339, 88900, 80447, 101178, 78056, 61548, 62399, 83283, 20452, 78356, 151086, 128198, 3201, 171771, 153373", 
"40383, 58822, 40146, 17339, 88900, 80447, 101178, 78056, 61548, 62399, 83283, 20452, 78356, 151086, 128198, 3201, 171771, 153373", 
"37096, 49539, 28654, 96305, 57145, 149266, 49277, 93464, 11829, 173879, 80146, 57586, 61760, 42996, 59940, 106899, 169432", 
"18656, 52940, 40868, 121599, 37742, 52153, 43250, 89085, 20046, 44604, 61566, 73426, 212319, 41945, 54484, 16045, 38439, 56827", 
"66797, 169187, 100059, 56979, 60914, 38454, 112338, 41733, 92217, 118748, 110979, 104542, 15157, 171287, 210237, 33871, 152760, 154566", 
"66797, 169187, 100059, 56979, 60914, 38454, 15157, 112338, 152760, 110979, 41733, 104542, 171287, 210237, 33871, 154566", 
"40349, 15149, 41320, 15237, 56917, 126184, 90105, 48615, 88498, 78007, 20037, 18726, 40387, 54469, 7958, 149484, 178304, 103912", 
"9089, 67527, 19151, 55829, 60232, 77777, 50229, 49944, 44683, 195864, 63370, 49440, 57134, 12086, 54908, 232427, 173809, 215457", 
"40349, 15149, 41320, 15237, 56917, 126184, 103912, 48615, 88498, 78007, 90105, 18726, 40387, 54469, 7958, 149484, 178304", 
"9089, 67527, 19151, 55829, 60232, 77777, 44683, 232427, 49440, 49944, 63370, 57134, 12086, 54908, 173809, 215457", 
"9089, 67527, 19151, 55829, 60232, 77777, 49440, 232427, 57134, 49944, 63370, 12086, 54908, 173809, 215457", 
"40349, 54469, 41320, 15237, 56917, 126184, 90105, 48615, 88498, 78007, 103912, 18726, 40387, 20037, 120447, 7958, 149484, 178304", 
"18656, 54484, 40868, 121599, 41945, 52153, 43250, 89085, 73426, 44604, 212319, 41725, 108413, 16045, 85624, 38439, 56827, 20046", 
"18656, 85624, 40868, 121599, 41945, 52153, 54484, 38439, 73426, 89085, 43250, 41725, 108413, 16045, 56827, 20046", 
"18656, 85624, 40868, 121599, 41945, 52153, 54484, 38439, 73426, 16045, 43250, 41725, 108413, 56827, 20046"
), a2 = c("1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5")), class = "data.frame", .Names = c("id", "type_id", "min", "sec", "group_id", "a1", "a2"), row.names = c(NA, -50L))

现在我需要对其进行转换,以使a1a2每行都有一个值,即:

structure(list(id = c(1, 1, 1), type_id = c(1, 1, 1), min = c(0, 
0, 0), sec = c(0, 0, 0), group_id = c(1, 1, 1), a1 = c(11334, 
98745, 81880), a2 = c(1, 3, 3)), .Names = c("id", "type_id", 
"min", "sec", "group_id", "a1", "a2"), row.names = c(NA, -3L), class = 
"data.frame")

我首先尝试separatea1a2,然后gather,最后分成两列,如下所示:

data2 <- data1 %>% 
  separate(col = a1, into = paste('obj', 1:18)) %>% 
  separate(col = a2, into = paste('desc', 1:18))

data2 %>% 
  gather(key = 'obj', value = 'value', -id, -type_id, -min, -sec, -group_id) %>% 
  spread(key = 'obj', value = 'value')

但最终出现以下错误:

  

错误:行(907,908),(909,910),(913,914),(918,919),(922,923),(927,928),(935,936)的重复标识符,(939,940),(942,943),(947,948),(1357,1358),(1359,1360),(1363,1364),(1368,1369),(1372,1373),( 1377,1378),(1385,1386),(1389,1390),(1392,1393),(1397,1398),(1407,1408),(1409,1410),(1413,1414),(1418, 1419),(1422,1423),(1427,1428),(1435,1436),(1439,1440),(1442,1443),(1447,1448),(1457,1458),(1459,1460) ,(1463,1464),(1468,1469),(1472,1473),(1477,1478),(1485,1486),(1489,1490),(1492,1493),(1497,1498),( 1507,1508),(1509,1510),(1513,1514),(1518,1519),(1522,1523),(1527,1528),(1535,1536),(1539,1540),(1542, 1543),(1547,1548),(1557,1558),(1559,1560),(1563,1564),(1568,1569),(1572,1573),(1577,1578),(1585,1586) ,(1589,1590),(1592,1593),(1597,1598),(1607,1608),(1609,1610),(1613,1614),(1618,1619),(1622,1623),( 1627,1628),(1635,1636),(1639,1640),(1642,1643),(1647,1648), (...

所以我的问题是:我如何转换这个数据集(如果需要,可以使用tidyr或其他包)?

1 个答案:

答案 0 :(得分:2)

您需要来自separate_rows - 函数,当嵌套列在每行中具有相同数量的元素时,它可以一次作为输入:

data1 %>% separate_rows(a1, a2)

给出:

    id type_id min sec group_id     a1 a2
1    1       1   0   0        1  11334  1
2    1       1   0   0        1  98745  3
3    1       1   0   0        1  81880  3
4    1       1   0   0        1 111457  2
5    1       1   0   0        1  38411  2
6    1       1   0   0        1 156074  2
7    1       1   0   0        1 153256  3
8    1       1   0   0        1  84450  3
9    1       1   0   0        1  59966  4
10   1       1   0   0        1  37605  4
.....