将不同的表与不一致的列组合在一起

时间:2014-09-20 15:20:38

标签: r reshape dplyr rbind

我正在尝试合并不同列的列表。很少有其他专栏。

我在这里展示一个简单的例子。

a <- structure(list(CrashID = c("3409", "3410", "6790", "1100", "1200", "5609", "6730", "1220", "1234", "1239", "4409", "5610", "6794", "1123", "1245", "5634", "6732"),  
From = c("0", "0", "0", "0", "1", "1", "3", "3", "3", "3", "8", "8", "8", "8","1", "1", "1"), To = c("1", "1", "1", "1", "3", "3", "4", "4", "4", "4", "9", "9", "9", "9", "2.2", "2.2", "2.2"), Dr = c("1", "6", "6", "6", "6", "4", "5", "4", "4", "4", "7", "7", "7", "4", "7", "7", "7"), Mn = c("4", "6", "5", "6", "3", "6", "4", "4", "4", "4", "9", "9", "9", "9", "3", "4", "7"), CSECT = c("001-01", "001-01", "001-01", "001-01", "001-01", "001-01", "001-01", "001-01", "001-01", "001-01", "001-02", "001-02","001-02","001-02","001-02","001-02","001-02")), .Names = c("CrashID", "From", "To", "Dr", "Mn", "CSECT" ), row.names = c(NA, -17L), class = "data.frame")
a
   CrashID From  To Dr Mn  CSECT
1     3409    0   1  1  4 001-01
2     3410    0   1  6  6 001-01
3     6790    0   1  6  5 001-01
4     1100    0   1  6  6 001-01
5     1200    1   3  6  3 001-01
6     5609    1   3  4  6 001-01
7     6730    3   4  5  4 001-01
8     1220    3   4  4  4 001-01
9     1234    3   4  4  4 001-01
10    1239    3   4  4  4 001-01
11    4409    8   9  7  9 001-02
12    5610    8   9  7  9 001-02
13    6794    8   9  7  9 001-02
14    1123    8   9  4  9 001-02
15    1245    1 2.2  7  3 001-02
16    5634    1 2.2  7  4 001-02
17    6732    1 2.2  7  7 001-02


b <- structure(list(CrashID = c("12409", "12410", "62290", "13330"),  
From = c("1", "2", "3", "4"), To = c("4", "4", "4", "4"), Dr = c("1", "3", "3", "3"),  
CSECT = c("0301-01", "0031-01", "0301-01", "0031-01")), .Names = c("CrashID", "From", "To", "Dr", "CSECT" ), row.names = c(NA, -4L), class = "data.frame")
b
  CrashID From To Dr   CSECT
1   12409    1  4  1 0301-01
2   12410    2  4  3 0031-01
3   62290    3  4  3 0301-01
4   13330    4  4  3 0031-01
> 
c <- structure(list(CrashID = c("3569", "3430", "6660", "1400", "1700", "5979", "6530", "1320", "1334", "1669", "4559", "5510", "6733", "3333", "1257", "544", "6632"),  
To = c("1", "2", "2", "2", "7", "7", "7", "7", "7", "7", "7", "8", "8", "9", "5.2", "4.2", "8.2"), CSECT = c("090-01", "090-01", "090-01", "090-01", "090-01", "001-0w", "090-06", "001-r5", "001-44", "031-01", "031-02", "031-32","031-02","331-02","001-we","301-02","031-02")), .Names = c("CrashID", "To", "CSECT" ), row.names = c(NA, -17L), class = "data.frame")
c
   CrashID  To  CSECT
1     3569   1 090-01
2     3430   2 090-01
3     6660   2 090-01
4     1400   2 090-01
5     1700   7 090-01
6     5979   7 001-0w
7     6530   7 090-06
8     1320   7 001-r5
9     1334   7 001-44
10    1669   7 031-01
11    4559   7 031-02
12    5510   8 031-32
13    6733   8 031-02
14    3333   9 331-02
15    1257 5.2 001-we
16     544 4.2 301-02
17    6632 8.2 031-02

我正在尝试使用&#34; rbind&#34;函数生成不会去任何地方的表。如果我手动使所有列都相同,我可以很容易地做到这一点。对于使用大量表格,这项工作是巨大的。

我喜欢得到如下的输出。

   CrashID From  To Dr Mn   CSECT Table
1     3409  0.0 1.0  1  4  001-01     a
2     3410  0.0 1.0  6  6  001-01     a
3     6790  0.0 1.0  6  5  001-01     a
4     1100  0.0 1.0  6  6  001-01     a
5     1200  1.0 3.0  6  3  001-01     a
6     5609  1.0 3.0  4  6  001-01     a
7     6730  3.0 4.0  5  4  001-01     a
8     1220  3.0 4.0  4  4  001-01     a
9     1234  3.0 4.0  4  4  001-01     a
10    1239  3.0 4.0  4  4  001-01     a
11    4409  8.0 9.0  7  9  001-02     a
12    5610  8.0 9.0  7  9  001-02     a
13    6794  8.0 9.0  7  9  001-02     a
14    1123  8.0 9.0  4  9  001-02     a
15    1245  1.0 2.2  7  3  001-02     a
16    5634  1.0 2.2  7  4  001-02     a
17    6732  1.0 2.2  7  7  001-02     a
18   12409  1.0 4.0  1 NA 0301-01     b
19   12410  2.0 4.0  3 NA 0031-01     b
20   62290  3.0 4.0  3 NA 0301-01     b
21   13330  4.0 4.0  3 NA 0031-01     b
22    3569  1.0  NA NA NA  090-01     c
23    3430  2.0  NA NA NA  090-01     c
24    6660  2.0  NA NA NA  090-01     c
25    1400  2.0  NA NA NA  090-01     c
26    1700  7.0  NA NA NA  090-01     c
27    5979  7.0  NA NA NA  001-0w     c
28    6530  7.0  NA NA NA  090-06     c
29    1320  7.0  NA NA NA  001-r5     c
30    1334  7.0  NA NA NA  001-44     c
31    1669  7.0  NA NA NA  031-01     c
32    4559  7.0  NA NA NA  031-02     c
33    5510  8.0  NA NA NA  031-32     c
34    6733  8.0  NA NA NA  031-02     c
35    3333  9.0  NA NA NA  331-02     c
36    1257  5.2  NA NA NA  001-we     c
37     544  4.2  NA NA NA  301-02     c
38    6632  8.2  NA NA NA  031-02     c

2 个答案:

答案 0 :(得分:4)

试试这个解决方案。在这里,我们创建了一个数据框列表,然后使用tidyr的{​​{1}}函数转换为一个名为unnest()的数据框。

Table

请注意,在您的示例中,数据框library('devtools') install_github('hadley/tidyr') library('tidyr') dfs <- list(a = a, b = b, c = c) combined <- unnest(dfs, Table) 没有c列,但在您的最终数据集中,它具有FromTo的值。

结果:

From

用NAs替换空字符串:

   Table CrashID From  To Dr Mn   CSECT
1      a    3409    0   1  1  4  001-01
2      a    3410    0   1  6  6  001-01
3      a    6790    0   1  6  5  001-01
4      a    1100    0   1  6  6  001-01
5      a    1200    1   3  6  3  001-01
6      a    5609    1   3  4  6  001-01
7      a    6730    3   4  5  4  001-01
8      a    1220    3   4  4  4  001-01
9      a    1234    3   4  4  4  001-01
10     a    1239    3   4  4  4  001-01
11     a    4409    8   9  7  9  001-02
12     a    5610    8   9  7  9  001-02
13     a    6794    8   9  7  9  001-02
14     a    1123    8   9  4  9  001-02
15     a    1245    1 2.2  7  3  001-02
16     a    5634    1 2.2  7  4  001-02
17     a    6732    1 2.2  7  7  001-02
18     b   12409    1   4  1    0301-01
19     b   12410    2   4  3    0031-01
20     b   62290    3   4  3    0301-01
21     b   13330    4   4  3    0031-01
22     c    3569        1        090-01
23     c    3430        2        090-01
24     c    6660        2        090-01
25     c    1400        2        090-01
26     c    1700        7        090-01
27     c    5979        7        001-0w
28     c    6530        7        090-06
29     c    1320        7        001-r5
30     c    1334        7        001-44
31     c    1669        7        031-01
32     c    4559        7        031-02
33     c    5510        8        031-32
34     c    6733        8        031-02
35     c    3333        9        331-02
36     c    1257      5.2        001-we
37     c     544      4.2        301-02
38     c    6632      8.2        031-02

答案 1 :(得分:4)

或者您可以使用join_all中的plyr。 (初始数据集都有character列)。使用@Kara Woo中的dfs

 library(plyr)
 join_all(Map(cbind, dfs, Table=names(dfs)), type="full")

  #   CrashID From  To   Dr   Mn   CSECT Table
 #1     3409    0   1    1    4  001-01     a
 #2     3410    0   1    6    6  001-01     a
 #3     6790    0   1    6    5  001-01     a
 #4     1100    0   1    6    6  001-01     a
 #5     1200    1   3    6    3  001-01     a
 #6     5609    1   3    4    6  001-01     a
 #7     6730    3   4    5    4  001-01     a
 #8     1220    3   4    4    4  001-01     a
 #9     1234    3   4    4    4  001-01     a
 #10    1239    3   4    4    4  001-01     a
 #11    4409    8   9    7    9  001-02     a
 #12    5610    8   9    7    9  001-02     a
 #13    6794    8   9    7    9  001-02     a
 #14    1123    8   9    4    9  001-02     a
 #15    1245    1 2.2    7    3  001-02     a
 #16    5634    1 2.2    7    4  001-02     a
 #17    6732    1 2.2    7    7  001-02     a
 #18   12409    1   4    1 <NA> 0301-01     b
 #19   12410    2   4    3 <NA> 0031-01     b
 #20   62290    3   4    3 <NA> 0301-01     b
 #21   13330    4   4    3 <NA> 0031-01     b
 #22    3569 <NA>   1 <NA> <NA>  090-01     c
 #23    3430 <NA>   2 <NA> <NA>  090-01     c
 #24    6660 <NA>   2 <NA> <NA>  090-01     c
 #25    1400 <NA>   2 <NA> <NA>  090-01     c
 #26    1700 <NA>   7 <NA> <NA>  090-01     c
 #27    5979 <NA>   7 <NA> <NA>  001-0w     c
 #28    6530 <NA>   7 <NA> <NA>  090-06     c
 #29    1320 <NA>   7 <NA> <NA>  001-r5     c
 #30    1334 <NA>   7 <NA> <NA>  001-44     c
 #31    1669 <NA>   7 <NA> <NA>  031-01     c
 #32    4559 <NA>   7 <NA> <NA>  031-02     c
 #33    5510 <NA>   8 <NA> <NA>  031-32     c
 #34    6733 <NA>   8 <NA> <NA>  031-02     c
 #35    3333 <NA>   9 <NA> <NA>  331-02     c
 #36    1257 <NA> 5.2 <NA> <NA>  001-we     c
 #37     544 <NA> 4.2 <NA> <NA>  301-02     c
 #38    6632 <NA> 8.2 <NA> <NA>  031-02     c