如何在另一个数据帧中转换和包含数据帧

时间:2015-10-21 09:52:31

标签: r dataframe

我还有另一项棘手的任务,目前我无法掌握。它处理R中的数据帧。

假设我的数据框看起来像:

original = data.frame(Male = c(rep(1,3),rep(2,4),rep(3,2)),
                  SongNumber = c(1,2,3,1,2,3,4,1,2),
                  SongType = c("16a","16b","17a","24a","24b","25d","24f","5e","5e"),
                  Start = c(0.5,16.1,24.2,0.9,10.1,18.9,0.7,0.6,12.2),
                  RecordFile = c(rep("A1",3),rep("B1",3),"B2",rep("C1",2)))
original

和另一个包含每种歌曲类型的特定音节顺序的数据框:

additional = data.frame(SongType = c("16a","16b","17a","24a"),
                    Syll1 = c(4,4,3,16),
                    Syll2 = c(4,4,3,16),
                    Syll3 = c(84,84,3,3),
                    Syll4 = c(3,3,3,16),
                    Syll5 = c(16,16,3,3),
                    Syll6 = c(16,16,NA,4),
                    Syll7 = c(NA,16,NA,NA),
                    Syll8 = c(NA,16,NA,NA),
                    Syll9 = c(NA,3,NA,NA),
                    Syll10 = c(NA,1,NA,NA))
additional

我想要的是现在将音节顺序作为前一个数据框中的列插入。最终结果应如下所示:

aim = data.frame(Male = c(rep(1,21),rep(2,9),rep(3,2)),
             SongNumber = c(rep(1,6),rep(2,10),rep(3,5),rep(1,6),2,3,4,1,2),
             SongType = c(rep("16a",6),rep("16b",10),rep("17a",5),rep("24a",6),"24b","25d",
                          "24f","5e","5e"),
             Start = c(rep(0.5,6),rep(16.1,10),rep(24.2,5),rep(0.9,6),10.1,18.9,0.7,0.6,
                       12.2),
             RecordFile = c(rep("A1",21),rep("B1",8),"B2",rep("C1",2)),
             SyllOrder = c(4,4,84,3,16,16,4,4,84,3,16,16,16,16,3,1,3,3,3,3,3,16,16,3,16,3,4,
                           NA,NA,NA,NA,NA))
aim

到目前为止,我没有看到merge等函数如何帮助:merge只根据两个数据帧之间的公共列将dataframe2的列添加到dataframe1。它不会强制dataframe1相应地添加行!

2 个答案:

答案 0 :(得分:3)

要获得所需的输出,您可以执行以下操作:

library(data.table)
additional2 <- melt(setDT(additional), id="SongType", na.rm=TRUE)[, .(SyllOrder = toString(value)), by = SongType]

aim2 <- setDT(original)[additional2, SyllOrder := i.SyllOrder, on="SongType"
                        ][, lapply(.SD, function(x) unlist(tstrsplit(x, ",", fixed=TRUE))),
                          by=setdiff(names(original),"SyllOrder")]

作为最后一步的替代方案,您还可以使用:

aim2 <- additional2[original, on="SongType"
                    ][, lapply(.SD, function(x) unlist(tstrsplit(x, ",", fixed=TRUE))),
                      by=setdiff(names(original),"SyllOrder")]

两者都导致:

> aim3
    Male SongNumber SongType Start RecordFile SyllOrder
 1:    1          1      16a   0.5         A1         4
 2:    1          1      16a   0.5         A1         4
 3:    1          1      16a   0.5         A1        84
 4:    1          1      16a   0.5         A1         3
 5:    1          1      16a   0.5         A1        16
 6:    1          1      16a   0.5         A1        16
 7:    1          2      16b  16.1         A1         4
 8:    1          2      16b  16.1         A1         4
 9:    1          2      16b  16.1         A1        84
10:    1          2      16b  16.1         A1         3
11:    1          2      16b  16.1         A1        16
12:    1          2      16b  16.1         A1        16
13:    1          2      16b  16.1         A1        16
14:    1          2      16b  16.1         A1        16
15:    1          2      16b  16.1         A1         3
16:    1          2      16b  16.1         A1         1
17:    1          3      17a  24.2         A1         3
18:    1          3      17a  24.2         A1         3
19:    1          3      17a  24.2         A1         3
20:    1          3      17a  24.2         A1         3
21:    1          3      17a  24.2         A1         3
22:    2          1      24a   0.9         B1        16
23:    2          1      24a   0.9         B1        16
24:    2          1      24a   0.9         B1         3
25:    2          1      24a   0.9         B1        16
26:    2          1      24a   0.9         B1         3
27:    2          1      24a   0.9         B1         4
28:    2          2      24b  10.1         B1        NA
29:    2          3      25d  18.9         B1        NA
30:    2          4      24f   0.7         B2        NA
31:    3          1       5e   0.6         C1        NA
32:    3          2       5e  12.2         C1        NA

答案 1 :(得分:1)

您需要将additional转换为长格式。然后你可以合并它们。

library(dplyr)
library(tidyr)
additional %>% 
  gather("Syllable", "SyllOrder", -SongType) %>% 
  inner_join(original, by = "SongType")