R数据转换

时间:2012-02-24 05:06:47

标签: r

我有一个R数据框,如下所示:

z = as.data.frame(list(Col1=c("a","c","e","g"),Col2=c("b","d","f","h"),Col3=c("1,2,5","3,5,7","9,8","1")))
> z
  Col1 Col2  Col3
1    a    b 1,2,5
2    c    d 3,5,7
3    e    f   9,8
4    g    h     1

(第三列是一个逗号分隔值的文本列。)我想将它转换为这样的数据框:

a    b    1
a    b    2
a    b    5
c    d    3
c    d    5
c    d    7
e    f    9 
e    f    8
g    h    1

任何人都可以建议使用apply来实现此目的吗?我接近使用下面的命令,但它不太正确。任何有关更有效方法的建议都将受到赞赏......

> apply(z,1,function(a){ids=strsplit(as.character(a[3]),",")[[1]];out<-c();for(id in ids){out<-rbind(out,c(a[1:2],id))};return(out)})
[[1]]
     Col1 Col2    
[1,] "a"  "b"  "1"
[2,] "a"  "b"  "2"
[3,] "a"  "b"  "5"

[[2]]
     Col1 Col2    
[1,] "c"  "d"  "3"
[2,] "c"  "d"  "5"
[3,] "c"  "d"  "7"

[[3]]
     Col1 Col2    
[1,] "e"  "f"  "9"
[2,] "e"  "f"  "8"

[[4]]
     Col1 Col2    
[1,] "g"  "h"  "1"

2 个答案:

答案 0 :(得分:5)

您可以使用ddply

library(plyr)
ddply(z, c("Col1", "Col2"), summarize, 
  Col3=strsplit(as.character(Col3),",")[[1]]
)

答案 1 :(得分:3)

使用reshapereshape2

require(reshape2)
merge(cbind(z[,-3], L1=rownames(z)), melt(strsplit(as.character(z$Col3),",")))

给出

  L1 Col1 Col2 value
1  1    a    b     1
2  1    a    b     2
3  1    a    b     5
4  2    c    d     3
5  2    c    d     5
6  2    c    d     7
7  3    e    f     9
8  3    e    f     8
9  4    g    h     1