重塑R中的数据

时间:2013-11-21 06:22:02

标签: r

我有一个数据集,如帖子底部所示。该数据有四列,分别称为SIC,AT95Group,AT95Mean,AT95Med。 AT95Group列具有四个值,例如“00”,“01”,“11”和“10”。目前,对于每个SIC,我们对AT95Group的每个值都有四行。我想以某种方式重塑数据帧,以便每个SIC只有一行。虽然之前我们为每个(SIC,AT95Group)对有两个名为mean和med的列,但我们想要创建基本上4 * 2列(4个用于组“00”,“11”,“01”,“10”)和2 for(“Mean”和“Med”)。八列将像“00Mean”,“11Mean”,“00Med”,“11Med”等,每个SIC具有相应的值。

我觉得这很难做到。请给我任何建议。谢谢。

> dput(head(pp,20))
structure(list(SIC = c(1L, 1L, 1L, 10L, 10L, 10L, 10L, 12L, 12L, 
12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L), AT95Group = c("11", 
"10", "00", "11", "01", "00", "10", "01", "11", "10", "00", "11", 
"01", "00", "10", "11", "01", "10", "00", "01"), AT95Med = c(0.0691039117115276, 
0.0608649722972575, 0.0609974198491522, 0.215571816296268, 0.305308985848382, 
0.351312558091798, 0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 0.143724642017362, 
0.156092793582293, 0.0976059673595903, 0.0116620184564053, 0.0188895210677074, 
0.0356836223212195, 0.0513040852859517, 0.0982448708035204), 
    AT95Mean = c(0.0691039117115276, 0.0608649722972575, 0.0609974198491522, 
    0.215571816296268, 0.305308985848382, 0.351312558091798, 
    0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
    0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 
    0.143724642017362, 0.156092793582293, 0.0976059673595903, 
    0.0116620184564053, 0.0188895210677074, 0.0356836223212195, 
    0.0513040852859517, 0.0982448708035204)), .Names = c("SIC", 
"AT95Group", "AT95Med", "AT95Mean"), row.names = c(241L, 236L, 
27L, 1126L, 1035L, 1030L, 664L, 1269L, 1259L, 1245L, 1244L, 3919L, 
4722L, 3329L, 3222L, 4886L, 4889L, 4951L, 4860L, 5108L), class = "data.frame")

尝试上述代码的尝试粗略失败。不确定如何继续前进。

pp <- unique(dacc1[,c("SIC","AT95Group","AT95Med","AT95Mean")])
xsic <- unique(pp[,"SIC"]);
xlist <- list(xsic,rep("AT95",length(xsic)));

编辑:

我在运行特洛伊的结果后得到的结果:

> pp1 <- head(pp,20)
     SIC AT95Group    AT95Med   AT95Mean
241    1        11 0.06910391 0.06910391
236    1        10 0.06086497 0.06086497
27     1        00 0.06099742 0.06099742
1126  10        11 0.21557182 0.21557182
1035  10        01 0.30530899 0.30530899
1030  10        00 0.35131256 0.35131256
664   10        10 0.35270472 0.35270472
1269  12        01 0.04598877 0.04598877
1259  12        11 0.03044662 0.03044662
1245  12        10 0.05138754 0.05138754
1244  12        00 0.05414319 0.05414319
3919  13        11 0.06509209 0.06509209
4722  13        01 0.14372464 0.14372464
3329  13        00 0.15609279 0.15609279
3222  13        10 0.09760597 0.09760597
4886  14        11 0.01166202 0.01166202
4889  14        01 0.01888952 0.01888952
4951  14        10 0.03568362 0.03568362
4860  14        00 0.05130409 0.05130409
5108  15        01 0.09824487 0.09824487

> molten<-melt(pp);
Using AT95Group as id variables

molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ");
cast(molten[,c(1,3,4)], SIC ~ variable);

> cast(molten[,c(1,3,4)], SIC ~ variable);
Error in `[.data.frame`(molten, , c(1, 3, 4)) : 
  undefined columns selected

3 个答案:

答案 0 :(得分:1)

我希望这个解决方案不会太神秘:

xsic <- unique(pp[,"SIC"]);
AT = c("00", "01", "10", "11")
d = data.frame(xsic=xsic);
for(i in 1:4) {
  subgroup = pp[ pp$AT95Group==AT[i],];
  d[[paste0(AT[i],"AT95Med")]] = subgroup$AT95Med[match(xsic,subgroup$SIC)];
  d[[paste0(AT[i],"AT95Mean")]] = subgroup$AT95Mean[match(xsic,subgroup$SIC)];
}

结果:

xsic 00AT95Med 00AT95Mean  01AT95Med 01AT95Mean  10AT95Med 10AT95Mean  11AT95Med 11AT95Mean
  1 0.06099742 0.06099742         NA         NA 0.06086497 0.06086497 0.06910391 0.06910391
 10 0.35131256 0.35131256 0.30530899 0.30530899 0.35270472 0.35270472 0.21557182 0.21557182
 12 0.05414319 0.05414319 0.04598877 0.04598877 0.05138754 0.05138754 0.03044662 0.03044662
 13 0.15609279 0.15609279 0.14372464 0.14372464 0.09760597 0.09760597 0.06509209 0.06509209
 14 0.05130409 0.05130409 0.01888952 0.01888952 0.03568362 0.03568362 0.01166202 0.01166202
 15         NA         NA 0.09824487 0.09824487         NA         NA         NA         NA

答案 1 :(得分:1)

或者你可以使用“reshape”包:

install.packages("reshape")  # only run this once if you don't have it
require(reshape)
pp   #  this is what I called your table
molten<-melt(pp)  # this stretches the table out into variable/value pairs

# then modify the "variable" values so they reflect the group (and delete 'AT95')
molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ")

# then use cast (you can look up the documentation in ?reshape)
# but basically this gives you a crosstab of the SICs against the new variables
# the significant of 1,3,4 is it pulls out only the columns I want to cast
cast(molten[,c(1,3,4)], SIC ~ variable)

给你:

  SIC  Mean 00    Mean 01    Mean 10    Mean 11     Med 00     Med 01     Med 10     Med 11  
1   1 0.06099742         NA 0.06086497 0.06910391 0.06099742         NA 0.06086497 0.06910391
2  10 0.35131256 0.30530899 0.35270472 0.21557182 0.35131256 0.30530899 0.35270472 0.21557182
3  12 0.05414319 0.04598877 0.05138754 0.03044662 0.05414319 0.04598877 0.05138754 0.03044662
4  13 0.15609279 0.14372464 0.09760597 0.06509209 0.15609279 0.14372464 0.09760597 0.06509209
5  14 0.05130409 0.01888952 0.03568362 0.01166202 0.05130409 0.01888952 0.03568362 0.01166202
6  15         NA 0.09824487         NA         NA         NA 0.09824487         NA         NA

答案 2 :(得分:1)

对于记录,reshape中还有一个base函数(嗯,stats):

reshape(pp, direction = "wide", idvar = "SIC", 
      timevar = "AT95Group", v.names = c("AT95Med", "AT95Mean"))
#     SIC AT95Med.11 AT95Mean.11 AT95Med.10 AT95Mean.10 AT95Med.00 AT95Mean.00 AT95Med.01 AT95Mean.01
#241    1 0.06910391  0.06910391 0.06086497  0.06086497 0.06099742  0.06099742         NA          NA
#1126  10 0.21557182  0.21557182 0.35270472  0.35270472 0.35131256  0.35131256 0.30530899  0.30530899
#1269  12 0.03044662  0.03044662 0.05138754  0.05138754 0.05414319  0.05414319 0.04598877  0.04598877
#3919  13 0.06509209  0.06509209 0.09760597  0.09760597 0.15609279  0.15609279 0.14372464  0.14372464
#4886  14 0.01166202  0.01166202 0.03568362  0.03568362 0.05130409  0.05130409 0.01888952  0.01888952
#5108  15         NA          NA         NA          NA         NA          NA 0.09824487  0.09824487