如何在R中重塑这个宽表以允许成对比较?

时间:2017-03-24 12:54:39

标签: r dataframe reshape

我有以下数据:

structure(list(id = c(174L, 187L, 188L, 190L, 213L, 234L), p7 = c(1L, 
1L, NA, 1L, 0L, 1L), p4 = c(1L, NA, NA, 1L, 1L, 1L), p3 = c(1L, 
1L, 1L, 1L, NA, 1L), p6 = c(1L, NA, NA, 1L, 1L, 1L), p1 = c(1L, 
1L, 1L, 1L, NA, 1L), p2 = c(1L, 1L, 1L, 1L, 1L, 1L), p5 = c(1L, 
0L, NA, 1L, NA, 1L), p8 = c(1L, NA, NA, 1L, 1L, 1L), q1 = c(1L, 
1L, NA, NA, NA, NA), q2 = c(NA, NA, 0L, NA, NA, NA), q3 = c(1L, 
1L, 1L, NA, 0L, 0L), q4 = c(NA, NA, NA, 0L, 0L, 0L), q5 = c(NA, 
1L, 0L, 1L, 0L, 1L), q6 = c(1L, NA, 1L, 1L, 0L, NA), q7 = c(0L, 
NA, 0L, 0L, NA, 1L), q8 = c(1L, NA, 0L, 0L, 0L, 0L)), .Names = c("id", 
"p7", "p4", "p3", "p6", "p1", "p2", "p5", "p8", "q1", "q2", "q3", 
"q4", "q5", "q6", "q7", "q8"), row.names = c(NA, 6L), class = "data.frame")

我需要一个数据框,其中第一列包含id重复8次,第二列包含标有p的所有值(按此顺序),第三列包含所有标有q的值(按此顺序)。我需要这个,所以我可以在相应的pq s数据点之间进行成对比较。

任何提示?

2 个答案:

答案 0 :(得分:2)

library(reshape2)

df = structure(list(id=c(174L,187L,188L,190L,213L,234L),p7=c(1L,
1L,NA,1L,0L,1L),p4=c(1L,NA,NA,1L,1L,1L),p3=c(1L,
1L,1L,1L,NA,1L),p6=c(1L,NA,NA,1L,1L,1L),p1=c(1L,
1L,1L,1L,NA,1L),p2=c(1L,1L,1L,1L,1L,1L),p5=c(1L,
0L,NA,1L,NA,1L),p8=c(1L,NA,NA,1L,1L,1L),q1=c(1L,
1L,NA,NA,NA,NA),q2=c(NA,NA,0L,NA,NA,NA),q3=c(1L,
1L,1L,NA,0L,0L),q4=c(NA,NA,NA,0L,0L,0L),q5=c(NA,
1L,0L,1L,0L,1L),q6=c(1L,NA,1L,1L,0L,NA),q7=c(0L,
NA,0L,0L,NA,1L),q8=c(1L,NA,0L,0L,0L,0L)),.Names=c("id",
"p7","p4","p3","p6","p1","p2","p5","p8","q1","q2","q3",
"q4","q5","q6","q7","q8"),row.names=c(NA,6L),class="data.frame")

p = melt(df, id.vars=c("id"),measure.vars=c("p7", "p4", "p3", "p6","p1", "p2", "p5", "p8"), variable.name="p")
q = melt(df, id.vars=c("id"),measure.vars=c("q1", "q2", "q3", "q4","q5", "q6", "q7", "q8"), variable.name="q")
both = cbind(p, q)

答案 1 :(得分:2)

我们可以使用melt中的data.table轻松完成此操作,这可能需要多个measure patterns

library(data.table)
melt(setDT(df), measure = patterns("^p\\d+", "^q\\d+"), 
                  value.name = c("p", "q"), variable.name = "grp")