我有一个数据集,如下所示:
P_ID S_ID Action A1 A2 A4 A5 A6 A8
101 1001 1 1015 0
102 1002 1 1016 0 1027 0
103 1003 1 1019 1 1030 1
104 1004 1 1017 0 1028 0
我希望输出文件看起来像这样:
P_ID S_ID Action
101 1001 1
101 1015 0
102 1002 1
102 1016 0
102 1027 0
103 1003 1
103 1019 1
103 1030 1
104 1004 1
104 1017 0
104 1028 0
我能够将列拆分成行,但是当它们之间有空白单元格时(请参阅P_ID = 104),我无法正常工作。有人可以建议一种解决方法吗?我检查了先前发布的类似问题,但找不到与此问题相关的内容。
以下是dput()
的输出:
This is the updated dput output:
structure(list(P_ID = c(10264547L, 28790961L, 25389968L, 20368912L,
12335739L, 28386707L, 29613504L, 13241475L, 7641351L, 7642873L,
30836625L, 20437550L, 29363410L, 7641147L), S_ID = c(13324763L,
13083449L, 13324759L, 13393573L, 13516358L, 13083449L, 13324763L,
13324763L, 13432404L, 13432474L, 13324763L, 13324763L, 13324763L,
13433044L), Action = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 0L, 1L), A1 = c(13324759L, 13324763L, NA, 13324763L,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 13324763L), A2 = c(0L, 0L,
13083466L, 0L, NA, 13432417L, 13324759L, 13432457L, NA, 13432449L,
13324759L, 13324759L, 13324759L, 0L), A4 = c(NA, 13324759L, 1L,
13324759L, NA, 1L, 1L, 1L, NA, 1L, 1L, 0L, 0L, 13324759L), A5 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), A6 = c(NA,
0L, 13432524L, 0L, NA, 13432457L, NA, NA, NA, 13432538L, NA,
NA, NA, 0L), A8 = c(NA, NA, 1L, NA, NA, 1L, NA, NA, NA, 1L, NA,
NA, NA, 13083449L), A9 = c(NA, NA, 13324763L, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA), A10 = c(NA, NA, 0L, NA, NA, 13432449L,
NA, NA, NA, 13432530L, NA, NA, NA, 0L), A12 = c(NA, NA, NA, NA,
NA, 1L, NA, NA, NA, 1L, NA, NA, NA, NA), A13 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, 13083449L, NA, NA, NA, NA), A14 = c(NA,
NA, NA, NA, NA, 13432538L, NA, NA, NA, 0L, NA, NA, NA, NA), A16 = c(NA,
NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA), A17 = c(NA,
NA, NA, NA, NA, 13324763L, NA, NA, NA, NA, NA, NA, NA, NA), A18 = c(NA,
NA, NA, NA, NA, 0L, NA, NA, NA, NA, NA, NA, NA, NA), A20 = c(NA,
NA, NA, NA, NA, 13324759L, NA, NA, NA, NA, NA, NA, NA, NA)), .Names = c("P_ID",
"S_ID", "Action", "A1", "A2", "A4", "A5", "A6", "A8", "A9", "A10",
"A12", "A13", "A14", "A16", "A17", "A18", "A20"), class = "data.frame", row.names = c(NA,
-14L))
答案 0 :(得分:1)
df = read.table(text=gsub('NA','',do.call(paste,df)),fill=T)
b = na.omit(reshape(df,matrix(2:ncol(df),2),idvar = 'V1',dir='long'))
`rownames<-`(b[order(b$V1),][-2], NULL)
V1 V2 V3
1 101 1001 1
2 101 1015 0
3 102 1002 1
4 102 1016 0
5 102 1027 0
6 103 1003 1
7 103 1019 1
8 103 1030 1
9 104 1004 1
10 104 1017 0
11 104 1028 0
答案 1 :(得分:1)
如果我理解正确,则有两个问题:
S_ID
行中都有多对Action
和P_ID
值对,但是由于散布的空(NA
)单元,这些值未正确对齐。 / li>
P_ID
,S_ID
和Action
的三元组。下面的方法使用data.table包中的melt()
,dcast()
和rowid()
:
library(data.table)
melt(setDT(df), "P_ID", na.rm = TRUE)[
order(P_ID), variable := 1:2][
, dcast(.SD, P_ID + rowid(P_ID, variable) ~ variable)][, P_ID_1 := NULL][]
P_ID S_ID Action
1: 101 1001 1
2: 101 1015 0
3: 102 1002 1
4: 102 1016 0
5: 102 1027 0
6: 103 1003 1
7: 103 1019 1
8: 103 1030 1
9: 104 1004 1
10: 104 1017 0
11: 104 1028 0