我的数据如下:
mydata <- data.frame(id = c(1,1,1,2,2,3,3,3,3),
subid = c(1,2,3,1,2,1,2,3,4),
time = c(16, 18, 20, 10, 11, 7, 9, 10, 11))
id subid time
1 1 1 16
2 1 2 18
3 1 3 20
4 2 1 10
5 2 2 11
6 3 1 7
7 3 2 9
8 3 3 10
9 3 4 11
我的目标是将数据转换为:
newdata <- data.frame(id = c(1,1,1,2,3,3,3,3,3,3),
subid.1 = c(1,1,2,1,1,1,1,2,2,3),
subid.2 = c(2,3,3,2,2,3,4,3,4,4),
time.1 = c(16,16,18,10,7,7,7,9,9,10),
time.2 = c(18,20,20,11,9,10,11,10,11,11))
id subid.1 subid.2 time.1 time.2
1 1 1 2 16 18
2 1 1 3 16 20
3 1 2 3 18 20
4 2 1 2 10 11
5 3 1 2 7 9
6 3 1 3 7 10
7 3 1 4 7 11
8 3 2 3 9 10
9 3 2 4 9 11
10 3 3 4 10 11
因此,从长到宽的过程不是一个简单的重塑:这个想法是,在由id定义的组内,采取所有可能的组合 subid&s及其相应的时间值,并将其变为宽格式。
我知道我可以使用所有可能的组合,例如gtools::combinations
。第一组由3行组成,所以
gtools::combinations(n=3, r=2)
给出了组id == 1的新subid.1和subid.2对的矩阵:
[,1] [,2]
[1,] 1 2
[2,] 1 3
[3,] 2 3
但后来我不知道如何继续(既不用id==1
重塑这个格式的组,也不知道如何为每个组分别重新组织。谢谢!
答案 0 :(得分:3)
以基数R:
subset(merge(mydata, mydata, by="id", suffix=c(".1",".2")), subid.1 < subid.2)
# id subid.1 time.1 subid.2 time.2
# 1 1 1 16 2 18
# 2 1 1 16 3 20
# 3 1 2 18 3 20
# 4 2 1 10 2 11
# 5 3 1 7 2 9
# 6 3 1 7 3 10
# 7 3 1 7 4 11
# 8 3 2 9 3 10
# 9 3 2 9 4 11
# 10 3 3 10 4 11
dplyr
版本:
mydata %>% inner_join(.,.,by="id",suffix=c(".1",".2")) %>% filter(subid.1 < subid.2)
data.table
版本:
setDT(mydata)
mydata[mydata, on="id", allow.cartesian=TRUE][subid < i.subid]
# id subid time i.subid i.time
# 1: 1 1 16 2 18
# 2: 1 1 16 3 20
# 3: 1 2 18 3 20
# 4: 2 1 10 2 11
# 5: 3 1 7 2 9
# 6: 3 1 7 3 10
# 7: 3 2 9 3 10
# 8: 3 1 7 4 11
# 9: 3 2 9 4 11
# 10: 3 3 10 4 11
或者让你的专栏名称正确,但它会破坏简短解决方案的乐趣:)。
merge(mydata, mydata, by="id", suffix=c(".1",".2"), allow.cartesian=TRUE)[subid.1 < subid.2]
答案 1 :(得分:1)
使用data.table
- 包:
library(data.table)
setDT(mydata)[, .(subid = c(t(combn(subid, 2)))), by = id
][, grp := rep(1:2, each = .N/2), by = id
][mydata, on = .(id, subid), time := time
][, dcast(.SD, id + rowid(grp) ~ grp, value.var = list('subid','time'), sep = '.')]
给你:
id grp subid.1 subid.2 time.1 time.2 1: 1 1 1 2 16 18 2: 1 2 1 3 16 20 3: 1 3 2 3 18 20 4: 2 4 1 2 10 11 5: 3 5 1 2 7 9 6: 3 6 1 3 7 10 7: 3 7 1 4 7 11 8: 3 8 2 3 9 10 9: 3 9 2 4 9 11 10: 3 10 3 4 10 11
答案 2 :(得分:1)
忘了声明我提出了这个相当蹩脚的4步解决方案:
step1 <- lapply(unique(mydata$id), function(x) {
nrows <- nrow(mydata[which(mydata$id == x), ])
combos <- gtools::combinations(n=nrows, r=2)
return(as.data.frame(cbind(x, combos)))
})
step2 <- dplyr::bind_rows(step1)
step3a <- merge(step2, mydata, by.x = c("x", "V2"), by.y = c("id", "subid"))
step3b <- merge(step3a, mydata, by.x = c("x", "V3"), by.y = c("id", "subid"))
step4 <- step3b[, c(1, 3, 2, 4, 5)]
names(step4) <- c("id", "subid.1", "subid.2", "time.1", "time.2")
它丑陋但有效。