我有data.table
,其中一些观测值包含data.frame
。例如:
data.table(colA= c('A1','A2','A3'),
colB=list(data.frame(),
data.frame(colsubB1=c('B2a','B2b'),colsubB2=c('B2c', 'B2d')),
data.frame(colsubB1=c('A3a','A3b'),colsubB2=c('A3c', 'A3d'))),
colC= c('C1','C2','C3'),
colD= c('D1','D2','D3')
)
返回
colA colB colC colD
1: A1 <data.frame> C1 D1
2: A2 <data.frame> C2 D2
3: A3 <data.frame> C3 D3
我希望得到以下结果:
colA colsubB1 colsubB2 colC colD
1: A1 <NA> <NA> C1 D1
2: A2 B2a B2c C2 D2
3: A2 B2b B2d C2 D2
4: A3 A3a A3c C3 D3
5: A3 A3b A3d C3 D3
请给我一些建议吗?
答案 0 :(得分:3)
使用by
:
DT[, if(nrow(colB[[1L]]) > 0)
colB[[1L]]
else
data.frame(colsubB1=NA_character_, colsubB2=NA_character_),
by=setdiff(names(DT), "colB")]
输出:
colA colC colD colsubB1 colsubB2
1: A1 C1 D1 <NA> <NA>
2: A2 C2 D2 B2a B2c
3: A2 C2 D2 B2b B2d
4: A3 C3 D3 A3a A3c
5: A3 C3 D3 A3b A3d
数据:
library(data.table)
DT <- data.table(colA= c('A1','A2','A3'),
colB=list(data.frame(),
data.frame(colsubB1=c('B2a','B2b'),colsubB2=c('B2c', 'B2d')),
data.frame(colsubB1=c('A3a','A3b'),colsubB2=c('A3c', 'A3d'))),
colC= c('C1','C2','C3'),
colD= c('D1','D2','D3')
答案 1 :(得分:2)
一种方法:
DT[, {
lens = sapply(colB, nrow)
empty = data.table(colB[[which.max(lens > 0)]])[NA_integer_]
unnested = rbindlist(replace(colB, lens == 0, list(empty)))
repped = .SD[rep(.I, pmax(lens, 1L))]
# figure out column positions
s = order(c(
match(names(repped), names(DT)),
rep(match("colB", names(DT)), ncol(unnested))
))
setcolorder(cbind(repped, unnested), s)
}, .SDcols=!"colB"]
colA colsubB1 colsubB2 colC colD
1: A1 <NA> <NA> C1 D1
2: A2 B2a B2c C2 D2
3: A2 B2b B2d C2 D2
4: A3 A3a A3c C3 D3
5: A3 A3b A3d C3 D3
如果colB的任何元素都不包含任何内容,则所需的输出不清楚。我猜您应该手动写出empty
的值,就像在这种情况下@chinsoon的答案一样。
答案 2 :(得分:1)
您实际上可以使用tidyr::unnest()
:
library(data.table)
library(tidyr)
# data edited to avoid warnings
DT <- data.table(colA= c('A1','A2','A3'),
colB=list(data.frame(row.names = 1),
data.frame(colsubB1=c('B2a','B2b'),colsubB2=c('B2c', 'B2d'),
stringsAsFactors = FALSE),
data.frame(colsubB1=c('A3a','A3b'),colsubB2=c('A3c', 'A3d'),
stringsAsFactors = FALSE)),
colC= c('C1','C2','C3'),
colD= c('D1','D2','D3')
)
# with current version of tidyr, unnest can remove rows, should be fixed in next
# version, but for now we need to add a row to our empty data.frames
# no idea why the which was needed
DT[which(!lengths(colB)), colB := list(list(data.frame(row.names = 1)))]
# then just unnest
DT[, unnest(.SD,colB)]
#> colA colC colD colsubB1 colsubB2
#> 1: A1 C1 D1 <NA> <NA>
#> 2: A2 C2 D2 B2a B2c
#> 3: A2 C2 D2 B2b B2d
#> 4: A3 C3 D3 A3a A3c
#> 5: A3 C3 D3 A3b A3d
# or
unnest(DT, colB)
#> colA colC colD colsubB1 colsubB2
#> 1: A1 C1 D1 <NA> <NA>
#> 2: A2 C2 D2 B2a B2c
#> 3: A2 C2 D2 B2b B2d
#> 4: A3 C3 D3 A3a A3c
#> 5: A3 C3 D3 A3b A3d
由reprex package(v0.3.0)于2019-07-11创建