我列出了30000多个元素。有不同长度的向量,我想将列表转换为数据框,其中每个向量代表一行,并且其值分布在多列中。该列表有一个模拟示例:
lst <- list(a = c(1,2,4,5,6), c = c(7,8,9), c = c(10,11))
我想要的输出如下:
# [,1] [,2] [,3] [,4] [,5] [,6]
#a 1 2 3 4 5 6
#b 7 8 9 NA NA NA
#c 10 11 NA NA NA NA
答案 0 :(得分:2)
您可以这样做:
t(as.data.frame(lapply(lst, "length<-", max(lengths(lst)))))
# [,1] [,2] [,3] [,4] [,5]
#a 1 2 4 5 6
#c 7 8 9 NA NA
#c.1 10 11 NA NA NA
或者如@Andrew所指出的,您可以执行:
t(sapply(lst, "length<-", max(lengths(lst))))
# [,1] [,2] [,3] [,4] [,5]
#a 1 2 4 5 6
#c 7 8 9 NA NA
#c 10 11 NA NA NA
答案 1 :(得分:2)
这是一个基于基数的R选项:
# Create a vector for number of times an NA needs to be padded
na_nums <- max(lengths(lst)) - lengths(lst)
# Transpose results after patting NA's using mapply
t(mapply(c, lst, sapply(na_nums, rep, x = NA)))
[,1] [,2] [,3] [,4] [,5]
a 1 2 4 5 6
c 7 8 9 NA NA
c 10 11 NA NA NA
答案 2 :(得分:2)
这是我的第一个冲动。
max_len <- max(vapply(lst,
FUN = length,
FUN.VALUE = numeric(1)))
lst <- lapply(lst,
function(x, max_len) c(x, rep(NA, max_len - length(x))),
max_len)
# Form a matrix
do.call("rbind", lst)
这有点冗长,而其他一些解决方案则相当优雅。既然您说您的列表超过了30,000个元素,那么我很好奇它们在长度为30,000的列表中的表现如何。
如果这是您经常需要做的事情,则可能要采用安德鲁的方法。
lst <- list(a = c(1,2,4,5,6), c = c(7,8,9), c = c(10,11))
# build out a list of 30,000 elements.
lst <- lst[sample(1:3, 30000, replace = TRUE)]
library(microbenchmark)
microbenchmark(
benjamin = {
max_len <- max(vapply(lst,
FUN = length,
FUN.VALUE = numeric(1)))
lst <- lapply(lst,
function(x, max_len) c(x, rep(NA, max_len - length(x))),
max_len)
# Form a matrix
do.call("rbind", lst)
},
slava = {
Reduce(function(x,y){
n <- max(length(x), length(y))
length(x) <- n
length(y) <- n
rbind(x,y,deparse.level = 0)
},
lst)
},
andrew = {
na_nums <- max(lengths(lst)) - lengths(lst)
# Transpose results after patting NA's using mapply
t(mapply(c, lst, sapply(na_nums, rep, x = NA)))
},
matt = {
t(as.data.frame(lapply(lst, "length<-", max(lengths(lst)))))
}
)
Unit: milliseconds
expr min lq mean median uq max neval
benjamin 77.08337 91.42793 117.9376 106.97656 122.53898 191.6612 5
slava 32383.10840 32962.57589 32976.6662 33071.40314 33180.70634 33285.5372 5
andrew 60.91803 66.74401 87.1645 71.92043 77.78805 158.4520 5
matt 1685.09158 1702.19796 1759.2741 1737.01949 1760.86237 1911.1993 5
答案 3 :(得分:1)
诀窍是制作相等长度的向量。另外,似乎您想在输出端使用矩阵。
Reduce(function(x,y){
n <- max(length(x), length(y))
length(x) <- n
length(y) <- n
rbind(x,y,deparse.level = 0)
},
list(a = c(1,2,4,5,6), c = c(7,8,9), c = c(10,11)))
输出
# [,1] [,2] [,3] [,4] [,5]
# [1,] 1 2 4 5 6
# [2,] 7 8 9 NA NA
# [3,] 10 11 NA NA NA
您可以在此时重置行名。
更新 感兴趣者的时间:
lst <- list(a = c(1,2,4,5,6), c = c(7,8,9), c = c(10,11))
convert <-function(lst){
Reduce(function(x,y){
n <- max(length(x), length(y))
length(x) <- n
length(y) <- n
rbind(x,y,deparse.level = 0)
},
lst)
}
convert2 <- function(lst){
t(sapply(lst, "length<-", max(lengths(lst))))
}
convert3 <- function(lst){
t(as.data.frame(lapply(lst, "length<-", max(lengths(lst)))))
}
microbenchmark::microbenchmark(convert(lst),
convert2(lst),
convert3(lst))
#Unit: microseconds
# expr min lq mean median uq max neval
# convert(lst) 41.962 50.0725 106.47314 62.2375 68.408 4392.895 100
# convert2(lst) 28.209 33.6755 69.93855 40.7280 45.136 2298.002 100
# convert3(lst) 292.673 306.6005 381.59504 319.1180 333.399 2887.929 100