将列表中的数据帧组合成宽数据帧

时间:2018-05-30 09:23:34

标签: r dplyr

我在列表中有很多数据框(这个例子只有2个)

set.seed(1)
df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))

dflist <- list(df1, df2)

如何将变量v1转换为新数据框,其中每行为v1列。

请注意,它们的长度不同,空值应设置为NA,而不是重复以前的值,因为此手动解决方案会执行此操作:

df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)

解决方案应如下所示:

       [,1]      [,2]       [,3]      [,4]      [,5]        [,6]        [,7]      [,8]      [,9]     [,10]
[1,] -0.6264538 0.1836433 -0.8356286  1.595281 0.3295078          NA          NA        NA        NA        NA
[2,]  1.5117812 0.3898432 -0.6212406 -2.214700 1.1249309 -0.04493361 -0.01619026 0.9438362 0.8212212 0.5939013

4 个答案:

答案 0 :(得分:3)

在基地R:

maxn <- max(sapply(dflist,nrow))
t(sapply(dflist,function(x) c(x$v1,rep(NA,maxn-length(x$v1)))))
# [,1]       [,2]       [,3]      [,4]       [,5]      [,6]     [,7]      [,8]       [,9]     [,10]
# [1,]  0.09196323 0.09557415  0.3929732 0.2369672 -0.8581784        NA       NA        NA         NA        NA
# [2,] -0.95637695 0.44242010 -1.3525504 1.1507461 -0.8572686 0.1472487 1.196955 0.7803178 -0.6583661 -0.914915

答案 1 :(得分:0)

library(data.table)
df1<- data.table(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.table(v1 = rnorm(10), V2 = rnorm(10))


dflist <- list(df1, df2)
df_new<-list("vector")

for (i in 1:length(dflist)){
  df_aux<-transpose(dflist[[i]])[1,]
  df_new[[i]]<-df_aux
}


df<-rbindlist(df_new,use.names = TRUE,fill=TRUE)

答案 2 :(得分:0)

df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))

dflist <- list(df1, df2)

df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)

first_column <- lapply(dflist, function(x) as.data.frame(x[,1]))
result <- t(do.call(cbind, first_column))
all(df == result)

答案 3 :(得分:0)

一个选项是使用dplyr::bind_rows加入列表中的数据框。甚至可以使用.id bind_rows参数在组合数据中指定/跟踪数据帧编号。

library(tidyverse)

# As mentioned by OP, the result containing only v1 column.
bind_rows(dflist, .id="name") %>%
  select(-V2) %>% 
  group_by(name) %>% 
  mutate(rn = row_number()) %>%
  spread(rn,v1)

# # A tibble: 2 x 11
# # Groups: name [2]
#  name     `1`   `2`    `3`   `4`   `5`      `6`      `7`    `8`    `9`   `10`
# * <chr>  <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>    <dbl>  <dbl>  <dbl>  <dbl>
# 1 1     -0.626 0.184 -0.836  1.60 0.330  NA       NA      NA     NA     NA    
# 2 2      1.51  0.390 -0.621 -2.21 1.12  - 0.0449 - 0.0162  0.944  0.821  0.594

#The result containing data from both v1 and V2 columns 
bind_rows(dflist, .id="name") %>%
  gather(key, value, -name) %>%
  group_by(name, key) %>%
  mutate(rn = row_number()) %>%
  spread(rn, value)

# # A tibble: 4 x 12
# # Groups: name, key [4]
#  name  key      `1`   `2`     `3`    `4`    `5`      `6`      `7`     `8`     `9`   `10`
# * <chr> <chr>  <dbl> <dbl>   <dbl>  <dbl>  <dbl>    <dbl>    <dbl>   <dbl>   <dbl>  <dbl>
# 1 1     v1    -0.626 0.184 -0.836   1.60   0.330  NA       NA       NA      NA     NA    
# 2 1     V2    -0.820 0.487  0.738   0.576 -0.305  NA       NA       NA      NA     NA    
# 3 2     v1     1.51  0.390 -0.621  -2.21   1.12  - 0.0449 - 0.0162   0.944   0.821  0.594
# 4 2     V2     0.919 0.782  0.0746 -1.99   0.620 - 0.0561 - 0.156  - 1.47  - 0.478  0.418