我在列表中有很多数据框(这个例子只有2个)
set.seed(1)
df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))
dflist <- list(df1, df2)
如何将变量v1
转换为新数据框,其中每行为v1
列。
请注意,它们的长度不同,空值应设置为NA,而不是重复以前的值,因为此手动解决方案会执行此操作:
df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)
解决方案应如下所示:
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] -0.6264538 0.1836433 -0.8356286 1.595281 0.3295078 NA NA NA NA NA
[2,] 1.5117812 0.3898432 -0.6212406 -2.214700 1.1249309 -0.04493361 -0.01619026 0.9438362 0.8212212 0.5939013
答案 0 :(得分:3)
在基地R:
maxn <- max(sapply(dflist,nrow))
t(sapply(dflist,function(x) c(x$v1,rep(NA,maxn-length(x$v1)))))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,] 0.09196323 0.09557415 0.3929732 0.2369672 -0.8581784 NA NA NA NA NA
# [2,] -0.95637695 0.44242010 -1.3525504 1.1507461 -0.8572686 0.1472487 1.196955 0.7803178 -0.6583661 -0.914915
答案 1 :(得分:0)
library(data.table)
df1<- data.table(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.table(v1 = rnorm(10), V2 = rnorm(10))
dflist <- list(df1, df2)
df_new<-list("vector")
for (i in 1:length(dflist)){
df_aux<-transpose(dflist[[i]])[1,]
df_new[[i]]<-df_aux
}
df<-rbindlist(df_new,use.names = TRUE,fill=TRUE)
答案 2 :(得分:0)
df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))
dflist <- list(df1, df2)
df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)
first_column <- lapply(dflist, function(x) as.data.frame(x[,1]))
result <- t(do.call(cbind, first_column))
all(df == result)
答案 3 :(得分:0)
一个选项是使用dplyr::bind_rows
加入列表中的数据框。甚至可以使用.id
bind_rows
参数在组合数据中指定/跟踪数据帧编号。
library(tidyverse)
# As mentioned by OP, the result containing only v1 column.
bind_rows(dflist, .id="name") %>%
select(-V2) %>%
group_by(name) %>%
mutate(rn = row_number()) %>%
spread(rn,v1)
# # A tibble: 2 x 11
# # Groups: name [2]
# name `1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
# * <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 -0.626 0.184 -0.836 1.60 0.330 NA NA NA NA NA
# 2 2 1.51 0.390 -0.621 -2.21 1.12 - 0.0449 - 0.0162 0.944 0.821 0.594
#The result containing data from both v1 and V2 columns
bind_rows(dflist, .id="name") %>%
gather(key, value, -name) %>%
group_by(name, key) %>%
mutate(rn = row_number()) %>%
spread(rn, value)
# # A tibble: 4 x 12
# # Groups: name, key [4]
# name key `1` `2` `3` `4` `5` `6` `7` `8` `9` `10`
# * <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 v1 -0.626 0.184 -0.836 1.60 0.330 NA NA NA NA NA
# 2 1 V2 -0.820 0.487 0.738 0.576 -0.305 NA NA NA NA NA
# 3 2 v1 1.51 0.390 -0.621 -2.21 1.12 - 0.0449 - 0.0162 0.944 0.821 0.594
# 4 2 V2 0.919 0.782 0.0746 -1.99 0.620 - 0.0561 - 0.156 - 1.47 - 0.478 0.418