将数据框列表转换为数据框

时间:2019-12-24 19:12:47

标签: r list dataframe dplyr lapply

我已经通过查询SQL数据库列出了数据帧。 dput(list_df[1:5])给予

数据

list_df <- list("NOT FOUND",
structure(list(associated_gene = NA, refsnp_id = "rs778387354", 
               allele = "GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG", 
               chrom_start = 42692955L, chrom_end = 42692989L), row.names = 2L, 
               class = "data.frame"), 
structure(list(associated_gene = "", refsnp_id = "rs771507737", 
               allele = "AT/-", chrom_start = 42693404L, chrom_end = 42693405L), row.names = 2L, 
               class = "data.frame"), 
"NOT FOUND",
structure(list(associated_gene = "A4GALT", refsnp_id = "rs387906280", 
               allele = "GGGG/GGGGG", chrom_start = 42692923L, chrom_end = 42692926L),
               row.names = 1L, class = "data.frame"), 
structure(list(associated_gene = c(NA, NA), refsnp_id = c("rs1296945362", 
               "rs775626055"), allele = c("GGGTGGGGTGGGG/GGGTGGGGTGGGGTGGGG", 
               "GGGGGG/GGGG/GGGGGGG"), chrom_start = c(42693742L, 42693751L),
               chrom_end = c(42693754L, 42693756L)), row.names = c(1L, 4L),
               class = "data.frame"))

list_df的对象具有3个维度类型:

1 5
2 5
NULL

我想将只有1行和5个列的list_df数据帧绑定到新的数据帧new_df,以便new_df [i,] == list [i]

对于0行或多于1行的对象,NA可以。

3 个答案:

答案 0 :(得分:2)

我们可以创建一个data.frame,然后将其绑定

library(purrr)
map_dfr(list_df, ~ if(!is.data.frame(.x) | nrow(data.frame(.x)) != 1) data.frame(associated_gene = NA) else .x)
#associated_gene    refsnp_id                                        allele chrom_start chrom_end
#1            <NA>         <NA>                                          <NA>          NA        NA
#2            <NA>  rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#3                  rs771507737                                          AT/-    42693404  42693405
#4            <NA>         <NA>                                          <NA>          NA        NA
#5          A4GALT  rs387906280                                    GGGG/GGGGG    42692923  42692926
#6            <NA> rs1296945362              GGGTGGGGTGGGG/GGGTGGGGTGGGGTGGGG    42693742  42693754
#7            <NA>  rs775626055                           GGGGGG/GGGG/GGGGGGG    42693751  42693756

如果我们只想keep的元素只有一行并且是data.frame

library(dplyr)
keep(list_df, ~ is.data.frame(.x) && nrow(.x) == 1) %>% 
      bind_rows

答案 1 :(得分:2)

plyr::rbind.fill也可以:

library(plyr)

list_df <- lapply(list_df, as.data.frame)
df <- rbind.fill(list_df)
df <- df[, -1]  # remove column X[[i]] containing "NOT FOUND"

答案 2 :(得分:0)

如果您只想要具有指定维度的列表,则可以使用Filter

do.call(rbind, Filter(function(x) is.data.frame(x) && nrow(x) == 1 && ncol(x) == 5, list_df))

#   associated_gene   refsnp_id                                        allele chrom_start chrom_end
#2             <NA> rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#21                 rs771507737                                          AT/-    42693404  42693405
#1           A4GALT rs387906280                                    GGGG/GGGGG    42692923  42692926

如果您希望在不符合条件的情况下拥有NA,我们可以使用lapply

dplyr::bind_rows(lapply(list_df, function(x) 
           if (is.data.frame(x) && nrow(x) == 1 && ncol(x) == 5) x else 
                  data.frame(associated_gene = NA)))

#  associated_gene   refsnp_id                                        allele chrom_start chrom_end
#1            <NA>        <NA>                                          <NA>          NA        NA
#2            <NA> rs778387354 GTGCCCTGGACGTGGCCTCGAACCGCGTGCCCTGG/GTGCCCTGG    42692955  42692989
#3                 rs771507737                                          AT/-    42693404  42693405
#4            <NA>        <NA>                                          <NA>          NA        NA
#5          A4GALT rs387906280                                    GGGG/GGGGG    42692923  42692926
#6            <NA>        <NA>                                          <NA>          NA        NA