如何使用DF /列表索引将数据帧列表连接到一个数据帧?

时间:2018-06-29 21:46:11

标签: r dataframe join dplyr

library(dplyr); library(tibble)

这是我的示例数据。我要加入单个数据框(listOfDFs)的小型数据框(points)的列表。

listOfDfs有5个小数据框,总共7行,而points是一个有7行的数据框:

points <- structure(list(EVENT_ID_CNTY = c("LBY1243", "LBY3389", "LBY3393", 
                    "LBY3506", "LBY3822"), year = c(2013, 2015, 2015, 2015, 2015), 
                    COUNTRY = c("Libya", "Libya", "Libya", "Libya", "Libya")), .Names = c("EVENT_ID_CNTY", 
                    "year", "COUNTRY"), row.names = c(NA, -5L), class = c("tbl_df", 
                    "tbl", "data.frame"))

listOfDFs <- structure(list(`1` = structure(list(CELL_ID = c(165267, 164547
), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", 
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), `2` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676, 
prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP", 
"prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `3` = structure(list(CELL_ID = 172475, 
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", 
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `4` = structure(list(CELL_ID = 172475, 
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", 
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), `5` = structure(list(CELL_ID = c(165267, 
164547), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", 
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), .Names = c("1", "2", "3", "4", "5"))

这些数据如下所示:

points
#> # A tibble: 5 x 3
#>   EVENT_ID_CNTY  year COUNTRY
#>   <chr>         <dbl> <chr>  
#> 1 LBY1243       2013. Libya  # Corresponds to the two items in listOfDFs[[1]]
#> 2 LBY3389       2015. Libya  # Corresponds to the one item in listOfDFs[[2]]
#> 3 LBY3393       2015. Libya  
#> 4 LBY3506       2015. Libya  
#> 5 LBY3822       2015. Libya


listOfDFs
#> $`1`
#> # A tibble: 2 x 4
#>   CELL_ID  gwno   POP prio_country
#>     <dbl> <int> <dbl> <chr>       
#> 1 165267.   620  751. Libya       
#> 2 164547.   620  755. Libya       
#> 
#> $`2`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`3`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`4`
#> # A tibble: 1 x 4
#>   CELL_ID  gwno    POP prio_country
#>     <dbl> <int>  <dbl> <chr>       
#> 1 172475.   620 11676. Libya       
#> 
#> $`5`
#> # A tibble: 2 x 4
#>   CELL_ID  gwno   POP prio_country
#>     <dbl> <int> <dbl> <chr>       
#> 1 165267.   620  751. Libya       
#> 2 164547.   620  755. Libya

我如何结合这两个,使它们遵循以下模式?我是否必须找到一种方法将它们绑定到数据框的行索引上,还是有一种更优雅的方法?:

#>   EVENT_ID_CNTY  year COUNTRY  CELL_ID  gwno   POP prio_country
#>   <chr>         <dbl> <chr>     <dbl> <int> <dbl> <chr>  
#>   LBY1243       2013. Libya     165267.   620  751. Libya 
#>   LBY1243       2013. Libya     164547.   620  755. Libya 
#>   LBY3389       2015. Libya     172475.   620 11676. Libya
#>   LBY3393       2015. Libya     172475.   620 11676. Libya
#>   LBY3506       2015. Libya     172475.   620 11676. Libya
#>   LBY3822       2015. Libya     165267.   620  751. Libya 
#>   LBY3822       2015. Libya     164547.   620  755. Libya
.........

3 个答案:

答案 0 :(得分:2)

library(tidyr)
points %>% mutate(mm=listOfDFs) %>% unnest

# A tibble: 7 x 7
  EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
  <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
1 LBY1243        2013 Libya    165267   620   751. Libya       
2 LBY1243        2013 Libya    164547   620   755. Libya       
3 LBY3389        2015 Libya    172475   620 11676  Libya       
4 LBY3393        2015 Libya    172475   620 11676  Libya       
5 LBY3506        2015 Libya    172475   620 11676  Libya       
6 LBY3822        2015 Libya    165267   620   751. Libya       
7 LBY3822        2015 Libya    164547   620   755. Libya  

答案 1 :(得分:1)

根据示例,我们将{points}数据集的行与c@http://localhost:8101/build/polyfills.js:3:19752 c@http://localhost:8101/build/polyfills.js:3:19461 f/<@http://localhost:8101/build/polyfills.js:3:20233 F</l</t.prototype.invokeTask@http://localhost:8101/build/polyfills.js:3:15649 onInvokeTask@http://localhost:8101/build/vendor.js:5125:24 F</l</t.prototype.invokeTask@http://localhost:8101/build/polyfills.js:3:15562 F</c</r.prototype.runTask@http://localhost:8101/build/polyfills.js:3:10815 o@http://localhost:8101/build/polyfills.js:3:7887 F</h</e.invokeTask@http://localhost:8101/build/polyfills.js:3:16823 p@http://localhost:8101/build/polyfills.js:2:27646 v@http://localhost:8101/build/polyfills.js:2:27893 的每个元素的行一起复制并将列绑定在一起

list

或在链中使用

library(tidyverse)
bind_cols(points[rep(seq_len(nrow(points)),
       map_dbl(listOfDFs, nrow)),], bind_rows(listOfDFs))
# A tibble: 7 x 7
#EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno    POP prio_country
#   <chr>         <dbl> <chr>     <dbl> <int>  <dbl> <chr>       
#1 LBY1243        2013 Libya    165267   620   751. Libya       
#2 LBY1243        2013 Libya    164547   620   755. Libya       
#3 LBY3389        2015 Libya    172475   620 11676  Libya       
#4 LBY3393        2015 Libya    172475   620 11676  Libya       
#5 LBY3506        2015 Libya    172475   620 11676  Libya       
#6 LBY3822        2015 Libya    165267   620   751. Libya       
#7 LBY3822        2015 Libya    164547   620   755. Libya    

或者用'EVENT_ID_CNTY'设置map_dbl(listOfDFs, nrow) %>% rep(seq_len(nrow(points)), .) %>% magrittr::extract(points, ., ) %>% bind_cols(bind_rows(listOfDFs)) 的名称以创建一个id列并与'points'连接起来

list

答案 2 :(得分:1)

使用dplyr::bind_rowsdplyr::inner_join的选项可以是:

library(dplyr)

mutate(points, rn = as.character(row_number())) %>%
  inner_join(bind_rows(listOfDFs, .id="Name"), by=c("rn" = "Name")) %>%
  select(-rn)

# # A tibble: 7 x 7
# EVENT_ID_CNTY  year COUNTRY CELL_ID  gwno   POP prio_country
# <chr>         <dbl> <chr>     <dbl> <int> <dbl> <chr>       
# 1 LBY1243        2013 Libya    165267   620   751 Libya       
# 2 LBY1243        2013 Libya    164547   620   755 Libya       
# 3 LBY3389        2015 Libya    172475   620 11676 Libya       
# 4 LBY3393        2015 Libya    172475   620 11676 Libya       
# 5 LBY3506        2015 Libya    172475   620 11676 Libya       
# 6 LBY3822        2015 Libya    165267   620   751 Libya       
# 7 LBY3822        2015 Libya    164547   620   755 Libya