如何通过使用名称将列表内的data.frame连接起来?

时间:2018-11-04 05:59:04

标签: r list dataframe merge mapply

我必须导入1,000个以上的excel文件,每个excel包含多张工作表(有些具有相同的工作表名称,有些具有不同的工作表名称)。

下面举一个小例子

games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary'))
weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy'))
cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw'))
list1 <- list(games, weather, cars)
names(list1) <-  c('games', 'weather', 'cars')

games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC'))
weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot'))
sport <- data.frame(index = c(1,2,3), interest = c('swim', 'soccer', 'rugby'))
list2 <- list(games, weather, sport)
names(list2) <-  c('games', 'weather', 'sport')
list3 <- list(games, weather)
names(list3) <-  c('games', 'weather')

rm(games, sport, weather, cars)  # clean envir from unneeded stuff

我正在寻找通过使用列表名称来组合列表的方法。我尝试使用merge()mapply(),但它们没有返回我想要的内容

我想要的回报如下:

   $`games`
# A tibble: 6 x 2
  index player
  <dbl> <chr> 
1     1 John  
2     2 Sam   
3     3 Mary  
4     1 AA    
5     2 BB    
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
  <dbl> <chr>      
1     1 hot        
2     2 cold       
3     3 rainy      
4     1 cold       
5     2 rainy      
6     3 hot        

$cars
# A tibble: 3 x 2
  index car   
  <dbl> <chr> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
  index interest
1     1     swim
2     2   soccer
3     3    rugby

编辑:我遇到过这样的情况:list2中有一个data.frame sport(不在list1中)

2 个答案:

答案 0 :(得分:0)

您可以使用myStringArray来帮助操作列表。我仅添加purrr以便绑定data.frame。如果您已经使用tibble,就不会有问题。

  • 我创建列表的列表。
  • stringAsFactors=FALSE更改列表以按名称将元素重新分组。基本上, x [[1]] [[2]]等效于transpose(x)[[2]] [[1]]
  • 我使用transpose遍历该列表,并使用map来获得最终的提示。
dplyr::bind_rows

reprex package(v0.2.1)于2018-11-04创建

如果第一个列表未包含所需的所有元素,则需要在转置中提供options(stringsAsFactors = FALSE) games <- data.frame(index = c(1,2,3), player = c('John', 'Sam', 'Mary')) weather <- data.frame(index = c(1,2,3), temperature = c('hot', 'cold', 'rainy')) cars <- data.frame(index = c(1,2,3), car = c('honda', 'toyota','bmw')) list1 <- list(games, weather, cars) names(list1) <- c('games', 'weather', 'cars') games <- data.frame(index = c(1,2,3), player = c('AA', 'BB', 'CC')) weather <- data.frame(index = c(1,2,3), temperature = c('cold', 'rainy', 'hot')) list2 <- list(games, weather) names(list2) <- c('games', 'weather') library(purrr) list(list1, list2) %>% # regroup named element together transpose() %>% # bind the df together map(dplyr::bind_rows) #> $games #> index player #> 1 1 John #> 2 2 Sam #> 3 3 Mary #> 4 1 AA #> 5 2 BB #> 6 3 CC #> #> $weather #> index temperature #> 1 1 hot #> 2 2 cold #> 3 3 rainy #> 4 1 cold #> 5 2 rainy #> 6 3 hot #> #> $cars #> index car #> 1 1 honda #> 2 2 toyota #> 3 3 bmw 参数。参见.names。 我为此建立一个例子。

help("transpose", package = "purrr")

reprex package(v0.2.1)于2018-11-04创建

答案 1 :(得分:0)

lapply()有一个简单的方法。

lapply(unique(unlist(lapply(mget(ls(pattern="list")), names))),
       function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]])))

使用setNames()dplyr::as_tibble获取列表名称和小标题。

像这样:

nms <- unique(unlist(lapply(Lol, names)))

setNames(lapply(lapply(nms, function(x) unique(rbind(list1[[x]], list2[[x]], list3[[x]]))),
            dplyr::as_tibble), nms)

产量

$`games`
# A tibble: 6 x 2
  index player
* <dbl> <fct> 
1     1 John  
2     2 Sam   
3     3 Mary  
4     1 AA    
5     2 BB    
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
* <dbl> <fct>      
1     1 hot        
2     2 cold       
3     3 rainy      
4     1 cold       
5     2 rainy      
6     3 hot        

$cars
# A tibble: 3 x 2
  index car   
* <dbl> <fct> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
# A tibble: 3 x 2
  index interest
* <dbl> <fct>   
1     1 swim    
2     2 soccer  
3     3 rugby  

但是 ,如果列表的数量未知,则假设您在全局环境中使用“ list”模式的所有列表,则可以采用以下方法。

Lol <- mget(ls(pattern="^list+"))  # list of lists

mergeFun <- function(z) {
  l1 <- lapply(z, 
               function(y) lapply(1:length(y),  # new column w/ sublist names
                                  function(x) cbind(y[[x]], list=names(y)[x])))
  l2 <- unlist(l1, recursive=FALSE)  # unnest lists
  l3 <- Reduce(function(...) merge(..., all=TRUE), l2)  # merge list 
  l4 <- split(l3, l3$list)  # new list of lists by sublist names
  l5 <- lapply(l4, function(w) 
    Filter(function(v) !all(is.na(v)), w[, -2]))  # delete NA cols
  return(lapply(l5, function(u) `rownames<-`(u, NULL)))  # reset row names
}

根据需要执行lapply(mergeFun(Lol), dplyr::as_tibble)以获得小标题,否则只需mergeFun(Lol)

产量

> lapply(mergeFun(Lol), dplyr::as_tibble)
$`games`
# A tibble: 6 x 2
  index player
  <dbl> <fct> 
1     1 John  
2     1 AA    
3     2 Sam   
4     2 BB    
5     3 Mary  
6     3 CC    

$weather
# A tibble: 6 x 2
  index temperature
  <dbl> <fct>      
1     1 cold       
2     1 hot        
3     2 cold       
4     2 rainy      
5     3 hot        
6     3 rainy      

$cars
# A tibble: 3 x 2
  index car   
  <dbl> <fct> 
1     1 honda 
2     2 toyota
3     3 bmw   

$sport
# A tibble: 3 x 2
  index interest
  <dbl> <fct>   
1     1 swim    
2     2 soccer  
3     3 rugby   

数据

list1 <- list(games = structure(list(index = c(1, 2, 3), player = structure(c(1L, 
3L, 2L), .Label = c("John", "Mary", "Sam"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(2L, 
1L, 3L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), cars = structure(list(index = c(1, 2, 3), car = structure(c(2L, 
3L, 1L), .Label = c("bmw", "honda", "toyota"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))
list2 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), sport = structure(list(index = c(1, 2, 3), interest = structure(3:1, .Label = c("rugby", 
"soccer", "swim"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))
list3 <- list(games = structure(list(index = c(1, 2, 3), player = structure(1:3, .Label = c("AA", 
"BB", "CC"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)), weather = structure(list(index = c(1, 2, 3), temperature = structure(c(1L, 
3L, 2L), .Label = c("cold", "hot", "rainy"), class = "factor")), class = "data.frame", row.names = c(NA, 
-3L)))