如何在dplyr中展平tibble?

时间:2018-02-11 17:22:29

标签: r dplyr

我有以下内容:

structure(list(label = list(list(structure(list(id = 431676528L, 
    url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori", 
    name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id", 
"url", "name", "color", "default")), structure(list(id = 442034204L, 
    url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale", 
    name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id", 
"url", "name", "color", "default"))), list(structure(list(id = 442051239L, 
    url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa", 
    name = "mappa", color = "0052cc", default = FALSE), .Names = c("id", 
"url", "name", "color", "default")), structure(list(id = 431676528L, 
    url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori", 
    name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id", 
"url", "name", "color", "default")), structure(list(id = 442034204L, 
    url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale", 
    name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id", 
"url", "name", "color", "default")))), mainId = c("216226960", 
"215647494")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L), .Names = c("label", "mainId")) -> dt

# A tibble: 2 x 2
  label      mainId   
  <list>     <chr>    
1 <list [2]> 216226960
2 <list [3]> 215647494

我希望将labelmainId配对的值展平(以便我可以将label中的每个子元素与其主ID相关联)。 因此,我试图获得标题: labelurlnamecolormainId

我以为我会使用类似flatten_dfr()的内容,但我不知道要保留mainId,主要是因为我不确定参数flatten_dfr(.id='')是怎么回事工作。

例如

dt %>% flatten_dfr(.id= 'mainId')
  

bind_rows_(x,.id)出错:参数1必须具有名称

什么是参数1?我猜它是.x,但我对这个不寻常的命名感到困惑。此外,rownames也被弃用了,因此我对这个函数所代表的名称(colnames?rownames?)感到非常困惑。

更新:

这是traceback()

的输出
> traceback()
13: stop(list(message = "Argument 1 must have names", call = bind_rows_(x, 
        .id), cppstack = list(file = "", line = -1L, stack = c("1   dplyr.so                            0x000000010a9ce464 _ZN4Rcpp9exceptionC2EPKcb + 276", 
    "2   dplyr.so                            0x000000010a9ce2a0 _ZN4Rcpp4stopERKNSt3__112basic_stringIcNS0_11char_traitsIcEENS0_9allocatorIcEEEE + 48", 
    "3   dplyr.so                            0x000000010a9e5ae6 _ZN5dplyr11bad_pos_argIPKcEEviT_ + 918", 
    "4   dplyr.so                            0x000000010a9fd43a _Z11rbind__implN4Rcpp6VectorILi19ENS_15PreserveStorageEEERKN5dplyr12SymbolStringE + 4634", 
    "5   dplyr.so                            0x000000010a9ffcce _Z10bind_rows_N4Rcpp6VectorILi19ENS_15PreserveStorageEEEP7SEXPREC + 286", 
    "6   dplyr.so                            0x000000010a9c28bc _dplyr_bind_rows_ + 140", 
    "7   libR.dylib                          0x0000000101201b90 R_doDotCall + 64", 
    "8   libR.dylib                          0x0000000101203db3 do_dotcall + 355", 
    "9   libR.dylib                          0x0000000101232249 Rf_eval + 1657", 
    "10  libR.dylib                          0x0000000101244a40 do_begin + 656", 
    "11  libR.dylib                          0x0000000101231f78 Rf_eval + 936", 
    "12  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "13  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "14  libR.dylib                          0x0000000101244a40 do_begin + 656", 
    "15  libR.dylib                          0x0000000101231f78 Rf_eval + 936", 
    "16  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "17  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "18  libR.dylib                          0x0000000101244a40 do_begin + 656", 
    "19  libR.dylib                          0x0000000101231f78 Rf_eval + 936", 
    "20  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "21  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "22  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "23  libR.dylib                          0x0000000101236384 bcEval + 16148", 
    "24  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "25  libR.dylib                          0x0000000101240949 forcePromise + 169", 
    "26  libR.dylib                          0x0000000101232071 Rf_eval + 1185", 
    "27  libR.dylib                          0x0000000101246761 do_withVisible + 49", 
    "28  libR.dylib                          0x0000000101281e00 do_internal + 336", 
    "29  libR.dylib                          0x0000000101236959 bcEval + 17641", 
    "30  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "31  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "32  libR.dylib                          0x0000000101236384 bcEval + 16148", 
    "33  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "34  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "35  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "36  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "37  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "38  libR.dylib                          0x00000001012460fe do_eval + 1534", 
    "39  libR.dylib                          0x00000001012367ac bcEval + 17212", 
    "40  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "41  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "42  libR.dylib                          0x0000000101236384 bcEval + 16148", 
    "43  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "44  libR.dylib                          0x0000000101240949 forcePromise + 169", 
    "45  libR.dylib                          0x0000000101232071 Rf_eval + 1185", 
    "46  libR.dylib                          0x0000000101246761 do_withVisible + 49", 
    "47  libR.dylib                          0x0000000101281e00 do_internal + 336", 
    "48  libR.dylib                          0x0000000101236959 bcEval + 17641", 
    "49  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "50  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "51  libR.dylib                          0x0000000101236384 bcEval + 16148", 
    "52  libR.dylib                          0x0000000101231e11 Rf_eval + 577", 
    "53  libR.dylib                          0x00000001012428e2 R_execClosure + 898", 
    "54  libR.dylib                          0x00000001012320f6 Rf_eval + 1318", 
    "55  libR.dylib                          0x00000001012729b8 Rf_ReplIteration + 904", 
    "56  libR.dylib                          0x0000000101273eaf run_Rmainloop + 207", 
    "57  rsession                            0x000000010084e259 _ZN7rstudio1r7session12runEmbeddedRERKNS_4core8FilePathES5_bb7SA_TYPERKNS1_9CallbacksEPNS1_17InternalCallbacksE + 425", 
    "58  rsession                            0x000000010082d25c _ZN7rstudio1r7session3runERKNS1_8ROptionsERKNS1_10RCallbacksE + 3164", 
    "59  rsession                            0x0000000100108603 main + 28963", 
    "60  rsession                            0x0000000100003264 start + 52", 
    "61  ???                                 0x000000000000000b 0x0 + 11"
    ))))
12: .Call(`_dplyr_bind_rows_`, dots, id)
11: bind_rows_(x, .id)
10: dplyr::bind_rows(res, .id = .id)
9: flatten_dfr(., .id = "mainId")
8: function_list[[k]](value)
7: withVisible(function_list[[k]](value))
6: freduce(value, `_function_list`)
5: `_fseq`(`_lhs`)
4: eval(quote(`_fseq`(`_lhs`)), env, env)
3: eval(quote(`_fseq`(`_lhs`)), env, env)
2: withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
1: dt %>% flatten_dfr(.id = "mainId")

更新2:

到目前为止,解决方案可以解决我最初提供的示例,但无法处理NA。例如:

h <- structure(list(label = list(list(NA_character_)), mainId = "242390063"), .Names = c("label", 
"mainId"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", 
"data.frame"))

map_df(h, flatten_dfr)
  

bind_rows_(x,.id)出错:参数1必须具有名称

我为在原始示例中不包含NA而道歉,但也许有人可以详细阐述他原来的答案来解决这个问题?

3 个答案:

答案 0 :(得分:8)

首先将label列中的数据转换为tibble,然后使用unnest提取嵌套label列中的信息。

library(tidyverse)

dt %>% 
  mutate(label = map(label, bind_rows)) %>% 
  unnest()

# A tibble: 5 x 6
#      mainId       id  url                                                                                  name             color  default
#       <chr>     <int> <chr>                                                                                <chr>            <chr>  <lgl>  
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F      
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale  sito principale  5319e7 F      
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa              mappa            0052cc F      
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F      
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale  sito principale  5319e7 F    

答案 1 :(得分:3)

我们可以将每个列表元素转换为列表列中的tibble,然后使用unnest

library(tidyverse)

dt2 <- dt %>%
  mutate(label = map(label, ~map_dfr(., as.tibble))) %>%
  unnest()
dt2
# # A tibble: 5 x 6
#   mainId           id url                                             name        color default
#   <chr>         <int> <chr>                                           <chr>       <chr> <lgl>  
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/ter~ per svilup~ d4c5~ F      
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/ter~ sito princ~ 5319~ F      
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/ter~ mappa       0052~ F      
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/ter~ per svilup~ d4c5~ F      
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/ter~ sito princ~ 5319~ F

答案 2 :(得分:1)

我认为具体问题(第一个参数是什么)的答案是"label"列中的列表嵌套太深,而且在最高级别它们是未命名的。在traceback()输出之后,bind_rows调用中的“x”将逐个传递给bind_rows_并被卡住。:

#Notice: 
#list(list(structure(list(id = 431676528L, ....


names( dt[["label"]][1] )
NULL

dt[["label"]][1]
#----------------
 dt[["label"]][1]
[[1]]
[[1]][[1]]
[[1]][[1]]$id
[1] 431676528

[[1]][[1]]$url
[1] "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori"

[[1]][[1]]$name
[1] "per sviluppatori"

[[1]][[1]]$color
[1] "d4c5f9"

[[1]][[1]]$default
[1] FALSE


[[1]][[2]]
[[1]][[2]]$id
[1] 442034204

[[1]][[2]]$url
[1] "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale"

[[1]][[2]]$name
[1] "sito principale"

[[1]][[2]]$color
[1] "5319e7"

[[1]][[2]]$default
[1] FALSE