R中的多级列表有问题。这是我的数据结构的一个小例子。
library(purrr)
> example
[[1]]
[[1]][[1]]
id.value id.name value
1 2 Tim -1.68956
2 4 Jack 1.23950
3 5 Mary -0.10897
4 3 Joseph -0.11724
5 1 Kermit 0.18308
[[1]][[2]]
id.value id.name value
1 6 Tim 0.50381
2 2 Jack 2.52834
3 1 Mary 0.54910
4 4 Joseph 0.23821
5 5 Kermit -1.04889
6 3 Red 1.29476
[[1]][[3]]
id.value id.name value
1 4 Tim -0.47279
2 1 Jack -1.06782
3 2 Mary -0.21797
4 3 Joseph -1.02600
5 5 Kermit -0.72889
[[1]]$main.id
[1] 123
[[2]]
[[2]][[1]]
id.value id.name value
1 2 Tim -1.16554
2 4 Jack -0.81852
3 1 Mary 0.68494
4 3 Joseph -0.32006
5 5 Kermit -1.31152
[[2]][[2]]
id.value id.name value
1 2 Tim 0.821581
2 4 Jack 0.688640
3 5 Mary 0.553918
4 3 Joseph -0.061912
5 1 Kermit -0.305963
[[2]][[3]]
id.value id.name value
1 2 Tim 0.80018
2 1 Jack -0.16393
3 4 Mary 1.24292
4 5 Joseph -0.93439
5 3 Kermit 0.39371
[[2]]$main.id
[1] 234
所以我理解的结构是列表列表列表,其中包括data.frame和法线向量。通常,我用purrr map
从中得到一些好处,但是现在我不够深入。最终结果应该看起来像result
(很抱歉,如果某些值不合适,但是手动执行容易出错)。
> head(result, 2)
# A tibble: 2 x 5
list.id sub.list.id id.value id.name value
<dbl> <dbl> <dbl> <chr> <dbl>
1 123 1 2 Tim -0.333
2 123 1 4 Jack -1.02
> tail(result, 2)
# A tibble: 2 x 5
list.id sub.list.id id.value id.name value
<dbl> <dbl> <dbl> <chr> <dbl>
1 234 3 5 Joseph 0.548
2 234 3 3 Kermit 0.239
list.id = main.id
,我亲身体验map_dbl(example, c("main.id"))
sub.list.id
=该列表列表中该列表的最后一个列表号。在此示例中,每个主列表从1-> 3开始运行。
[[1]]
[[1]]**[[1]]**
id.value id.name value
1 2 Tim -1.6895557
2 4 Jack 1.2394959
3 5 Mary -0.1089660
4 3 Joseph -0.1172420
5 1 Kermit 0.1830826
其他变量应该可以自我解释。
我通常使用purrr map
,但是如果有其他好的方法来解决这些复杂的列表,我也会开放。我尝试了不公开,但它完全破坏了结构,我认为不需要。我目前正在尝试bind_cols
(使用data.frame和vector)->,然后尝试使用bind_rows
进行.id
,但是还没有成功完成任何有意义的事情。
数据:
example <- list(list(structure(list(id = structure(list(value = c(2L, 4L,
5L, 3L, 1L), name = c("Tim", "Jack", "Mary", "Joseph", "Kermit"
)), class = "data.frame", row.names = c(NA, 5L)), value = c(-1.6895556640288,
1.23949588599841, -0.108965972315484, -0.117241961787958, 0.183082613838439
)), class = "data.frame", row.names = c(NA, 5L)), structure(list(
id = structure(list(value = c(6L, 2L, 1L, 4L, 5L, 3L), name = c("Tim",
"Jack", "Mary", "Joseph", "Kermit", "Red")), class = "data.frame", row.names = c(NA,
6L)), value = c(0.503812447155119, 2.52833655070411, 0.549096735635542,
0.238212920794043, -1.04889314358654, 1.29476325458416)), class = "data.frame", row.names = c(NA,
6L)), structure(list(id = structure(list(value = c(4L, 1L, 2L,
3L, 5L), name = c("Tim", "Jack", "Mary", "Joseph", "Kermit")), class = "data.frame", row.names = c(NA,
5L)), value = c(-0.472791407727934, -1.06782370598685, -0.217974914658295,
-1.02600444830724, -0.72889122929114)), class = "data.frame", row.names = c(NA,
5L)), main.id = 123), list(structure(list(id = structure(list(
value = c(2L, 4L, 1L, 3L, 5L), name = c("Tim", "Jack", "Mary",
"Joseph", "Kermit")), class = "data.frame", row.names = c(NA,
5L)), value = c(-1.16554484788995, -0.818515722513129, 0.684936077925063,
-0.320056419276819, -1.31152241139676)), class = "data.frame", row.names = c(NA,
5L)), structure(list(id = structure(list(value = c(2L, 4L, 5L,
3L, 1L), name = c("Tim", "Jack", "Mary", "Joseph", "Kermit")), class = "data.frame", row.names = c(NA,
5L)), value = c(0.821581081637487, 0.688640254100091, 0.553917653537589,
-0.0619117105767217, -0.305962663739917)), class = "data.frame", row.names = c(NA,
5L)), structure(list(id = structure(list(value = c(2L, 1L, 4L,
5L, 3L), name = c("Tim", "Jack", "Mary", "Joseph", "Kermit")), class = "data.frame", row.names = c(NA,
5L)), value = c(0.800176865835429, -0.163930968642975, 1.24291877493732,
-0.93438505805516, 0.393708652215792)), class = "data.frame", row.names = c(NA,
5L)), main.id = 234))
预期输出:
result <- structure(list(list.id = c(123, 123, 123, 123, 123, 123, 123,
123, 123, 123, 123, 123, 123, 123, 123, 123, 234, 234, 234, 234,
234, 234, 234, 234, 234, 234, 234, 234, 234, 234, 234), sub.list.id = c(1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 2,
2, 2, 2, 2, 3, 3, 3, 3, 3), id.value = c(2, 4, 5, 3, 1, 6, 2,
1, 4, 5, 3, 4, 1, 2, 3, 5, 2, 4, 1, 3, 5, 2, 4, 5, 3, 1, 2, 1,
4, 5, 3), id.name = c("Tim", "Jack", "Mary", "Joseph", "Kermit",
"Tim", "Jack", "Mary", "Joseph", "Kermit", "Red", "Tim", "Jack",
"Mary", "Joseph", "Kermit", "Tim", "Jack", "Mary", "Joseph",
"Kermit", "Tim", "Jack", "Mary", "Joseph", "Kermit", "Tim", "Jack",
"Mary", "Joseph", "Kermit"), value = c(-0.33320738366942, -1.01857538310709,
-1.07179122647558, 0.303528641404258, 0.448209778629426, 0.0530042267305041,
0.922267467879737, 2.05008468562714, -0.491031166056535, -2.30916887564081,
1.00573852446226, -0.709200762582393, -0.688008616467358, 1.0255713696967,
-0.284773007051009, -1.22071771225454, 0.18130347974915, -0.138891362439045,
0.00576418589988693, 0.38528040112633, -0.370660031792409, 0.644376548518833,
-0.220486561818751, 0.331781963915697, 1.09683901314935, 0.435181490833803,
-0.325931585531227, 1.14880761845109, 0.993503855962119, 0.54839695950807,
0.238731735111441)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
31L))
答案 0 :(得分:2)
这有点毛茸茸,tidyr::unnest
似乎由于某种原因无法正常工作。尽管如此,
library(purrr)
example %>%
set_names(map_chr(., 'main.id')) %>% # store IDs as names for easy recovery
map(keep, is.data.frame) %>% # drop now superfluous `main.id` elements
map_dfr( # for each sublist
function(sublist) {
map_dfr( # for each element
sublist,
~dplyr::bind_cols(set_names(.x$id, ~paste0('id.', .x)), .x[2]), # unnest
.id = 'sublist.id' # simplify sublist to data frame, adding element ID column
)
},
.id = 'list.id') %>% # simplify list to data frame, adding element ID column
readr::type_convert() # fix types of data stored as names
#> list.id sublist.id id.value id.name value
#> 1 123 1 2 Tim -1.68955566
#> 2 123 1 4 Jack 1.23949589
#> 3 123 1 5 Mary -0.10896597
#> 4 123 1 3 Joseph -0.11724196
#> 5 123 1 1 Kermit 0.18308261
#> 6 123 2 6 Tim 0.50381245
#> 7 123 2 2 Jack 2.52833655
#> 8 123 2 1 Mary 0.54909674
#> 9 123 2 4 Joseph 0.23821292
#> 10 123 2 5 Kermit -1.04889314
#> 11 123 2 3 Red 1.29476325
#> 12 123 3 4 Tim -0.47279141
#> 13 123 3 1 Jack -1.06782371
#> 14 123 3 2 Mary -0.21797491
#> 15 123 3 3 Joseph -1.02600445
#> 16 123 3 5 Kermit -0.72889123
#> 17 234 1 2 Tim -1.16554485
#> 18 234 1 4 Jack -0.81851572
#> 19 234 1 1 Mary 0.68493608
#> 20 234 1 3 Joseph -0.32005642
#> 21 234 1 5 Kermit -1.31152241
#> 22 234 2 2 Tim 0.82158108
#> 23 234 2 4 Jack 0.68864025
#> 24 234 2 5 Mary 0.55391765
#> 25 234 2 3 Joseph -0.06191171
#> 26 234 2 1 Kermit -0.30596266
#> 27 234 3 2 Tim 0.80017687
#> 28 234 3 1 Jack -0.16393097
#> 29 234 3 4 Mary 1.24291877
#> 30 234 3 5 Joseph -0.93438506
#> 31 234 3 3 Kermit 0.39370865
答案 1 :(得分:1)
我对tidyverse解决方案的看法:
library(tidyverse):
map_dfr(example,
~ cbind(.['main.id'], # put main_id as our first column
map_dfr(.[-length(.)], # build the rest of the table from previous elements
~bind_cols( # for each of them takout manually .id col and rename
rename_all(.$id,~paste0("id.",.)),
.['value']),
.id = "sub.list.id"))) # the .id parameter will do the required indexing
# main.id sub.list.id id.value id.name value
# 1 123 1 2 Tim -1.68955566
# 2 123 1 4 Jack 1.23949589
# 3 123 1 5 Mary -0.10896597
# 4 123 1 3 Joseph -0.11724196
# 5 123 1 1 Kermit 0.18308261
# 6 123 2 6 Tim 0.50381245
# 7 123 2 2 Jack 2.52833655
# 8 123 2 1 Mary 0.54909674
# 9 123 2 4 Joseph 0.23821292
# 10 123 2 5 Kermit -1.04889314
# 11 123 2 3 Red 1.29476325
# 12 123 3 4 Tim -0.47279141
# 13 123 3 1 Jack -1.06782371
# 14 123 3 2 Mary -0.21797491
# 15 123 3 3 Joseph -1.02600445
# 16 123 3 5 Kermit -0.72889123
# 17 234 1 2 Tim -1.16554485
# 18 234 1 4 Jack -0.81851572
# 19 234 1 1 Mary 0.68493608
# 20 234 1 3 Joseph -0.32005642
# 21 234 1 5 Kermit -1.31152241
# 22 234 2 2 Tim 0.82158108
# 23 234 2 4 Jack 0.68864025
# 24 234 2 5 Mary 0.55391765
# 25 234 2 3 Joseph -0.06191171
# 26 234 2 1 Kermit -0.30596266
# 27 234 3 2 Tim 0.80017687
# 28 234 3 1 Jack -0.16393097
# 29 234 3 4 Mary 1.24291877
# 30 234 3 5 Joseph -0.93438506
# 31 234 3 3 Kermit 0.39370865
答案 2 :(得分:0)
您可以如下循环遍历嵌套项目
list.id <- c(); sub.list.id <- c(); id.value <- c(); id.name <- c(); value <- c(); r <- 0
for (i in 1:length(example)) {
list.id.value <- example[[i]]$main.id
for (j in 1:(length(example[[i]])-1)) {
sub.list.id.value <- j
for (k in 1:nrow(example[[i]][[j]][1])) {
r <- r + 1
list.id[r] <- list.id.value %>% as.numeric()
sub.list.id[r] <- sub.list.id.value %>% as.numeric()
id.value[r] <- example[[i]][[j]][[1]][k, "value"] %>% as.numeric()
id.name[r] <- example[[i]][[j]][[1]][k, "name"]
value[r] <- example[[i]][[j]][[2]][k] %>% as.numeric()
}
}
}
result <- data.frame(list.id, sub.list.id, id.value, id.name, value) # %>% as.tibble()
result