Question

我已经详尽地研究了如何将复杂列表转换为数据框 - 但似乎我的情况相当独特：

问题：我有一个包含多个变量列表的大型列表，其观察结果（数据点）进一步列为列表。

以下是多个主要清单中代表性清单1的结构;

:List of 5 ..$ type : chr "time" ..$ data :List of 134 .. ..$ : int 5624 .. ..$ : int 5625 .. ..$ : int 5627 .. .. [list output truncated] :List of 5 ..$ type : chr "dist" ..$ data :List of 134 .. ..$ : num 22321 .. ..$ : num 22313 .. .. [list output truncated] :List of 5 ..$ type : chr "cad" ..$ data :List of 134 .. ..$ : num 0.4 .. ..$ : num 0.6 .. .. [list output truncated]

因此，该结构被复制用于大量列表 - 目的是通过拉动所有主要列表中“类型”定义的所有变量的“数据”部分将该列表转换为数据帧

注意：其他主要列表可能包含更多变量（$ type）和更高/更低的子列表观察数 - 即134列表不适用于Large列表中的其他列表。

感谢您的期待。进一步更新：

以下是清晰度的输出结果：

`

list(structure(list(type = "time", data = list(2881L, 2885L, 
    2892L, 2893L, 2898L, 2899L, 2900L, 2901L, 2904L, 2907L, 2911L, 
    2912L, 2914L, 2918L), series_type = "distance", original_size = 14L, 
    resolution = "high"), .Names = c("type", "data", "series_type", 
"original_size", "resolution")), structure(list(type = "distance", 
    data = list(22512.4, 22548.3, 22605.5, 22615.1, 22670.3, 
        22692.2, 22705.8, 22719, 22752.3, 22771.7, 22815.6, 22827.9, 
        22851.6, 22892.5), series_type = "distance", original_size = 14L, 
    resolution = "high"), .Names = c("type", "data", "series_type", 
"original_size", "resolution")), structure(list(type = "grade_smooth", 
    data = list(-1.7, -3.1, -3.7, -3.2, -3, -3, -1.4, -2.5, -3.2, 
        -3.6, -3.7, -3.6, -2.7, -2.5)9.7, 10.3, 10.5, 10), 
    series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type", 
"data", "series_type", "original_size", "resolution")), structure(list(
    type = "time", data = list(665L, 668L, 671L, 674L, 677L, 
        680L, 683L, 686L, 689L, 692L, 695L, 698L, 701L, 704L), 
    series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type", 
"data", "series_type", "original_size", "resolution")))

` 进一步更新：

在实施解决方案后，我最终得到了这样的数据框：

Current Structure

Needed Structure

df问题：

STR（unlisting）

List of 11106 $ :List of 5 ..$ type : chr "time" ..$ data : int [1:180] 2426 2429 2432 2435 2438 2441 2445 2448 2451 2454 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:180] 8802 8815 8826 8834 8844 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:180] -1 -0.7 -0.6 -0.4 -0.2 -0.1 0 0.2 0.4 0.5 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:180] 2.7 3.9 3.9 3.2 3.1 3.6 3.6 4.4 4.5 3 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "time" ..$ data : int [1:74] 2999 3008 3009 3016 3020 3026 3027 3029 3030 3036 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:74] 23661 23719 23735 23790 23825 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:74] -1.3 -0.5 -0.5 0.6 0.7 1 1 0.5 1 1.9 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:74] 5.7 6.2 7.4 8.9 8.2 8.6 8.2 8.3 8.2 9.2 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "time" ..$ data : int [1:60] 396 403 410 416 418 424 429 437 447 455 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:60] 935 964 992 1014 1020 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "heartrate" ..$ data : int [1:60] 121 117 117 111 108 107 109 112 116 121 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:60] -0.7 -0.5 -0.3 0.1 0.4 0.8 1 0.9 1.4 1.6 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:60] 3 4 4.1 3.8 3.4 2.8 3.4 5.2 6.2 6.7 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" [list output truncated]

Answer 1

运行部分代码，让我知道数据框中的不同之处，我会编辑更多内容以提供帮助：

# fake data
a <- list(type = "time", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
names(b)
b <- list(type = "dist", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))

c <- list(type = "cad", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
example <- list(a, b, c)

# populate some fake data
for(i in 1:length(example)){
  L <- length(example[[i]])
  for(j in 1:L){
    if(j == 1){ 
      example[[i]][[j]] <- c("time","dist","cad")[i]
    } else {
      example[[i]][[j]] <- rep(list(as.numeric(i+j)),134)
    }
  }
}

# unlist function on the right level of nested list
unlisting <- lapply(example, function(L1) lapply(L1, unlist))
str(unlisting)

listsofDFs <- lapply(unlisting, function(L1) as.data.frame(L1))
str(listsofDFs)

listsofDFs

修改

df <- do.call(rbind,listsofDFs) df.split <- split(df$data, df$type) maxLength <- max(rapply(df.split, length)) df2.split <- vector("list", length(df.split)) z <- NULL for(i in 1:length(df.split)){ z <- df.split[[i]] length(z) <- maxLength df2.split[[i]] <- z } df2 <- do.call(cbind.data.frame,df2.split) colnames(df2) <- names(df.split) copyrows <- match(df2$time, df$data) df2.final <- cbind.data.frame(df2,df[copyrows, -which(names(df) %in% c("type","data"))]) df2.final head(df2.final) #time distance grade_smooth velocity_smooth heartrate series_type original_size resolution #1 2881 22512.4 -1.7 7.6 110 distance 14 high #2 2885 22548.3 -3.1 8.5 114 distance 14 high #3 2892 22605.5 -3.7 8.5 118 distance 14 high #4 2893 22615.1 -3.2 8.3 122 distance 14 high #5 2898 22670.3 -3.0 10.8 132 distance 14 high #6 2899 22692.2 -3.0 10.8 139 distance 14 high tail(df2.final) #time distance grade_smooth velocity_smooth heartrate series_type original_size resolution #135 689 NA NA NA NA distance 14 high #136 692 NA NA NA NA distance 14 high #137 695 NA NA NA NA distance 14 high #138 698 NA NA NA NA distance 14 high #139 701 NA NA NA NA distance 14 high #140 704 NA NA NA NA distance 14 high

将具有“列表中列出的数据点”的大型列表列表转换为r中的数据帧

1 个答案: