将具有“列表中列出的数据点”的大型列表列表转换为r中的数据帧

时间:2017-06-24 19:41:50

标签: r list

我已经详尽地研究了如何将复杂列表转换为数据框 - 但似乎我的情况相当独特:

问题: 我有一个包含多个变量列表的大型列表,其观察结果(数据点)进一步列为列表。

以下是多个主要清单中代表性清单1的结构;

:List of 5 ..$ type : chr "time" ..$ data :List of 134 .. ..$ : int 5624 .. ..$ : int 5625 .. ..$ : int 5627 .. .. [list output truncated] :List of 5 ..$ type : chr "dist" ..$ data :List of 134 .. ..$ : num 22321 .. ..$ : num 22313 .. .. [list output truncated] :List of 5 ..$ type : chr "cad" ..$ data :List of 134 .. ..$ : num 0.4 .. ..$ : num 0.6 .. .. [list output truncated]

因此,该结构被复制用于大量列表 - 目的是通过拉动所有主要列表中“类型”定义的所有变量的“数据”部分将该列表转换为数据帧

注意:其他主要列表可能包含更多变量($ type)和更高/更低的子列表观察数 - 即134列表不适用于Large列表中的其他列表。

感谢您的期待。 进一步更新:

以下是清晰度的输出结果:

`

list(structure(list(type = "time", data = list(2881L, 2885L, 
    2892L, 2893L, 2898L, 2899L, 2900L, 2901L, 2904L, 2907L, 2911L, 
    2912L, 2914L, 2918L), series_type = "distance", original_size = 14L, 
    resolution = "high"), .Names = c("type", "data", "series_type", 
"original_size", "resolution")), structure(list(type = "distance", 
    data = list(22512.4, 22548.3, 22605.5, 22615.1, 22670.3, 
        22692.2, 22705.8, 22719, 22752.3, 22771.7, 22815.6, 22827.9, 
        22851.6, 22892.5), series_type = "distance", original_size = 14L, 
    resolution = "high"), .Names = c("type", "data", "series_type", 
"original_size", "resolution")), structure(list(type = "grade_smooth", 
    data = list(-1.7, -3.1, -3.7, -3.2, -3, -3, -1.4, -2.5, -3.2, 
        -3.6, -3.7, -3.6, -2.7, -2.5)9.7, 10.3, 10.5, 10), 
    series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type", 
"data", "series_type", "original_size", "resolution")), structure(list(
    type = "time", data = list(665L, 668L, 671L, 674L, 677L, 
        680L, 683L, 686L, 689L, 692L, 695L, 698L, 701L, 704L), 
    series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type", 
"data", "series_type", "original_size", "resolution")))

` 进一步更新:

在实施解决方案后,我最终得到了这样的数据框:

Current Structure

Needed Structure

df问题:

STR(unlisting)

List of 11106 $ :List of 5 ..$ type : chr "time" ..$ data : int [1:180] 2426 2429 2432 2435 2438 2441 2445 2448 2451 2454 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:180] 8802 8815 8826 8834 8844 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:180] -1 -0.7 -0.6 -0.4 -0.2 -0.1 0 0.2 0.4 0.5 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:180] 2.7 3.9 3.9 3.2 3.1 3.6 3.6 4.4 4.5 3 ... ..$ series_type : chr "distance" ..$ original_size: int 180 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "time" ..$ data : int [1:74] 2999 3008 3009 3016 3020 3026 3027 3029 3030 3036 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:74] 23661 23719 23735 23790 23825 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:74] -1.3 -0.5 -0.5 0.6 0.7 1 1 0.5 1 1.9 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:74] 5.7 6.2 7.4 8.9 8.2 8.6 8.2 8.3 8.2 9.2 ... ..$ series_type : chr "distance" ..$ original_size: int 74 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "time" ..$ data : int [1:60] 396 403 410 416 418 424 429 437 447 455 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "distance" ..$ data : num [1:60] 935 964 992 1014 1020 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "heartrate" ..$ data : int [1:60] 121 117 117 111 108 107 109 112 116 121 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "grade_smooth" ..$ data : num [1:60] -0.7 -0.5 -0.3 0.1 0.4 0.8 1 0.9 1.4 1.6 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" $ :List of 5 ..$ type : chr "velocity_smooth" ..$ data : num [1:60] 3 4 4.1 3.8 3.4 2.8 3.4 5.2 6.2 6.7 ... ..$ series_type : chr "distance" ..$ original_size: int 60 ..$ resolution : chr "high" [list output truncated]

1 个答案:

答案 0 :(得分:0)

运行部分代码,让我知道数据框中的不同之处,我会编辑更多内容以提供帮助:

# fake data
a <- list(type = "time", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
names(b)
b <- list(type = "dist", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))

c <- list(type = "cad", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
example <- list(a, b, c)

# populate some fake data
for(i in 1:length(example)){
  L <- length(example[[i]])
  for(j in 1:L){
    if(j == 1){ 
      example[[i]][[j]] <- c("time","dist","cad")[i]
    } else {
      example[[i]][[j]] <- rep(list(as.numeric(i+j)),134)
    }
  }
}

# unlist function on the right level of nested list
unlisting <- lapply(example, function(L1) lapply(L1, unlist))
str(unlisting)

listsofDFs <- lapply(unlisting, function(L1) as.data.frame(L1))
str(listsofDFs)

listsofDFs

修改

df <- do.call(rbind,listsofDFs)
df.split <- split(df$data, df$type)
maxLength <- max(rapply(df.split, length))
df2.split <- vector("list", length(df.split))
z <- NULL
for(i in 1:length(df.split)){
  z <- df.split[[i]]
  length(z) <- maxLength
  df2.split[[i]] <- z
}
df2 <- do.call(cbind.data.frame,df2.split)
colnames(df2) <- names(df.split)
copyrows <- match(df2$time, df$data)
df2.final <- cbind.data.frame(df2,df[copyrows, -which(names(df) %in% c("type","data"))])
df2.final

head(df2.final)
  #time distance grade_smooth velocity_smooth heartrate series_type original_size resolution
#1 2881  22512.4         -1.7             7.6       110    distance            14       high
#2 2885  22548.3         -3.1             8.5       114    distance            14       high
#3 2892  22605.5         -3.7             8.5       118    distance            14       high
#4 2893  22615.1         -3.2             8.3       122    distance            14       high
#5 2898  22670.3         -3.0            10.8       132    distance            14       high
#6 2899  22692.2         -3.0            10.8       139    distance            14       high
tail(df2.final)
    #time distance grade_smooth velocity_smooth heartrate series_type original_size resolution
#135  689       NA           NA              NA        NA    distance            14       high
#136  692       NA           NA              NA        NA    distance            14       high
#137  695       NA           NA              NA        NA    distance            14       high
#138  698       NA           NA              NA        NA    distance            14       high
#139  701       NA           NA              NA        NA    distance            14       high
#140  704       NA           NA              NA        NA    distance            14       high