我已经详尽地研究了如何将复杂列表转换为数据框 - 但似乎我的情况相当独特:
问题: 我有一个包含多个变量列表的大型列表,其观察结果(数据点)进一步列为列表。
以下是多个主要清单中代表性清单1的结构;
:List of 5
..$ type : chr "time"
..$ data :List of 134
.. ..$ : int 5624
.. ..$ : int 5625
.. ..$ : int 5627
.. .. [list output truncated]
:List of 5
..$ type : chr "dist"
..$ data :List of 134
.. ..$ : num 22321
.. ..$ : num 22313
.. .. [list output truncated]
:List of 5
..$ type : chr "cad"
..$ data :List of 134
.. ..$ : num 0.4
.. ..$ : num 0.6
.. .. [list output truncated]
因此,该结构被复制用于大量列表 - 目的是通过拉动所有主要列表中“类型”定义的所有变量的“数据”部分将该列表转换为数据帧
注意:其他主要列表可能包含更多变量($ type)和更高/更低的子列表观察数 - 即134列表不适用于Large列表中的其他列表。
感谢您的期待。 进一步更新:
以下是清晰度的输出结果:
`
list(structure(list(type = "time", data = list(2881L, 2885L,
2892L, 2893L, 2898L, 2899L, 2900L, 2901L, 2904L, 2907L, 2911L,
2912L, 2914L, 2918L), series_type = "distance", original_size = 14L,
resolution = "high"), .Names = c("type", "data", "series_type",
"original_size", "resolution")), structure(list(type = "distance",
data = list(22512.4, 22548.3, 22605.5, 22615.1, 22670.3,
22692.2, 22705.8, 22719, 22752.3, 22771.7, 22815.6, 22827.9,
22851.6, 22892.5), series_type = "distance", original_size = 14L,
resolution = "high"), .Names = c("type", "data", "series_type",
"original_size", "resolution")), structure(list(type = "grade_smooth",
data = list(-1.7, -3.1, -3.7, -3.2, -3, -3, -1.4, -2.5, -3.2,
-3.6, -3.7, -3.6, -2.7, -2.5)9.7, 10.3, 10.5, 10),
series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type",
"data", "series_type", "original_size", "resolution")), structure(list(
type = "time", data = list(665L, 668L, 671L, 674L, 677L,
680L, 683L, 686L, 689L, 692L, 695L, 698L, 701L, 704L),
series_type = "distance", original_size = 14L, resolution = "high"), .Names = c("type",
"data", "series_type", "original_size", "resolution")))
` 进一步更新:
在实施解决方案后,我最终得到了这样的数据框:
df问题:
STR(unlisting)
List of 11106
$ :List of 5
..$ type : chr "time"
..$ data : int [1:180] 2426 2429 2432 2435 2438 2441 2445 2448 2451 2454 ...
..$ series_type : chr "distance"
..$ original_size: int 180
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "distance"
..$ data : num [1:180] 8802 8815 8826 8834 8844 ...
..$ series_type : chr "distance"
..$ original_size: int 180
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "grade_smooth"
..$ data : num [1:180] -1 -0.7 -0.6 -0.4 -0.2 -0.1 0 0.2 0.4 0.5 ...
..$ series_type : chr "distance"
..$ original_size: int 180
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "velocity_smooth"
..$ data : num [1:180] 2.7 3.9 3.9 3.2 3.1 3.6 3.6 4.4 4.5 3 ...
..$ series_type : chr "distance"
..$ original_size: int 180
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "time"
..$ data : int [1:74] 2999 3008 3009 3016 3020 3026 3027 3029 3030 3036 ...
..$ series_type : chr "distance"
..$ original_size: int 74
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "distance"
..$ data : num [1:74] 23661 23719 23735 23790 23825 ...
..$ series_type : chr "distance"
..$ original_size: int 74
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "grade_smooth"
..$ data : num [1:74] -1.3 -0.5 -0.5 0.6 0.7 1 1 0.5 1 1.9 ...
..$ series_type : chr "distance"
..$ original_size: int 74
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "velocity_smooth"
..$ data : num [1:74] 5.7 6.2 7.4 8.9 8.2 8.6 8.2 8.3 8.2 9.2 ...
..$ series_type : chr "distance"
..$ original_size: int 74
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "time"
..$ data : int [1:60] 396 403 410 416 418 424 429 437 447 455 ...
..$ series_type : chr "distance"
..$ original_size: int 60
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "distance"
..$ data : num [1:60] 935 964 992 1014 1020 ...
..$ series_type : chr "distance"
..$ original_size: int 60
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "heartrate"
..$ data : int [1:60] 121 117 117 111 108 107 109 112 116 121 ...
..$ series_type : chr "distance"
..$ original_size: int 60
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "grade_smooth"
..$ data : num [1:60] -0.7 -0.5 -0.3 0.1 0.4 0.8 1 0.9 1.4 1.6 ...
..$ series_type : chr "distance"
..$ original_size: int 60
..$ resolution : chr "high"
$ :List of 5
..$ type : chr "velocity_smooth"
..$ data : num [1:60] 3 4 4.1 3.8 3.4 2.8 3.4 5.2 6.2 6.7 ...
..$ series_type : chr "distance"
..$ original_size: int 60
..$ resolution : chr "high"
[list output truncated]
答案 0 :(得分:0)
运行部分代码,让我知道数据框中的不同之处,我会编辑更多内容以提供帮助:
# fake data
a <- list(type = "time", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
names(b)
b <- list(type = "dist", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
c <- list(type = "cad", data = vector("list", 134), foo3 = vector("list", 134), foo4 = vector("list",134), foo5 = vector("list",134))
example <- list(a, b, c)
# populate some fake data
for(i in 1:length(example)){
L <- length(example[[i]])
for(j in 1:L){
if(j == 1){
example[[i]][[j]] <- c("time","dist","cad")[i]
} else {
example[[i]][[j]] <- rep(list(as.numeric(i+j)),134)
}
}
}
# unlist function on the right level of nested list
unlisting <- lapply(example, function(L1) lapply(L1, unlist))
str(unlisting)
listsofDFs <- lapply(unlisting, function(L1) as.data.frame(L1))
str(listsofDFs)
listsofDFs
修改强>
df <- do.call(rbind,listsofDFs)
df.split <- split(df$data, df$type)
maxLength <- max(rapply(df.split, length))
df2.split <- vector("list", length(df.split))
z <- NULL
for(i in 1:length(df.split)){
z <- df.split[[i]]
length(z) <- maxLength
df2.split[[i]] <- z
}
df2 <- do.call(cbind.data.frame,df2.split)
colnames(df2) <- names(df.split)
copyrows <- match(df2$time, df$data)
df2.final <- cbind.data.frame(df2,df[copyrows, -which(names(df) %in% c("type","data"))])
df2.final
head(df2.final)
#time distance grade_smooth velocity_smooth heartrate series_type original_size resolution
#1 2881 22512.4 -1.7 7.6 110 distance 14 high
#2 2885 22548.3 -3.1 8.5 114 distance 14 high
#3 2892 22605.5 -3.7 8.5 118 distance 14 high
#4 2893 22615.1 -3.2 8.3 122 distance 14 high
#5 2898 22670.3 -3.0 10.8 132 distance 14 high
#6 2899 22692.2 -3.0 10.8 139 distance 14 high
tail(df2.final)
#time distance grade_smooth velocity_smooth heartrate series_type original_size resolution
#135 689 NA NA NA NA distance 14 high
#136 692 NA NA NA NA distance 14 high
#137 695 NA NA NA NA distance 14 high
#138 698 NA NA NA NA distance 14 high
#139 701 NA NA NA NA distance 14 high
#140 704 NA NA NA NA distance 14 high