有没有什么方法可以在rapply()
之上使用或构建,以便为通过以下函数获得的任意结构化列表导出相同类型的结构信息。
定义:
getRawStructure_2 <- function(input) {
.getRawStructure <- function(x, level = 0) {
level <- level + 1
out <- lapply(seq(along = x), function(el) {
name <- names(x[el])
value <- x[[el]]
cls <- class(value)
.dim <- if (any(cls %in% c("data.frame", "matrix"))) {
paste(dim(value), collapse = " ")
} else {
length(value)
}
out <- data.frame(
level = level,
name = if (is.null(name) || name == "") NA else name,
class = cls,
dim = .dim,
stringsAsFactors = FALSE
)
if (any(cls == "list")) {
deep <- .getRawStructure(x = value, level = level)
c(list(out), unlist(deep, recursive = FALSE))
} else {
list(out)
}
})
}
tmp <- do.call("rbind", unlist(.getRawStructure(x = input), recursive = FALSE))
subs <- tmp$level
subs_2 <- lapply(1:subs[which.max(subs)], function(ii) {
out <- subs == ii
out[out] <- 1
out
})
names(subs_2) <- 1:length(subs_2)
data.frame(subs_2, tmp, stringsAsFactors = FALSE)
}
输出:
> getRawStructure_2(input)
X1 X2 X3 level name class dim
1 1 0 0 1 x1 list 2
2 0 1 0 2 x11 list 2
3 0 0 1 3 x111 numeric 1
4 0 0 1 3 x112 numeric 1
5 0 1 0 2 x12 list 2
6 0 0 1 3 x121 numeric 1
7 0 0 1 3 x122 numeric 1
8 1 0 0 1 x2 list 1
9 0 1 0 2 x21 character 1
10 1 0 0 1 x3 list 1
11 0 1 0 2 <NA> character 1
12 1 0 0 1 x4 character 1
13 1 0 0 1 x5 list 1
14 0 1 0 2 <NA> integer 3
15 1 0 0 1 <NA> list 2
16 0 1 0 2 <NA> list 2
17 0 0 1 3 <NA> numeric 1
18 0 0 1 3 <NA> numeric 1
19 0 1 0 2 <NA> list 2
20 0 0 1 3 <NA> numeric 1
21 0 0 1 3 <NA> numeric 1
22 1 0 0 1 <NA> list 1
23 0 1 0 2 <NA> integer 3
24 1 0 0 1 <NA> character 1
25 1 0 0 1 <NA> character 1
26 1 0 0 1 <NA> character 3
27 1 0 0 1 <NA> numeric 3
28 1 0 0 1 <NA> logical 1
29 1 0 0 1 <NA> environment 0
30 1 0 0 1 <NA> data.frame 3 2
对于相当小的列表来说很好,但是对于较大的列表需要一些时间(参见下面的示例)。
考虑以下列表:
input <- list(
x1 = list(x11 = list(x111 = 1, x112 = 1), x12 = list(x121 = 1, x122 = 1)),
x2 = list(x21 = "x21"),
x3 = list("x31"),
x4 = "x4",
x5 = list(1:3),
list(list(1, 2), list(3, 4)),
list(1:3),
"char 1",
"char 2",
letters[1:3],
c(1,3,5),
TRUE,
new.env(),
data.frame(x = 1:3, y = 1:3)
)
最后,我希望能够以二维形式表示列表,例如listr::getStructure()
返回的列表:
> listr::getStructure(input)
X1 X2 X3 type index oindex path ppath level name class dim
1 1 NA NA 1 [[1]] [1] x1 1 1 x1 list 2
2 1 1 NA 2 [[1]][[1]] [[1]][1] x1/x11 1/1 2 x11 list 2
3 1 1 1 2 [[1]][[1]][[1]] [[1]][[1]][1] x1/x11/x111 1/1/1 3 x111 numeric 1
4 1 1 2 2 [[1]][[1]][[2]] [[1]][[1]][2] x1/x11/x112 1/1/2 3 x112 numeric 1
5 1 2 NA 2 [[1]][[2]] [[1]][2] x1/x12 1/2 2 x12 list 2
6 1 2 1 2 [[1]][[2]][[1]] [[1]][[2]][1] x1/x12/x111 1/2/1 3 x121 numeric 1
7 1 2 2 2 [[1]][[2]][[2]] [[1]][[2]][2] x1/x12/x112 1/2/2 3 x122 numeric 1
8 2 NA NA 1 [[2]] [2] x2 2 1 x2 list 1
9 2 1 NA 2 [[2]][[1]] [[2]][1] x2/x21 2/1 2 x21 character 1
10 3 NA NA 1 [[3]] [3] x3 3 1 x3 list 1
11 3 1 NA 2 [[3]][[1]] [[3]][1] x3/1 3/1 2 <NA> character 1
12 4 NA NA 3 [[4]] [4] x4 4 1 x4 character 1
13 5 NA NA 1 [[5]] [5] x5 5 1 x5 list 1
14 5 1 NA 2 [[5]][[1]] [[5]][1] x5/1 5/1 2 <NA> integer 3
15 6 NA NA 1 [[6]] [6] 6 6 1 <NA> list 2
16 6 1 NA 2 [[6]][[1]] [[6]][1] 6/1 6/1 2 <NA> list 2
17 6 1 1 2 [[6]][[1]][[1]] [[6]][[1]][1] 6/1/1 6/1/1 3 <NA> numeric 1
18 6 1 2 2 [[6]][[1]][[2]] [[6]][[1]][2] 6/1/2 6/1/2 3 <NA> numeric 1
19 6 2 NA 2 [[6]][[2]] [[6]][2] 6/2 6/2 2 <NA> list 2
20 6 2 1 2 [[6]][[2]][[1]] [[6]][[2]][1] 6/2/1 6/2/1 3 <NA> numeric 1
21 6 2 2 2 [[6]][[2]][[2]] [[6]][[2]][2] 6/2/2 6/2/2 3 <NA> numeric 1
22 7 NA NA 1 [[7]] [7] 7 7 1 <NA> list 1
23 7 1 NA 2 [[7]][[1]] [[7]][1] 7/1 7/1 2 <NA> integer 3
24 8 NA NA 3 [[8]] [8] 8 8 1 <NA> character 1
25 9 NA NA 3 [[9]] [9] 9 9 1 <NA> character 1
26 10 NA NA 3 [[10]] [10] 10 10 1 <NA> character 3
27 11 NA NA 3 [[11]] [11] 11 11 1 <NA> numeric 3
28 12 NA NA 3 [[12]] [12] 12 12 1 <NA> logical 1
29 13 NA NA 3 [[13]] [13] 13 13 1 <NA> environment 0
30 14 NA NA 3 [[14]] [14] 14 14 1 <NA> data.frame 3 2
为了做到这一点,我尝试了几种不同的方法。 rapply()
的速度似乎非常有趣(参见下面的基准),但AFAICT我无法真正将其用于我的目的,因为f
专门应用于叶子/底部值,因此我&#39 ; m&#34;失去任何&#34;中间&#34;关于分支机构或列表结构本身的信息,对吗?
如果有任何方法可以使用rapply()
- 或者调整版本 - 不知何故,我真的很感激任何指针。
说明:
require("stringr")
getRawStructure_1 <- function(input) {
struc <- capture.output(str(input, list.len = length(input)))
struc <- unlist(strsplit(struc, split = "\n"))
tops <- str_count(struc, "\\s\\$\\s")
subs <- str_count(struc, "((\\.\\.)(\\s|\\$))")
## Clean //
idx_out <- which(tops == 0 & subs == 0)
if (length(idx_out)) {
tops <- tops[-idx_out]
subs <- subs[-idx_out]
struc <- struc[-idx_out]
}
## Types //
types <- gsub(".*\\$(\\s*:|\\s*\\w+:)", "", struc)
types <- gsub("(?<=\\w)\\s.*|\\:.*", "", types, perl = TRUE)
types <- tolower(gsub("\\s|<|'", "", types))
## Names //
idx_names <- str_detect(struc, "\\$\\s\\w+.*:\\s")
nms <- if (length(idx_names)) {
gsub("\\$\\s", "", str_extract(struc, "\\$\\s\\w+"))
} else {
NA
}
## Levels //
subs_2 <- lapply(0:subs[which.max(subs)], function(ii) {
out <- subs == ii
out[out] <- 1
out
})
names(subs_2) <- 1:length(subs_2)
data.frame(
subs_2,
name = nms,
class = types,
str = struc,
stringsAsFactors = FALSE
)
}
getRawStructure_3 <- function(input) {
.getRawStructure <- function(x) {
list(level = length(x), class = class(x))
}
rapply(input, .getRawStructure, how = "list")
}
getRawStructure_1(input)
getRawStructure_2(input)
getRawStructure_3(input)
基准:
require("microbenchmark")
res <- microbenchmark(
"0.1" = str(input, list.len = length(input)),
"0.2" = capture.output(str(input, list.len = length(input))),
"1" = getRawStructure_1(input),
"2" = getRawStructure_2(input),
"3" = getRawStructure_3(input),
unit = "ms"
)
> res
Unit: milliseconds
expr min lq mean median uq max neval
0.1 22.526810 24.431485 25.3388115 25.04150 26.160263 29.466200 100
0.2 15.678034 17.549247 17.9725577 17.79237 18.328352 22.353281 100
1 17.977740 20.286033 21.1265184 20.59993 21.396797 35.075989 100
2 17.647265 19.622121 20.2261508 20.03640 20.860219 25.066969 100
3 0.076993 0.092095 0.1055568 0.09861 0.112527 0.251706 100
更大的名单:
scope <- 1000
input <- lapply(1:scope, function(ii) {
list(name = paste(sample(letters, 4), collapse = ""), value = Sys.time())
})
head(input)
res <- microbenchmark(
"0.1" = str(input, list.len = length(input)),
"0.2" = capture.output(str(input, list.len = length(input))),
"1" = getRawStructure_1(input),
"2" = getRawStructure_2(input),
"3" = getRawStructure_3(input),
unit = "ms",
times = 3
)
> res
Unit: milliseconds
expr min lq mean median uq max neval
0.1 2449.80358 2481.302677 2494.523763 2512.801773 2516.883854 2520.965936 3
0.2 1591.30210 1662.759397 1696.973466 1734.216693 1749.809148 1765.401602 3
1 1600.41742 1647.050567 1669.736494 1693.683716 1704.396033 1715.108349 3
2 2051.56185 2052.655737 2077.485806 2053.749622 2090.447782 2127.145943 3
3 7.98708 8.559489 8.765691 9.131898 9.154996 9.178094 3