考虑以下列表:
> l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
> str(l1)
List of 4
$ : NULL
$ : num 1
$ : num 2
$ :List of 3
..$ : NULL
..$ : num 3
..$ :List of 2
.. ..$ : NULL
.. ..$ : num 4
要从第一级删除NULL
值,只需调用
l1[vapply(l1,is.null,logical(1L))] <- NULL
现在我想删除所有级别的所有NULL
值,我想出了以下代码。
list.clean <- function(.data, fun = is.null, recursive = FALSE) {
if(recursive) {
.data <- lapply(.data, function(.item) {
if(is.list(.item)) list.clean(.item, fun, TRUE)
else .item
})
}
.data[vapply(.data,fun,logical(1L))] <- NULL
.data
}
并致电
> list.clean(l1, recursive = TRUE)
[[1]]
[1] 1
[[2]]
[1] 2
[[3]]
[[3]][[1]]
[1] 3
[[3]][[2]]
[[3]][[2]][[1]]
[1] 4
虽然它现在可行,但有更好或更快的方法吗?
答案 0 :(得分:17)
这可以递归地完成:
rmNull <- function(x) {
x <- Filter(Negate(is.null), x)
lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
}
l2 <- rmNull(l1)
,并提供:
> str(l2)
List of 3
$ : num 1
$ : num 2
$ :List of 2
..$ : num 3
..$ :List of 1
.. ..$ : num 4
答案 1 :(得分:0)
使用外部软件包,现在也可以在rrapply
软件包(基础rrapply
的修订版)中使用rapply
来完成此操作。设置how = "prune"
来修剪所有不满足condition
参数中定义的功能的列表元素:
library(rrapply)
l1 <- list(NULL,1,2,list(NULL,3,list(NULL,4)))
rrapply(l1, condition = Negate(is.null), how = "prune")
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [[3]][[1]]
#> [1] 3
#>
#> [[3]][[2]]
#> [[3]][[2]][[1]]
#> [1] 4
我们可以对照OP的list.clean
函数和G. Grothendieck的rmNull
函数对大型列表对象的计算时间进行基准测试:
## benchmark recursion functions
rmNull <- function(x) {
x <- Filter(Negate(is.null), x)
lapply(x, function(x) if (is.list(x)) rmNull(x) else x)
}
list.clean <- function(.data, fun = is.null, recursive = FALSE) {
if(recursive) {
.data <- lapply(.data, function(.item) {
if(is.list(.item)) list.clean(.item, fun, TRUE)
else .item
})
}
.data[vapply(.data,fun,logical(1L))] <- NULL
.data
}
## recursively create nested list with dmax layers and 50% NULL elements
f <- function(len, d, dmax) {
x <- vector(mode = "list", length = len)
for(i in seq_along(x)) {
if(d + 1 < dmax) {
x[[i]] <- Recall(len, d + 1, dmax)
} else {
x[[i]] <- list(1, NULL)
}
}
return(x)
}
## long shallow list (3 layers, total 5e5 nodes)
x_long <- f(len = 500, d = 1, dmax = 3)
microbenchmark::microbenchmark(
rmNull = rmNull(x_long),
list.clean = list.clean(x_long, recursive = TRUE),
rrapply = rrapply(x_long, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max
#> rmNull 2381.5536 2535.6871 2559.4045 2546.0375 2571.9462 2761.7982
#> list.clean 1954.4046 1973.7983 2012.2158 2010.7334 2049.8020 2072.3409
#> rrapply 288.5784 297.9041 382.3111 301.3147 460.5107 563.2475
## deeply nested list (18 layers, total 2^18 nodes)
x_deep <- f(len = 2, d = 1, dmax = 18)
microbenchmark::microbenchmark(
rmNull = rmNull(x_deep),
list.clean = list.clean(x_deep, recursive = TRUE),
rrapply = rrapply(x_deep, condition = Negate(is.null), how = "prune"),
check = "equal",
times = 5L
)
#> Unit: milliseconds
#> expr min lq mean median uq max
#> rmNull 2306.5788 2360.2663 2422.2578 2367.9296 2530.201 2546.3135
#> list.clean 1708.1192 1829.1303 2014.2162 2157.2148 2180.023 2196.5937
#> rrapply 174.5385 187.9491 271.4967 200.9263 206.739 587.3306