我有一个名为filter_vals
的列表,其中包含(子集)iris
数据集的每一列的过滤条件。在此列表中,可能会发生某些条目为character(0)
。如果是这种情况,我不想过滤相应的列,但是,当有一个过滤条件而不是character(0)
library(dplyr)
filter_vals = list(c(3,5), character(0), "setosa", character(0))
iris %>%
as_tibble() %>%
select(Sepal.Length, Sepal.Width, Species) %>%
mutate(x = "texttext") %>%
filter(between(Sepal.Length, filter_vals[[1]][1], filter_vals[[1]][2]),
# because filter_vals[[2]] is "character(0)" I dont want to apply a filtering on column Sepal.width, however, it might happen that filter_vals[[2]] = c(3,2)
# I think filtering wihtout filtering rows out could be achieved by filter(Sepal.Width %in% Sepal.Width)
Species %in% filter_vals[[3]],
# because filter_vals[[4]] is "character(0) I dont was to apply a filtering on column x, however, it might happen that filter_vals[[4]] = "textext"
)
给定filter_vals
的预期输出应为
# A tibble: 28 x 4
Sepal.Length Sepal.Width Species x
<dbl> <dbl> <fct> <chr>
1 4.9 3 setosa texttext
2 4.7 3.2 setosa texttext
3 4.6 3.1 setosa texttext
4 5 3.6 setosa texttext
5 4.6 3.4 setosa texttext
6 5 3.4 setosa texttext
7 4.4 2.9 setosa texttext
8 4.9 3.1 setosa texttext
9 4.8 3.4 setosa texttext
10 4.8 3 setosa texttext
# … with 18 more rows
对于其他filter_vals,其外观应有所不同,请参见下文:
filter_vals = list(c(3,5), c(1,3), "setosa", character(0))
iris %>%
as_tibble() %>%
select(Sepal.Length, Sepal.Width, Species) %>%
mutate(x = "texttext") %>%
filter(between(Sepal.Length, filter_vals[[1]][1], filter_vals[[1]][2]),
between(Sepal.Width, filter_vals[[2]][1], filter_vals[[2]][2]),
Species %in% filter_vals[[3]],
# because filter_vals[[4]] is "character(0) I dont was to apply a filtering on column x, however, it might happen that filter_vals[[4]] = "textext"
)
# A tibble: 8 x 4
Sepal.Length Sepal.Width Species x
<dbl> <dbl> <fct> <chr>
1 4.9 3 setosa texttext
2 4.4 2.9 setosa texttext
3 4.8 3 setosa texttext
4 4.3 3 setosa texttext
5 5 3 setosa texttext
6 4.4 3 setosa texttext
7 4.5 2.3 setosa texttext
8 4.8 3 setosa texttext
答案 0 :(得分:1)
一种选择是使用pmap
library(tidyverse)
pmap(list(c('Sepal.Length', 'Sepal.Width'), filter_vals[1:2], filter_vals[3:4]), ~
iris %>%
select(Species, ..1) %>%
transmute(ind = (if(length(..2) >0)
between(!! (rlang::sym(..1)), ..2[1], ..2[2]) else TRUE) &
(if(length(..3) >0) Species %in% ..3 else TRUE))) %>%
reduce(`&`) %>%
filter(iris, .)
它可以包装在一个函数中
f1 <- function(data, filterLst1, filterLst2, varLst, otherCol) {
posbetween <- purrr::possibly(function(x, y, z)
between(x, y, z), otherwise = rep(TRUE, nrow(data)))
fin <- function(x, y) if(length(y) > 0) x %in% y else rep(TRUE, nrow(data))
pmap(list(varLst, filterLst1, filterLst2), ~
data %>%
dplyr::select(otherCol, ..1) %>%
dplyr::transmute(ind = posbetween((!! rlang::sym(..1)),
..2[1], ..2[2]) &
fin((!! rlang::sym(otherCol)), ..3))) %>%
reduce(`&`) %>%
filter(data, .) %>%
as_tibble
}
f1(iris, filter_vals[1:2], filter_vals[3:4],
c("Sepal.Length", "Sepal.Width"), "Species")
# A tibble: 28 x 5
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <dbl> <dbl> <dbl> <dbl> <fct>
# 1 4.9 3 1.4 0.2 setosa
# 2 4.7 3.2 1.3 0.2 setosa
# 3 4.6 3.1 1.5 0.2 setosa
# 4 5 3.6 1.4 0.2 setosa
# 5 4.6 3.4 1.4 0.3 setosa
# 6 5 3.4 1.5 0.2 setosa
# 7 4.4 2.9 1.4 0.2 setosa
# 8 4.9 3.1 1.5 0.1 setosa
# 9 4.8 3.4 1.6 0.2 setosa
#10 4.8 3 1.4 0.1 setosa
# … with 18 more rows
更改了“ filter_vals”
filter_vals = list(c(3,5), c(1,3), "setosa", character(0))
f1(iris, filter_vals[1:2], filter_vals[3:4],
c("Sepal.Length", "Sepal.Width"), "Species")
# A tibble: 8 x 5
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# <dbl> <dbl> <dbl> <dbl> <fct>
#1 4.9 3 1.4 0.2 setosa
#2 4.4 2.9 1.4 0.2 setosa
#3 4.8 3 1.4 0.1 setosa
#4 4.3 3 1.1 0.1 setosa
#5 5 3 1.6 0.2 setosa
#6 4.4 3 1.3 0.2 setosa
#7 4.5 2.3 1.3 0.3 setosa
#8 4.8 3 1.4 0.3 setosa