根据变量值过滤行

时间:2019-06-07 19:38:30

标签: r filter dplyr

我有一个名为filter_vals的列表,其中包含(子集)iris数据集的每一列的过滤条件。在此列表中,可能会发生某些条目为character(0)。如果是这种情况,我不想过滤相应的列,但是,当有一个过滤条件而不是character(0)

时,它仍然可以工作
library(dplyr)
filter_vals = list(c(3,5), character(0), "setosa", character(0))

iris %>%
  as_tibble() %>%
  select(Sepal.Length, Sepal.Width, Species) %>% 
  mutate(x = "texttext") %>%
  filter(between(Sepal.Length, filter_vals[[1]][1], filter_vals[[1]][2]),
         # because filter_vals[[2]] is "character(0)" I dont want to apply a filtering on column Sepal.width, however, it might happen that filter_vals[[2]] = c(3,2)
         # I think filtering wihtout filtering rows out could be achieved by filter(Sepal.Width %in% Sepal.Width)
         Species %in% filter_vals[[3]],
         # because filter_vals[[4]] is "character(0) I dont was to apply a filtering on column x, however, it might happen that filter_vals[[4]] = "textext"
         )

给定filter_vals的预期输出应为

# A tibble: 28 x 4
   Sepal.Length Sepal.Width Species x       
          <dbl>       <dbl> <fct>   <chr>   
 1          4.9         3   setosa  texttext
 2          4.7         3.2 setosa  texttext
 3          4.6         3.1 setosa  texttext
 4          5           3.6 setosa  texttext
 5          4.6         3.4 setosa  texttext
 6          5           3.4 setosa  texttext
 7          4.4         2.9 setosa  texttext
 8          4.9         3.1 setosa  texttext
 9          4.8         3.4 setosa  texttext
10          4.8         3   setosa  texttext
# … with 18 more rows

对于其他filter_vals,其外观应有所不同,请参见下文:

  filter_vals = list(c(3,5), c(1,3), "setosa", character(0))

    iris %>%
      as_tibble() %>%
      select(Sepal.Length, Sepal.Width, Species) %>% 
      mutate(x = "texttext") %>%
      filter(between(Sepal.Length, filter_vals[[1]][1], filter_vals[[1]][2]),
             between(Sepal.Width, filter_vals[[2]][1], filter_vals[[2]][2]),
             Species %in% filter_vals[[3]],
             # because filter_vals[[4]] is "character(0) I dont was to apply a filtering on column x, however, it might happen that filter_vals[[4]] = "textext"
             )

# A tibble: 8 x 4
  Sepal.Length Sepal.Width Species x       
         <dbl>       <dbl> <fct>   <chr>   
1          4.9         3   setosa  texttext
2          4.4         2.9 setosa  texttext
3          4.8         3   setosa  texttext
4          4.3         3   setosa  texttext
5          5           3   setosa  texttext
6          4.4         3   setosa  texttext
7          4.5         2.3 setosa  texttext
8          4.8         3   setosa  texttext

1 个答案:

答案 0 :(得分:1)

一种选择是使用pmap

评估相应的列
library(tidyverse)
pmap(list(c('Sepal.Length', 'Sepal.Width'), filter_vals[1:2], filter_vals[3:4]), ~ 
  iris %>% 
     select(Species, ..1) %>% 
     transmute(ind = (if(length(..2) >0)
        between(!! (rlang::sym(..1)), ..2[1], ..2[2]) else TRUE)  & 
           (if(length(..3) >0) Species %in% ..3 else TRUE))) %>% 
     reduce(`&`) %>% 
  filter(iris, .)

它可以包装在一个函数中

f1 <- function(data, filterLst1, filterLst2, varLst, otherCol) {

      posbetween <- purrr::possibly(function(x, y, z)
         between(x, y, z), otherwise = rep(TRUE, nrow(data)))
      fin <- function(x, y) if(length(y) > 0) x %in% y else rep(TRUE, nrow(data))

     pmap(list(varLst, filterLst1, filterLst2), ~ 
          data %>%
             dplyr::select(otherCol, ..1) %>%            
             dplyr::transmute(ind = posbetween((!! rlang::sym(..1)), 
               ..2[1], ..2[2])  &
                                      fin((!! rlang::sym(otherCol)), ..3)))  %>%
             reduce(`&`) %>%
             filter(data, .) %>%
             as_tibble



             }


f1(iris, filter_vals[1:2], filter_vals[3:4],
         c("Sepal.Length", "Sepal.Width"), "Species")       
# A tibble: 28 x 5
#   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#          <dbl>       <dbl>        <dbl>       <dbl> <fct>  
# 1          4.9         3            1.4         0.2 setosa 
# 2          4.7         3.2          1.3         0.2 setosa 
# 3          4.6         3.1          1.5         0.2 setosa 
# 4          5           3.6          1.4         0.2 setosa 
# 5          4.6         3.4          1.4         0.3 setosa 
# 6          5           3.4          1.5         0.2 setosa 
# 7          4.4         2.9          1.4         0.2 setosa 
# 8          4.9         3.1          1.5         0.1 setosa 
# 9          4.8         3.4          1.6         0.2 setosa 
#10          4.8         3            1.4         0.1 setosa 
# … with 18 more rows

更改了“ filter_vals”

filter_vals = list(c(3,5), c(1,3), "setosa", character(0))
f1(iris, filter_vals[1:2], filter_vals[3:4],
     c("Sepal.Length", "Sepal.Width"), "Species")
# A tibble: 8 x 5
#  Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#         <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#1          4.9         3            1.4         0.2 setosa 
#2          4.4         2.9          1.4         0.2 setosa 
#3          4.8         3            1.4         0.1 setosa 
#4          4.3         3            1.1         0.1 setosa 
#5          5           3            1.6         0.2 setosa 
#6          4.4         3            1.3         0.2 setosa 
#7          4.5         2.3          1.3         0.3 setosa 
#8          4.8         3            1.4         0.3 setosa