Question

我正在尝试测试一系列列中的条件。数据看起来像这样

      Name DPD_1 DPD_2 DPD_3 Default_flag
 1:    A    46    63   138         TRUE
 2:    B    12    82    33        FALSE
 3:    C    95    71    55         TRUE
 4:    D    57   133   116         TRUE
 5:    E    48    27   137         TRUE

如果DPD_1，DPD_2或DPD_3中的任何一个大于90，我需要测试代码中的

，在这种情况下，Default_flag设置为TRUE。

我正在使用的代码如下所示

df1 <- data.table(Name = LETTERS[1:10],DPD_1 = sample(1:100,10),DPD_2 = sample(1:200,10),DPD_3 = sample(1:200,10) )
df1[,Default_flag := ifelse((DPD_1>=90 | DPD_2>=90 | DPD_3>=90 ),TRUE,FALSE)]

现在问题在于我需要将一些数据集从DPD_1增加DPD检查到DPD_24（检查24列，而不是当前示例中的3列）。无论如何，我可以避免在ifelse语句中指定每个DPDnumber。我很高兴失去ifelse声明，如果某个版本的apply可以工作，我也很乐意使用它。

Answer 1

在Reduce

中指定感兴趣的列后，我们可以|与.SDcols一起使用

df1[, Default_flag :=  Reduce(`|`, lapply(.SD, `>=`, 90)), .SDcols = DPD_1:DPD_3]

更新

根据OP的注释，如果我们需要创建一个自动检测列名的函数，那么使用grep根据模式获取列名。下面的函数采用数据集，模式（'pat'），要比较的值（'val'）和'n'，即特定模式的列数

f1 <- function(dat, pat, val, n){
  tmp <- as.data.table(dat)
  nm1 <- head(grep(pat, names(tmp), value = TRUE), n)
  tmp[, Default_flag := Reduce(`|`,lapply(.SD, `>=`, val)), .SDcols = nm1][]
}

f1(df1, "DPD", 90, 2)
f1(df1, "DPD", 90, 3)

根据@ aelwan的请求，使用tidyverse的选项将是

library(tidyverse)
f2 <- function(dat, pat, val, n){
  pat <- quo_name(enquo(pat))
  nm1 <- head(grep(pat, names(dat), value = TRUE), n)

  dat %>%
      mutate_at(vars(nm1), funs(.>= val)) %>%
      select_at(nm1) %>%
      reduce(`|`) %>%
      mutate(dat, Default_flag = .) 

}

f2(df1, DPD, 90, 2)
f2(df1, DPD, 90, 3)

identical(f1(df1, "DPD", 90, 2), as.data.table(f2(df1, DPD, 90, 2)))
#[1] TRUE
identical(f1(df1, "DPD", 90, 3), as.data.table(f2(df1, DPD, 90, 3)))
#[1] TRUE

Answer 2

尝试：

df <- read.table(header = TRUE, text = " Name DPD_1 DPD_2 DPD_3 Default_flag    
 1:    A    46    63   138         TRUE
             2:    B    12    82    33        FALSE
             3:    C    95    71    55         TRUE
             4:    D    57   133   116         TRUE
             5:    E    48    27   137         TRUE")
df

df$Df.flag <- apply(df[, colnames(df) %in% paste0("DPD_", (1:3))],
                 1, 
                 function(x) any(x > 90))
df

Answer 3

@akrun's accepted answer的另一种替代方案是使用tidyverse函数的tidyr::gather解决方案（正如评论中@ r2evans所建议的那样）。

使用dplyr::starts_with而非grep检测相关变量名称。

suppressPackageStartupMessages(library(tidyverse))  

set.seed(12345)
df1 <- tibble(Name = LETTERS[1:10],
              DPD_1 = sample(1:100,10),
              DPD_2 = sample(1:200,10),
              DPD_3 = sample(1:200,10))

df1 %>%
  select(Name, starts_with("DPD_")) %>%
  gather(DPD_name, DPD_value, -Name) %>%
  group_by(Name) %>%
  summarise(Default_Flag = any(DPD_value > 90)) %>%
  { left_join(df1, ., by = "Name") }

#> # A tibble: 10 x 5
#>     Name DPD_1 DPD_2 DPD_3 Default_Flag
#>    <chr> <int> <int> <int>        <lgl>
#>  1     A    73     7    91         TRUE
#>  2     B    87    31    66        FALSE
#>  3     C    75   146   192         TRUE
#>  4     D    86     1   140         TRUE
#>  5     E    44    77   127         TRUE
#>  6     F    16    91    77         TRUE
#>  7     G    31    76   136         TRUE
#>  8     H    48    78   106         TRUE
#>  9     I    67    35    44        FALSE
#> 10     J    91   182    93         TRUE

如何在R中的一系列列中测试条件

3 个答案:

更新