子集使用从另一个data.table变量动态创建的i语句

时间:2017-11-28 17:40:46

标签: r data.table

我的数据类似于以下内容:

set.seed(1)
dt <- data.table(ID=1:10, Status=c(rep("OUT",2),rep("IN",2),"ON",rep("OUT",2),rep("IN",2),"ON"), 
                 t1=round(rnorm(10),1), t2=round(rnorm(10),1), t3=round(rnorm(10),1), 
                 t4=round(rnorm(10),1), t5=round(rnorm(10),1), t6=round(rnorm(10),1),
                 t7=round(rnorm(10),1),t8=round(rnorm(10),1))

    ID Status   t1   t2   t3   t4   t5   t6   t7   t8
 1:  1    OUT -0.6  1.5  0.9  1.4 -0.2  0.4  2.4  0.5
 2:  2    OUT  0.2  0.4  0.8 -0.1 -0.3 -0.6  0.0 -0.7
 3:  3     IN -0.8 -0.6  0.1  0.4  0.7  0.3  0.7  0.6
 4:  4     IN  1.6 -2.2 -2.0 -0.1  0.6 -1.1  0.0 -0.9
 5:  5     ON  0.3  1.1  0.6 -1.4 -0.7  1.4 -0.7 -1.3
 6:  6    OUT -0.8  0.0 -0.1 -0.4 -0.7  2.0  0.2  0.3
 7:  7    OUT  0.5  0.0 -0.2 -0.4  0.4 -0.4 -1.8 -0.4
 8:  8     IN  0.7  0.9 -1.5 -0.1  0.8 -1.0  1.5  0.0
 9:  9     IN  0.6  0.8 -0.5  1.1 -0.1  0.6  0.2  0.1
10: 10     ON -0.3  0.6  0.4  0.8  0.9 -0.1  2.2 -0.6

我需要将约束应用于dt,类似于以下内容(使用fread从csv读入):

dt_constraints <- data.table(columns=c("t1","t3","t7","t8"), operator=c(rep(">=",2),rep("<=",2)), 
                         values=c(-.6,-.5,2.4,.5))

    columns operator    values
1   t1       >=         -0.6
2   t3       >=         -0.5
3   t7       <=          2.4
4   t8       <=          0.5

我可以通过在dt语句中输入各种约束来轻松地对i进行分组:

dt_sub <- dt[t1>=-.6 & t3 >=-.5 & t7<=2.4 & t8<=.5,] 

    ID  Status  t1  t2  t3   t4  t5   t6    t7   t8
1   1   OUT   -0.6  1.5 0.9  1.4 -0.2  0.4  2.4  0.5
2   2   OUT    0.2  0.4 0.8 -0.1 -0.3 -0.6  0   -0.7
3   5   ON     0.3  1.1 0.6 -1.4 -0.7  1.4 -0.7 -1.3
4   7   OUT    0.5  0  -0.2 -0.4  0.4 -0.4 -1.8 -0.4
5   9   IN     0.6  0.8-0.5  1.1 -0.1  0.6  0.2  0.1
6   10  ON    -0.3  0.6 0.4  0.8  0.9 -0.1  2.2 -0.6

但是,由于约束不断变化(每次都会读取一个新的约束csv),我正在寻找一种有效的方法,可以直接从dt_constraints到子集dt以编程方式应用约束。实际数据非常大,因为约束的数量因此效率是关键。

非常感谢。

2 个答案:

答案 0 :(得分:3)

有一种替代方法使用非equi连接进行子集化:

thresholds <- dt_constraints[, values]
cond <- dt_constraints[, paste0(columns, operator, "V", .I)]
dt[dt[as.list(thresholds), on = cond, which = TRUE]]
   ID Status   t1  t2   t3   t4   t5   t6   t7   t8
1:  1    OUT -0.6 1.5  0.9  1.4 -0.2  0.4  2.4  0.5
2:  2    OUT  0.2 0.4  0.8 -0.1 -0.3 -0.6  0.0 -0.7
3:  5     ON  0.3 1.1  0.6 -1.4 -0.7  1.4 -0.7 -1.3
4:  7    OUT  0.5 0.0 -0.2 -0.4  0.4 -0.4 -1.8 -0.4
5:  9     IN  0.6 0.8 -0.5  1.1 -0.1  0.6  0.2  0.1
6: 10     ON -0.3 0.6  0.4  0.8  0.9 -0.1  2.2 -0.6

答案 1 :(得分:2)

我们可以paste将其作为单个字符串,然后执行eval

dt[eval(parse(text=do.call(paste, c(dt_constraints, collapse= ' & '))))]
#   ID Status   t1  t2   t3   t4   t5   t6   t7   t8
#1:  1    OUT -0.6 1.5  0.9  1.4 -0.2  0.4  2.4  0.5
#2:  2    OUT  0.2 0.4  0.8 -0.1 -0.3 -0.6  0.0 -0.7
#3:  5     ON  0.3 1.1  0.6 -1.4 -0.7  1.4 -0.7 -1.3
#4:  7    OUT  0.5 0.0 -0.2 -0.4  0.4 -0.4 -1.8 -0.4
#5:  9     IN  0.6 0.8 -0.5  1.1 -0.1  0.6  0.2  0.1
#6: 10     ON -0.3 0.6  0.4  0.8  0.9 -0.1  2.2 -0.6

如果我们使用tidyverse,那么

library(dplyr)
dt %>% 
   filter(!!rlang::parse_expr(do.call(paste, c(dt_constraints, collapse= ' & '))))
#  ID Status   t1  t2   t3   t4   t5   t6   t7   t8
#1  1    OUT -0.6 1.5  0.9  1.4 -0.2  0.4  2.4  0.5
#2  2    OUT  0.2 0.4  0.8 -0.1 -0.3 -0.6  0.0 -0.7
#3  5     ON  0.3 1.1  0.6 -1.4 -0.7  1.4 -0.7 -1.3
#4  7    OUT  0.5 0.0 -0.2 -0.4  0.4 -0.4 -1.8 -0.4
#5  9     IN  0.6 0.8 -0.5  1.1 -0.1  0.6  0.2  0.1
#6 10     ON -0.3 0.6  0.4  0.8  0.9 -0.1  2.2 -0.6