R:将一列与作为列表对象存在的另一列中的值进行比较

时间:2019-07-05 12:54:16

标签: r data.table tidyverse

我使用for循环创建了DF1或从某人那里获得了它。我想检查DF2中是否有对应日期的nos值(作为列表或字符串连接)。我经常遇到这种情况。 这是代码。

library(dplyr)
library(magrittr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
  mutate(list_column = strsplit(nos,split = "\\|")) %>% 
  print
# DF1
# det1 no   nos
# 1 2013-02-02  1   1|3
# 2 2018-01-11  2 4|2|1
for(i_ in 1:nrow(DF1)){
  # i_ = 1
  temp = DF1[i_,]
  list_vals = temp$list_column %>% as.vector() %>% unlist() %>% as.numeric() %>% print
  DF1$present[i_] = temp$no %in% list_vals
    }
#R>DF1
#        det1 no   nos list_column present
#1 2013-02-02  1   1|3        1, 3    TRUE
#2 2018-01-11  3 4|2|1     4, 2, 1   FALSE

创建另一个逻辑列(如果nonos之一)的最佳方法是什么。如何实现我想要做的事情或更好地实现我最终想要做的事情?   我欢迎任何解决方法basetidyversedata.table

EDIT-1

我正在寻找消除for循环的方法。

5 个答案:

答案 0 :(得分:2)

library(data.table)
setDT(DF1)
DF1[, present := as.character(no) %in% list_column[[1]], by = seq_len(nrow(DF1))][]

         det1 no   nos list_column present
1: 2013-02-02  1   1|3         1,3    TRUE
2: 2018-01-11  3 4|2|1       4,2,1   FALSE

数据(只需将as.character()添加到一个位置)

DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
  mutate(list_column = strsplit(as.character(nos),split = "\\|"))

答案 1 :(得分:2)

**代码的第一部分**

library(dplyr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) # no need to strsplit()

greplapply一起使用,只需一行:

DF1$present <- apply(DF1, 1, function(x){
  ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})

结果:

         det1 no   nos  present
1: 2013-02-02  1   1|3  TRUE
2: 2018-01-11  3 4|2|1 FALSE

例如,该解决方案可“移植”到data.table

library(data.table)

data.table::setDT(DF1) # into data.table

DF1[, present := apply(DF1, 1, function(x){ 
  ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})] # the := is a symbol for assignment

答案 2 :(得分:2)

我们可以使用Map遍历“ list_column”,检查length个元素中的intersecting

library(tidyverse)
DF1 %>%
   mutate(present = map2(list_column, DF2$no, ~ length(intersect(.x, .y))) > 0)
#        det1 no   nos list_column present
#1 2013-02-02  1   1|3        1, 3    TRUE
#2 2018-01-11  3 4|2|1     4, 2, 1   FALSE

或者更紧凑,没有匿名函数调用

DF1 %>%
   mutate(present = lengths(map2(list_column, DF2$no, intersect)) > 0)

答案 3 :(得分:0)

我发现grepl在这种情况下很有用。

DF3 = left_join(DF2, DF1, by=c('det1'='det'))
for(i in 1:nrow(DF3)){
  DF3[i, 'present'] = grepl(DF3[i,'no'], DF3[i, 'nos'])
}

> DF3
        det1 no   nos present
1 2013-02-02  1   1|3    TRUE
2 2018-01-11  3 4|2|1   FALSE

数据(添加到stringsAsFactors = F中):

DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4"), stringsAsFactors = F)
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')), no = c(1,3), stringsAsFactors = F)

答案 4 :(得分:0)

另一个使用data.table::tstrsplit的选项:

library(data.table)
df1 <- setDT(DF1)[, .(no=as.integer(unlist(tstrsplit(nos, "\\|")))), by=.(det)]
setDT(DF2)[, present := FALSE][
    df1, on=c("det1"="det", "no"), present := !is.na(i.no)]

输出:

         det1 no present
1: 2013-02-02  1    TRUE
2: 2018-01-11  3   FALSE

数据:

DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),
    nos = c("1|3","4|2|1","3|4"))
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')), 
    no = as.integer(c(1,3)))