我使用for循环创建了DF1
或从某人那里获得了它。我想检查DF2
中是否有对应日期的nos值(作为列表或字符串连接)。我经常遇到这种情况。
这是代码。
library(dplyr)
library(magrittr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
mutate(list_column = strsplit(nos,split = "\\|")) %>%
print
# DF1
# det1 no nos
# 1 2013-02-02 1 1|3
# 2 2018-01-11 2 4|2|1
for(i_ in 1:nrow(DF1)){
# i_ = 1
temp = DF1[i_,]
list_vals = temp$list_column %>% as.vector() %>% unlist() %>% as.numeric() %>% print
DF1$present[i_] = temp$no %in% list_vals
}
#R>DF1
# det1 no nos list_column present
#1 2013-02-02 1 1|3 1, 3 TRUE
#2 2018-01-11 3 4|2|1 4, 2, 1 FALSE
创建另一个逻辑列(如果no
是nos
之一)的最佳方法是什么。如何实现我想要做的事情或更好地实现我最终想要做的事情?
我欢迎任何解决方法base
,tidyverse
或data.table
。
EDIT-1
我正在寻找消除for
循环的方法。
答案 0 :(得分:2)
library(data.table)
setDT(DF1)
DF1[, present := as.character(no) %in% list_column[[1]], by = seq_len(nrow(DF1))][]
det1 no nos list_column present
1: 2013-02-02 1 1|3 1,3 TRUE
2: 2018-01-11 3 4|2|1 4,2,1 FALSE
数据(只需将as.character()添加到一个位置)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
mutate(list_column = strsplit(as.character(nos),split = "\\|"))
答案 1 :(得分:2)
**代码的第一部分**
library(dplyr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) # no need to strsplit()
将grepl
与apply
一起使用,只需一行:
DF1$present <- apply(DF1, 1, function(x){
ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})
结果:
det1 no nos present
1: 2013-02-02 1 1|3 TRUE
2: 2018-01-11 3 4|2|1 FALSE
例如,该解决方案可“移植”到data.table
:
library(data.table)
data.table::setDT(DF1) # into data.table
DF1[, present := apply(DF1, 1, function(x){
ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
})] # the := is a symbol for assignment
答案 2 :(得分:2)
我们可以使用Map
遍历“ list_column”,检查length
个元素中的intersecting
library(tidyverse)
DF1 %>%
mutate(present = map2(list_column, DF2$no, ~ length(intersect(.x, .y))) > 0)
# det1 no nos list_column present
#1 2013-02-02 1 1|3 1, 3 TRUE
#2 2018-01-11 3 4|2|1 4, 2, 1 FALSE
或者更紧凑,没有匿名函数调用
DF1 %>%
mutate(present = lengths(map2(list_column, DF2$no, intersect)) > 0)
答案 3 :(得分:0)
我发现grepl
在这种情况下很有用。
DF3 = left_join(DF2, DF1, by=c('det1'='det'))
for(i in 1:nrow(DF3)){
DF3[i, 'present'] = grepl(DF3[i,'no'], DF3[i, 'nos'])
}
> DF3
det1 no nos present
1 2013-02-02 1 1|3 TRUE
2 2018-01-11 3 4|2|1 FALSE
数据(添加到stringsAsFactors = F
中):
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4"), stringsAsFactors = F)
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')), no = c(1,3), stringsAsFactors = F)
答案 4 :(得分:0)
另一个使用data.table::tstrsplit
的选项:
library(data.table)
df1 <- setDT(DF1)[, .(no=as.integer(unlist(tstrsplit(nos, "\\|")))), by=.(det)]
setDT(DF2)[, present := FALSE][
df1, on=c("det1"="det", "no"), present := !is.na(i.no)]
输出:
det1 no present
1: 2013-02-02 1 TRUE
2: 2018-01-11 3 FALSE
数据:
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),
nos = c("1|3","4|2|1","3|4"))
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),
no = as.integer(c(1,3)))