我正在尝试计算以 AK 开头且在交易中也包含 AK 的交易数量,但不要以 AK结尾
示例:
排除 的: 示例:AK-> se(中间没有AK)
EXCLUDE :AK-> gg-> se-> ll:在交易中不包含 AK
包含: 的 例如: AK-> SE-> AK-> GG
示例数据:
f<- data.frame(
id=c("A","A","A","A","C","C","D","D","E"),
Mode=c("AK->se","se->AK->gg, bishan->K","AK->se","se->gr->gg, bishan->AK","AK->se","se->gr->gg, bishan->AK","AK->se","se->gr->gg, bishan->AK","se->AK->df, hg->pp->sk")
)
我需要处理大量数据,因此优化至关重要。
提前致谢。
被修改
f<- data.frame(
id=c("A","A","A","A","C","C","D","D","E"),
Mode=c("AK->se","se->AK->gg, bishan->K","AK->se","se->gr->gg, bishan->AK","AK->se","AK->AK->gg, bishan->AK","AK->se->Ak->gg","se->gr->gg, bishan->AK","AK->AK->df, hg->pp->sk")
)
答案 0 :(得分:0)
使用正则表达式
f<- data.frame(
id=c("A","A","A","A","C","C","D","D","E"),
Mode=c("AK->se","se->AK->gg, bishan->K","AK->se","se->gr->gg, bishan->AK","AK->se","se->gr->gg, bishan->AK","AK->se->AK->gg","se->gr->gg, bishan->AK","se->AK->df, hg->pp->sk")
)
selection = grepl(pattern="^AK->.*AK->",x=f$Mode,perl=TRUE)
f$Mode[selection]
f$id[selection]
使用lapply(如果有很多字符串,可能会慢一点)
f<- data.frame(
id=c("A","A","A","A","C","C","D","D","E"),
Mode=c("AK->se","se->AK->gg, bishan->K","AK->se","se->gr->gg, bishan->AK","AK->se","se->gr->gg, bishan->AK","AK->se->AK->gg","se->gr->gg, bishan->AK","se->AK->df, hg->pp->sk")
)
selection = sapply(strsplit(x=f$Mode,split="->"),FUN=function(x) (x[1]=="AK")&(x[length(x)]!="AK")&(sum(x=="AK")>1))
f$Mode[selection]
f$id[selection]