我有一个字符串替换表。我需要将所有替换模式应用于目标数据帧。一个单元格中可以有多个替换字符串。不在替换表中的目标将转换为NA。我用嵌套循环来管理它 - 缓慢而丑陋。我可以使用一些关于如何更好地编码的想法。 谢谢。这是一个例子:
library(tibble)
#define replacement table
rt <-tribble(
~to.replace, ~replace.with,
"abc" , "xyz",
"def" , "qwe",
"lkj" , "dffg",
"cvb" , "mnb"
)
#create a sample data.frame with some extra strings not in the replacement table
set.seed(1)
df <- data.frame(a = paste0(sample(c(rt$to.replace, "jhg", "ert", "ytr"),10,replace=T)," ; ",
sample(c(rt$to.replace, "jhg", "ert", "ytr"),10,replace=T)),
b = paste0(sample(c(rt$to.replace, "vfe", "thn", "mjh"),10,replace=T)," ; ",
sample(c(rt$to.replace, "vfe", "thn", "mjh"),10,replace=T)))
> df
a b
1 def ; def mjh ; cvb
2 lkj ; def def ; vfe
3 jhg ; jhg vfe ; cvb
4 ytr ; lkj abc ; def
5 def ; ert def ; thn
6 ytr ; cvb lkj ; vfe
7 ytr ; ert abc ; thn
8 jhg ; ytr lkj ; abc
9 jhg ; lkj mjh ; thn
10 abc ; ert lkj ; lkj
# Here is what df is supposed to look like after applying all the replacements
> df
a b
1 qwe ; qwe NA ; mnb
2 dffg ; qwe qwe ; NA
3 NA ; NA NA ; mnb
4 NA ; dffg xyz ; qwe
5 qwe ; NA qwe ; NA
6 NA ; mnb dffg ; NA
7 NA ; NA xyz ; NA
8 NA ; NA dffg ; xyz
9 NA ; dffg NA ; NA
10 xyz ; NA dffg ; dffg
答案 0 :(得分:2)
base R
的一个选项是在每列中拆分字符串,然后match
并替换&#39; rt&#39;
df[] <- lapply(df, function(x) sapply(strsplit(as.character(x), " ; "),
function(y) paste(rt$replace.with[match(y, rt$to.replace)], collapse=' ; ')))
df
# a b
#1 qwe ; qwe NA ; mnb
#2 dffg ; qwe qwe ; NA
#3 NA ; NA NA ; mnb
#4 NA ; dffg xyz ; qwe
#5 qwe ; NA qwe ; NA
#6 NA ; mnb dffg ; NA
#7 NA ; NA xyz ; NA
#8 NA ; NA dffg ; xyz
#9 NA ; dffg NA ; NA
#10 xyz ; NA dffg ; dffg