我有以下输入
#mydata
ID variable1 variable2
1 a,b,c,d c,a
2 g,f,h h
3 p,l,m,n,c c,l
我希望从variable1中减去varible2的字符串,我希望得到以下输出?
#Output
ID Output
1 b,d
2 g,f
3 p,m,n
#dput
structure(list(ID = 1:3, variable1 = structure(1:3, .Label = c("a,b,c,d",
"g,f,h", "p,l,m,n,c"), class = "factor"), variable2 = structure(c(1L,
3L, 2L), .Label = c("c,a", "c,l", "h"), class = "factor")), .Names = c("ID",
"variable1", "variable2"), class = "data.frame", row.names = c(NA,
-3L))
答案 0 :(得分:5)
你可以尝试,
Map(setdiff, strsplit(as.character(df$variable1), ',')), strsplit(as.character(df$variable2), ','))
答案 1 :(得分:4)
我们可以在按Map
分割每个列后,
使用setdiff
,paste
将它们放在一起,设置list
输出的名称用' ID'列,stack
到' data.frame'并set
名称为' ID'和'输出'对于列。
setNames(stack(setNames(Map(function(x,y) toString(setdiff(x,y)),
strsplit(as.character(df1$variable1), ","),
strsplit(as.character(df1$variable2), ",")),
df1$ID))[2:1], c("ID", "Output"))
# ID Output
#1 1 b, d
#2 2 g, f
#3 3 p, m, n
或者紧凑的选项是
library(splitstackshape)
cSplit(df1, 2:3, ",", "long")[, .(Output = toString(setdiff(variable1, variable2))) , ID]
# ID Output
#1: 1 b, d
#2: 2 g, f
#3: 3 p, m, n
答案 2 :(得分:2)
使用grepl
代替setdiff
library(stringr)
a1 <- str_split(d$variable1, ",")
a2 <- str_split(d$variable2, ",")
do.call("rbind",Map(function(x,y) paste(x[!grepl(paste(y, collapse="|"), x)], collapse=","), a1, a2))
[,1]
[1,] "b,d"
[2,] "g,f"
[3,] "p,m,n"
答案 3 :(得分:2)
使用 Dplyr
mydata %>%
rowwise() %>%
mutate(output = paste0(setdiff(strsplit(as.character(variable1),split = ",")[[1]], strsplit(as.character(variable2),",")[[1]] ),collapse = ","))
%>% select(ID,output)
输出:
ID output
(int) (chr)
1 1 b,d
2 2 g,f
3 3 p,m,n