相互减去两个字符串

时间:2016-08-04 07:19:35

标签: r

我有以下输入

#mydata

ID  variable1  variable2
1    a,b,c,d      c,a 
2    g,f,h        h
3    p,l,m,n,c    c,l

我希望从variable1中减去varible2的字符串,我希望得到以下输出?

#Output
ID  Output 
1    b,d      
2    g,f        
3    p,m,n    

#dput

structure(list(ID = 1:3, variable1 = structure(1:3, .Label = c("a,b,c,d", 
"g,f,h", "p,l,m,n,c"), class = "factor"), variable2 = structure(c(1L, 
 3L, 2L), .Label = c("c,a", "c,l", "h"), class = "factor")), .Names =    c("ID", 
 "variable1", "variable2"), class = "data.frame", row.names = c(NA, 
-3L))

4 个答案:

答案 0 :(得分:5)

你可以尝试,

Map(setdiff, strsplit(as.character(df$variable1), ',')), strsplit(as.character(df$variable2), ','))

答案 1 :(得分:4)

我们可以在按Map分割每个列后,使用setdiffpaste将它们放在一起,设置list输出的名称用' ID'列,stack到' data.frame'并set名称为' ID'和'输出'对于列。

setNames(stack(setNames(Map(function(x,y) toString(setdiff(x,y)), 
         strsplit(as.character(df1$variable1), ","), 
         strsplit(as.character(df1$variable2), ",")),
              df1$ID))[2:1], c("ID", "Output"))
 #  ID  Output
 #1  1    b, d
 #2  2    g, f
 #3  3 p, m, n

或者紧凑的选项是

library(splitstackshape)
cSplit(df1, 2:3, ",", "long")[, .(Output = toString(setdiff(variable1, variable2))) , ID]
#   ID  Output
#1:  1    b, d
#2:  2    g, f
#3:  3 p, m, n

答案 2 :(得分:2)

使用grepl代替setdiff

library(stringr)
a1 <- str_split(d$variable1, ",")
a2 <- str_split(d$variable2, ",")
do.call("rbind",Map(function(x,y) paste(x[!grepl(paste(y, collapse="|"), x)], collapse=","), a1, a2))
     [,1]   
[1,] "b,d"  
[2,] "g,f"  
[3,] "p,m,n"

答案 3 :(得分:2)

使用 Dplyr

mydata %>% 
  rowwise() %>% 
     mutate(output = paste0(setdiff(strsplit(as.character(variable1),split = ",")[[1]], strsplit(as.character(variable2),",")[[1]] ),collapse = ","))
       %>% select(ID,output)

输出:

    ID output
   (int)  (chr)
1     1    b,d
2     2    g,f
3     3  p,m,n