在字符串中查找模式并在R中连接唯一的部分

时间:2016-03-25 12:13:15

标签: regex r string

拥有2个字符串s1s2的Conciser。如何只连接这些字符串中那些唯一的部分,并保留非唯一部分。让/成为连接部分之间的分隔符。期望的结果如下所示:

s1 <- "very big house"
s2 <- "very small house"

some_function(s1,s2)
"very big/small house" #// desired result.

通常,两个字符串的前导和尾部章程相同,差异仅出现在中间 更多期望结果的例子:

# a
s1 <- "1b"; s2 <- "2b"
"1/2b" # <- new string

# b
s1 <- "a_1_b"; s2 <- "a_2_b"
"a_1/2_b" # <- new string

# c
s1 <- "a"; s2 <- "b"
"a/b" # <- new string

1 个答案:

答案 0 :(得分:1)

这是一个解决方案:

pasteMergePrefixAndSuffix <- function(vs1,vs2,sep=' ') {
    ## cycle string vectors to same length
    vsl <- max(length(vs1),length(vs2));
    vs1 <- rep(vs1,len=vsl);
    vs2 <- rep(vs2,len=vsl);
    ## precompute character splits
    ss1 <- strsplit(vs1,'');
    ss2 <- strsplit(vs2,'');
    ## iterate over each pair of strings
    sapply(seq_along(vs1),function(si) {
        s1 <- vs1[si];
        s2 <- vs2[si];
        s1l <- length(ss1[[si]]);
        s2l <- length(ss2[[si]]);
        slmin <- min(s1l,s2l);
        ## handle trivial case of exact equality
        if (s1==s2) return(s1);
        ## get prefix and suffix lengths
        if (slmin==0L) { ## empty string cannot result in a prefix or suffix
            pl <- sl <- 0L;
        } else {
            eq <- ss1[[si]][seq_len(slmin)]==ss2[[si]][seq_len(slmin)];
            pl <- if (all(eq)) slmin else if (eq[1L]==T) which(!eq)[1L]-1L else 0L;
            eq <- rev(ss1[[si]])[seq_len(slmin)]==rev(ss2[[si]])[seq_len(slmin)];
            sl <- if (all(eq)) slmin else if (eq[1L]==T) which(!eq)[1L]-1L else 0L;
        }; ## end if
        ## paste together prefix, sep-pasted middles, and suffix
        m1 <- substr(s1,pl+1L,s1l-sl);
        m2 <- substr(s2,pl+1L,s2l-sl);
        paste0(substr(s1,1L,pl),paste(sep=sep,m1,m2),substr(s1,s1l-sl+1L,s1l));
    });
}; ## end pasteMergePrefixAndSuffix()

演示:

pasteMergePrefixAndSuffix('a','b');
## [1] "a b"
pasteMergePrefixAndSuffix('a','b','/');
## [1] "a/b"
s1 <- 'very big house'; s2 <- 'very small house'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "very big/small house"
s1 <- '1b'; s2 <- '2b'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "1/2b"
s1 <- 'a_1_b'; s2 <- 'a_2_b'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "a_1/2_b"
s1 <- 'ab'; s2 <- 'ab'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "ab"
s1 <- 'xab'; s2 <- 'ab'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "x/ab"
s1 <- 'ab'; s2 <- 'abx'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "ab/x"
s1 <- 'abx'; s2 <- 'ab'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "abx/"
s1 <- 'ab'; s2 <- 'xab'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "/xab"
s1 <- ''; s2 <- 'x'; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "/x"
s1 <- 'x'; s2 <- ''; pasteMergePrefixAndSuffix(s1,s2,'/');
## [1] "x/"