通过删除重复的子串来组合两个字符串

时间:2014-05-18 04:41:47

标签: c regex

我想要组合两个字符串,删除重复的子字符串。请注意,每两个连续的数字构成一个子字符串。考虑字符串str1和str2:

str1 = "#100#123#100#678"
str2 = "#100#678#100#56"

我想生成一个组合字符串:

comboStr = "#100#123#100#678#100#56" (i.e. I removed the duplicate #100#678)

最简单的方法是什么?有没有办法可以使用正则表达式实现这一点?

1 个答案:

答案 0 :(得分:1)

我认为正则表达式不是解决此问题的好方法。正则表达式在查找#123标记时可能很有用,但问题需要以正则表达式的后向引用未被设计的方式回溯自己的字符串。

我也不认为有一种简单的方法(如三行代码)来解决这个问题。

我假设字符串始终遵循模式(#\d+)*,并且在连接两个字符串时在接缝处创建的对不会被视为特殊字符,即结果对可能被视为重复。这意味着我们可以将连接与对删除分开。

将您的字符串转换为整数列表,对这些列表进行操作,然后将它们连接起来。这是一些工作,但它使实际的代码更容易删除重复 - 它足够复杂 - 并且当你需要经常操作相似的字符串时也可能派上用场。

#include <stdlib.h>
#include <stdio.h>

/*
 *      Convert a string to a list of at most max integers. The
 *      return value is the number of integers in the list (which
 *      max be greater than max!) or -1 if the string is invalid.
 */
int ilist_split(int *ilist, int max, const char *str)
{
    const char *p = str;
    int n = 0;

    while (*p) {
        int x;
        int pos;

        if (sscanf(p, "#%d %n", &x, &pos) < 1) return -1;
        if (n < max) ilist[n] = x;
        n++;
        p += pos;
    }

    return n;
}

/*
 *      Convert a list of integers back to a string. The string
 *      is at most nbuf - 1 characters long and is assured to be
 *      zero-terminated if nbuf isn't 0. It is legal to pass NULL
 *      as char buffer if nbuf is 0. Returns the number of characters
 *      that would have been written ha dthe buffer been long enough,
 *      snprintf-style.
 */
int ilist_join(const int *ilist, int n, char *buf, int nbuf)
{
    int len = 0;
    int i;

    for (i = 0; i < n; i++) {
        len += snprintf(buf + len, 
            nbuf > len ? nbuf - len : 0, "#%d", ilist[i]);
    }

    return len;
}

/*
 *      Auxliary function to find a pair in an inteher list.
 */
int ilist_find_pair(int *ilist, int n, int a1, int a2)
{
    int i;

    for (i = 1; i < n; i++) {
        if (ilist[i - 1] == a1 && ilist[i] == a2) return i - 1;
    }

    return -1;
}

/*
 *      Remove duplicate pairs from an integer list. The first
 *      pair is kept, subsequent pairs are deleted. Returns the
 *      new length of the array.
 */
int ilist_remove_dup_pairs(int *ilist, int n)
{
    int i, j;

    j = 1;
    for (i = 1; i < n; i++) {
        int a1 = ilist[i - 1];
        int a2 = ilist[i];

        if (ilist_find_pair(ilist, i - 1, a1, a2) < 0) {
            ilist[j++] = ilist[i];
        } else {
            i++;
        }
    }

    return j;
}



#define MAX 40

int main()
{
    const char *str1 = "#100#123#100#678";
    const char *str2 = "#100#678#100#56";
    char res[80];

    int ilist[MAX];
    int nlist;

    /* convert str1 */
    nlist = ilist_split(ilist, MAX, str1);
    if (nlist > MAX) nlist = MAX;

    /* convert and concatenate str2 */
    nlist += ilist_split(ilist + nlist, MAX - nlist, str2);
    if (nlist > MAX) nlist = MAX;

    /* remove duplicate pairs */
    nlist = ilist_remove_dup_pairs(ilist, nlist);

    /* convert back to string */
    ilist_join(ilist, nlist, res, sizeof(res));
    printf("%s\n", res);

    return 0;
}