使用Suffix Automata的最长公共子串

时间:2014-03-16 20:18:03

标签: c++ string algorithm lcs suffix-array

我曾经使用动态编程计算最长公共子串 O(m * n),后缀树 O(m + n),后缀数组 O( nlog ^ 2 n)根据我的需要。最近我学会了 后缀自动机 ,它在 O(n)中表现非常令人印象深刻。

我可以编写代码来轻松计算最长公共子字符串的长度。例如:

Input:
abcdef
xyzabc

Output:
3

这是代码:

#include <bits/stdc++.h>
using namespace std;

const int maxN = 250500;
const int maxState = maxN << 1;

struct State {
    State *go[26], *suffix;
    int depth, id;
    long long cnt;
};
State pool[maxState], *point, *root, *sink;
int size;

State *newState(int dep) {
    point->id = size++;
    point->depth = dep;
    return point++;
}

void init() {
    point = pool;
    size = 0;
    root = sink = newState(0);
}

void insert(int a) {
    State *p = newState(sink->depth+1);
    State *cur = sink, *sufState;
    while (cur && !cur->go[a]) {
        cur->go[a] = p;
        cur = cur->suffix;
    }
    if (!cur)
        sufState = root;
    else {
        State *q = cur->go[a];
        if (q->depth == cur->depth + 1)
            sufState = q;
        else {
            State *r = newState(cur->depth+1);
            memcpy(r->go, q->go, sizeof(q->go));
            r->suffix = q->suffix;
            q->suffix = r;
            sufState = r;
            while (cur && cur->go[a] == q) {
                cur->go[a] = r;
                cur = cur->suffix;
            }
        }
    }
    p->suffix = sufState;
    sink = p;
}

int work(char buf[]) {
    //printf("%s", buf);
    int len = strlen(buf);
    int tmp = 0, ans = 0;
    State *cur = root;
    for (int i = 0; i < len; i++) {
        if (cur->go[buf[i]-'a']) {
            tmp++;
            cur = cur->go[buf[i]-'a'];
        } else {
            while (cur && !cur->go[buf[i]-'a'])
                cur = cur->suffix;
            if (!cur) {
                cur = root;
                tmp = 0;
            } else {
                tmp = cur->depth + 1;
                cur = cur->go[buf[i]-'a'];
            }
        }
        ans = max(ans, tmp);

    }
    return ans;
}

char ch[maxN];

int main() {
    scanf("%s", ch);
    init();
    int len = strlen(ch);
    for (int i = 0; i < len; i++)
        insert(ch[i]-'a');
    scanf("%s", ch);
    printf("%d\n", work(ch));
    return 0;
}

但是现在我需要打印最长的Common Substring本身,而不是长度。但是我无法修改我的代码:(如何修改此代码以打印最长的公共子串?

1 个答案:

答案 0 :(得分:3)

当你在这条线上时:

ans = max(ans, tmp);

buf中达到深度tmp的起始位置为i - tmp + 1。现在您知道第二个字符串中所有最长公共子串的位置。只需选择任何并输出结果。