我有一个建议,通过控制台可以得到一组字符串模式T(最大长度80)和一组文本(最大长度250)。目的是使用Aho-Corasick algorithm来检查哪些文本包含至少一种给定的模式,然后按照给定的顺序输出所有的文本。所有使用的字符串(模式和文本)仅包含可打印的ASCII字符,因此字母大小为95。
用户首先输入整数n个(最多1000个)模式,然后输入n个模式,然后输入任意数量的文本字符串。
课程辅导员为我们提供了一个Trie实现,其中包含一些已创建的方法(例如,一种在Aho-Corasick DFA中的节点之间进行转换的方法,以及一种用于计算节点v的故障链接的方法)。我添加了一个construct_slinks()
方法,该方法在将所有模式都添加到其中并通过其功能计算故障链接之后,将通过该方法。我还添加了一个search_string()
函数,该函数使用上述DFA转换方法沿文本元素(从S开始)移动,并且如果在文本中的某个位置匹配了模式,则应该返回true。
到目前为止,太好了。我检查过的所有测试用例都使用我的代码工作,但是我提交代码的自动检查系统仍然显示“ WRONG ANSWER”,因此我必须做错了什么。问题是我对Aho-Corasick还不是很熟悉,也看不到我哪里出了问题。如果有人可以查看我的代码,或者甚至找到严格不返回应有的测试用例,我将感到非常高兴。我已尽可能评论了。
伪代码贯穿整个过程也将大有帮助!
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <cstring>
#include <queue>
using namespace std;
const int MAXN = 1000, NMAX = 80000, K = 95; //1000 patterns, 80 (pattern length)*1000 = 80000 maximal states, 95 printable chars in ASCII
int n; //Number of patterns
queue<int> vq;
struct vertex {
int next[K]; // child links
bool leaf; // marker that a word ends here
int p; // parent link
char pch; // next[pch] = current node
int link; // suffix link
int go[K]; // node to go to for given character in DFA
};
vertex t[NMAX+1]; // array nodes are stored in
int sz; // index of next free space for node
void init() { //initialize root node (0)
t[0].p = t[0].link = -1;
memset(t[0].next, 255, sizeof t[0].next);
memset(t[0].go, 255, sizeof t[0].go);
sz = 1;
}
int get_link(int v);
void add_string(const string &s) { //Add string to trie
int v = 0;
for (int i = 0; i < s.length(); i++) {
char c = s[i]-32; //-32 to skip first 31 unprintable ascii chars -> So space (ASCII 32) is 0 in array
if (t[v].next[c] == -1) {
memset(t[sz].next, 255, sizeof t[sz].next);
memset(t[sz].go, 255, sizeof t[sz].go);
t[sz].link = -1;
t[sz].p = v;
t[sz].pch = c;
vq.push(sz); //Add child to queue for suffix link construction
t[v].next[c] = sz++;
}
v = t[v].next[c]; //Pick newly created child as current node
}
t[v].leaf = true; //For the last node: Pattern ends here, so leaf = true.
}
int go(int v, char c);
int get_link(int v) { //Failure link
if (t[v].link == -1) //Link is not computed already
if (v == 0 || t[v].p == 0) //If root or parent is root -> Link = 0
t[v].link = 0;
else
t[v].link = go(get_link(t[v].p), t[v].pch+32); //Otherwise, follow failure link of the parent node using the char on the edge between. (+32 to balance out -32 in go method)
return t[v].link;
}
int go(int v, char c) { //Go to next node in DFA
c-=32; //-32 to skip the first 31 unprintable ASCII letters
if (t[v].go[c] == -1) { //Hasnt been computed yet - Compute it
if (t[v].next[c] != -1) { //Direct child with corresponding char -> Next node is the child node
t[v].go[c] = t[v].next[c];
}
else { //No direct child with corresponding char -> Follow failure link
t[v].go[c] = (v == 0) ? 0 : go(get_link(v), c+32); //c+=32 because we already subtract 32 from it at method start (balancing it out)
}
}
return t[v].go[c];
}
bool search_string(const string &s) { //Follow along the "go"-links
int v = 0;
for (char c : s) {
v = go(v,c);
if (t[v].leaf) { //Pattern ends at current node
return true;
}
}
return false;
}
void construct_slinks() { //Construct failure (suffix) links with a top-down approach using the queue we stacked
while (!vq.empty()) { //Run DFS on the trie, recursively calculate links
int curr = vq.front();
vq.pop();
int link = get_link(curr);
if (t[get_link(curr)].leaf)
t[curr].leaf = true; //If failure link of current node points to a leaf node (where a pattern ends), make current node a leaf too
}
}
int main() {
int n; cin >> n; cin.ignore(); //Read number of patterns
vector<string> trueones; //For output
init();
vq.push(0);
for (int i = 0; i < n; i++){
string s;
getline(cin, s);
add_string(s); //Build tree out of patterns
}
construct_slinks(); //Construct suffix links
string temp;
getline(cin, temp);
while ( temp != "") { //Read unknown number of strings and run search on them
if (search_string(temp))
trueones.push_back(temp); //if a pattern is in string "temp", push temp to trueones
getline(cin, temp);
}
for (auto x: trueones) { //Output all strings in trueones
cout << x << endl;
}
}