这个问题是关于LightOj问题(1255 - 子串频率): http://lightoj.com/volume_showproblem.php?problem=1255 [您必须登录才能查看问题]
基本上,问题是子字符串匹配和计数问题。
以下是KMP代码:
+----+---------+----------+--------+------------+-----------------------------+
| id | user_id | image_id | tag_id | comment_id | message |
+----+---------+----------+--------+------------+-----------------------------+
| 1 | 3 | null | null | null | I'm building an application |
+----+---------+----------+--------+------------+-----------------------------+
| 2 | null | 45 | null | null | This image is NSFW! |
+----+---------+----------+--------+------------+-----------------------------+
| 3 | null | null | 234 | null | Tag includes bad content... |
+----+---------+----------+--------+------------+-----------------------------+
| 4 | null | null | null | 125 | Spamming!!! |
+----+---------+----------+--------+------------+-----------------------------+
这是Z算法代码:
#include <iostream>
#include <cstdio>
#include <cmath>
#include <cstdlib>
#include <vector>
#include <string>
#define MOD 1000000007
#define ull unsigned long long int
#define dll int
#define dl long int
#define ul unsigned long int
#define gc getchar_unlocked
#define cn int
using namespace std;
template <class T> void scanNum(T &x)
{
register T c = gc();
x = 0;
int neg = 0;
for(;((c<48 || c>57) && c != '-');c = gc());
if(c=='-') {neg=1;c=gc();}
for(;c>47 && c<58;c = gc()) {x = (x<<1) + (x<<3) + c - 48;}
if(neg) x=-x;
}
inline void scanString(string& str)
{
register char c = 0;
register int i = 0;
while (c < 33)
c = getchar_unlocked();
while (c != '\n')
{
str =str + c;
c = getchar_unlocked();
}
str = str + '\0';
}
class KMP
{
string txt, pat;
dll M,N,c;
dll *lps;
public:
KMP(string t,string p)
{
txt=t;
pat=p;
N=txt.length()-1;
M=pat.length()-1;
c=0;
}
void preprocess()
{
dll len=0,i=1;
lps=(dll *)malloc(M*sizeof(dll));
lps[0]=0;
while(i<M)
{
if(pat[i]==pat[len])
{
lps[i++]=++len;
}
else
{
if(len!=0)
{
len=lps[len-1];
}
else
{
lps[i++]=0;
}
}
}
}
dll KMPalgo()
{
preprocess();
dll j=0,i=0;
while(i<N)
{
if(pat[j]==txt[i])
{
i++;
j++;
}
if(j==M)
{
c++;
j=lps[j-1];
}
else if((i<N) && (pat[j]!=txt[i]))
{
if(j!=0)
j=lps[j-1];
else
i++;
}
}
free(lps);
return c;
}
};
int main()
{
cn t;
scanNum<cn>(t);
for(cn i=1;i<=t;i++)
{
string txt,pat;
scanString(txt);
scanString(pat);
KMP strMatch(txt,pat);
cn v = strMatch.KMPalgo();
printf("Case %d: %d\n",i,v);
//nl;
}
return 0;
}
两种解决方案都在网站上提供TLE。
当有词典时,可以使用Aho-Corsick算法。 在最坏的情况下,Boyer-Moore的复杂度为O(mn),例如 txt = aaaaaaaaa 和 pat = aa 的情况,这些问题出现在问题的测试案例中
有没有更好的算法可以解决问题?我找不到任何合适的解决方案,所以我不得不在这里发布。