我有以下strstr
的实现注意:此代码不是我的。
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
char *fast_strstr(const char *haystack, const char *needle)
{
if (!*needle) // Empty needle.
return (char *) haystack;
const char needle_first = *needle;
// Runs strchr() on the first section of the haystack as it has a lower
// algorithmic complexity for discarding the first non-matching characters.
haystack = strchr(haystack, needle_first);
if (!haystack) // First character of needle is not in the haystack.
return NULL;
// First characters of haystack and needle are the same now. Both are
// guaranteed to be at least one character long.
// Now computes the sum of the first needle_len characters of haystack
// minus the sum of characters values of needle.
const char *i_haystack = haystack + 1,
*i_needle = needle + 1;
unsigned int sums_diff = *haystack;
bool identical = true;
while (*i_haystack && *i_needle)
{
sums_diff += *i_haystack;
sums_diff -= *i_needle;
identical &= *i_haystack++ == *i_needle++;
}
// i_haystack now references the (needle_len + 1)-th character.
if (*i_needle) // haystack is smaller than needle.
return NULL;
else if (identical)
return (char *) haystack;
size_t needle_len = i_needle - needle;
size_t needle_len_1 = needle_len - 1;
// Loops for the remaining of the haystack, updating the sum iteratively.
const char *sub_start;
for (sub_start = haystack; *i_haystack; i_haystack++)
{
sums_diff -= *sub_start++;
sums_diff += *i_haystack;
// Since the sum of the characters is already known to be equal at that
// point, it is enough to check just needle_len-1 characters for
// equality.
if (
sums_diff == 0
&& needle_first == *sub_start // Avoids some calls to memcmp.
&& memcmp(sub_start, needle, needle_len_1) == 0
)
return (char *) sub_start;
}
return NULL;
}
int main(void)
{
char s[] = "this is a test";
char s2[] = "test";
if(fast_strstr(s, s2) != NULL)
puts("YES!");
else
puts("NOT!");
return 0;
}
这会使当前条目输出错误,其中 NOT!而非 YES!。这个问题只发生在最后一个单词,但奇怪的是它与其他字符串一起使用,任何想法为什么会发生这种情况?
答案 0 :(得分:2)
如果针的第一个char
与大海捞针中的char
匹配,则代码会失败,但其余的则没有。
试试fast_strstr("zhis is a test", "test")
代替最后一个return NULL;
,代码需要在第一个匹配的字母后尝试其余的haystack 。接下来是一个递归解决方案,但肯定会有一个函数循环。
return fast_strstr(haystack+1, needle); // --> YES!
// return NULL;
某些输入的代码可能很快,但似乎是O(n * n)
答案 1 :(得分:0)
sum_diff
应初始化为0,因为它们在第一个字符处匹配时没有初始差异。
如果你在GDB中运行它,你会发现sum_diff = 116
(这是't'
的ASCII值。),当它返回而不是0时。
unsigned int sums_diff = 0; // *haystack - *needle (which is 0)
这个错误导致它在任何干草堆中失败,其中第一个字符的针出现在字符串的前面并且没有完全匹配,因为当你进入for
循环并依赖{{ 1}}。