我需要检查字符串以查看它是否与任何前缀匹配。将来要比较的前缀数量将增加。因此,我对以下代码的性能感到担忧。有很多选项需要检查时,有哪些方法可以使其运行更快?
int checkString(const char *name)
{
if(!name) return 0;
if(strncmp(name, "AE_", 3) == 0 ) return 1;
if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;
if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;
if(strncmp(name, "EFAN_", 5) == 0 ) return 1;
if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;
if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;
if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;
if(strncmp(name, "ECADF_", 6) == 0 ) return 1;
if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;
if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;
if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;
if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;
if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;
if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;
if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;
if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;
/*
more strncmp to be added.
*/
return 0;
}
答案 0 :(得分:3)
一次性,提前设置:
regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);
要检查:
return regexec(&re, name, 0, 0, 0) == 0;
在任何良好的正则表达式实现中,regcomp
会将正则表达式编译为DFA,该DFA会执行以最长前缀的长度为边界的多个步骤。
答案 1 :(得分:2)
当需要检查很多字符串时,有哪些选择可以使其运行更快?
如果对n
前缀进行了排序,则最多需要进行log2(n)
比较。代码可以使用bsearch()
。
#include <stdio.h>
#include <stdlib.h>
const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
"EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
"EPDPZ_", "EYYYYYY_", "E_GCA", "IFEXX_", "NXXEFAN_"};
int cmp(const void *key, const void *element) {
const char *k = key;
const char *e = *(const char **) element;
size_t elen = strlen(e);
printf("strncmp(%s,%s,%zu)\n", k,e,elen);
return strncmp(k, e, elen);
}
void test(const char *key) {
printf("Search for <%s>\n", key);
size_t n = sizeof prefix/sizeof prefix[0];
const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
if (s) {
printf("Found <%s>\n", *s);
} else {
printf("Not Found\n");
}
}
int main() {
test("E_GC");
test("E_GCA");
test("E_GCA_");
}
输出
Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>
答案 2 :(得分:-2)
如果前缀没有改变并且仅添加了前缀,并且您声明的前缀数量为常数,则可以使用strstr
进行循环:
#include "stdio.h"
#include "string.h"
#define N_STRINGS 17
int checkString(const char *name);
const char *subStrings[N_STRINGS];
int main() {
subStrings[0] = "AE_";
subStrings[1] = "AEDZ_";
subStrings[2] = "EDPZ_";
subStrings[3] = "EFAN_";
subStrings[4] = "E_GCA";
subStrings[5] = "EFFAN_";
subStrings[6] = "EPDPZ_";
subStrings[7] = "EDDPZ_";
subStrings[8] = "ECADF_";
subStrings[9] = "EPCEA_";;
subStrings[10] = "CFEXXX_";
subStrings[11] = "IFEXX_";
subStrings[12] = "EINFFAN_";
subStrings[13] = "NXXEFAN_";
subStrings[14] = "ENAEAZY_";
subStrings[15] = "EYYYYYY_";
subStrings[16] = "ENEOENUE_";
//run for a random string
printf("%d\n", checkString("AEDZ_value"));
return 1;
}
int checkString(const char *name)
{
int i;
if(!name) return -1;
for (i = 0; i < N_STRINGS; i++) {
if (strstr(name, subStrings[i]) != 0) {
return i;
}
}
return -1;
}
函数checkString
将返回前缀的索引。
在这种情况下,可能还有很多更有效的实现方式。