如何优化多个strncmp?

时间:2018-10-25 21:06:49

标签: c algorithm

我需要检查字符串以查看它是否与任何前缀匹配。将来要比较的前缀数量将增加。因此,我对以下代码的性能感到担忧。有很多选项需要检查时,有哪些方法可以使其运行更快?

int checkString(const char *name)
{
    if(!name) return 0;

    if(strncmp(name, "AE_", 3) == 0 ) return 1;                                                                              
    if(strncmp(name, "AEDZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EDPZ_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "EFAN_", 5) == 0 ) return 1;                                                                            
    if(strncmp(name, "E_GCA", 5 ) == 0 ) return 1;                                                                           
    if(strncmp(name, "EFFAN_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EDDPZ_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "ECADF_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "EPCEA_", 6) == 0 ) return 1;                                                                           
    if(strncmp(name, "CFEXXX_", 7) == 0 ) return 1;                                                                          
    if(strncmp(name, "IFEXX_", 7) == 0 ) return 1;                                                                           
    if(strncmp(name, "EINFFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "NXXEFAN_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENAEAZY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "EYYYYYY_", 8) == 0 ) return 1;                                                                         
    if(strncmp(name, "ENEOENUE_", 9) == 0 ) return 1;                                                                        
    /*
    more strncmp to be added.
    */

    return 0;
}       

3 个答案:

答案 0 :(得分:3)

一次性,提前设置:

regex_t re;
regcomp(&re, "^(AE_|AEDZ|_EDPZ_|EFAN_|E_GCA|" /*...*/ ")", REG_EXTENDED);

要检查:

return regexec(&re, name, 0, 0, 0) == 0;

在任何良好的正则表达式实现中,regcomp会将正则表达式编译为DFA,该DFA会执行以最长前缀的长度为边界的多个步骤。

答案 1 :(得分:2)

  

当需要检查很多字符串时,有哪些选择可以使其运行更快?

如果对n前缀进行了排序,则最多需要进行log2(n)比较。代码可以使用bsearch()

#include <stdio.h>
#include <stdlib.h>

const char *prefix[] = {"AE_", "AEDZ_", "CFEXXX_", "ECADF_", "EDDPZ_",
    "EDPZ_", "EFAN_", "EFFAN_", "EINFFAN_", "ENAEAZY_", "ENEOENUE_", "EPCEA_",
    "EPDPZ_", "EYYYYYY_", "E_GCA",  "IFEXX_", "NXXEFAN_"};

int cmp(const void *key, const void *element) {
  const char *k = key;
  const char *e = *(const char **) element;
  size_t elen = strlen(e);
  printf("strncmp(%s,%s,%zu)\n", k,e,elen);
  return strncmp(k, e, elen);
}

void test(const char *key) {
  printf("Search for <%s>\n", key);
  size_t n = sizeof prefix/sizeof prefix[0];
  const char **s = bsearch(key, prefix, n, sizeof prefix[0], cmp);
  if (s) {
    printf("Found <%s>\n", *s);
  } else {
    printf("Not Found\n");
  }
}

int main() {
  test("E_GC");
  test("E_GCA");
  test("E_GCA_");
}

输出

Search for <E_GC>
strncmp(E_GC,EINFFAN_,8)
strncmp(E_GC,EYYYYYY_,8)
strncmp(E_GC,IFEXX_,6)
strncmp(E_GC,E_GCA,5)
Not Found
Search for <E_GCA>
strncmp(E_GCA,EINFFAN_,8)
strncmp(E_GCA,EYYYYYY_,8)
strncmp(E_GCA,IFEXX_,6)
strncmp(E_GCA,E_GCA,5)
Found <E_GCA>
Search for <E_GCA_>
strncmp(E_GCA_,EINFFAN_,8)
strncmp(E_GCA_,EYYYYYY_,8)
strncmp(E_GCA_,IFEXX_,6)
strncmp(E_GCA_,E_GCA,5)
Found <E_GCA>

答案 2 :(得分:-2)

如果前缀没有改变并且仅添加了前缀,并且您声明的前缀数量为常数,则可以使用strstr进行循环:

#include "stdio.h"
#include "string.h"

#define N_STRINGS 17

int checkString(const char *name);

const char *subStrings[N_STRINGS];

int main() {

    subStrings[0] = "AE_";
    subStrings[1] = "AEDZ_";
    subStrings[2] = "EDPZ_";
    subStrings[3] = "EFAN_";
    subStrings[4] = "E_GCA";
    subStrings[5] = "EFFAN_";
    subStrings[6] = "EPDPZ_";
    subStrings[7] = "EDDPZ_";
    subStrings[8] = "ECADF_";
    subStrings[9] = "EPCEA_";;
    subStrings[10] = "CFEXXX_";
    subStrings[11] = "IFEXX_";
    subStrings[12] = "EINFFAN_";
    subStrings[13] = "NXXEFAN_";
    subStrings[14] = "ENAEAZY_";
    subStrings[15] = "EYYYYYY_";
    subStrings[16] = "ENEOENUE_";

    //run for a random string
    printf("%d\n", checkString("AEDZ_value"));

    return 1;
}

int checkString(const char *name)
{
    int i;

    if(!name) return -1;

    for (i = 0; i < N_STRINGS; i++) {
        if (strstr(name, subStrings[i]) != 0) {
            return i;
        }
    }                                                                 

    return -1;
}

函数checkString将返回前缀的索引。

在这种情况下,可能还有很多更有效的实现方式。