Question

我正在开发一个项目（在C中实现），我们需要维护一个功能或关键字列表。用户输入一个字符串。我们需要在存储的字符串数组中对此字符串进行不区分大小写的搜索。该列表目前包含100个字符串，可以添加新字符串（每年5个字符串）。

我想知道存储此数组的最佳方法，并使搜索更有效。

目前实现的解决方案如下所示:(我还没有编译此代码。这只是一段代码片段。）

    char **applist={ asdf , adgh, eftg , egty, ...} 
    char *user_input; // this string contains user entered string
    int id;
    switch(user_input[0])
    {
     case 'a':
     case 'A':
     switch(user_input[1]
     {
     case 's':
     case 'S':
     id=0
     break;

     case 'd':
     case 'D':
     id=1
     break;
     }
     break;
     case'e':
     case'E':
     switch(user_input[1])
     {
     case 'f':
     case 'F':
     id=2
     break;

     case 'g':
     case 'G':
     id=3
     break;
     }
     break;
     }
    if(stricmp(user_input,applist[id]))
    return id;
    else 
    return -1;

在实际代码中，applist未排序。当新的字符串被添加到applist时，我需要一种有效的方法来存储这个数组。

如果我按字母顺序存储字符串，那么每次添加新字符串时我都必须手动找到新字符串的正确位置。（在编译代码之前，不在运行时将新字符串添加到applist中）

建议一种有效的方法。

编辑：我当前的方法导致代码更长但效率更高。但是这段代码不容易维护。我需要的是一种数据结构，它可以以相同的效率进行搜索，但代码更小。您建议的数据结构不应该有额外的开销。唯一的要求是高效搜索。并且可以轻松地在编译时向数据结构添加元素。在运行时进行排序不是我的要求，因为在编译时添加了新的字符串（这是为了避免限制用户将任何新字符串添加到列表中）。

Answer 1

一个好的数据结构可能是一个TRIE：

http://en.wikipedia.org/wiki/Trie

看起来这就像你在代码中开始实现的那样。

Answer 2

听起来这不是性能关键的代码，在这种情况下我建议使用strcasestr来进行字符串比较。存储像这样的关键字

char *applist[] = {"abc", "def", "geh"}

然后循环遍历它们并将用户输入与strcasestr进行比较，如此

if (strlen(applist[id]) == strlen(user_input) && 
            strcasestr(applist[id], user_input) != NULL)
    return id;

与使用复杂的数据结构相比，这种方法更简洁，更易于维护。如果您确实关注性能，首先要实现这种方法，做一些时序测试，然后就可以决定是否需要更快的算法。

Answer 3

在搜索字符串时，您可以使用的最佳数据结构是BST - 二进制搜索树 - http://en.wikipedia.org/wiki/Binary_search_tree。使用O(log n)或O(n)时，最差情况下的搜索时间仅为arrays与lists的比较。

以下是带数字的示例代码（您可能需要使用字符串更改它并使用strcmp）：

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <limits.h>

typedef struct node {
        int data;
        struct node *left;
        struct node *right;
} NODE;


NODE * newnode (int data) 
{
    NODE * n = NULL;
    if (n = (NODE *) malloc (sizeof (NODE))) {
        n->data = data;
        n->right = n->left = NULL;
    } else {
        printf("Error: unable to allocate memory \n");
    }

    return n;
}


NODE * insert (NODE * head, int data)
{
    NODE * n;

    if (head == NULL)
        return newnode(data);

    if (head->data == data) {
        printf("Info: attempting to add duplicate element : %d\n", data);
        return head;
    }

    if (head->data < data)
        head->right = insert(head->right, data);
    else
        head->left = insert(head->left, data);

    return head;
}    

void inorder(NODE * node)
{
        if (node == NULL) 
                return;

        inorder(node->left);
        printf("%d ", node->data);
        inorder(node->right);
        return;
}

int lookup(NODE * head, int data)
{
    if (head == NULL)
        return 0;

    if (head->data == data)
        return 1;

    if (head->data < data)
        return lookup(head->right, data);
    else
        return lookup(head->left, data);
}

void search(NODE * head, int data)
{
    if (lookup(head, data)) {
        printf("found : %d \n", data);
    } else {
        printf("not found : %d \n", data);
    }

    return;
}

int main()
{
    int sum = 35;
    NODE * root = NULL;

    root = insert(root, 20); 
    root = insert(root, 10); 
    root = insert(root, 22); 
    root = insert(root, 23); 
    root = insert(root, 24); 
    root = insert(root, 25); 
    root = insert(root, 10); 
    root = insert(root, 20); 
    root = insert(root, 30); 
    root = insert(root, 40); 
    root = insert(root, 50); 
    root = insert(root, 60); 
    inorder(root);  printf("\n");

    search(root, 10);
    search(root, 11);
    search(root, 13);
    search(root, 14);

    return 0;
}

OTOH，hast表将为您提供恒定的搜索时间O（1） - http://en.wikipedia.org/wiki/Hash_table

在C中的字符串数组中搜索字符串的有效方法。（不区分大小写）

3 个答案: