在C语言中实现字典的快速方法

时间:2010-12-08 05:08:20

标签: c data-structures dictionary

在C语言中编写程序时我想念的一件事是字典数据结构。在C中实现一个最方便的方法是什么?我不是在寻找性能,而是从头开始编写代码。我也不希望它是通用的 - 像string-> int这样的东西。但我确实希望它能够存储任意数量的项目。

这更像是一项练习。我知道有第三方库可供使用。但考虑一下,他们不存在。在这种情况下,你可以用最快的方式实现满足上述要求的字典。

11 个答案:

答案 0 :(得分:96)

The C Programming Language的6.6节提供了一个简单的字典(哈希表)数据结构。我认为有用的字典实现不会比这更简单。为方便起见,我在这里重现了代码。

struct nlist { /* table entry: */
    struct nlist *next; /* next entry in chain */
    char *name; /* defined name */
    char *defn; /* replacement text */
};

#define HASHSIZE 101
static struct nlist *hashtab[HASHSIZE]; /* pointer table */

/* hash: form hash value for string s */
unsigned hash(char *s)
{
    unsigned hashval;
    for (hashval = 0; *s != '\0'; s++)
      hashval = *s + 31 * hashval;
    return hashval % HASHSIZE;
}

/* lookup: look for s in hashtab */
struct nlist *lookup(char *s)
{
    struct nlist *np;
    for (np = hashtab[hash(s)]; np != NULL; np = np->next)
        if (strcmp(s, np->name) == 0)
          return np; /* found */
    return NULL; /* not found */
}

char *strdup(char *);
/* install: put (name, defn) in hashtab */
struct nlist *install(char *name, char *defn)
{
    struct nlist *np;
    unsigned hashval;
    if ((np = lookup(name)) == NULL) { /* not found */
        np = (struct nlist *) malloc(sizeof(*np));
        if (np == NULL || (np->name = strdup(name)) == NULL)
          return NULL;
        hashval = hash(name);
        np->next = hashtab[hashval];
        hashtab[hashval] = np;
    } else /* already there */
        free((void *) np->defn); /*free previous defn */
    if ((np->defn = strdup(defn)) == NULL)
       return NULL;
    return np;
}

char *strdup(char *s) /* make a duplicate of s */
{
    char *p;
    p = (char *) malloc(strlen(s)+1); /* +1 for ’\0’ */
    if (p != NULL)
       strcpy(p, s);
    return p;
}

请注意,如果两个字符串的哈希值发生冲突,则可能会导致O(n)查找时间。您可以通过增加HASHSIZE的值来降低冲突的可能性。有关数据结构的完整讨论,请参阅本书。

答案 1 :(得分:15)

最快方式是使用已存在的实现,例如uthash

而且,如果确实想要自己编码,可以检查并重复使用uthash中的算法。它是BSD许可的,除了传达版权声明的要求之外,你可以用它来做很多事情。

答案 2 :(得分:5)

为了便于实现,很难在天真地搜索数组。除了一些错误检查,这是一个完整的实现(未经测试)。

typedef struct dict_entry_s {
    const char *key;
    int value;
} dict_entry_s;

typedef struct dict_s {
    int len;
    int cap;
    dict_entry_s *entry;
} dict_s, *dict_t;

int dict_find_index(dict_t dict, const char *key) {
    for (int i = 0; i < dict->len; i++) {
        if (!strcmp(dict->entry[i], key)) {
            return i;
        }
    }
    return -1;
}

int dict_find(dict_t dict, const char *key, int def) {
    int idx = dict_find_index(dict, key);
    return idx == -1 ? def : dict->entry[idx].value;
}

void dict_add(dict_t dict, const char *key, int value) {
   int idx = dict_find_index(dict, key);
   if (idx != -1) {
       dict->entry[idx].value = value;
       return;
   }
   if (dict->len == dict->cap) {
       dict->cap *= 2;
       dict->entry = realloc(dict->entry, dict->cap * sizeof(dict_entry_s));
   }
   dict->entry[dict->len].key = strdup(key);
   dict->entry[dict->len].value = value;
   dict->len++;
}

dict_t dict_new(void) {
    dict_s proto = {0, 10, malloc(10 * sizeof(dict_entry_s))};
    dict_t d = malloc(sizeof(dict_s));
    *d = proto;
    return d;
}

void dict_free(dict_t dict) {
    for (int i = 0; i < dict->len; i++) {
        free(dict->entry[i].key);
    }
    free(dict->entry);
    free(dict);
}

答案 3 :(得分:3)

根据哈希值创建一个简单的哈希函数和一些链接的结构列表,分配要插入值的链表。也可以使用哈希来检索它。

我在一段时间后做了一个简单的实现:

...
#define K 16 // chaining coefficient

struct dict
{
    char *name; /* name of key */
    int val;   /*  value */
    struct dict *next; /* link field */
};

typedef struct dict dict;
dict *table[K];
int initialized = 0;


void  putval ( char *,int);

void init_dict()
{   
    initialized = 1;
    int i;  
    for(i=0;iname = (char *) malloc (strlen(key_name)+1);
    ptr->val = sval;
    strcpy (ptr->name,key_name);


    ptr->next = (struct dict *)table[hsh];
    table[hsh] = ptr;

}


int getval ( char *key_name )
{   
    int hsh = hash(key_name);   
    dict *ptr;
    for (ptr = table[hsh]; ptr != (dict *) 0;
        ptr = (dict *)ptr->next)
    if (strcmp (ptr->name,key_name) == 0)
        return ptr->val;
    return -1;
}

答案 4 :(得分:2)

这是一个快速实现,我用它从字符串中获取'Matrix'(struct)。 你可以拥有一个更大的数组并在运行中更改其值:

typedef struct  { int** lines; int isDefined; }mat;
mat matA, matB, matC, matD, matE, matF;

/* an auxilary struct to be used in a dictionary */
typedef struct  { char* str; mat *matrix; }stringToMat;

/* creating a 'dictionary' for a mat name to its mat. lower case only! */
stringToMat matCases [] =
{
    { "mat_a", &matA },
    { "mat_b", &matB },
    { "mat_c", &matC },
    { "mat_d", &matD },
    { "mat_e", &matE },
    { "mat_f", &matF },
};

mat* getMat(char * str)
{
    stringToMat* pCase;
    mat * selected = NULL;
    if (str != NULL)
    {
        /* runing on the dictionary to get the mat selected */
        for(pCase = matCases; pCase != matCases + sizeof(matCases) / sizeof(matCases[0]); pCase++ )
        {
            if(!strcmp( pCase->str, str))
                selected = (pCase->matrix);
        }
        if (selected == NULL)
            printf("%s is not a valid matrix name\n", str);
    }
    else
        printf("expected matrix name, got NULL\n");
    return selected;
}

答案 5 :(得分:2)

我很惊讶没有人提到hsearch/hcreate一组库虽然在Windows上不可用,但是由POSIX强制要求,因此可以在Linux / GNU系统中使用。

该链接有一个简单而完整的基本示例,可以很好地解释其用法。

它甚至具有线程安全的变体,易于使用且性能非常高。

答案 6 :(得分:1)

哈希表是简单“字典”的传统实现。如果您不关心速度或尺寸,请google for it。有许多免费提供的实现。

here's the first one I saw - 一眼就看出来,我看起来还不错。 (这是非常基本的。如果你真的希望它能保存无限量的数据,那么你需要添加一些逻辑来“重新分配”表内存随着它的增长。)

祝你好运!

答案 7 :(得分:1)

GLib和gnulib

如果您没有更具体的要求,这些是您最好的选择,因为它们广泛可用,便携且效率高。

另请参阅:Are there any open source C libraries with common data structures?

答案 8 :(得分:0)

哈希是关键。我认为使用查找表和散列密钥。您可以在线找到许多散列函数。

答案 9 :(得分:0)

最快的方法是使用二叉树。最坏的情况也只是O(logn)。

答案 10 :(得分:-2)

此外,您还可以使用 Google CityHash:

#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>

#include <byteswap.h>

#include "city.h"

void swap(uint32* a, uint32* b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}

#define PERMUTE3(a, b, c) swap(&a, &b); swap(&a, &c);

// Magic numbers for 32-bit hashing.  Copied from Murmur3.
static const uint32 c1 = 0xcc9e2d51;
static const uint32 c2 = 0x1b873593;

static uint32 UNALIGNED_LOAD32(const char *p) {
  uint32 result;
  memcpy(&result, p, sizeof(result));
  return result;
}

static uint32 Fetch32(const char *p) {
  return UNALIGNED_LOAD32(p);
}

// A 32-bit to 32-bit integer hash copied from Murmur3.
static uint32 fmix(uint32 h)
{
  h ^= h >> 16;
  h *= 0x85ebca6b;
  h ^= h >> 13;
  h *= 0xc2b2ae35;
  h ^= h >> 16;
  return h;
}

static uint32 Rotate32(uint32 val, int shift) {
  // Avoid shifting by 32: doing so yields an undefined result.
  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
}

static uint32 Mur(uint32 a, uint32 h) {
  // Helper from Murmur3 for combining two 32-bit values.
  a *= c1;
  a = Rotate32(a, 17);
  a *= c2;
  h ^= a;
  h = Rotate32(h, 19);
  return h * 5 + 0xe6546b64;
}

static uint32 Hash32Len13to24(const char *s, size_t len) {
  uint32 a = Fetch32(s - 4 + (len >> 1));
  uint32 b = Fetch32(s + 4);
  uint32 c = Fetch32(s + len - 8);
  uint32 d = Fetch32(s + (len >> 1));
  uint32 e = Fetch32(s);
  uint32 f = Fetch32(s + len - 4);
  uint32 h = len;

  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
}

static uint32 Hash32Len0to4(const char *s, size_t len) {
  uint32 b = 0;
  uint32 c = 9;
  for (size_t i = 0; i < len; i++) {
    signed char v = s[i];
    b = b * c1 + v;
    c ^= b;
  }
  return fmix(Mur(b, Mur(len, c)));
}

static uint32 Hash32Len5to12(const char *s, size_t len) {
  uint32 a = len, b = len * 5, c = 9, d = b;
  a += Fetch32(s);
  b += Fetch32(s + len - 4);
  c += Fetch32(s + ((len >> 1) & 4));
  return fmix(Mur(c, Mur(b, Mur(a, d))));
}

uint32 CityHash32(const char *s, size_t len) {
  if (len <= 24) {
    return len <= 12 ?
        (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
        Hash32Len13to24(s, len);
  }

  // len > 24
  uint32 h = len, g = c1 * len, f = g;
  uint32 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
  uint32 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
  uint32 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
  uint32 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
  uint32 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
  h ^= a0;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  h ^= a2;
  h = Rotate32(h, 19);
  h = h * 5 + 0xe6546b64;
  g ^= a1;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  g ^= a3;
  g = Rotate32(g, 19);
  g = g * 5 + 0xe6546b64;
  f += a4;
  f = Rotate32(f, 19);
  f = f * 5 + 0xe6546b64;
  size_t iters = (len - 1) / 20;
  do {
    uint32 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
    uint32 a1 = Fetch32(s + 4);
    uint32 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
    uint32 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
    uint32 a4 = Fetch32(s + 16);
    h ^= a0;
    h = Rotate32(h, 18);
    h = h * 5 + 0xe6546b64;
    f += a1;
    f = Rotate32(f, 19);
    f = f * c1;
    g += a2;
    g = Rotate32(g, 18);
    g = g * 5 + 0xe6546b64;
    h ^= a3 + a1;
    h = Rotate32(h, 19);
    h = h * 5 + 0xe6546b64;
    g ^= a4;
    g = bswap_32(g) * 5;
    h += a4 * 5;
    h = bswap_32(h);
    f += a0;
    PERMUTE3(f, h, g);
    s += 20;
  } while (--iters != 0);
  g = Rotate32(g, 11) * c1;
  g = Rotate32(g, 17) * c1;
  f = Rotate32(f, 11) * c1;
  f = Rotate32(f, 17) * c1;
  h = Rotate32(h + g, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  h = Rotate32(h + f, 19);
  h = h * 5 + 0xe6546b64;
  h = Rotate32(h, 17) * c1;
  return h;
}