Question

我目前正在研究C中的哈希表实现。我正在尝试实现动态调整大小，但遇到了一个问题。

如果调整哈希表的大小意味着创建一个具有双倍（或一半）大小，重新散列和删除旧散列表的新表，我该如何处理用户可能对旧表进行的旧引用？示例代码（我在此示例中省略了错误检查）：

int main(int argc, char *argv[])
{
    ht = ht_create(5) /* make hashtable with size 5 */
    ht_insert("john", "employee"); /* key-val pair "john -> employee" */
    ht_insert("alice", "employee");
    char *position = ht_get(ht, "alice"); /* get alice's position from hashtable ht */


    ht_insert("bob", "boss"); /* this insert exceeds the load factor, resizes the hash table */

    printf("%s", position); /* returns NULL because the previous hashtable that was resized was freed */

    return 0;
}

在这种情况下，position指向在哈希表中找到的alice的值。当它被调整大小时，我们释放了哈希表并丢失了它。如何解决此问题，以便用户不必担心以前定义的指针被释放？

编辑：我当前的哈希表实现

hash.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "hash.h"

#define LOADFACTOR 0.75

typedef struct tableentry /* hashtab entry */
{
    struct tableentry *next;
    char *key;
    void *val;
} tableentry_t;

typedef struct hashtable
{
    datatype_t type;
    size_t size;
    size_t load; /* number of keys filled */
    struct tableentry **tab;
} hashtable_t;

/* creates hashtable */
/* NOTE: dynamically allocated, remember to ht_free() */
hashtable_t *ht_create(size_t size, datatype_t type)
{
    hashtable_t *ht = NULL;
    if ((ht = malloc(sizeof(hashtable_t))) == NULL)
        return NULL;
    /* allocate ht's table */
    if ((ht->tab = malloc(sizeof(tableentry_t) * size)) == NULL)
        return NULL;
    /* null-initialize table */
    size_t i;
    for (i = 0; i < size; i++)
        ht->tab[i] = NULL;
    ht->size = size;
    ht->type = type;
    return ht;
}

/* creates hash for a hashtab */
static unsigned hash(char *s)
{
    unsigned hashval;
    for (hashval = 0; *s != '\0'; s++)
        hashval = *s + 31 * hashval;
    return hashval;
}

static int *intdup(int *i)
{
    int *new;
    if ((new = malloc(sizeof(int))) == NULL)
        return NULL;
    *new = *i;
    return new;
}

static void free_te(tableentry_t *te)
{
    free(te->key);
    free(te->val);
    free(te);
}

/* loops through linked list freeing */
static void free_te_list(tableentry_t *te)
{
    tableentry_t *next;
    while (te != NULL)
    {
        next = te->next;
        free_te(te);
        te = next;
    }
}

/* creates a key-val pair */
static tableentry_t *alloc_te(char *k, void *v, datatype_t type)
{
    tableentry_t *te = NULL;
    int status = 0;
    /* alloc struct */
    if ((te = calloc(1, sizeof(*te))) == NULL)
        status = -1;
    /* alloc key */
    if ((te->key = strdup(k)) == NULL)
        status = -1;
    /* alloc value */
    int *d;
    char *s;
    switch (type)
    {
        case STRING:
            s = (char *) v;
            if ((te->val = strdup(s)) == NULL)
                status = -1;
            break;
        case INTEGER:
            d = (int *) v;
            if ((te->val = intdup(d)) == NULL)
                status = -1;
            break;
        default:
            status = -1;
    }
    if (status < 0)
    {
        free_te_list(te);
        return NULL;
    }
    te->next = NULL;
    return te;
}

static tableentry_t *lookup(hashtable_t *ht, char *k)
{
    tableentry_t *te;
    /* step through linked list */
    for (te = ht->tab[hash(k) % ht->size]; te != NULL; te = te->next)
        if (strcmp(te->key, k) == 0)
            return te; /* found */
    return NULL; /* not found */
}

/* inserts the key-val pair */
hashtable_t *ht_insert(hashtable_t *ht, char *k, void *v)
{
    tableentry_t *te;
    /* unique entry */
    if ((te = lookup(ht, k)) == NULL)
    {
        te = alloc_te(k, v, ht->type);
        unsigned hashval = hash(k) % ht->size;
        /* insert at beginning of linked list */
        te->next = ht->tab[hashval]; 
        ht->tab[hashval] = te;
        ht->load++;
    }
    /* replace val of previous entry */
    else
    {
        free(te->val);
        switch (ht->type)
        {
            case STRING:
                if ((te->val = strdup(v)) == NULL)
                    return NULL;
                break;
            case INTEGER:
                if ((te->val = intdup(v)) == NULL)
                    return NULL;
                break;
            default:
                return NULL;
        }
    }
    return ht;
}

static void delete_te(hashtable_t *ht, char *k)
{
    tableentry_t *te, *prev;
    unsigned hashval = hash(k) % ht->size;
    te = ht->tab[hashval];
    /* point head to next element if deleting head */
    if (strcmp(te->key, k) == 0)
    {
        ht->tab[hashval] = te->next;
        free_te(te);
        ht->load--;
        return;
    }
    /* otherwise look through, keeping track of prev to reassign its ->next */
    for (; te != NULL; te = te->next)
    {
        if (strcmp(te->key, k) == 0)
        {
            prev->next = te->next;
            free_te(te);
            ht->load--;
            return;
        }
        prev = te;
    }   
}

hashtable_t *ht_delete(hashtable_t *ht, char *k)
{
    size_t i;
    if (lookup(ht, k) == NULL)
        return NULL;
    else
        delete_te(ht, k);

}

/* retrieve value from key */
void *ht_get(hashtable_t *ht, char *k)
{
    tableentry_t *te;
    if ((te = lookup(ht, k)) == NULL)
        return NULL;
    return te->val;
}

/* frees hashtable created from ht_create() */
void ht_free(hashtable_t *ht)
{
    size_t i;
    if (ht)
    {
        for (i = 0; i < ht->size; i++)
            if (ht->tab[i] != NULL)
                free_te_list(ht->tab[i]);
        free(ht);
    }
}

/* resizes hashtable, returns new hashtable and frees old */
static hashtable_t *resize(hashtable_t *oht, size_t size)
{
    hashtable_t *nht; /* new hashtable */
    nht = ht_create(size, oht->type);
    /* rehash */
    size_t i;
    tableentry_t *te;
    /* loop through hashtable */
    for (i = 0; i < oht->size; i++)
        /* loop through linked list */
        for (te = oht->tab[i]; te != NULL; te = te->next)
            /* insert & rehash old vals into new ht */
            if (ht_insert(nht, te->key, te->val) == NULL)
                return NULL;
    ht_free(oht);
    return nht;
}

hash.h

/* a hash-table implementation in c */
/*
hashing algorithm: hashval = *s + 31 * hashval
resolves collisions using linked lists
*/

#ifndef HASH
#define HASH

typedef struct hashtable hashtable_t;

typedef enum datatype {STRING, INTEGER} datatype_t;

/* inserts the key-val pair */
hashtable_t *ht_insert(hashtable_t *ht, char *k, void *v);

/* creates hashtable */
/* NOTE: dynamically allocated, remember to ht_free() */
hashtable_t *ht_create(size_t size, datatype_t type);

/* frees hashtable created from ht_create() */
void ht_free(hashtable_t *ht);

/* retrive value from key */
void *ht_get(hashtable_t *ht, char *k);

hashtable_t *ht_delete(hashtable_t *ht, char *k);

#endif

Answer 1

不要将哈希表用作数据的容器;只用它来引用数据，你就不会有这个问题了。

例如，假设你有键值对，使用的结构包含C99灵活数组成员中的实际数据：

struct pair {
    struct pair  *next; /* For hash chaining */
    size_t        hash; /* For the raw key hash */

    /* Payload: */
    size_t        offset; /* value starts at (data + offset) */
    char          data[]; /* key starts at (data) */
};

static inline const char *pair_key(struct pair *ref)
{
    return (const char *)(ref->data);
}

static inline const char *pair_value(struct pair *ref)
{
    return (const char *)(ref->data + ref->offset);
}

您的哈希表可以简单地

struct pair_hash_table {
    size_t        size;
    struct pair **entry;
};

如果您有struct pair_hash_table *ht，struct pair *foo foo->hash包含密钥的哈希值，则foo应该在挂起{{1}的单链接列表中}}

假设您希望调整哈希表ht->entry[foo->hash % ht->size];的大小。您选择了一个新的ht，并为那么多size分配了足够的内存。然后，遍历每个旧哈希条目中的每个单链表，将它们从旧列表中分离出来，并将它们预先添加到新哈希表中正确哈希表条目中的列表中。然后你只需释放旧的哈希表struct pair *数组，将其替换为新的数组：

entry

请注意，int resize_pair_hash_table(struct pair_hash_table *ht, const size_t new_size) { struct pair **entry, *curr, *next; size_t i, k; if (!ht || new_size < 1) return -1; /* Invalid parameters */ entry = malloc(new_size * sizeof entry[0]); if (!entry) return -1; /* Out of memory */ /* Initialize new entry array to empty. */ for (i = 0; i < new_size; i++) entry[i] = NULL; for (i = 0; i < ht->size; i++) { /* Detach the singly-linked list. */ next = ht->entry[i]; ht->entry[i] = NULL; while (next) { /* Detach the next element, as 'curr' */ curr = next; next = next->next; /* k is the index to this hash in the new array */ k = curr->hash % new_size; /* Prepend to the list in the new array */ curr->next = entry[k]; entry[k] = curr; } } /* Old array is no longer needed, */ free(ht->entry); /* so replace it with the new one. */ ht->entry = entry; ht->size = size; return 0; /* Success */ }中的hash字段不会被修改，也不会重新计算。

拥有原始哈希（而不是模数表大小），意味着即使不同的密钥使用相同的插槽，您也可以加快密钥搜索的速度：

struct pair

在C中，逻辑和运算符struct pair *find_key(struct pair_hash_table *ht, const char *key, const size_t key_hash) { struct pair *curr = ht->entry[key_hash % ht->size]; while (curr) if (curr->hash == key_hash && !strcmp(key, pair_key(next))) return curr; else curr = curr->next; return NULL; /* Not found. */ }是短路的。如果左侧不是真的，则根本不评估右侧，因为在这种情况下整个表达式永远不会为真。

在上面，这意味着比较密钥的原始哈希值，并且只有当它们匹配时，才会比较实际的字符串。如果您的哈希算法甚至是好的一半，这意味着如果密钥已经存在，通常只进行一次字符串比较;如果表中不存在密钥，通常不会进行字符串比较。

Answer 2

您可以像标准库（C ++）处理这个确切问题一样处理它们：

对容器的某些操作（例如插入，擦除，调整大小）使迭代器无效。

例如std::unordered_map基本上是用桶实现的哈希表，它们具有以下规则：

插入


unordered_ [multi] {set，map}：重新散列时所有迭代器都无效   发生，但参考不受影响[23.2.5 / 8]。不会发生重复   如果插入不会导致容器的大小超过z * B.   其中z是最大负载系数，B是当前数   桶。 [23.2.5 / 14]


擦除


unordered_ [multi] {set，map}：只有迭代器和对它的引用   删除的元素无效[23.2.5 / 13]

Iterator invalidation rules

迭代器的C ++概念是指针的泛化。所以这个概念可以应用于C.

您唯一的另一种选择是，不是将对象直接保存到容器中，而是添加另一级别的间接并保留某种代理。因此元素始终保持在内存中的相同位置。它是调整大小/插入等的代理。但是你需要分析这种情况：增加的双重间接（肯定会以负面的方式影响性能）并增加实现复杂性吗？拥有持久性指针是否重要？

如何处理对已调整大小的哈希表的旧引用？

2 个答案: