我目前正在研究C中的哈希表实现。我正在尝试实现动态调整大小,但遇到了一个问题。
如果调整哈希表的大小意味着创建一个具有双倍(或一半)大小,重新散列和删除旧散列表的新表,我该如何处理用户可能对旧表进行的旧引用?示例代码(我在此示例中省略了错误检查):
int main(int argc, char *argv[])
{
ht = ht_create(5) /* make hashtable with size 5 */
ht_insert("john", "employee"); /* key-val pair "john -> employee" */
ht_insert("alice", "employee");
char *position = ht_get(ht, "alice"); /* get alice's position from hashtable ht */
ht_insert("bob", "boss"); /* this insert exceeds the load factor, resizes the hash table */
printf("%s", position); /* returns NULL because the previous hashtable that was resized was freed */
return 0;
}
在这种情况下,position
指向在哈希表中找到的alice
的值。当它被调整大小时,我们释放了哈希表并丢失了它。如何解决此问题,以便用户不必担心以前定义的指针被释放?
编辑:我当前的哈希表实现
hash.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "hash.h"
#define LOADFACTOR 0.75
typedef struct tableentry /* hashtab entry */
{
struct tableentry *next;
char *key;
void *val;
} tableentry_t;
typedef struct hashtable
{
datatype_t type;
size_t size;
size_t load; /* number of keys filled */
struct tableentry **tab;
} hashtable_t;
/* creates hashtable */
/* NOTE: dynamically allocated, remember to ht_free() */
hashtable_t *ht_create(size_t size, datatype_t type)
{
hashtable_t *ht = NULL;
if ((ht = malloc(sizeof(hashtable_t))) == NULL)
return NULL;
/* allocate ht's table */
if ((ht->tab = malloc(sizeof(tableentry_t) * size)) == NULL)
return NULL;
/* null-initialize table */
size_t i;
for (i = 0; i < size; i++)
ht->tab[i] = NULL;
ht->size = size;
ht->type = type;
return ht;
}
/* creates hash for a hashtab */
static unsigned hash(char *s)
{
unsigned hashval;
for (hashval = 0; *s != '\0'; s++)
hashval = *s + 31 * hashval;
return hashval;
}
static int *intdup(int *i)
{
int *new;
if ((new = malloc(sizeof(int))) == NULL)
return NULL;
*new = *i;
return new;
}
static void free_te(tableentry_t *te)
{
free(te->key);
free(te->val);
free(te);
}
/* loops through linked list freeing */
static void free_te_list(tableentry_t *te)
{
tableentry_t *next;
while (te != NULL)
{
next = te->next;
free_te(te);
te = next;
}
}
/* creates a key-val pair */
static tableentry_t *alloc_te(char *k, void *v, datatype_t type)
{
tableentry_t *te = NULL;
int status = 0;
/* alloc struct */
if ((te = calloc(1, sizeof(*te))) == NULL)
status = -1;
/* alloc key */
if ((te->key = strdup(k)) == NULL)
status = -1;
/* alloc value */
int *d;
char *s;
switch (type)
{
case STRING:
s = (char *) v;
if ((te->val = strdup(s)) == NULL)
status = -1;
break;
case INTEGER:
d = (int *) v;
if ((te->val = intdup(d)) == NULL)
status = -1;
break;
default:
status = -1;
}
if (status < 0)
{
free_te_list(te);
return NULL;
}
te->next = NULL;
return te;
}
static tableentry_t *lookup(hashtable_t *ht, char *k)
{
tableentry_t *te;
/* step through linked list */
for (te = ht->tab[hash(k) % ht->size]; te != NULL; te = te->next)
if (strcmp(te->key, k) == 0)
return te; /* found */
return NULL; /* not found */
}
/* inserts the key-val pair */
hashtable_t *ht_insert(hashtable_t *ht, char *k, void *v)
{
tableentry_t *te;
/* unique entry */
if ((te = lookup(ht, k)) == NULL)
{
te = alloc_te(k, v, ht->type);
unsigned hashval = hash(k) % ht->size;
/* insert at beginning of linked list */
te->next = ht->tab[hashval];
ht->tab[hashval] = te;
ht->load++;
}
/* replace val of previous entry */
else
{
free(te->val);
switch (ht->type)
{
case STRING:
if ((te->val = strdup(v)) == NULL)
return NULL;
break;
case INTEGER:
if ((te->val = intdup(v)) == NULL)
return NULL;
break;
default:
return NULL;
}
}
return ht;
}
static void delete_te(hashtable_t *ht, char *k)
{
tableentry_t *te, *prev;
unsigned hashval = hash(k) % ht->size;
te = ht->tab[hashval];
/* point head to next element if deleting head */
if (strcmp(te->key, k) == 0)
{
ht->tab[hashval] = te->next;
free_te(te);
ht->load--;
return;
}
/* otherwise look through, keeping track of prev to reassign its ->next */
for (; te != NULL; te = te->next)
{
if (strcmp(te->key, k) == 0)
{
prev->next = te->next;
free_te(te);
ht->load--;
return;
}
prev = te;
}
}
hashtable_t *ht_delete(hashtable_t *ht, char *k)
{
size_t i;
if (lookup(ht, k) == NULL)
return NULL;
else
delete_te(ht, k);
}
/* retrieve value from key */
void *ht_get(hashtable_t *ht, char *k)
{
tableentry_t *te;
if ((te = lookup(ht, k)) == NULL)
return NULL;
return te->val;
}
/* frees hashtable created from ht_create() */
void ht_free(hashtable_t *ht)
{
size_t i;
if (ht)
{
for (i = 0; i < ht->size; i++)
if (ht->tab[i] != NULL)
free_te_list(ht->tab[i]);
free(ht);
}
}
/* resizes hashtable, returns new hashtable and frees old */
static hashtable_t *resize(hashtable_t *oht, size_t size)
{
hashtable_t *nht; /* new hashtable */
nht = ht_create(size, oht->type);
/* rehash */
size_t i;
tableentry_t *te;
/* loop through hashtable */
for (i = 0; i < oht->size; i++)
/* loop through linked list */
for (te = oht->tab[i]; te != NULL; te = te->next)
/* insert & rehash old vals into new ht */
if (ht_insert(nht, te->key, te->val) == NULL)
return NULL;
ht_free(oht);
return nht;
}
hash.h
/* a hash-table implementation in c */
/*
hashing algorithm: hashval = *s + 31 * hashval
resolves collisions using linked lists
*/
#ifndef HASH
#define HASH
typedef struct hashtable hashtable_t;
typedef enum datatype {STRING, INTEGER} datatype_t;
/* inserts the key-val pair */
hashtable_t *ht_insert(hashtable_t *ht, char *k, void *v);
/* creates hashtable */
/* NOTE: dynamically allocated, remember to ht_free() */
hashtable_t *ht_create(size_t size, datatype_t type);
/* frees hashtable created from ht_create() */
void ht_free(hashtable_t *ht);
/* retrive value from key */
void *ht_get(hashtable_t *ht, char *k);
hashtable_t *ht_delete(hashtable_t *ht, char *k);
#endif
答案 0 :(得分:1)
不要将哈希表用作数据的容器;只用它来引用数据,你就不会有这个问题了。
例如,假设你有键值对,使用的结构包含C99灵活数组成员中的实际数据:
struct pair {
struct pair *next; /* For hash chaining */
size_t hash; /* For the raw key hash */
/* Payload: */
size_t offset; /* value starts at (data + offset) */
char data[]; /* key starts at (data) */
};
static inline const char *pair_key(struct pair *ref)
{
return (const char *)(ref->data);
}
static inline const char *pair_value(struct pair *ref)
{
return (const char *)(ref->data + ref->offset);
}
您的哈希表可以简单地
struct pair_hash_table {
size_t size;
struct pair **entry;
};
如果您有struct pair_hash_table *ht
,struct pair *foo
foo->hash
包含密钥的哈希值,则foo
应该在挂起{{1}的单链接列表中}}
假设您希望调整哈希表ht->entry[foo->hash % ht->size];
的大小。您选择了一个新的ht
,并为那么多size
分配了足够的内存。然后,遍历每个旧哈希条目中的每个单链表,将它们从旧列表中分离出来,并将它们预先添加到新哈希表中正确哈希表条目中的列表中。然后你只需释放旧的哈希表struct pair *
数组,将其替换为新的数组:
entry
请注意,int resize_pair_hash_table(struct pair_hash_table *ht, const size_t new_size)
{
struct pair **entry, *curr, *next;
size_t i, k;
if (!ht || new_size < 1)
return -1; /* Invalid parameters */
entry = malloc(new_size * sizeof entry[0]);
if (!entry)
return -1; /* Out of memory */
/* Initialize new entry array to empty. */
for (i = 0; i < new_size; i++)
entry[i] = NULL;
for (i = 0; i < ht->size; i++) {
/* Detach the singly-linked list. */
next = ht->entry[i];
ht->entry[i] = NULL;
while (next) {
/* Detach the next element, as 'curr' */
curr = next;
next = next->next;
/* k is the index to this hash in the new array */
k = curr->hash % new_size;
/* Prepend to the list in the new array */
curr->next = entry[k];
entry[k] = curr;
}
}
/* Old array is no longer needed, */
free(ht->entry);
/* so replace it with the new one. */
ht->entry = entry;
ht->size = size;
return 0; /* Success */
}
中的hash
字段不会被修改,也不会重新计算。
拥有原始哈希(而不是模数表大小),意味着即使不同的密钥使用相同的插槽,您也可以加快密钥搜索的速度:
struct pair
在C中,逻辑和运算符struct pair *find_key(struct pair_hash_table *ht,
const char *key, const size_t key_hash)
{
struct pair *curr = ht->entry[key_hash % ht->size];
while (curr)
if (curr->hash == key_hash && !strcmp(key, pair_key(next)))
return curr;
else
curr = curr->next;
return NULL; /* Not found. */
}
是短路的。如果左侧不是真的,则根本不评估右侧,因为在这种情况下整个表达式永远不会为真。
在上面,这意味着比较密钥的原始哈希值,并且只有当它们匹配时,才会比较实际的字符串。如果您的哈希算法甚至是好的一半,这意味着如果密钥已经存在,通常只进行一次字符串比较;如果表中不存在密钥,通常不会进行字符串比较。
答案 1 :(得分:-1)
您可以像标准库(C ++)处理这个确切问题一样处理它们:
对容器的某些操作(例如插入,擦除,调整大小)使迭代器无效。
例如std::unordered_map
基本上是用桶实现的哈希表,它们具有以下规则:
- 插入
unordered_ [multi] {set,map}:重新散列时所有迭代器都无效 发生,但参考不受影响[23.2.5 / 8]。不会发生重复 如果插入不会导致容器的大小超过z * B. 其中z是最大负载系数,B是当前数 桶。 [23.2.5 / 14]
- 擦除
unordered_ [multi] {set,map}:只有迭代器和对它的引用 删除的元素无效[23.2.5 / 13]
迭代器的C ++概念是指针的泛化。所以这个概念可以应用于C.
您唯一的另一种选择是,不是将对象直接保存到容器中,而是添加另一级别的间接并保留某种代理。因此元素始终保持在内存中的相同位置。它是调整大小/插入等的代理。但是你需要分析这种情况:增加的双重间接(肯定会以负面的方式影响性能)并增加实现复杂性吗?拥有持久性指针是否重要?