Question

嗨，我正在尝试在普通C语言中实现一个非常简单的哈希图，以字符串作为键，并将空指针作为值，因为我希望将映射用于多种数据类型。

到目前为止，我有这个

struct node{
    void * value;
    char * key;
};

unsigned long strhash(char *string)
{   
    unsigned long hash = 5381;
    int c;

    while ((c = *string++))
    {   
        hash = ((hash << 5) + hash) + c;
    }   
    return hash;
}


map_t *map_create(int maxSize){

    map_t *map = malloc(sizeof(map_t));
    map->curSize = 0;
    map->maxSize = maxSize;
    map->nodes = calloc(map->maxSize, sizeof(node_t *));

    return map;
}


node_t *node_create(char *key, void *value){

    node_t *node = malloc(sizeof(node_t));
    node->key = key;
    node->value = value;
    return node;
}

void map_insert(map_t *map, char *key, void *value){

    node_t *node = node_create(key, value);

    int idx = strhash(key) % map->maxSize;
    if(map->nodes[idx] == NULL){
        map->nodes[idx] = node;
    }else{
        while(map->nodes[idx] != NULL){
            idx++%map->maxSize;
        }
        map->nodes[idx] = node;
    }   
    return;
}

void map_print(map_t *map){

    for(int i = 0; i < map->maxSize; i++){
        if(map->nodes[i] != NULL){
            printf("index: %d\t value: %d\n",i, *(int*)map->nodes[i]->value);
        }
    }
    return;
}

void map_destroy(map_t *map){
     for(int i = 0; i < map->maxSize; i++){
        if(map->nodes[i] != NULL){
            free(map->nodes[i]);
        }
    }
    free(map->nodes);
    free(map);
    return;
}



int main(){

    map_t *map = map_create(32);
    for(int i = 0; i < 30; i++){
        map_insert(map, (char*)&i, &i);
    }
    map_print(map);
    map_destroy(map);
    return 0;
}

问题在于，当打印地图时，输出结果不是我期望的那样，所有检索到的结果都是所有索引上的值“ 30”，这是插入到地图中的最后一个数字。如果我将值更改为int类型，则映射将按预期工作，因此在指针方面一定缺少一些关键的内容。

我不是C方面的佼佼者，因此，对此可能有什么启发，将不胜感激。

Answer 1

问题是您每次调用map_insert()时都使用相同的指针。它只存储指针，不复制数据。每次循环时，您都会更改该内存的内容，因此所有哈希映射元素都指向相同的值。

有两种方法可以修复它。一种方法是始终在调用map_insert()之前对数据进行动态分配的副本：

for (int i = 0; i < 30; i++) {
    int *i_copy = malloc(sizeof *i_copy);
    *i_copy = i;
    map_insert(map, (char *)i_copy, (char *)i_copy);
}

另一个选择是将值的大小添加到map_insert()和node_create()参数中。然后node_create调用malloc()和memcpy()将值复制到动态内存中。

顺便说一句，还有另一个问题。键应该是一个以空值结尾的字符串（strhash()取决于此），但是您正在使用&i，它是指向整数的指针。将指向整数的指针转换为char*不会返回字符串，而只是返回指向具有不同数据类型的相同位置的指针。我还没有在上面解决这个问题。

Answer 2

OP存储对相同值的引用，因此，所有查找当然都会产生相同的值（甚至不是字符串，但变量i的值的存储表示形式都是这样）。

我更喜欢链接哈希映射条目，并在条目中保留哈希的副本：

struct entry {
    struct entry *next;
    size_t        hash;
    void         *data;
    size_t        data_size;
    int           data_type;
    unsigned char name[];
};

typedef struct {
    size_t         size;
    size_t         used;  /* Number of entries, total */
    struct entry **slot;  /* Array of entry pointers */
    size_t       (*hash)(const unsigned char *, size_t);
} hashmap;

int hashmap_new(hashmap *hmap, const size_t size,
                size_t (*hash)(const unsigned char *, size_t))
{
    if (!hmap)
        return -1; /* No hashmap specified */

    hmap->size = 0;
    hmap->used = 0;
    hmap->slot = NULL;
    hmap->hash = NULL;

    if (size < 1)
        return -1; /* Invalid size */
    if (!hash)
        return -1; /* No hash function specified. */

    hmap->slot = calloc(size, sizeof hmap->slot[0]);
    if (!hmap->slot)
        return -1; /* Not enough memory */

    hmap->size = size;
    hmap->hash = hash;

    return 0;
}

void hashmap_free(hashmap *hmap)
{
    if (hmap) {
        size_t  i = hmap->size;
        while (i-->0) {
            struct entry *next = hmap->slot[i];
            struct entry *curr;

            while (next) {
                curr = next;
                next = next->next;

                free(curr->data);

                /* Poison the entry, to help detect use-after-free bugs. */
                curr->next = NULL;
                curr->data = NULL;
                curr->hash = 0;
                curr->data_size = 0;
                curr->data_type = 0;
                curr->name[0] = '\0';

                free(curr);
            }
        }
    }

    free(hmap->slot);
    hmap->size = 0;
    hmap->used = 0;
    hmap->slot = NULL;
    hmap->hash = NULL;
}

要插入键值对，该函数要么按原样使用指定的数据，在这种情况下，调用方有责任确保每个键都有自己的唯一数据，以后再覆盖；或者我们复制用户数据。在上面的hashmap_free()函数中，您将看到free(curr->data);；假设我们动态分配内存，并将用户数据复制到那里。所以：

int hashmap_add(hashmap *hmap, const unsigned char *name,
                const void *data, const size_t data_size,
                const int data_type)
{
    const size_t  namelen = (name) ? strlen(name) : 0;
    struct entry *curr;
    size_t        i;

    if (!hmap)
        return -1; /* No hashmap specified. */

    if (name_len < 1)
        return -1; /* NULL or empty name. */

    /* Allocate memory for the hashmap entry,
       including enough room for the name, and end of string '\0'. */
    curr = malloc(sizeof (struct entry) + namelen + 1;
    if (!curr)
        return -1; /* Out of memory. */

    /* Copy data, if any. */
    if (data_size > 0) {
        curr->data = malloc(data_size);
        if (!curr->data) {
            free(curr);
            return -1; /* Out of memory. */
        }
        memcpy(curr->data, data, data_size);
    } else {
        curr->data      = NULL;
        curr->data_size = 0;
    }

    curr->data_type = data_type;

    /* Calculate the hash of the name. */
    curr->hash = hmap->hash(name, namelen);

    /* Copy name, including the trailing '\0'. */
    memcpy(curr->name, name, namelen + 1);

    /* Slot to prepend to. */
    i = curr->hash % hmap->size;

    curr->next = hmap->slot[i];
    hmap->slot[i] = curr;

    /* An additional node added. */
    hmap->used++;

    return 0;
}

data_type的含义完全取决于代码的用户。可以根据哈希和数据类型进行查找：

/* Returns 0 if found. */
int hashmap_find(hashmap *hmap, const unsigned char *name,
                 const int data_type,
                 void **dataptr_to, size_t *size_to)
{
    struct entry  *curr;
    size_t         hash;

    if (size_to)
        *size_to = 0;
    if (dataptr_to)
        *dataptr_to = NULL;

    if (!hmap)
        return -1; /* No hashmap specified. */
    if (!name || !*name)
        return -1; /* NULL or empty name. */

    hash = hmap->hash(name, strlen(name));
    curr = hmap->slot[hash % hmap->size];

    for (curr = hmap->slot[hash % hmap->size]; curr != NULL; curr = curr->next) {
        if (curr->data_type == data_type && curr->hash == hash &&
            !strcmp(curr->name, name)) {
            /* Data type an name matches. Save size if requested. */
            if (size_to)
                *size_to = curr->data_size;
            if (dataptr_to)
                *dataptr_to = curr->data;
            return 0; /* Found. */
        }
    }

    return -1; /* Not found. */
}

上面的查找如果找到将返回0，如果发现错误则返回非零。（这样，即使零大小的NULL数据也可以存储在哈希图中。）

如果支持的数据类型数量很少，例如32，则对每个位使用unsigned int（1U<<0 == 1，1U<<1 == 2，1U<<2 == 4等）为特定类型保留，您可以使用掩码进行查找，仅允许指定类型。同样，data_type可以是一个掩码，描述该值可以解释为哪种类型（几乎总是只设置一位）。

该方案还允许人通过分配新的slot指针数组并将每个旧条目移动到新条目来动态调整哈希图的大小。不需要重新加密键，因为原始哈希存储在每个条目中。为了提高查找效率，链（挂在每个插槽上）应尽可能短。常见的“经验法则”是hashmap->size应该在hashmap->used和2 * hashmap->used之间。

Answer 3

当您调用map_insert(map, (char*)&i, &i);时，插入到hasmap中的值是指向i变量的指针，即它在内存中的地址，而不是i的值。因此，当在for循环中更改i值时，哈希表中的所有条目都会产生副作用，在循环结束时，您只会看到分配的最后一个值。

以void指针为值的C中的Hashmap实现问题

3 个答案: