需要帮助在c中重新散列哈希表

时间:2015-08-30 10:41:24

标签: c hashtable

我希望通过为新表分配空间来遍历哈希表,遍历旧表,并为每个元素计算一个哈希表 新的哈希值,然后将其链接到新表。我将链表作为条目链接到哈希表中,因此第二个for循环遍历旧哈希表。我也想释放旧表,但首先要正确地将元素放入新表中。

我需要帮助,在穿过旧桌子时我哪里出错了?我也可以在最后将原始ht指向newht吗?我之后也需要释放旧桌子(可用),我稍后会知道。

typedef struct hashtable {
    htentry_ptr *table;         /*<< a pointer to the underlying table        */
    unsigned int size;          /*<< the current size of the underlying table */
    unsigned int num_entries;   /*<< the current number of entries            */
    float max_loadfactor;       /*<< the maximum load factor before the
                                 *   underlying table is resized              */
    unsigned short idx;         /*<< the index into the delta array           */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison
                                         *   function                         */
} hashtable_t;

rehash功能看起来像这样

static void rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable;
    /* store reference to the old table */
    prevtable->table = ht->table;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;
    printf("\nrehashing\n");
    ht->size = getsize(prevtable);
    printf("\nnew table size %d\n", ht->size);
    ht->table = calloc(ht->size , sizeof(htentry_t));
    for (i = 0; i < prevtable->size; i++) {
        for (p = prevtable->table[i]; p; p = p->next_ptr) {
            newidx = ht->hash(p->key, ht->size);
            if(ht->table[newidx]){
                htentry_ptr next;
                htentry_ptr prev = NULL;
                next = ht->table[newidx];
                printf("\ncollision adding to linked list\n");
                while (next) {
                    prev = next;
                    next = next->next_ptr;
                }
                prev->next_ptr = p;
                p->next_ptr = NULL;
            } else {
                ht->table[newidx] = p; 
                ht->table[newidx]->next_ptr = NULL;
                ht->num_entries++;
            }
        }
    }
}

插入哈希表。当表变得太密集时,在插入的末尾调用rehash函数。

int ht_insert(hashtab_ptr ht, void *key, void *value)
{
/* key is the id of the variable like num1 and value is number 
index = value 
*/

unsigned int N = ht->size;
unsigned int ne;
float current_loadfactor;
int k;
htentry_ptr p;
p = calloc(1,sizeof(htentry_t));
p->key = key;
p->value = value;
k = ht->hash(key, ht->size);
if(ht->table[k]){
    htentry_ptr next;
    htentry_ptr prev = NULL;
    /* theres already something in the index*/
    next = ht->table[k];
    printf("\ncollision adding to linked list");
    while (next) {
        prev = next;
        next = next->next_ptr;
    }
    ht->num_entries++;
    prev->next_ptr = p;
    p->next_ptr = NULL;
} else {
    ht->table[k] = p;
    ht->table[k]->next_ptr = NULL; 
    ht->num_entries++;
}
ne = ht->num_entries;
current_loadfactor = ne / N;
if (current_loadfactor > ht->max_loadfactor) {
    rehash(ht);
}

3 个答案:

答案 0 :(得分:1)

  

我也可以在最后将原始ht指向newht吗?

没有

指针ht是本地函数堆栈的副本。使用ht = newht;更改值只会更改副本。

最简单的解决方案是让你的rehash()函数返回指向新哈希表的指针。

static hashtab_ptr rehash(hashtab_ptr ht)
{
  [...]
  return newht;
}

然后你可以这样称呼它:

current_ht = rehash(current_ht);

第二个解决方案是更改原型以传递双指针:

static void rehash(hashtab_ptr *ht)
{
  [...]
  *ht = newht;
}

这意味着您需要在rehash()函数中的任何位置更改ht的使用,以反映它现在是双指针。

第三个解决方案是不创建新的hashtable_t,只需创建一个新的htentry_ptr *table区域并更新ht;中的值。这将是我在代码审核中最喜欢的解决方案

  

我需要帮助,在穿过旧桌子时我哪里出错?

  while (next)
  {
    prev = next;
    next = next->next_ptr;
    newht->num_entries++;
  }

newht->num_entries++;位于错误的位置。当您查找链接列表的末尾时,已存在的元素不会增加哈希表的大小。您可以将表达式newht->num_entries++;移出if / else - 无论是否发生碰撞,您的表都会增加一个。

其次,在链表循环结束时,它将如下所示:

prev = [last_element of linked list];
next = null;
prev->next_ptr = old_element;

但是...... old_element->next_ptr指向哪里?无法保证它为空。 因此,您需要添加p->next_ptr = NULL;,以便以前不在碰撞结束时并且现在处于碰撞结束的元素正确结束链接列表。

问题是你不能只做p->next_ptr = NULL;,因为那时你的循环认为它就在最后。当链表中间的链表元素被重新分配给新哈希表中的新索引时,您的概念就会被搞砸。该元素不能同时为next_ptr中的旧表和新表提供正确的值。

所以,有两种解决方案:
a)向后浏览你的碰撞列表,但由于这是一个单独的链表,这是将元素放在堆栈上的一个非常痛苦的过程。
b)通过创建新元素而不是尝试重用旧元素来重新表格。

修改

好的,使用insert函数,rehash函数看起来像这样(快速和脏):

static hashtab_ptr rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable = ht;
    hashtab_ptr newht;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;

    printf("\nrehashing");

    newht->idx  = prevtable->idx + 1;
    newht->size = getsize(prevtable);
    newht->num_entries = 0;
    newht->hash = prevtable->hash;
    newht->cmp  = prevtable->cmp;

    newht->max_loadfactor = prevtable->max_loadfactor;

    newht->table = calloc(newht->size , sizeof(htentry_t));

    for (i = 0; i < ht->size; i++) {
      for (p = ht->table[i]; p; p = p->next_ptr) {
        ht_insert(newht, p->key, p->value);
    }

    return newht;
   }

然后你应该有一个释放哈希表的函数,所以你最终使用它:

if (current_loadfactor > ht->max_loadfactor) {
    hashtab_ptr tempht = ht;
    ht = rehash(ht);
    ht_delete(tempht);
}

答案 1 :(得分:0)

这是为了表明:

  • 您只需要重新分配table []成员,而不是信封
  • 指针指针可以简化事情
  • 将元素从旧表移动到新表时,应注意不要损坏其下一个指针

[注意:我删除了typedefines,因为我讨厌它们......]

#include <stdio.h>
#include <stdlib.h>

struct hashentry {
    struct hashentry *next;
    char *key;
    void *payload;
    };
struct hashtable {
    struct hashentry **table;   /*<< a pointer to array of pointers        */
    unsigned int size;          /*<< current size */
    unsigned int num_entries;   /*<< current number of entries    */
    float max_loadfactor;
    /* unsigned short idx;      the index into the delta array(Quoi?) */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison function     */
    };

static void rehash(struct hashtable *ht);
// The rehash function could look like this
static void rehash(struct hashtable *ht)
{
    struct hashentry **newtab;
    struct hashentry **pp, **qq, *this;
    unsigned int newsize, oldidx, newidx;

    newsize = ht->size * 2; /* or something like (max_loadfactor*num_entries), rounded up */
    fprintf(stderr, "new table size %u\n", newsize);
    newtab = malloc(newsize * sizeof *newtab );

    for (newidx=0; newidx < newsize; newidx++) {
        newtab[newidx] = NULL;
        }

    for (oldidx = 0; oldidx < ht->size; oldidx++) {
        for (pp = &ht->table[oldidx]; *pp; ) {
            this = *pp;
            *pp = this->next; /* this is important ! */
            this->next = NULL; /* ... because ... */

            newidx = ht->hash(this->key, newsize);
            for(qq = &newtab[newidx]; *qq; qq = &(*qq)->next) {
                /* You could count the number of "collisions" here */
                }

            *qq = this;
        }
    }
    free(ht->table);
    ht->table = newtab;
    ht->size = newsize;
    /* The rest of the fields does not need to change */
}

答案 2 :(得分:0)

我认为可能是解决方案,但我不是100%肯定。

static void rehash(hashtab_ptr ht)
{
        unsigned int old_size, new_size;
        unsigned int newindex;
        unsigned int i;
        htentry_ptr q, p;
        htentry_ptr *new_table;
        old_size = ht->size;
        /*gets new size in prime table */
        new_size = getsize(ht);
        new_table = malloc(sizeof(htentry_t) * new_size);
        /* nullify the new table */
        for (i = 0; i < new_size; i++) {
            new_table[i] = NULL;
        }
        printf("\n*****rehashing******\n");
        ht->size = new_size; 
        printf("%s %d\n", "new size:", new_size);
        for (i = 0; i < old_size; i++) {
            p = ht->table[i];
            while (p) {
                q = p->next_ptr;
                newindex = ht->hash(p->key, new_size);
                /*
                temp = malloc(sizeof(htentry_t));
                temp->key = p->key;
                temp->value = p->value;
                temp->next_ptr = new_table[ht->hash(temp->key, next_size)];
                new_table[ht->hash(temp->key, next_size)] = temp;
                */
                if (new_table[newindex]) {
                    p->next_ptr = new_table[newindex];
                    new_table[newindex] = p;    
                } else {
                    new_table[newindex] = p;
                    new_table[newindex]->next_ptr = NULL;
                }
                p = q;
            }
        }

    free(ht->table);
    ht->table = new_table;  
}