在c中的线性探针哈希表中进行重新散列

时间:2015-01-18 16:24:11

标签: c hash

在尝试学习散列时,我试图制作一个散列表,其中散列是通过线性探测完成的。每当负载系数 - alpha(填充桶/总桶)超过0.75时,我会增加表的大小。以下是相同的代码。但是程序在我执行它之间停止。 令人困惑的部分是,有时表格的大小调整会发生多个步骤,而有时则不会。调整大小是在rehashing函数中完成的。初始表格大小需要为31并且一直到8191.

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#define null 0
#define occupied 1
#define deleted 2
#define maxLen 8191

typedef struct cell
{
    int key;
    short flag;
} Cell;

void insert(Cell *a, Cell* copy, int key);
int search(Cell* a, int key);
void delete(Cell* a, int key);
void rehash(Cell* a, Cell* copy);
int nextPrime(int n);
int isPrime(int n);

int buckets = 31;
int filled = 0;
float alpha = 0.0;

int main()
{
    int i, total, val;
    printf("\nHashing with linear probing\n");
    printf("_______________________________________________________________\n\n");

    Cell *hashTable, *copy;
    hashTable = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        hashTable[i].key = -100;
        hashTable[i].flag = null;
    }

    printf("Initially:\n");
    printf("1. Number of buckets = %d\n",buckets);
    printf("2. Load factor (alpha) = %4.2f\n", alpha);
    printf("_______________________________________________________________\n\n");
    printf("Enter the number of values to be hashed in the table.\n");
    scanf("%d",&total);

    //Peripheral routines to hash random integers into hash table
    srand(time(NULL));

    for (i = 0; i < total; i++)
    {
        int temp = rand() % maxLen;
        //printf("%4d  ",i);
        insert(hashTable, copy, temp);
        //printf("buckets = %4d  filled = %4d  alpha = %4.2f\n\n",buckets, filled, alpha);
    }
    printf("\n");
    printf("Following is a list of operation and the respective commands:\n\n");
    printf(" 1. Insert - i\n 2. Search - s\n 3. Delete - d\n 4. Exit - e\n\n");
    char option;
    while(1)
    {
        scanf("%c",&option);
        if (option == 'e')
        {
            break;
        }
        else if (option == 'i')
        {
            printf("Please enter the value to be inserted : ");
            scanf("%d",&val);
            insert(hashTable, copy, val);
            printf("\n");
        }
        else if (option == 's')
        {
            printf("Please enter the value to be searched : ");
            scanf("%d",&val);
            search(hashTable, val);
            printf("\n");
        }
        else if (option == 'd')
        {
            printf("Please enter the value to be deleted : ");
            scanf("%d",&val);
            delete(hashTable, val);
            printf("\n");
        }
    }
    free(hashTable);
    return(0);
}

void insert(Cell *a, Cell* copy, int key)    // Method of linear probing
{
    int lt, rt, shift, hashKey;
    alpha = (float)(filled + 1)/buckets;
    if (alpha >= 0.75) { rehash(a, copy); }

    lt = hashKey = key % buckets;
    printf("Key = %4d  Hashed at = %4d  ", key, lt);

    while (a[hashKey].flag == occupied)
    {
        if (a[hashKey].key == key)
        {
            printf("Key already present in table at %4d.\n",hashKey);
            return;
        }
        hashKey = (hashKey + 1) % buckets;
    }
    rt = hashKey;
    a[hashKey].key = key;
    a[hashKey].flag = occupied;
    if (rt >= lt) shift = rt - lt;
    else          shift = buckets - lt + rt;

    printf("Placed at = %4d  Shift = %4d\n", rt, shift);
    filled++;
    alpha = (float)filled/buckets;
    return;
}

int search(Cell* a, int key)
{
    int hashKey = key % buckets;
    while (a[hashKey].flag != null)
    {
        if (a[hashKey].key == key)
        {
            if (a[hashKey].flag == occupied)
            {
                printf("Element found in table at position %d.\n", hashKey);
                return hashKey;
            }
            else
            {
                printf("Element not found in table.\n");
                return -1;
            }
        }
        hashKey = (hashKey + 1)% buckets;
    }
    printf("Element not found in table.\n");
    return -1;
}

void delete(Cell* a, int key)
{
    int hashKey = search(a, key);
    if (hashKey == -1) return;
    a[hashKey].flag = deleted;
    filled--;
    alpha = (float)filled/buckets;
    printf("Element has been deleted from the table.\n");
    return;
}

void rehash(Cell* a, Cell* copy)
{
    if (buckets == maxLen)
    {
        printf("Table size cannot exceed 8191.\n");
        return;
    }
    int i = 0, temp = 0, count = 0, num = buckets, hashKey;
    buckets = nextPrime(2*num);
    printf("_____________________________________________________________\n\n");
    printf("Due to load factor(alpha) exceeding 0.75, table resized to %d\n\n",buckets);

    copy = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        copy[i].key = -100;
        copy[i].flag = null;
    }

    for (i = 0; i < num; i++)
    {
        if(a[i].flag != occupied)
        {
            //printf("Unoccupied: \n");
            //printf("   a: index = %4d  key = %4d  flag = %4d  count = %4d\n",i,a[i].key, a[i].flag, count);
            //printf("copy: index = %4d  key = %4d  flag = %4d  count = %4d\n\n",hashKey,copy[i].key, copy[i].flag, count);
            continue;
        }
        temp = a[i].key;
        hashKey = temp % buckets;
        while (copy[hashKey].flag == occupied)
        {
            hashKey = (hashKey + 1) % buckets;
        }
        copy[hashKey].key = temp;
        copy[hashKey].flag = occupied;
        count++;
        //printf("Occupied: \n");
        //printf("   a: index = %4d  key = %4d  flag = %4d  count = %4d\n", i, a[i].key, a[i].flag, count);
        //printf("copy: index = %4d  key = %4d  flag = %4d  count = %4d\n\n", hashKey, copy[hashKey].key, copy[hashKey].flag, count);
    }
    free(a);
    a = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        a[i].key = -100;
        a[i].flag = null;
    }
    a = copy;
    free(copy);
    filled = count;
    alpha = (float)filled/buckets;
}

int nextPrime(int n)
{
    int num = n+1;  // parameter n is even, hence +1 to make it odd.
    while(!isPrime(num)) num += 2;
    return num;
}

int isPrime(int n)
{
    int i = 0;
    int a = sqrt(n);
    for (i = 3; i <= a; i++)
        if (n % i == 0) return 0;
    return 1;
}

2 个答案:

答案 0 :(得分:1)

当你rehash时,你将旧的和新的数组作为指针传递。你还可以在函数结束时做一些相当惊人的事情:

void rehash(Cell* a, Cell* copy)
{
    // determine new bucket size

    copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(a);
    a = malloc(buckets * sizeof(Cell));     // (a)
    for (i = 0; i < buckets; i++)
    {
        a[i].key = -100;
        a[i].flag = null;
    }
    a = copy;
    free(copy);     // (b)
}

让我们先来看看结尾。在(a)中,您分配空间,初始化所有内容,然后立即用副本覆盖a,从而有效地放弃新内存的句柄。 (另外,为什么要在最后初始化?这是复制和粘贴的剩余部分吗?)跳过分配和初始化。

接下来,您将copy的句柄分配给a。这些数组现在是相同的。当你在(b)释放它们时,你可以释放它们。

摆脱free(a);

之后的所有事情

因为您已将哈希数组作为指针传递,所以调用函数无法知道您所做的更改。在更改数组内容的函数中,传递指针就足够了。但malloc会更改指针本身,因此至少copy应该是指向指针的指针。更好的是,你可以传递原始数组:

void rehash(Cell **a)
{
    // determine new bucket size

    Cell *copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(*a);
    *a = copy;      // now the new array is in effect
}

或者,您可以返回新指针。 (您可以使用malloc内存来安全地执行此操作,但不能使用本地数组。)因此,您的函数可能如下所示:

Cell *rehash(const Cell *a)
{
    // determine new bucket size

    copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(a);
    return copy;
}

并将其称为:

a = rehash(a);

copy功能之外无需rehash,因此您不应将其传递给insert,例如。

答案 1 :(得分:0)

最后,我能够做到这一点。除了上面答案的帮助之外,另外需要注意的是,从插入函数给出了重新调用的调用,在这种情况下,新的(更大的)表被返回到insert函数。但是insert函数没有向main函数返回任何内容。因此,为了解决这个问题,我发现了以下三种方式:

  1. 如上所述,传递给void函数的参数应该是 指向指针,因为这里涉及malloc()函数。
  2. 从main调用rehash函数,不涉及insert 功能
  3. 让insert函数也返回从中接收的新哈希表 rehashing功能。
  4. 以下是第三种方法的代码。

    #include<stdio.h>
    #include<stdlib.h>
    #include<math.h>
    #define null 0
    #define occupied 1
    #define deleted 2
    #define maxLen 8191
    
    typedef struct cell
    {
        int key;
        short flag;
    } Cell;
    
    void insert(Cell *a, int key);
    int search(Cell *a, int key);
    void delete(Cell *a, int key);
    Cell* rehash(Cell *a);
    int nextPrime(int n);
    int isPrime(int n);
    
    int buckets = 31;
    int filled = 0;
    float alpha = 0.0;
    
    int main()
    {
        int i, total, val;
        printf("\nHashing with linear probing\n");
        printf("_______________________________________________________________\n\n");
    
        Cell *hashTable;
        hashTable = (Cell*)calloc(buckets, sizeof(Cell));
    
    
        printf("Initially:\n");
        printf("1. Number of buckets = %d\n",buckets);
        printf("2. Load factor (alpha) = %4.2f\n", alpha);
        printf("_______________________________________________________________\n\n");
        printf("Enter the number of values to be hashed in the table.\n");
        scanf("%d",&total);
    
        //Peripheral routines to hash random integers into hash table
        srand(time(NULL));
    
        for (i = 0; i < total; i++)
        {
            int temp = rand() % maxLen;
            //printf("%4d  ",i);
    
            alpha = (float)(filled + 1)/buckets;
    
            //Rehashing in case of alpha >= 0.75
            if (alpha >= 0.75) { hashTable = rehash(hashTable); }
            insert(hashTable, temp);
        }
        printf("\n");
        printf("Following is a list of operation and the respective commands:\n\n");
        printf(" 1. Insert - i\n 2. Search - s\n 3. Delete - d\n 4. Exit - e\n\n");
        char option;
        while(1)
        {
            scanf("%c",&option);
            if (option == 'e')
            {
                break;
            }
            else if (option == 'i')
            {
                alpha = (float)(filled + 1)/buckets;
    
                //Rehashing in case of alpha >= 0.75
                if (alpha >= 0.75) { hashTable = rehash(hashTable); }
                printf("Please enter the value to be inserted : ");
                scanf("%d",&val);
                insert(hashTable, val);
                printf("\n");
            }
            else if (option == 's')
            {
                printf("Please enter the value to be searched : ");
                scanf("%d",&val);
                search(hashTable, val);
                printf("\n");
            }
            else if (option == 'd')
            {
                printf("Please enter the value to be deleted : ");
                scanf("%d",&val);
                delete(hashTable, val);
                printf("\n");
            }
        }
        free(hashTable);
        return(0);
    }
    
    void insert(Cell *a, int key)    // Method of linear probing
    {
        int lt, rt, shift, hashKey;
        lt = hashKey = key % buckets;
        printf("Key = %4d  Hashed at = %4d  ", key, lt);
    
        while (a[hashKey].flag == occupied)
        {
            if (a[hashKey].key == key)
            {
                printf("Key already present in table at %4d.\n",hashKey);
                return;
            }
            hashKey = (hashKey + 1) % buckets;
        }
        rt = hashKey;
        a[hashKey].key = key;
        a[hashKey].flag = occupied;
        if (rt >= lt) shift = rt - lt;
        else          shift = buckets - lt + rt;
    
        printf("Placed at = %4d  Shift = %4d\n", rt, shift);
        filled++;
        alpha = (float)filled/buckets;
        return;
    }
    
    int search(Cell *a, int key)
    {
        int hashKey = key % buckets;
        while (a[hashKey].flag != null)
        {
            if (a[hashKey].key == key)
            {
                if (a[hashKey].flag == occupied)
                {
                    printf("Element found in table at position %d.\n", hashKey);
                    return hashKey;
                }
                else
                {
                    printf("Element not found in table.\n");
                    return -1;
                }
            }
            hashKey = (hashKey + 1)% buckets;
        }
        printf("Element not found in table.\n");
        return -1;
    }
    
    void delete(Cell *a, int key)
    {
        int hashKey = search(a, key);
        if (hashKey == -1) return;
        a[hashKey].flag = deleted;
        filled--;
        alpha = (float)filled/buckets;
        printf("Element has been deleted from the table.\n");
        return;
    }
    
    Cell* rehash(Cell* a)
    {
        if (buckets == maxLen)
        {
            printf("Table size cannot exceed 8191.\n");
            return;
        }
        int num = buckets;
        buckets = nextPrime(2*num);
        printf("_____________________________________________________________\n\n");
        printf("Due to load factor(alpha) exceeding 0.75, table resized to %d\n\n",buckets);
    
        int i = 0, temp = 0, count = 0;
        Cell* copy;
        copy = (Cell*)calloc(buckets, sizeof(Cell));
        if (copy == NULL) {
            perror("Failed calloc() for copy");
            exit(1);
        }
    
        for (i = 0; i < num; i++)
        {
            if(a[i].flag != occupied) continue;
    
            temp = a[i].key;
            insert(copy, temp);
            count++;
        }
        free(a);
    
        filled = count;
        alpha = (float)filled/buckets;
        return copy;
    }
    
    int nextPrime(int n)
    {
        int num = n+1;  // parameter n is even, hence +1 to make it odd.
        while(!isPrime(num)) num += 2;
        return num;
    }
    
    int isPrime(int n)
    {
        int i = 0;
        int a = sqrt(n);
        for (i = 3; i <= a; i++)
            if (n % i == 0) return 0;
        return 1;
    }