Question

我正在编写一个带有线性探测的哈希表，但我的程序有一个错误。我的任务是写出文本中每个单词的出现次数。例如，我的文件包含以下单词：

lol lol lol a c d

输出结果为：

lol = 3, a = 1, c = 1, d = 2.

（但是d不应该是2！）当SIZE_OF_TABLE为10时会发生。当SIZE_OF_TABLE为2时，程序不起作用。真正的结果必须是：

lol = 3, a = 1, c = 1, d = 1.

我的代码：

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>

#define MAX 10  
#define SIZE_OF_TABLE 10
#define MAX_STRING 256
#define THAT_OCCUP 0

struct HT{
    int amount;
    int occup;//occupancy   
    char string[MAX_STRING];
};

unsigned int long hash(const char *str);
struct HT* init(int size);
struct HT* reHT(struct HT* table,int* size,char* word, int* occup);
struct HT* put(struct HT* table,char* word, int* size,int* occup);
int take(struct HT* table,char* word, int* size);



unsigned int long hash(const char *str) // hash function
{
    int long hash = 5381;
    int c = 0;

    while (c == *str++)
        hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

    return hash;
}

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }

    return table;
}
struct HT* reHT(struct HT* table,int* size,char* word, int* occup) //rehash
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    table = (struct HT*)calloc(sizeof(struct HT*),(*size)*MAX);
    int i = 0;

    while ( i < (*size)/MAX)
    {   
        table = put(table, table[i].string, size, occup);
        i++;
    }
    return table;
}

struct HT* put(struct HT* table,char* word, int* size,int* occup)
{
    assert(table);
    assert(word);
    assert(size);
    assert(occup);

    int i = 0;

    i = hash(word) % (*size);

    if ((*occup) > ((*size) / 2))
        table = reHT(table, size, word, occup);


    if(1 == table[i].occup ) // if free put it
    {
        strcpy(table[i].string,word);
        table[i].amount++;
        table[i].occup = -1;
        (*occup)++;
    }

    else if (-1 == table[i].occup && strstr(table[i].string,word)) // if place isnt free and it is a similar world  just increase amount
        table[i].amount++;

    else if (-1 == table[i].occup && !strstr(table[i].string,word)) // if place isnt free and  it the words arent similar then use linear probing
    {
        i++;
        while(1)
        {
            i = (i + 1) % (*size);
            if(table[i].occup == 1)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }
            else if ( -1 == table[i].occup && strstr(table[i].string,word))
            {
                table[i].amount++;
                break;
            }
        }

        i = 0; // go to start and do the same thing

        while(1)
        {
            if(1 == table[i].occup)
            {
                strcpy(table[i].string,word);
                table[i].amount++;
                table[i].occup = -1;
                (*occup)++;
                break;
            }

            else if (strstr(table[i].string,word) && table[i].occup == -1)
            {
                table[i].amount++;
                break;
            }

            i++;
        }
    }

    return table;
}


int take(struct HT* table,char* word, int* size) // take amount
{
    assert(size);
    assert(word);
    assert(table);

    int i = 0;

    i = hash(word) % (*size);

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    i = 0;

    while( i < (*size))
    {
        if(strstr(table[i].string, word))
            return table[i].amount;
        i++;
    }

    return 0;
}

int main(int argc, char *argv[])
{
    FILE* file = fopen("text.txt","r");
    struct HT* table = NULL;
    char string[256] = {0};

    int size = SIZE_OF_TABLE;
    int occup = THAT_OCCUP;

    if (NULL == file)
        return -1;

    table = init(size); //create hash

    if ( NULL == table)
        return - 1;

    while(1 == fscanf(file, "%s", string)) // put words to hash table
    {
        table = put(table,string,&size,&occup);
    }

    printf("HASH_TABLE IS READY!!!!11111\n");
    printf("Enter WORDS!!!1111\n");

    while(1)
    {
        scanf("%s",string);
        if(strstr(string,"END")) // if you want to stop just enter "END"
            break;
        printf(" KOLI4ESTVO!! = %d\n", take(table, string, &size)); 
    }

    free(table);
    return 0;
}

Answer 1

当我第一次编译代码时，我只收到两个警告 - argc和argv到main()未被使用。这样做得很好 - 很少有关于SO的程序可以干净地编译。

我创建了一个文件text.txt，其中包含：

lol
abracadabra
a
a
d
d
d
a
d
c

当我运行程序时，检测到足够的信息来打印一些信息（并修改为在EOF上干净地终止），我得到了：

$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
c
 KOLI4ESTVO!! [c] = 9
d
 KOLI4ESTVO!! [d] = 9
a
 KOLI4ESTVO!! [a] = 9
lol
 KOLI4ESTVO!! [lol] = 1
abracadabra
 KOLI4ESTVO!! [abracadabra] = 9
$

那很奇怪;我没有创建任何单个单词的那么多条目。在valgrind下运行时，存在许多问题：

==98849== Invalid write of size 4
==98849==    at 0x100001173: init (ht.c:49)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b588 is 120 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x100001180: init (ht.c:50)
==98849==    by 0x1000019A9: main (ht.c:188)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
Adding [lol]
==98849== Invalid read of size 4
==98849==    at 0x1000015C9: put (ht.c:89)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 1
==98849==    at 0x1003FE3A0: _platform_memmove$VARIANT$Nehalem (in /usr/lib/system/libsystem_platform.dylib)
==98849==    by 0x1001B4113: strcpy (in /usr/lib/system/libsystem_c.dylib)
==98849==    by 0x10000175A: put (ht.c:91)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b590 is 128 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
==98849== Invalid write of size 4
==98849==    at 0x10000175B: put (ht.c:93)
==98849==    by 0x1000019F1: main (ht.c:196)
==98849==  Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849== 
…and a whole lot more in a similar vein…

快速查看init()会显示一些问题：

struct HT* init(int size) // create a hash table
{
    struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
    int i = 0;

    if (size < 1)
        return NULL;

    if(NULL == table)
        return  NULL;

    for (i = 0; i < size; ++i)
    {
        table[i].amount = 0;
        table[i].occup = 1;
    }

您已经分配了一组size指针，但您尝试使用它们，就好像您已经分配size struct HT个struct HT *table = (struct HT *)calloc(sizeof(*table), size);条目数组一样。 / p>

至少，你需要写：

valgrind

这会分配一个结构数组而不是一个指针数组。

修复可消除 $ ./ht Adding [lol] Adding [abracadabra] Adding [a] Adding [a] Adding [d] Adding [d] Adding [d] Adding [a] Adding [d] Adding [c] HASH_TABLE IS READY!!!!11111 Enter WORDS!!!1111 a KOLI4ESTVO!! [a] = 9 lol KOLI4ESTVO!! [lol] = 1 abracadabra KOLI4ESTVO!! [abracadabra] = 9 b KOLI4ESTVO!! [b] = 9 c KOLI4ESTVO!! [c] = 9 d KOLI4ESTVO!! [d] = 9 e KOLI4ESTVO!! [e] = 0 antimony KOLI4ESTVO!! [antimony] = 0 $ 错误。但输出仍然不正确：

我认为我已经确定9并非巧合，因为数据文件中有10行。当我将数据减少到6行时，只重复 $ ./ht Adding [lol] Adding [abracadabra] Adding [a] Adding [d] Adding [a] Adding [c] HASH_TABLE IS READY!!!!11111 Enter WORDS!!!1111 a KOLI4ESTVO!! [a] = 5 d KOLI4ESTVO!! [d] = 5 c KOLI4ESTVO!! [c] = 5 abracadabra KOLI4ESTVO!! [abracadabra] = 5 lol KOLI4ESTVO!! [lol] = 1 $ ，输出为：

init()

我还尝试了许多重复的行（15），程序崩溃了。我认为数字命理学应该给你一些提示。我发现重建哈希表的代码与{{1}}中的代码大小相似，我并不感到惊讶。

我认为您需要创建一个可以打印哈希表的详细信息和内容的函数，以便您可以使用它来查看正在创建的内容。

哈希表与线性探测

1 个答案: