我正在编写一个带有线性探测的哈希表,但我的程序有一个错误。我的任务是写出文本中每个单词的出现次数。例如,我的文件包含以下单词:
lol lol lol a c d
输出结果为:
lol = 3, a = 1, c = 1, d = 2.
(但是d
不应该是2!)当SIZE_OF_TABLE为10时会发生。当SIZE_OF_TABLE为2时,程序不起作用。真正的结果必须是:
lol = 3, a = 1, c = 1, d = 1.
我的代码:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#define MAX 10
#define SIZE_OF_TABLE 10
#define MAX_STRING 256
#define THAT_OCCUP 0
struct HT{
int amount;
int occup;//occupancy
char string[MAX_STRING];
};
unsigned int long hash(const char *str);
struct HT* init(int size);
struct HT* reHT(struct HT* table,int* size,char* word, int* occup);
struct HT* put(struct HT* table,char* word, int* size,int* occup);
int take(struct HT* table,char* word, int* size);
unsigned int long hash(const char *str) // hash function
{
int long hash = 5381;
int c = 0;
while (c == *str++)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
struct HT* init(int size) // create a hash table
{
struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
int i = 0;
if (size < 1)
return NULL;
if(NULL == table)
return NULL;
for (i = 0; i < size; ++i)
{
table[i].amount = 0;
table[i].occup = 1;
}
return table;
}
struct HT* reHT(struct HT* table,int* size,char* word, int* occup) //rehash
{
assert(table);
assert(word);
assert(size);
assert(occup);
table = (struct HT*)calloc(sizeof(struct HT*),(*size)*MAX);
int i = 0;
while ( i < (*size)/MAX)
{
table = put(table, table[i].string, size, occup);
i++;
}
return table;
}
struct HT* put(struct HT* table,char* word, int* size,int* occup)
{
assert(table);
assert(word);
assert(size);
assert(occup);
int i = 0;
i = hash(word) % (*size);
if ((*occup) > ((*size) / 2))
table = reHT(table, size, word, occup);
if(1 == table[i].occup ) // if free put it
{
strcpy(table[i].string,word);
table[i].amount++;
table[i].occup = -1;
(*occup)++;
}
else if (-1 == table[i].occup && strstr(table[i].string,word)) // if place isnt free and it is a similar world just increase amount
table[i].amount++;
else if (-1 == table[i].occup && !strstr(table[i].string,word)) // if place isnt free and it the words arent similar then use linear probing
{
i++;
while(1)
{
i = (i + 1) % (*size);
if(table[i].occup == 1)
{
strcpy(table[i].string,word);
table[i].amount++;
table[i].occup = -1;
(*occup)++;
break;
}
else if ( -1 == table[i].occup && strstr(table[i].string,word))
{
table[i].amount++;
break;
}
}
i = 0; // go to start and do the same thing
while(1)
{
if(1 == table[i].occup)
{
strcpy(table[i].string,word);
table[i].amount++;
table[i].occup = -1;
(*occup)++;
break;
}
else if (strstr(table[i].string,word) && table[i].occup == -1)
{
table[i].amount++;
break;
}
i++;
}
}
return table;
}
int take(struct HT* table,char* word, int* size) // take amount
{
assert(size);
assert(word);
assert(table);
int i = 0;
i = hash(word) % (*size);
while( i < (*size))
{
if(strstr(table[i].string, word))
return table[i].amount;
i++;
}
i = 0;
while( i < (*size))
{
if(strstr(table[i].string, word))
return table[i].amount;
i++;
}
return 0;
}
int main(int argc, char *argv[])
{
FILE* file = fopen("text.txt","r");
struct HT* table = NULL;
char string[256] = {0};
int size = SIZE_OF_TABLE;
int occup = THAT_OCCUP;
if (NULL == file)
return -1;
table = init(size); //create hash
if ( NULL == table)
return - 1;
while(1 == fscanf(file, "%s", string)) // put words to hash table
{
table = put(table,string,&size,&occup);
}
printf("HASH_TABLE IS READY!!!!11111\n");
printf("Enter WORDS!!!1111\n");
while(1)
{
scanf("%s",string);
if(strstr(string,"END")) // if you want to stop just enter "END"
break;
printf(" KOLI4ESTVO!! = %d\n", take(table, string, &size));
}
free(table);
return 0;
}
答案 0 :(得分:1)
当我第一次编译代码时,我只收到两个警告 - argc
和argv
到main()
未被使用。这样做得很好 - 很少有关于SO的程序可以干净地编译。
我创建了一个文件text.txt
,其中包含:
lol
abracadabra
a
a
d
d
d
a
d
c
当我运行程序时,检测到足够的信息来打印一些信息(并修改为在EOF上干净地终止),我得到了:
$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
c
KOLI4ESTVO!! [c] = 9
d
KOLI4ESTVO!! [d] = 9
a
KOLI4ESTVO!! [a] = 9
lol
KOLI4ESTVO!! [lol] = 1
abracadabra
KOLI4ESTVO!! [abracadabra] = 9
$
那很奇怪;我没有创建任何单个单词的那么多条目。在valgrind
下运行时,存在许多问题:
==98849== Invalid write of size 4
==98849== at 0x100001173: init (ht.c:49)
==98849== by 0x1000019A9: main (ht.c:188)
==98849== Address 0x10080b588 is 120 bytes inside an unallocated block of size 2,736 in arena "client"
==98849==
==98849== Invalid write of size 4
==98849== at 0x100001180: init (ht.c:50)
==98849== by 0x1000019A9: main (ht.c:188)
==98849== Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849==
Adding [lol]
==98849== Invalid read of size 4
==98849== at 0x1000015C9: put (ht.c:89)
==98849== by 0x1000019F1: main (ht.c:196)
==98849== Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849==
==98849== Invalid write of size 1
==98849== at 0x1003FE3A0: _platform_memmove$VARIANT$Nehalem (in /usr/lib/system/libsystem_platform.dylib)
==98849== by 0x1001B4113: strcpy (in /usr/lib/system/libsystem_c.dylib)
==98849== by 0x10000175A: put (ht.c:91)
==98849== by 0x1000019F1: main (ht.c:196)
==98849== Address 0x10080b590 is 128 bytes inside an unallocated block of size 2,736 in arena "client"
==98849==
==98849== Invalid write of size 4
==98849== at 0x10000175B: put (ht.c:93)
==98849== by 0x1000019F1: main (ht.c:196)
==98849== Address 0x10080b58c is 124 bytes inside an unallocated block of size 2,736 in arena "client"
==98849==
…and a whole lot more in a similar vein…
快速查看init()
会显示一些问题:
struct HT* init(int size) // create a hash table
{
struct HT* table = (struct HT*)calloc(sizeof(struct HT*),size);
int i = 0;
if (size < 1)
return NULL;
if(NULL == table)
return NULL;
for (i = 0; i < size; ++i)
{
table[i].amount = 0;
table[i].occup = 1;
}
您已经分配了一组size
指针,但您尝试使用它们,就好像您已经分配size
struct HT
个 struct HT *table = (struct HT *)calloc(sizeof(*table), size);
条目数组一样。 / p>
至少,你需要写:
valgrind
这会分配一个结构数组而不是一个指针数组。
修复可消除$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [a]
Adding [d]
Adding [d]
Adding [d]
Adding [a]
Adding [d]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
KOLI4ESTVO!! [a] = 9
lol
KOLI4ESTVO!! [lol] = 1
abracadabra
KOLI4ESTVO!! [abracadabra] = 9
b
KOLI4ESTVO!! [b] = 9
c
KOLI4ESTVO!! [c] = 9
d
KOLI4ESTVO!! [d] = 9
e
KOLI4ESTVO!! [e] = 0
antimony
KOLI4ESTVO!! [antimony] = 0
$
错误。但输出仍然不正确:
a
我认为我已经确定9并非巧合,因为数据文件中有10行。当我将数据减少到6行时,只重复$ ./ht
Adding [lol]
Adding [abracadabra]
Adding [a]
Adding [d]
Adding [a]
Adding [c]
HASH_TABLE IS READY!!!!11111
Enter WORDS!!!1111
a
KOLI4ESTVO!! [a] = 5
d
KOLI4ESTVO!! [d] = 5
c
KOLI4ESTVO!! [c] = 5
abracadabra
KOLI4ESTVO!! [abracadabra] = 5
lol
KOLI4ESTVO!! [lol] = 1
$
,输出为:
init()
我还尝试了许多重复的行(15),程序崩溃了。我认为数字命理学应该给你一些提示。我发现重建哈希表的代码与{{1}}中的代码大小相似,我并不感到惊讶。
我认为您需要创建一个可以打印哈希表的详细信息和内容的函数,以便您可以使用它来查看正在创建的内容。