Hash.c更新了评论的修订版,我仍然遇到Seg错误。我必须在这里遗漏一些你们说的话
我使用C创建了一个哈希表ADT,但是当我尝试在ADT中调用一个函数(find_hash)时遇到了分段错误。
我已经发布了我创建的所有3个文件parse.c,hash.c和hash.h,因此您可以看到所有变量。我们正在阅读gettysburg.txt文件,该文件也附有
当我调用find_hash时,parse.c中出现了seg错误。我无法弄清楚我的生活在这里发生了什么。如果您需要更多信息,我一定能提供。
很抱歉,我已经在这个问题上花费了一周的时间。提前致谢
我运行程序的方式是: gcc -o parse parse.c hash.c 那么:cat gettysburg.txt |解析
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)malloc(sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
cur2 = next_hash_walk(t);
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)malloc(sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
t->sorted_array = NULL;
t->index=0;
t->sort_num=0;
for(i=0;i<size;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
hash_entry *cur;
for(i = 0; i<(table->size);i++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
free(cur->key); //Freeing memory for key and data
free(cur->data);
}
free(table->buckets[i]); //free the whole bucket
}}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)malloc(sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
if(strcmp(table->buckets[i]->key, key) == 0)
return((table->buckets[i]->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size]; //creates an array, same size as table->size(# of buckets)
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++){
if(table->buckets[i] != NULL){
for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
count ++;}
node_num[i] = count;
count = 0;}
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) / (table->size);
}
void *start_hash_walk(Phash_table table){
Phash_table temp = table;
int i, j, k;
hash_entry *cur; //CHANGE IF NEEDED to HASH_TABLE *
if(table->sorted_array != NULL) free(table->sorted_array);
table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));
for(i = 0; i < table->total; i++){
if(table->buckets[i]!=NULL){
for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
table->sorted_array[i] = table->buckets[i]->data;
}}
}
for(j = (table->total) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp -> buckets[0]-> data = table->sorted_array[k-1];
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp->buckets[0] -> data;
}
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
table->sort_num ++;
return table->sorted_array[table->sort_num];
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_entry_ { //Linked List
void *data; //Generic pointer
char *key; //String-based key value
struct hash_entry_ *next; //Self-Referencing pointer
} hash_entry, *Phash_entry;
typedef struct hash_table_ {
hash_entry **buckets; //Pointer to a pointer to a Linked List of type hash_entry
int (*hash_func)(char *);
int (*cmp_func)(void *, void *);
int size;
void **sorted_array; //Array used to sort each hash entry
int index;
int total;
int largest;
float average;
int sort_num;
} hash_table, *Phash_table;
Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);
Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.
Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.
We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.
But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.
It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.
答案 0 :(得分:2)
此代码的几个问题之一可能是循环:
for(table->buckets[i];
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
for循环的初始化部分(table->buckets[i]
)无效。如果i
为0且table->buckets[0] == NULL
,则此循环上的条件(table->buckets[i]->next != NULL
)将取消引用空指针并崩溃。
至少,这就是你的代码似乎在我的盒子上崩溃的地方。当我将几个循环更改为:
if (table->buckets[i] != NULL) {
for(;
table->buckets[i]->next != NULL;
table->buckets[i] = table->buckets[i]->next)
...
}
......它一直在崩溃,但在另一个地方。也许这会让你失败?
编辑:另一个潜在问题是那些for循环具有破坏性。当您致电find_hash
时,您真的希望修改所有这些存储桶吗?
我建议使用类似的东西:
hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
// ...
}
}
当我这样做并注释掉你的dump_dictionary
函数时,代码会运行而不会崩溃。
答案 1 :(得分:0)
嗯,
这是hash.c
#include "hash.h"
Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
int i;
Phash_table t;
t = (Phash_table)calloc(1, sizeof(hash_table)); //creates the main hash table
t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *)); //creates the hash table of "size" buckets
t->size = size; //Holds the number of buckets
t->hash_func = hash_func; //assigning the pointer to the function in the user's program
t->cmp_func = cmp_func; // " "
t->total=0;
t->largest=0;
t->average=0;
for(i=0;t->buckets[i] != NULL;i++){ //Sets all buckets in hash table to NULL
t->buckets[i] = NULL;}
return(t);
}
void free_hash(Phash_table table){
int i;
for(i = 0; i<(table->size);i++){
if(table->buckets[i]!=NULL)
for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
free(table->buckets[i]->key); //Freeing memory for key and data
free(table->buckets[i]->data);
}
free(table->buckets[i]); //free the whole bucket
}
free(table->sorted_array);
free(table);
}
void insert_hash(Phash_table table, char *key, void *data){
Phash_entry new_node; //pointer to a new node of type hash_entry
int index;
new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
new_node->data = data; //stores the user's data into the node
strcpy(new_node->key,key); //copies the key into the node
//calling the hash function in the user's program
index = table->hash_func(key); //index will hold the hash table value for where the new node will be placed
table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
table->total++; //increment the total (total # of buckets)
}
void *find_hash(Phash_table table, char *key){
int i;
hash_entry *cur;
printf("Inside find_hash\n"); //REMOVE
for(i = 0;i<table->size;i++){
if(table->buckets[i]!=NULL){
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
if(strcmp(cur->key, key) == 0)
return((cur->data));} //returns the data to the user if the key values match
} //otherwise return NULL, if no match was found.
}
return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){
int node_num[table->size];
int i,j, count = 0;
int largest_buck = 0;
hash_entry *cur;
for(i = 0; i < table->size; i ++)
{
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
count ++;}
node_num[i] = count;
count = 0;
}
for(j = 0; j < table->size; j ++){
if(node_num[j] > largest_buck)
largest_buck = node_num[j];}
*total = table->total;
*largest = largest_buck;
*average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}
void *start_hash_walk(Phash_table table){
void* temp = 0; //oook: this was another way of overwriting your input table
int i, j, k;
int l=0; //oook: new counter for elements in your sorted_array
hash_entry *cur;
if(table->sorted_array !=NULL) free(table->sorted_array);
table->sorted_array = (void**)calloc((table->total), sizeof(void*));
for(i = 0; i < table->size; i ++){
//for(i = 0; i < table->total; i++){ //oook: i don't think you meant total ;)
if(table->buckets[i]!=NULL)
for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
//for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
table->sorted_array[l++] = cur->data;
}
}
//oook: sanity check/assert on expected values
if (l != table->total)
{
printf("oook: l[%d] != table->total[%d]\n",l,table->total);
}
for(j = (l) - 1; j > 0; j --) {
for(k = 1; k <= j; k ++){
if (table->sorted_array[k-1] && table->sorted_array[k])
{
if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
table->sorted_array[k-1] = table->sorted_array[k];
table->sorted_array[k] = temp;
}
}
else
printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
}
}
return table->sorted_array[table->sort_num];
}
void *next_hash_walk(Phash_table table){
/*oook: this was blowing up since you were incrementing past the size of sorted_array..
NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
//table->sort_num++
return table->sorted_array[table->sort_num++];
}
这是parse.c
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h> //oook: added so you can assert ;)
#include "hash.h"
#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000
#define TRUE 1
#define FALSE 0
void lower_case_word(char *);
void dump_dictionary(Phash_table );
/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);
typedef struct user_data_ {
char word[WORD_SIZE];
int freq_counter;
} user_data, *Puser_data;
int main(void)
{
char c, word1[WORD_SIZE];
int char_index = 0, dictionary_size = 0, num_words = 0, i;
int total=0, largest=0;
float average = 0.0;
Phash_table t; //Pointer to main hash_table
int (*Phash_func)(char *)=NULL; //Function Pointers
int (*Pcmp_func)(void *, void *)=NULL;
Puser_data data_node; //pointer to hash table above
user_data * find;
printf("Parsing input ...\n");
Phash_func = hash_func; //Assigning Function pointers
Pcmp_func = cmp_func;
t = new_hash(1000,Phash_func,Pcmp_func);
// Read in characters until end is reached
while ((c = getchar()) != EOF) {
if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
(c == ':') || (c == '\n')) {
// End of a word
if (char_index) {
// Word is not empty
word1[char_index] = '\0';
lower_case_word(word1);
data_node = (Puser_data)calloc(1,sizeof(user_data));
strcpy(data_node->word,word1);
printf("%s\n", data_node->word);
//!!!!!!SEG FAULT HERE!!!!!!
if (!((user_data *)find_hash(t, data_node->word))){ //SEG FAULT!!!!
dictionary_size++;
insert_hash(t,word1,(void *)data_node);
}
char_index = 0;
num_words++;
}
} else {
// Continue assembling word
word1[char_index++] = c;
}
}
printf("There were %d words; %d unique words.\n", num_words,
dictionary_size);
dump_dictionary(t); //???
}
void lower_case_word(char *w){
int i = 0;
while (w[i] != '\0') {
w[i] = tolower(w[i]);
i++;
}
}
void dump_dictionary(Phash_table t){ //???
int i;
user_data *cur, *cur2;
stat_hash(t, &(t->total), &(t->largest), &(t->average)); //Call to stat hash
printf("Number of unique words: %d\n", t->total);
printf("Largest Bucket: %d\n", t->largest);
printf("Average Bucket: %f\n", t->average);
cur = start_hash_walk(t);
if (!cur) //ook: do test or assert for null values
{
printf("oook: null== (cur = start_hash_walk)\n");
exit(-1);
}
printf("%s: %d\n", cur->word, cur->freq_counter);
for (i = 0; i < t->total; i++)
{//oook: i think you needed these braces
cur2 = next_hash_walk(t);
if (!cur2) //ook: do test or assert for null values
{
printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
}
else
printf("%s: %d\n", cur2->word, cur2->freq_counter);
}//oook: i think you needed these braces
}
int hash_func(char *string){
int i, sum=0, temp, index;
for(i=0; i < strlen(string);i++){
sum += (int)string[i];
}
index = sum % 1000;
return (index);
}
/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){
user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;
/* ooook: do assert on programmatic errors.
this function *requires non-null inputs. */
assert(cur1 && cur2);
if(cur1->freq_counter < cur2->freq_counter){
return(-1);}
else{ if(cur1->freq_counter > cur2->freq_counter){
return(1);}
else return(0);}
}
按照//ooks
<强>解释强>
有一两个地方会爆炸。
您的问题的快速解决方案和答案位于parse.c
,大约 L100 :
cur = start_hash_walk(t);
printf("%s: %d\n", cur->word, cur->freq_counter);
..在调用cur
之前检查null
不是printf
,修复了您的直接段错误。
但为什么cur
会null
? 〜因为这个坏孩子:
void *start_hash_walk(Phash_table table)
您的hash_func(char *string)
可以(&amp; do)返回非唯一值。除了您尚未实现链接列表链之外,这当然是可以的。因此,您最终会得到包含少于table->sorted_array
元素的table->total
〜或者如果您在所有table->size
个桶上进行迭代,那么您会这样做;)
还有一两个问题。
现在我进一步攻击Nate Kohl's for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next)
,因为你没有锁链,所以for(cur=table->buckets[i]; cur != NULL; cur=cur->next)
。但这是*你的TODO就这么说了。
最后。请注意,在next_hash_walk(Phash_table table)
中您有:
table->sort_num++
return table->sorted_array[table->sort_num];
哎哟! 执行检查那些数组边界!
备注强>
1)如果您的功能不是为改变输入而设计的,那么输入const
。这样编译器很可能会告诉你什么时候你无意中捣乱了什么。
2)对数组索引进行绑定检查。
3)在尝试使用Null指针之前,先测试/断言。
4)对每个功能进行单元测试;在编译之前永远不要写太多代码。测试
5)使用最少的测试数据;制作它以限制测试你的代码和试图以狡猾的方式打破它。
6)初始化数据结构!
7)永远不要使用埃及牙套! {
只是在开玩笑;)
}
PS 到目前为止工作顺利〜&gt;指针是棘手的小东西! &安培;一个问题很好的问题,包含所有必要的细节,所以+1和gl;)
(// oook:可能添加作业标签)