编辑：

Question

编辑：

Hash.c更新了评论的修订版，我仍然遇到Seg错误。我必须在这里遗漏一些你们说的话

我使用C创建了一个哈希表ADT，但是当我尝试在ADT中调用一个函数（find_hash）时遇到了分段错误。

我已经发布了我创建的所有3个文件parse.c，hash.c和hash.h，因此您可以看到所有变量。我们正在阅读gettysburg.txt文件，该文件也附有

当我调用find_hash时，parse.c中出现了seg错误。我无法弄清楚我的生活在这里发生了什么。如果您需要更多信息，我一定能提供。

很抱歉，我已经在这个问题上花费了一周的时间。提前致谢

我运行程序的方式是： gcc -o parse parse.c hash.c 那么：cat gettysburg.txt |解析

Parse.c

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)malloc(sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

  for (i = 0; i < t->total; i++)
     cur2 = next_hash_walk(t);
     printf("%s:  %d\n", cur2->word, cur2->freq_counter);
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

hash.c

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)malloc(sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)malloc(sizeof(hash_entry *)*size);  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;
    t->sorted_array = NULL;
    t->index=0;
    t->sort_num=0;

    for(i=0;i<size;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;
    hash_entry *cur;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                free(cur->key);  //Freeing memory for key and data
                free(cur->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }}
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)malloc(sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
    hash_entry *cur;
   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){            
            for(cur = table->buckets[i]; cur->next != NULL; cur = cur->next){
                if(strcmp(table->buckets[i]->key, key) == 0)
                return((table->buckets[i]->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];  //creates an array, same size as table->size(# of buckets)
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++){
        if(table->buckets[i] != NULL){
            for(cur=table->buckets[i]; cur->next!=NULL; cur = cur->next){
                count ++;}
            node_num[i] = count;
            count = 0;}
        }

    for(j = 0; j < table->size; j ++){      
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) / (table->size);
}

void *start_hash_walk(Phash_table table){
    Phash_table temp = table;
    int i, j, k;
    hash_entry *cur;  //CHANGE IF NEEDED to HASH_TABLE *

    if(table->sorted_array != NULL) free(table->sorted_array);

    table->sorted_array = (void**)malloc(sizeof(void*)*(table->total));

    for(i = 0; i < table->total; i++){
        if(table->buckets[i]!=NULL){
            for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next){
                table->sorted_array[i] = table->buckets[i]->data;
        }}
    }

    for(j = (table->total) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                temp -> buckets[0]-> data = table->sorted_array[k-1];
                table->sorted_array[k-1] = table->sorted_array[k];
                table->sorted_array[k] = temp->buckets[0] -> data;
            }
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    table->sort_num ++;
    return table->sorted_array[table->sort_num];
}

hash.h

#include <stdio.h> 
#include <stdlib.h>
#include <string.h>

typedef struct hash_entry_ {    //Linked List
    void *data;                 //Generic pointer
    char *key;                  //String-based key value
    struct hash_entry_ *next;   //Self-Referencing pointer
} hash_entry, *Phash_entry;

typedef struct hash_table_ {
    hash_entry **buckets;           //Pointer to a pointer to a Linked List of type hash_entry
    int (*hash_func)(char *);
    int (*cmp_func)(void *, void *);
    int size;
    void **sorted_array;         //Array used to sort each hash entry
    int index;
    int total;
    int largest;
    float average;  
    int sort_num;
} hash_table, *Phash_table;


Phash_table new_hash(int size, int (*hash_func)(char *), int (*cmp_func)(void *, void *));
void free_hash(Phash_table table);
void insert_hash(Phash_table table, char *key, void *data);
void *find_hash(Phash_table table, char *key);
void stat_hash(Phash_table table, int *total, int *largest, float *average);
void *start_hash_walk(Phash_table table);
void *next_hash_walk(Phash_table table);

Gettysburg.txt

Four score and seven years ago, our fathers brought forth upon this continent a new nation: conceived in liberty, and dedicated to the proposition that all men are created equal.

Now we are engaged in a great civil war. . .testing whether that nation, or any nation so conceived and so dedicated. . . can long endure. We are met on a great battlefield of that war.

We have come to dedicate a portion of that field as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this.

But, in a larger sense, we cannot dedicate. . .we cannot consecrate. . . we cannot hallow this ground. The brave men, living and dead, who struggled here have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember, what we say here, but it can never forget what they did here.

It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us. . .that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion. . . that we here highly resolve that these dead shall not have died in vain. . . that this nation, under God, shall have a new birth of freedom. . . and that government of the people. . .by the people. . .for the people. . . shall not perish from the earth.

Answer 1

此代码的几个问题之一可能是循环：

for(table->buckets[i]; 
    table->buckets[i]->next != NULL; 
    table->buckets[i] = table->buckets[i]->next)
  ...

for循环的初始化部分（table->buckets[i]）无效。如果i为0且table->buckets[0] == NULL，则此循环上的条件（table->buckets[i]->next != NULL）将取消引用空指针并崩溃。

至少，这就是你的代码似乎在我的盒子上崩溃的地方。当我将几个循环更改为：

if (table->buckets[i] != NULL) {
  for(; 
      table->buckets[i]->next != NULL; 
      table->buckets[i] = table->buckets[i]->next)
    ...
}

......它一直在崩溃，但在另一个地方。也许这会让你失败？

编辑：另一个潜在问题是那些for循环具有破坏性。当您致电find_hash时，您真的希望修改所有这些存储桶吗？

我建议使用类似的东西：

hash_entry *cur;
// ...
if (table->buckets[i] != NULL) {
  for (cur = table->buckets[i]; cur->next != NULL; cur = cur->next) {
    // ...
  }
}

当我这样做并注释掉你的dump_dictionary函数时，代码会运行而不会崩溃。

Answer 2

嗯，

这是hash.c

#include "hash.h"

Phash_table new_hash (int size, int(*hash_func)(char*), int(*cmp_func)(void*, void*)){
    int i;
    Phash_table t;

    t = (Phash_table)calloc(1, sizeof(hash_table));   //creates the main hash table
    t->buckets = (hash_entry **)calloc(size, sizeof(hash_entry *));  //creates the hash table of "size" buckets
    t->size = size;   //Holds the number of buckets
    t->hash_func = hash_func;   //assigning the pointer to the function in the user's program
    t->cmp_func = cmp_func;     // "  "  
    t->total=0;
    t->largest=0;
    t->average=0;

    for(i=0;t->buckets[i] != NULL;i++){   //Sets all buckets in hash table to NULL
        t->buckets[i] = NULL;}

    return(t);
}

void free_hash(Phash_table table){
    int i;

    for(i = 0; i<(table->size);i++){
        if(table->buckets[i]!=NULL)
            for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                free(table->buckets[i]->key);  //Freeing memory for key and data
                free(table->buckets[i]->data);
            }
      free(table->buckets[i]);    //free the whole bucket
    }
    free(table->sorted_array);
    free(table);
}

void insert_hash(Phash_table table, char *key, void *data){
    Phash_entry new_node;   //pointer to a new node of type hash_entry
    int index;

    new_node = (Phash_entry)calloc(1,sizeof(hash_entry));
    new_node->key = (char *)malloc(sizeof(char)*(strlen(key)+1)); //creates the key array based on the length of the string-based key
    new_node->data = data;       //stores the user's data into the node
    strcpy(new_node->key,key);   //copies the key into the node

                                //calling the hash function in the user's program
    index = table->hash_func(key);    //index will hold the hash table value for where the new node will be placed
    table->buckets[index] = new_node; //Assigns the pointer at the index value to the new node
    table->total++;   //increment the total (total # of buckets)
}

void *find_hash(Phash_table table, char *key){
    int i;
hash_entry *cur;

   printf("Inside find_hash\n"); //REMOVE

    for(i = 0;i<table->size;i++){
        if(table->buckets[i]!=NULL){
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                if(strcmp(cur->key, key) == 0)
                   return((cur->data));}  //returns the data to the user if the key values match
        }    //otherwise return NULL, if no match was found.
    }   
    return NULL;
}
void stat_hash(Phash_table table, int *total, int *largest, float *average){

    int node_num[table->size];    
    int i,j, count = 0;
    int largest_buck = 0;
    hash_entry *cur;

    for(i = 0; i < table->size; i ++)
    {
        if(table->buckets[i]!=NULL)            
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                count ++;}

        node_num[i] = count;
        count = 0;
    }

    for(j = 0; j < table->size; j ++){
        if(node_num[j] > largest_buck)
            largest_buck = node_num[j];}

    *total = table->total;
    *largest = largest_buck;
    *average = (table->total) /(float) (table->size); //oook: i think you want a fp average
}

void *start_hash_walk(Phash_table table){
    void* temp = 0; //oook: this was another way of overwriting your input table 
    int i, j, k; 
    int l=0; //oook: new counter for elements in your sorted_array
    hash_entry *cur;

    if(table->sorted_array !=NULL) free(table->sorted_array);

    table->sorted_array = (void**)calloc((table->total), sizeof(void*));

    for(i = 0; i < table->size; i ++){
    //for(i = 0; i < table->total; i++){  //oook: i don't think you meant total ;)
        if(table->buckets[i]!=NULL)
            for (cur = table->buckets[i]; cur != NULL; cur = cur->next){
            //for(table->buckets[i]; table->buckets[i]->next != NULL; table->buckets[i] = table->buckets[i]->next){
                table->sorted_array[l++] = cur->data;
            }
    }

    //oook: sanity check/assert on expected values
    if (l != table->total)
    {
        printf("oook: l[%d] != table->total[%d]\n",l,table->total);
    }

    for(j = (l) - 1; j > 0; j --)    {
        for(k = 1; k <= j; k ++){
            if (table->sorted_array[k-1] && table->sorted_array[k])
            {
                if(table->cmp_func(table->sorted_array[k-1], table->sorted_array[k]) == 1){
                    temp = table->sorted_array[k-1]; //ook. changed temp to void* see assignment
                    table->sorted_array[k-1] = table->sorted_array[k];
                    table->sorted_array[k] = temp;
                }
            }
            else
                printf("if (table->sorted_array[k-1] && table->sorted_array[k])\n");
        }
    }
    return table->sorted_array[table->sort_num];
}

void *next_hash_walk(Phash_table table){ 

    /*oook: this was blowing up since you were incrementing past the size of sorted_array..
    NB: *you **need** to implement some bounds checking here or you will endup with more seg-faults!!*/
    //table->sort_num++
    return table->sorted_array[table->sort_num++];
}

这是parse.c

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <assert.h>  //oook: added so you can assert ;)
#include "hash.h"

#define WORD_SIZE 40
#define DICTIONARY_SIZE 1000

#define TRUE 1
#define FALSE 0


void lower_case_word(char *);
void dump_dictionary(Phash_table ); 

/*Hash and compare functions*/
int hash_func(char *);
int cmp_func(void *, void *);

typedef struct user_data_ {   
    char word[WORD_SIZE];
    int freq_counter;
} user_data, *Puser_data;

int main(void)
{
   char c, word1[WORD_SIZE];
   int char_index = 0, dictionary_size = 0, num_words = 0, i;
   int total=0, largest=0;
   float average = 0.0;

   Phash_table t;                  //Pointer to main hash_table
   int (*Phash_func)(char *)=NULL;         //Function Pointers
   int (*Pcmp_func)(void *, void *)=NULL;
   Puser_data data_node;                   //pointer to hash table above
   user_data * find;


   printf("Parsing input ...\n");

   Phash_func = hash_func;   //Assigning Function pointers
   Pcmp_func = cmp_func;
   t = new_hash(1000,Phash_func,Pcmp_func);

  // Read in characters until end is reached 
  while ((c = getchar()) != EOF) {
    if ((c == ' ') || (c == ',') || (c == '.') || (c == '!') || (c == '"') ||
        (c == ':') || (c == '\n')) {
          // End of a word 
      if (char_index) {
          // Word is not empty 
        word1[char_index] = '\0';
        lower_case_word(word1);

        data_node = (Puser_data)calloc(1,sizeof(user_data));  
        strcpy(data_node->word,word1);
        printf("%s\n", data_node->word);


    //!!!!!!SEG FAULT HERE!!!!!!

        if (!((user_data *)find_hash(t, data_node->word))){   //SEG FAULT!!!!
        dictionary_size++;
         insert_hash(t,word1,(void *)data_node); 
        }

        char_index = 0;
        num_words++;
      }
    } else {
      // Continue assembling word 
      word1[char_index++] = c;
    }
  }

  printf("There were %d words; %d unique words.\n", num_words,
     dictionary_size);
  dump_dictionary(t);  //???

  }

void lower_case_word(char *w){
  int i = 0;

  while (w[i] != '\0') {
    w[i] = tolower(w[i]);
    i++;
  }
}

void dump_dictionary(Phash_table t){  //???
  int i;
  user_data *cur, *cur2;

  stat_hash(t, &(t->total), &(t->largest), &(t->average));   //Call to stat hash
    printf("Number of unique words:  %d\n", t->total);
    printf("Largest Bucket:  %d\n", t->largest);
    printf("Average Bucket:  %f\n", t->average);  

  cur = start_hash_walk(t);
  if (!cur) //ook: do test or assert for null values
  {
    printf("oook: null== (cur = start_hash_walk)\n");
    exit(-1);
  }
  printf("%s:  %d\n", cur->word, cur->freq_counter);
  for (i = 0; i < t->total; i++)
  {//oook: i think you needed these braces
      cur2 = next_hash_walk(t);
      if (!cur2) //ook: do test or assert for null values
      {
        printf("oook: null== (cur2 = next_hash_walk(t) at i[%d])\n",i);
      }
      else
        printf("%s:  %d\n", cur2->word, cur2->freq_counter);
  }//oook: i think you needed these braces
}

int hash_func(char *string){
    int i, sum=0, temp, index;

    for(i=0; i < strlen(string);i++){
        sum += (int)string[i];  
    }
    index = sum % 1000;
return (index); 
}


/*array1 and array2 point to the user defined data struct defined above*/
int cmp_func(void *array1, void *array2){

user_data *cur1= array1;
user_data *cur2= array2;//(user_data *)array2;

    /* ooook: do assert on programmatic errors. 
    this function *requires non-null inputs.  */
    assert(cur1 && cur2);  
    if(cur1->freq_counter < cur2->freq_counter){
        return(-1);}
        else{ if(cur1->freq_counter > cur2->freq_counter){
                return(1);}
                else return(0);}
}

按照//ooks

<强>解释

有一两个地方会爆炸。
您的问题的快速解决方案和答案位于parse.c，大约 L100 ：

  cur = start_hash_walk(t);
  printf("%s:  %d\n", cur->word, cur->freq_counter);

..在调用cur之前检查null不是printf，修复了您的直接段错误。

但为什么cur会null？〜因为这个坏孩子：
void *start_hash_walk(Phash_table table)

您的hash_func(char *string)可以（＆amp; do）返回非唯一值。除了您尚未实现链接列表链之外，这当然是可以的。因此，您最终会得到包含少于table->sorted_array元素的table->total〜或者如果您在所有table->size个桶上进行迭代，那么您会这样做;）

还有一两个问题。现在我进一步攻击Nate Kohl's for(cur=table->buckets[i]; cur->next != NULL; cur=cur->next)，因为你没有锁链，所以for(cur=table->buckets[i]; cur != NULL; cur=cur->next)。但这是*你的TODO就这么说了。

最后。请注意，在next_hash_walk(Phash_table table)中您有：

table->sort_num++
return table->sorted_array[table->sort_num];

哎哟！执行检查那些数组边界！

备注

1）如果您的功能不是为改变输入而设计的，那么输入const。这样编译器很可能会告诉你什么时候你无意中捣乱了什么。

2）对数组索引进行绑定检查。

3）在尝试使用Null指针之前，先测试/断言。

4）对每个功能进行单元测试;在编译之前永远不要写太多代码。测试

5）使用最少的测试数据;制作它以限制测试你的代码和试图以狡猾的方式打破它。

6）初始化数据结构！

7）永远不要使用埃及牙套！ {
只是在开玩笑;）
}

PS 到目前为止工作顺利〜＆gt;指针是棘手的小东西！＆安培;一个问题很好的问题，包含所有必要的细节，所以+1和gl;）

（// oook：可能添加作业标签）

赛格。哈希表ADT中的错误 - C.

编辑：

Parse.c

hash.c

hash.h

Gettysburg.txt

2 个答案: