Question

我真的想知道是否有人不介意按照我可能在这里错过的原则教育我。我以为我已经覆盖了所有东西，但似乎我做错了。

以下代码给出了一个分段错误，我无法弄清楚为什么？我将&添加到传递给fscanf的参数名称前面。

int word_size = 0;

#define HASH_SIZE 65536

#define LENGTH = 45

node* global_hash[HASH_SIZE] = {NULL};

typedef struct node {
  char word[LENGTH + 1];
  struct node* next;
} node;

int hash_func(char* hash_val){
    int h = 0;
    for (int i = 0, j = strlen(hash_val); i < j; i++){
        h = (h << 2) ^ hash_val[i];
    }
    return h % HASH_SIZE;
}

bool load(const char *dictionary)
{
    char* string;
    FILE* dic = fopen(dictionary, "r");
    if(dic == NULL){
        fprintf(stdout, "Error: File is NULL.");
        return false;
    }
    while(fscanf(dic, "%ms", &string) != EOF){
        node* new_node = malloc(sizeof(node));
        if(new_node == NULL){
            return false;
        }
        strcpy(new_node->word, string);
        new_node->next = NULL;
        int hash_indx = hash_func(new_node->word);
        node* first = global_hash[hash_indx];
        if(first == NULL){
            global_hash[hash_indx] = new_node;
        } else {
            new_node->next = global_hash[hash_indx];
            global_hash[hash_indx] = new_node;
        }
        word_size++;
        free(new_node);
    }
    fclose(dic);
    return true;
}

dictionary.c:25:16: runtime error: left shift of 2127912344 by 2 places cannot be represented in type 'int'
dictionary.c:71:23: runtime error: index -10167 out of bounds for type 'node *[65536]'
dictionary.c:73:13: runtime error: index -10167 out of bounds for type 'node *[65536]'
dictionary.c:75:30: runtime error: index -22161 out of bounds for type 'node *[65536]'
dictionary.c:76:13: runtime error: index -22161 out of bounds for type 'node *[65536]'

Segmentation fault

Answer 1

OP发布更多代码后更新

问题是你的hash_func使用有符号整数并且它会溢出。因此，您会得到负的返回值（或者更确切地说是未定义的行为）。

这也是这些线告诉你的：

dictionary.c：25：16：运行时错误：左移2127912344 2个位置无法在'int'类型中表示

这里它告诉你有一个有符号整数溢出

dictionary.c：71：23：运行时错误：索引-10167超出类型'node * [65536]'
的范围

这里它告诉你使用负数索引到数组（即global_hash）

尝试使用无符号整数

unsigned int hash_func(char* hash_val){
    unsigned int h = 0;
    for (int i = 0, j = strlen(hash_val); i < j; i++){
        h = (h << 2) ^ hash_val[i];
    }
    return h % HASH_SIZE;
}

并称之为：

unsigned int hash_indx = hash_func(new_node->word);

原始回答

我不确定这是所有问题的根本原因，但似乎你在内存分配方面遇到了一些问题。

每次拨打fscanf时，都会为string du %ms分配新的动态内存。但是，你永远不会free那个记忆，所以你有泄漏。

此外，这看起来像是一个主要问题：

        global_hash[hash_indx] = new_node;  // Here you save new_node
    } else {
        new_node->next = global_hash[hash_indx];
        global_hash[hash_indx] = new_node;  // Here you save new_node
    }
    word_size++;
    free(new_node);  // But here you free the memory

因此，您的表似乎已经存储了已经被释放的内存指针。

这是一个在使用指针时可能导致seg错误的主要问题。

也许改变这个

free(new_node);

到

free(string);

一般情况下，我建议您避开%ms并避免fscanf。请改用char string[LENGTH + 1]和fgets。

Answer 2

发布的代码中存在多个问题。以下是主要内容：

您应该使用无符号算法进行哈希码计算，以确保哈希值为正。当前实现具有未定义的行为，因为长于15个字母的单词会导致算术溢出，这可能会产生负值并导致模数也为负值，索引超出global_hash的范围。
您使用free(new_node);释放新分配的节点。它已存储到global_hash数组中：稍后为具有相同散列值的另一个单词解除引用它将导致未定义的行为。您可能想要使用free(string);释放已解析的单词。

以下是其他问题：

在使用string

strcpy(new_node->word, string);

fscanf(dic, "%ms", &string)

m不可移植。 fscanf修饰符会导致glibc为单词分配内存，但它是while(fscanf(dic, "%ms", &string) == 1)支持的扩展，可能在其他环境中不可用。您可能希望编写一个简单的函数以获得更好的可移植性。
主循环应使用EOF测试成功转换，而不是使用#define HASH_SIZE 65536;测试文件结尾。它可能不会在这种特定情况下引起问题，但它是其他转换说明符未定义行为的常见原因。
定义;有一个额外的HASH_SIZE，如果在表达式中使用#define LENGTH = 45;，可能会导致意外行为。
定义#include <ctype.h> #include <stdio.h> #include <stdlib.h> #define HASH_SIZE 65536 #define LENGTH 45 typedef struct node { char word[LENGTH + 1]; struct node *next; } node; int word_size = 0; node *global_hash[HASH_SIZE]; unsigned hash_func(const char *hash_val) { unsigned h = 0; for (size_t i = 0, j = strlen(hash_val); i < j; i++) { h = ((h << 2) | (h >> 30)) ^ (unsigned char)hash_val[i]; } return h % HASH_SIZE; } /* read a word from fp, skipping initial whitespace. return the length of the word read or EOF at end of file store the word into the destination array, truncating it as needed */ int get_word(char *buf, size_t size, FILE *fp) { int c; size_t i; while (isspace(c = getc(fp))) continue; if (c == EOF) return EOF; for (i = 0;; i++) { if (i < size) buf[i] = c; c = getc(fp); if (c == EOF) break; if (isspace(c)) { ungetc(c, fp); break; } } if (i < size) buf[i] = '\0'; else if (size > 0) buf[size - 1] = '\0'; return i; } bool load(const char *dictionary) { char buf[LENGTH + 1]; FILE *dic = fopen(dictionary, "r"); if (dic == NULL) { fprintf(stderr, "Error: cannot open dictionary file %s\n", dictionary); return false; } while (get_word(buf, sizeof buf, dic) != EOF) { node *new_node = malloc(sizeof(node)); if (new_node == NULL) { fprintf(stderr, "Error: out of memory\n"); fclose(dic); return false; } unsigned hash_indx = hash_func(buf); strcpy(new_node->word, buf); new_node->next = global_hash[hash_indx]; global_hash[hash_indx] = new_node; word_size++; } fclose(dic); return true; }不正确：代码未按发布方式编译。

以下是修改后的版本：

{{1}}

Answer 3

以下提议的代码：

干净地编译
功能仍有一个主要问题：hash_func()
将结构的定义与该结构的typedef分开。
正确格式化#define语句
正确处理来自fopen()和malloc()
正确地限制从＆＃39;字典中读取的字符串的长度。文件
假设没有来自＆＃39;字典的文字＆＃39;文件大于45个字节。

现在，建议的代码：

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

//prototypes
bool load(const char *dictionary);
int hash_func(char* hash_val);


#define HASH_SIZE 65536
#define LENGTH  45


struct node
{
    char word[LENGTH + 1];
    struct node* next;
};
typedef struct node node;


node* global_hash[HASH_SIZE] = {NULL};
int word_size = 0;

int hash_func(char* hash_val)
{
    int h = 0;
    for ( size_t i = 0, j = strlen(hash_val); i < j; i++)
    {
        h = (h << 2) ^ hash_val[i];
    }
    return h % HASH_SIZE;
}


bool load(const char *dictionary)
{
    char string[ LENGTH+1 ];
    FILE* dic = fopen(dictionary, "r");
    if(dic == NULL)
    {
        perror( "fopen failed" );
        //fprintf(stdout, "Error: File is NULL.");
        return false;
    }

    while( fscanf( dic, "%45s", string) == 1 )
    {
        node* new_node = malloc(sizeof(node));
        if(new_node == NULL)
        {
            perror( "malloc failed" );
            return false;
        }

        strcpy(new_node->word, string);
        new_node->next = NULL;

        int hash_indx = hash_func(new_node->word);

        // following statement for debug:
        printf( "index returned from hash_func(): %d\n", hash_indx );

        if( !global_hash[hash_indx] )
        {
            global_hash[hash_indx] = new_node;
        }

        else
        {
            new_node->next = global_hash[hash_indx];
            global_hash[hash_indx] = new_node;
        }

        word_size++;
    }
    fclose(dic);
    return true;
}

在c中使用fscanf时出现分段错误

3 个答案: