在C中访问struct成员时出现seg错误

时间:2017-09-04 00:08:22

标签: c struct segmentation-fault

我试图在C中实现用于存储单词的trie,但是在尝试访问struct成员时我遇到了分段错误。

代码如下:

#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define ALPHABET_SIZE 27
#define SIZE 45

//Trie data structure declaration
typedef struct _dictionary {
    bool is_word;
    char letter;
    struct _dictionary *children[ALPHABET_SIZE];
} dicto;

dicto *DICT;

//Function prototypes
void insert(char *string);
int toIndex(char s);

int main() {
    FILE *fp = fopen("small", "r");
    if (fp == NULL) {
        printf("Could not open file\n");
        return 1;
    }

    char word[46];

    while (fgets(word, sizeof(word), fp)) {
        insert(word);
        if (feof(fp)) {
            return 0;
        }
    }
    return 2;
}

//Inserts word into trie
void insert(char *string) {
    dicto *trav; //Pointer to walk through the trie
    trav = DICT;
    for (int n = 0; n = strlen(string); n++) {
        if (trav->children[toIndex(string[n])] == NULL) {
            trav->children[toIndex(string[n])] = malloc(sizeof(DICT));
            trav->letter = string[n];
            trav = trav->children[toIndex(string[n])];
        } else {
            trav->letter = string[n];
            trav = trav->children[toIndex(string[n])];
        }

        if (trav->letter == '\0') {
            trav->is_word = true;
        }
    }
    return;
}

/**
 * Output alphabetic index from given input
*/
int toIndex(char s) {
    s = toupper(s);
    int index = s - 65;
    return index;
}

我已尝试使用ValgrindGDB进行调试。 Valgrind的输出是:

==1979== Memcheck, a memory error detector
==1979== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==1979== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==1979== Command: ./test_function1
==1979== 
==1979== Invalid read of size 4
==1979==    at 0x8048684: insert (in /home/test_function1)
==1979==    by 0x80485F7: main (in /home/test_function1)
==1979==  Address 0xffffff00 is not stack'd, malloc'd or (recently) free'd
==1979== 
==1979== 
==1979== Process terminating with default action of signal 11 (SIGSEGV)
==1979==  Access not within mapped region at address 0xFFFFFF00
==1979==    at 0x8048684: insert (in /home/test_function1)
==1979==    by 0x80485F7: main (in /home/test_function1)
==1979==  If you believe this happened as a result of a stack
==1979==  overflow in your program's main thread (unlikely but
==1979==  possible), you can try to increase the size of the
==1979==  main thread stack using the --main-stacksize= flag.
==1979==  The main thread stack size used in this run was 8388608.
==1979== 
==1979== HEAP SUMMARY:
==1979==     in use at exit: 344 bytes in 1 blocks
==1979==   total heap usage: 2 allocs, 1 frees, 4,440 bytes allocated
==1979== 
==1979== LEAK SUMMARY:
==1979==    definitely lost: 0 bytes in 0 blocks
==1979==    indirectly lost: 0 bytes in 0 blocks
==1979==      possibly lost: 0 bytes in 0 blocks
==1979==    still reachable: 344 bytes in 1 blocks
==1979==         suppressed: 0 bytes in 0 blocks
==1979== Reachable blocks (those to which a pointer was found) are not shown.
==1979== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==1979== 
==1979== For counts of detected and suppressed errors, rerun with: -v
==1979== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
Segmentation fault (core dumped)

通过运行GDB,看起来错误来自第54行:

if (trav->children[toIndex(string[n])] == NULL)

不知道可能发生的事情。

2 个答案:

答案 0 :(得分:2)

这只是关于问题中代码的一个可能问题的快速答案。我没有读完整篇文章。

在以下分配之后,内存中充满了垃圾数据:

trav->children[toIndex(string[n])] = malloc(sizeof(dicto));

使用calloc(保证内存被清零)会更好:

trav->children[toIndex(string[n])] = calloc(sizeof(dicto), 1);

或者自己将数据清零:

trav->children[toIndex(string[n])] = malloc(sizeof(dicto));
 memset(trav->children[toIndex(string[n])], 0, sizeof(dicto));

如果将垃圾数据保留在内存中,则即使应该为真,下列条件也可能为false:

if(trav->children[toIndex(string[n])] == NULL)

P.S。

此外,sizeof(DICT)指针的大小,而不是结构。您可以考虑sizeof(*DICT)sizeof(dicto)

答案 1 :(得分:0)

您的代码中存在多个问题:

  • 测试feof(fp)没有按照您的想法执行,实际上没有必要,因为fgets()会在文件末尾返回NULL

  • 循环for (int n = 0; n = strlen(string); n++)永远不会结束,因为n被重新计算为每次迭代时字符串的长度,请改用:

    for (int n = 0, len = strlen(string); n < len; n++) {
    
  • 分配新节点时,必须初始化结构,否则可能会有未定义的行为,因为malloc()返回的内存块未初始化。请改用calloc()

  • toIndex()函数不一定会返回026范围内的值。你不应该对'A'的值进行硬编码,你应该测试的是字符确实是一个字母。

以下是修改后的版本:

#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define ALPHABET_SIZE 27
#define SIZE 45

//Trie data structure declaration
typedef struct _dictionary {
    bool is_word;
    char letter;
    struct _dictionary *children[ALPHABET_SIZE];
} dicto;

dicto *DICT;

//Function prototypes
void insert(char *string);
int toIndex(char s);

int main(void) {
    char word[SIZE + 1];
    FILE *fp = fopen("small", "r");
    if (fp == NULL) {
        printf("Could not open file\n");
        return 1;
    }

    while (fgets(word, sizeof(word), fp)) {
        insert(word);
    }
    return 0;
}

//Inserts word into trie
void insert(char *string) {
    dicto *trav = DICT; //Pointer to walk through the trie

    for (int n = 0, len = strlen(string); n < len; n++) {
        int index = toIndex(string[n]);
        if (trav->children[index] == NULL) {
            trav->children[index] = malloc(sizeof(DICT));
        }
        trav->letter = string[n];
        trav = trav->children[index];
    }
    trav->is_word = true;
}

/**
 * Output alphabetic index from given input (assuming ASCII)
 */
int toIndex(char c) {
    if (c >= 'a' && c <= 'z')
        return c - 'a';
    if (c >= 'A' && c <= 'Z')
        return c - 'A';
    return 26;  /* not a letter */
}