我试图在C中实现用于存储单词的trie,但是在尝试访问struct成员时我遇到了分段错误。
代码如下:
#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ALPHABET_SIZE 27
#define SIZE 45
//Trie data structure declaration
typedef struct _dictionary {
bool is_word;
char letter;
struct _dictionary *children[ALPHABET_SIZE];
} dicto;
dicto *DICT;
//Function prototypes
void insert(char *string);
int toIndex(char s);
int main() {
FILE *fp = fopen("small", "r");
if (fp == NULL) {
printf("Could not open file\n");
return 1;
}
char word[46];
while (fgets(word, sizeof(word), fp)) {
insert(word);
if (feof(fp)) {
return 0;
}
}
return 2;
}
//Inserts word into trie
void insert(char *string) {
dicto *trav; //Pointer to walk through the trie
trav = DICT;
for (int n = 0; n = strlen(string); n++) {
if (trav->children[toIndex(string[n])] == NULL) {
trav->children[toIndex(string[n])] = malloc(sizeof(DICT));
trav->letter = string[n];
trav = trav->children[toIndex(string[n])];
} else {
trav->letter = string[n];
trav = trav->children[toIndex(string[n])];
}
if (trav->letter == '\0') {
trav->is_word = true;
}
}
return;
}
/**
* Output alphabetic index from given input
*/
int toIndex(char s) {
s = toupper(s);
int index = s - 65;
return index;
}
我已尝试使用Valgrind
和GDB
进行调试。 Valgrind的输出是:
==1979== Memcheck, a memory error detector
==1979== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==1979== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==1979== Command: ./test_function1
==1979==
==1979== Invalid read of size 4
==1979== at 0x8048684: insert (in /home/test_function1)
==1979== by 0x80485F7: main (in /home/test_function1)
==1979== Address 0xffffff00 is not stack'd, malloc'd or (recently) free'd
==1979==
==1979==
==1979== Process terminating with default action of signal 11 (SIGSEGV)
==1979== Access not within mapped region at address 0xFFFFFF00
==1979== at 0x8048684: insert (in /home/test_function1)
==1979== by 0x80485F7: main (in /home/test_function1)
==1979== If you believe this happened as a result of a stack
==1979== overflow in your program's main thread (unlikely but
==1979== possible), you can try to increase the size of the
==1979== main thread stack using the --main-stacksize= flag.
==1979== The main thread stack size used in this run was 8388608.
==1979==
==1979== HEAP SUMMARY:
==1979== in use at exit: 344 bytes in 1 blocks
==1979== total heap usage: 2 allocs, 1 frees, 4,440 bytes allocated
==1979==
==1979== LEAK SUMMARY:
==1979== definitely lost: 0 bytes in 0 blocks
==1979== indirectly lost: 0 bytes in 0 blocks
==1979== possibly lost: 0 bytes in 0 blocks
==1979== still reachable: 344 bytes in 1 blocks
==1979== suppressed: 0 bytes in 0 blocks
==1979== Reachable blocks (those to which a pointer was found) are not shown.
==1979== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==1979==
==1979== For counts of detected and suppressed errors, rerun with: -v
==1979== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
Segmentation fault (core dumped)
通过运行GDB,看起来错误来自第54行:
if (trav->children[toIndex(string[n])] == NULL)
不知道可能发生的事情。
答案 0 :(得分:2)
这只是关于问题中代码的一个可能问题的快速答案。我没有读完整篇文章。
在以下分配之后,内存中充满了垃圾数据:
trav->children[toIndex(string[n])] = malloc(sizeof(dicto));
使用calloc(保证内存被清零)会更好:
trav->children[toIndex(string[n])] = calloc(sizeof(dicto), 1);
或者自己将数据清零:
trav->children[toIndex(string[n])] = malloc(sizeof(dicto));
memset(trav->children[toIndex(string[n])], 0, sizeof(dicto));
如果将垃圾数据保留在内存中,则即使应该为真,下列条件也可能为false:
if(trav->children[toIndex(string[n])] == NULL)
P.S。
此外,sizeof(DICT)
是指针的大小,而不是结构。您可以考虑sizeof(*DICT)
或sizeof(dicto)
。
答案 1 :(得分:0)
您的代码中存在多个问题:
测试feof(fp)
没有按照您的想法执行,实际上没有必要,因为fgets()
会在文件末尾返回NULL
。
循环for (int n = 0; n = strlen(string); n++)
永远不会结束,因为n
被重新计算为每次迭代时字符串的长度,请改用:
for (int n = 0, len = strlen(string); n < len; n++) {
分配新节点时,必须初始化结构,否则可能会有未定义的行为,因为malloc()
返回的内存块未初始化。请改用calloc()
。
toIndex()
函数不一定会返回0
到26
范围内的值。你不应该对'A'
的值进行硬编码,你应该测试的是字符确实是一个字母。
以下是修改后的版本:
#include <stdbool.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define ALPHABET_SIZE 27
#define SIZE 45
//Trie data structure declaration
typedef struct _dictionary {
bool is_word;
char letter;
struct _dictionary *children[ALPHABET_SIZE];
} dicto;
dicto *DICT;
//Function prototypes
void insert(char *string);
int toIndex(char s);
int main(void) {
char word[SIZE + 1];
FILE *fp = fopen("small", "r");
if (fp == NULL) {
printf("Could not open file\n");
return 1;
}
while (fgets(word, sizeof(word), fp)) {
insert(word);
}
return 0;
}
//Inserts word into trie
void insert(char *string) {
dicto *trav = DICT; //Pointer to walk through the trie
for (int n = 0, len = strlen(string); n < len; n++) {
int index = toIndex(string[n]);
if (trav->children[index] == NULL) {
trav->children[index] = malloc(sizeof(DICT));
}
trav->letter = string[n];
trav = trav->children[index];
}
trav->is_word = true;
}
/**
* Output alphabetic index from given input (assuming ASCII)
*/
int toIndex(char c) {
if (c >= 'a' && c <= 'z')
return c - 'a';
if (c >= 'A' && c <= 'Z')
return c - 'A';
return 26; /* not a letter */
}