我刚刚遇到了一个非常奇怪的错误。我正在为一个简单的函数进行单元测试,如下所示。
更新:谢谢@Bodo,这是最小的工作示例。您可以简单地编译并运行 tokenizer.c
。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
/* ============================= BOOL =============================== */
#ifndef _BOOL_
#define _BOOL_
typedef enum {
true, false
} bool;
#endif // _BOOL_
/* ============================= STACK =============================== */
#ifndef _STACK_
#define _STACK_
typedef void (*stack_freefn)(void *elemAddr);
typedef struct {
size_t size; // number of element allowed
int ite; // point to the current last element
size_t elemSize; // size of each element (how many bytes)
void *elems; // stockage of elements
stack_freefn freefn; // free memory allocated for each element if necessary
} stack;
/* constructor */
void new_stack(stack *s, const size_t size, const size_t elemSize, stack_freefn freefn) {
s->size = size;
s->ite = 0;
s->elemSize = elemSize;
s->elems = malloc(size * elemSize);
s->freefn = freefn;
}
/* free memory */
void dispose_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
}
free(s->elems);
s->elems = NULL;
}
/* push one new element on the top */
void push_stack(stack *s, const void *value, const size_t elemSize) {
if (s->ite == s->size) {
s->size *= 2;
s->elems = realloc(s->elems, s->size * s->elemSize);
}
void *elemAddr = (char *)s->elems + s->elemSize * s->ite++;
memcpy(elemAddr, value, s->elemSize);
}
/* pop our the element on the top */
void pop_stack(stack *s, void *res) {
if (s->ite > 0) {
void *elemAddr = (char *)s->elems + ((s->ite - 1) * s->elemSize);
memcpy(res, elemAddr, s->elemSize);
s->ite--;
}
}
void clear_stack(stack *s) {
if (s->freefn != NULL) {
while (s->ite > 0) {
void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
s->freefn(elemAddr);
}
} else {
s->ite = 0;
}
}
size_t stack_size(stack *s) {
return s->ite;
}
#endif // _STACK_
/* ============================= VECTOR =============================== */
#ifndef _VECTOR_
#define _VECTOR_
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Frees up all the memory of the specified vector.
*/
void VectorDispose(vector *v) {
if (v->freefn != NULL) {
for (; v->elemNum > 0; v->elemNum--) {
void *elemAddr = (char *)v->elems + (v->elemNum - 1) * v->elemSize;
v->freefn(elemAddr);
}
}
free(v->elems);
v->elems = NULL;
}
/**
* Returns the logical length of the vector.
*/
int VectorLength(const vector *v) {
return v->elemNum;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
#endif // _VECTOR_
/* ============================= TOKENIZER =============================== */
/**
* Dump current string into vector as a new word.
* Strings are null-terminated.
*/
static void dumpstack(stack *s, vector *v) {
size_t len = stack_size(s);
char *word = (char *)malloc((len + 1) * sizeof(char)); // +1 for null-terminator
for (int i = len - 1; i >= 0; i--) {
pop_stack(s, word + i * sizeof(char));
}
word[len] = '\0';
VectorAppend(v, &word);
clear_stack(s);
}
static const size_t kTokenStackDefaultSize = 64;
static void tokenize(vector *words, char *stream) {
stack s;
new_stack(&s, kTokenStackDefaultSize, sizeof(char), NULL);
size_t len = strlen(stream);
bool begin = false;
char c;
for (int i = 0; i < len; i++) {
c = stream[i];
/* =============================== My printf() is here ============================== */
// printf("char c = [%c]\n", c);
/* =============================== My printf() is here ============================== */
if (isalpha(c) || isdigit(c)) {
if (begin == false) begin = true;
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
} else if (c == '-') {
if (begin == true) { // case: covid-19
push_stack(&s, &c, sizeof(char));
} else {
if (i < len - 1 && isdigit(stream[i + 1])) { // case: -9
begin = true;
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
} else if (c == '.' && begin == true) { // case: 3.14
if (isdigit(stream[i - 1])) {
push_stack(&s, &c, sizeof(char));
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
} else {
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
}
}
if (begin == true) {
dumpstack(&s, words);
begin = false;
}
dispose_stack(&s);
}
/* ============================= UNIT-TEST =============================== */
/**
* HashSetFreeFunction<char *>
*/
static void freestr(void *elemAddr) {
char *str = *(char **)elemAddr;
free(str);
}
/**
* HashSetCompareFunction<char *>
*/
static int compstr(const void *elemAddr1, const void *elemAddr2) {
char *str1 = *(char **)elemAddr1;
char *str2 = *(char **)elemAddr2;
return strcmp(str1, str2);
}
static void test_tokenize(void) {
printf("Testing Tokenizer.c::tokenize() ...\n");
char *sentence = "Covid-19: Top adviser warns France at 'emergency' virus moment - BBC News\nPi = 3.14\n-1 is negative.";
vector words;
VectorNew(&words, sizeof(char *), freestr, 256);
tokenize(&words, sentence);
char *musthave[] = {"covid-19", "top", "3.14", "-1"};
char *musthavenot[] = {"-", "1"};
assert(VectorLength(&words) == 16);
for (int i = 0; i < sizeof(musthave)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthave[i], compstr, 0, false) != -1);
}
for (int i = 0; i < sizeof(musthavenot)/sizeof(char *); i++) {
assert(VectorSearch(&words, &musthavenot[i], compstr, 0, false) == -1);
}
VectorDispose(&words);
printf("[ALL PASS]\n");
}
int main(void) {
test_tokenize();
}
我一开始有 segmentation fault
。
[1] 4685 segmentation fault testtokenizer
但是当我添加一个 printf()
进行调试时,segmentation fault
消失了并且测试通过了。注释掉 printf
后,该函数仍然有效。我很困惑。
回想一下,在这次测试之前,我测试了一些内存处理功能,并且可能在内存中留下了一些未释放的块。这会是短暂的分段错误的原因吗?谢谢兄弟。
更新:
现在我什至不能自己重现这个错误。上面的 tokenizer.c
可以通过单元测试。我认为这可能是由 makefile 先决条件规则引起的。当源代码改变时,gcc 没有重新编译一些目标文件。
谢谢@Steve Summit,您明确表示未释放的内存不会导致分段错误。 感谢 @schwern 进行代码审查,这真的很有帮助。
答案 0 :(得分:2)
但是当我添加 printf() 进行调试时,分段错误消失了,测试通过了。注释掉 printf 后,该函数仍然有效。我很困惑。
他们称之为undefined behavior,因为它的行为是未定义的。看似不相关的操作可能会稍微推动一些事情,使代码“工作”,但它们只是与问题的切线相关。
<块引用>我测试了一些内存处理函数,可能在内存中留下了一些未释放的块。这会是短暂的分段错误的原因吗?
没有。这确实意味着内存是不可引用的和 "leaked"。当进程退出时,内存将释放给操作系统。
问题一定出在别处。没有看到你的整个程序我们不能肯定,但有两件可疑的事情很突出。
您正在定义一个固定大小的堆栈,但您将其推入了不确定的次数。除非 push_stack
对此有保护措施,否则您将离开分配的内存。
您正在堆栈中存储对变量的引用。 lower
、c
char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));
一旦 lower
超出范围,它将自动被释放并重新使用内存。 &lower
在 tokenize
返回后无效。如果您的堆栈只持续函数的长度,这似乎没问题,但值得注意。
而且可能 new_stack
、push_stack
或 dumpstack
做错了。