内存未释放会导致 C 中的分段错误吗?

时间:2021-02-08 21:46:20

标签: c macos segmentation-fault

我刚刚遇到了一个非常奇怪的错误。我正在为一个简单的函数进行单元测试,如下所示。

更新:谢谢@Bodo,这是最小的工作示例。您可以简单地编译并运行 tokenizer.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>

/* ============================= BOOL =============================== */
#ifndef _BOOL_
#define _BOOL_

typedef enum {
    true, false
} bool;

#endif // _BOOL_



/* ============================= STACK =============================== */
#ifndef _STACK_
#define _STACK_

typedef void (*stack_freefn)(void *elemAddr);

typedef struct {
    size_t size;                // number of element allowed
    int ite;                    // point to the current last element
    size_t elemSize;            // size of each element (how many bytes)
    void *elems;                // stockage of elements
    stack_freefn freefn;        // free memory allocated for each element if necessary
} stack;

/* constructor */
void new_stack(stack *s, const size_t size, const size_t elemSize, stack_freefn freefn) {
    s->size = size;
    s->ite = 0;
    s->elemSize = elemSize;
    s->elems = malloc(size * elemSize);
    s->freefn = freefn;
}

/* free memory */
void dispose_stack(stack *s) {
    if (s->freefn != NULL) {
        while (s->ite > 0) {
            void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
            s->freefn(elemAddr);
        }
    }
    free(s->elems);
    s->elems = NULL;
}

/* push one new element on the top */
void push_stack(stack *s, const void *value, const size_t elemSize) {
    if (s->ite == s->size) {
        s->size *= 2;
        s->elems = realloc(s->elems, s->size * s->elemSize);
    }
    void *elemAddr = (char *)s->elems + s->elemSize * s->ite++;
    memcpy(elemAddr, value, s->elemSize);
}

/* pop our the element on the top */
void pop_stack(stack *s, void *res) {
    if (s->ite > 0) {
        void *elemAddr = (char *)s->elems + ((s->ite - 1) * s->elemSize);
        memcpy(res, elemAddr, s->elemSize);
        s->ite--;
    }
}

void clear_stack(stack *s) {
    if (s->freefn != NULL) {
        while (s->ite > 0) {
            void *elemAddr = (char *)s->elems + --s->ite * s->elemSize;
            s->freefn(elemAddr);
        }
    } else {
        s->ite = 0;
    }
}

size_t stack_size(stack *s) {
    return s->ite;
}

#endif // _STACK_



/* ============================= VECTOR =============================== */
#ifndef _VECTOR_
#define _VECTOR_

typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);

typedef void (*VectorFreeFunction)(void *elemAddr);

typedef struct {
    int elemSize;               //how many byte for each element
    int elemNum;                //number of current element in vector
    int capacity;               //maximum number of element vector can hold
    void *elems;                //pointer to data memory
    VectorFreeFunction freefn;  //pointer to the function used to free each element
} vector;

/**
 * Reallocate a new memory of twice of original size
 * return 1 if reallocation success, otherwise return -1.
 */
static void DoubleMemory(vector *v) {
    void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
    assert(tmp != NULL);
    v->elems = tmp;
    v->capacity *= 2;
}

/**
 * Constructor
 */
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
    v->elems = malloc(initialAllocation * elemSize);
    assert(v->elems != NULL);
    v->elemSize = elemSize;
    v->elemNum = 0;
    v->capacity = initialAllocation;
    v->freefn = freefn;
}

/**
 * Frees up all the memory of the specified vector.
 */
void VectorDispose(vector *v) {
    if (v->freefn != NULL) {
        for (; v->elemNum > 0; v->elemNum--) {
            void *elemAddr = (char *)v->elems + (v->elemNum - 1) * v->elemSize;
            v->freefn(elemAddr);
        }
    }
    free(v->elems);
    v->elems = NULL;
}

/**
 * Returns the logical length of the vector.
 */
int VectorLength(const vector *v) {
    return v->elemNum;  
}

/**
 * Appends a new element to the end of the specified vector.
 */
void VectorAppend(vector *v, const void *elemAddr) {
    /* double size if neccessary */
    if (v->elemNum == v->capacity) DoubleMemory(v);
    memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
    v->elemNum++;
}

/**
 * Search the specified vector for an element whose contents match the element passed as the key.
 */
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
    assert(key && searchfn);
    if (v->elemNum == 0) return -1;
    assert(startIndex >= 0 && startIndex < v->elemNum);
    if (isSorted == true) {
        /* binary search */
        void *startAddr = (char *)v->elems + startIndex * v->elemSize;
        int size = v->elemNum - startIndex;
        void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
        return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
    } else {
        /* linear search */
        for (int i = 0; i < v->elemNum; i++) {
            if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
                return i;
            }
        }
        return -1;
    }
}

#endif // _VECTOR_



/* ============================= TOKENIZER =============================== */

/**
 * Dump current string into vector as a new word.
 * Strings are null-terminated.
 */
static void dumpstack(stack *s, vector *v) {
    size_t len = stack_size(s);
    char *word = (char *)malloc((len + 1) * sizeof(char)); // +1 for null-terminator
    for (int i = len - 1; i >= 0; i--) {
         pop_stack(s, word + i * sizeof(char));
    }
    word[len] = '\0';
    VectorAppend(v, &word);
    clear_stack(s);
}

static const size_t kTokenStackDefaultSize = 64;
static void tokenize(vector *words, char *stream) {
    stack s;
    new_stack(&s, kTokenStackDefaultSize, sizeof(char), NULL);
    size_t len = strlen(stream);
    bool begin = false;
    char c;
    for (int i = 0; i < len; i++) {
        c = stream[i];
/* =============================== My printf() is here ============================== */

// printf("char c = [%c]\n", c);

/* =============================== My printf() is here ============================== */
        if (isalpha(c) || isdigit(c)) {
            if (begin == false) begin = true;
            char lower = tolower(c);
            push_stack(&s, &lower, sizeof(char));
        } else if (c == '-') {
            if (begin == true) { // case: covid-19
                push_stack(&s, &c, sizeof(char));
            } else {
                if (i < len - 1 && isdigit(stream[i + 1])) { // case: -9
                    begin = true;
                    push_stack(&s, &c, sizeof(char));
                } else {
                    if (begin == true) {
                        dumpstack(&s, words);
                        begin = false;
                    }
                }
            }
        } else if (c == '.' && begin == true) { // case: 3.14
            if (isdigit(stream[i - 1])) {
                push_stack(&s, &c, sizeof(char));
            } else {
                if (begin == true) {
                    dumpstack(&s, words);
                    begin = false;
                }
            }
        } else {
            if (begin == true) {
                dumpstack(&s, words);
                begin = false;
            }
        }
    }
    if (begin == true) {
        dumpstack(&s, words);
        begin = false;
    }
    dispose_stack(&s);
}



/* ============================= UNIT-TEST =============================== */

/**
 * HashSetFreeFunction<char *>
 */
static void freestr(void *elemAddr) {
    char *str = *(char **)elemAddr;
    free(str);
}

/**
 * HashSetCompareFunction<char *>
 */
static int compstr(const void *elemAddr1, const void *elemAddr2) {
    char *str1 = *(char **)elemAddr1;
    char *str2 = *(char **)elemAddr2;
    return strcmp(str1, str2);
}

static void test_tokenize(void) {
    printf("Testing Tokenizer.c::tokenize() ...\n");
    char *sentence = "Covid-19: Top adviser warns France at 'emergency' virus moment - BBC News\nPi = 3.14\n-1 is negative.";
    vector words;
    VectorNew(&words, sizeof(char *), freestr, 256);
    tokenize(&words, sentence);
    char *musthave[] = {"covid-19", "top", "3.14", "-1"};
    char *musthavenot[] = {"-", "1"};
    assert(VectorLength(&words) == 16);
    for (int i = 0; i < sizeof(musthave)/sizeof(char *); i++) {
        assert(VectorSearch(&words, &musthave[i], compstr, 0, false) != -1);
    }
    for (int i = 0; i < sizeof(musthavenot)/sizeof(char *); i++) {
        assert(VectorSearch(&words, &musthavenot[i], compstr, 0, false) == -1);
    }
    VectorDispose(&words);
    printf("[ALL PASS]\n");
}

int main(void) {
    test_tokenize();
}

我一开始有 segmentation fault

[1]    4685 segmentation fault  testtokenizer

但是当我添加一个 printf() 进行调试时,segmentation fault 消失了并且测试通过了。注释掉 printf 后,该函数仍然有效。我很困惑。

回想一下,在这次测试之前,我测试了一些内存处理功能,并且可能在内存中留下了一些未释放的块。这会是短暂的分段错误的原因吗?谢谢兄弟。

更新: 现在我什至不能自己重现这个错误。上面的 tokenizer.c 可以通过单元测试。我认为这可能是由 makefile 先决条件规则引起的。当源代码改变时,gcc 没有重新编译一些目标文件。

谢谢@Steve Summit,您明确表示未释放的内存不会导致分段错误。 感谢 @schwern 进行代码审查,这真的很有帮助。

1 个答案:

答案 0 :(得分:2)

<块引用>

但是当我添加 printf() 进行调试时,分段错误消失了,测试通过了。注释掉 printf 后,该函数仍然有效。我很困惑。

他们称之为undefined behavior,因为它的行为是未定义的。看似不相关的操作可能会稍微推动一些事情,使代码“工作”,但它们只是与问题的切线相关。

<块引用>

我测试了一些内存处理函数,可能在内存中留下了一些未释放的块。这会是短暂的分段错误的原因吗?

没有。这确实意味着内存是不可引用的和 "leaked"。当进程退出时,内存将释放给操作系统。

问题一定出在别处。没有看到你的整个程序我们不能肯定,但有两件可疑的事情很突出。

您正在定义一个固定大小的堆栈,但您将其推入了不确定的次数。除非 push_stack 对此有保护措施,否则您将离开分配的内存。

您正在堆栈中存储对变量的引用。 lowerc

char lower = tolower(c);
push_stack(&s, &lower, sizeof(char));

一旦 lower 超出范围,它将自动被释放并重新使用内存。 &lowertokenize 返回后无效。如果您的堆栈只持续函数的长度,这似乎没问题,但值得注意。

而且可能 new_stackpush_stackdumpstack 做错了。

相关问题