Question

我正在用C编写一个简单的程序，它基于在哈希表中查找它们来查找单词的字谜。我所拥有的是一个哈希表数组，其中的单词由它们的长度索引并在其字母的总和内进行散列，例如： a = 1，b = 2等。

我仍然熟悉C中的动态内存管理，所以这个问题可能很简单，但是我有一个为每个哈希表（及其内部数据）分配和释放内存的函数。

我最初使用单个哈希表编写了这个程序，在通过valgrind运行程序之后，我发现内存已经正确释放并且没有泄漏。但是，当扩展程序以使用一组哈希表时，valgrind开始发现可能的泄漏（尽管它报告它们仍然可以访问）。我很困惑为什么在散列表数组中没有正确释放内存，尽管数组中的每个散列表都是通过最初使用的deallocate函数运行的。

使用完整代码Full Code

typedef struct Bucket Bucket;
typedef struct HashTable HashTable;

struct Bucket {
  char* data;
  Bucket *next;
};

struct HashTable {
  int size;
  Bucket **buckets;
};

int main(int argc, char const *argv[])
{

  // Allocate memory for array of hash tables
  HashTable** hash_array = (HashTable**) malloc(sizeof(HashTable*) * WORD_SIZE);
  for(i = 0; i < WORD_SIZE; i++) {
    hash_alloc(&hash_array[i], BUCKET_COUNT);
  }

  // Main logic here...

  // Free memory
  for(i = 0; i < WORD_SIZE; i++) {
    hash_dealloc(hash_array[i]);
  }
  free(hash_array);
  return 0;
}

哈希表分配功能

void hash_alloc(HashTable** a, unsigned int size) {
  *a = (HashTable*) malloc(sizeof(HashTable));
  (*a)->buckets = (Bucket**) malloc(sizeof(Bucket*) * size);
  (*a)->size = size;
}

哈希表释放函数

void hash_dealloc(HashTable* a) {
  int i;
  Bucket* current, *temp;
  for(i = 0; i < a->size; i++) {
    current = a->buckets[i];
    while(current != NULL) {
      temp = current;
      free(temp->data);
      current = current->next;
      free(temp);
    }
    free(current);
  }
  free(a->buckets);
  free(a);
}

添加到哈希表函数

void add_to_hash_array(HashTable** a, char* data) {
    // Removed some other logic for readability...

    replace_str(data, "\n", "\0");
    newNode->data = strdup(data);
    newNode->next = currentTable->buckets[index];
    currentTable->buckets[index] = newNode;
  } else {
    return;
  }
}

Valgrind输出

==39817== 261,120 bytes in 128 blocks are still reachable in loss record 5 of 7
==39817==    at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==39817==    by 0x400A38: hash_alloc (main.c:73)
==39817==    by 0x4008B0: main (main.c:39)
==39817== 
==39817== 286,936 bytes in 31,553 blocks are still reachable in loss record 6 of 7
==39817==    at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==39817==    by 0x4EBAD71: strdup (strdup.c:43)
==39817==    by 0x400D4D: add_to_hash_array (main.c:141)
==39817==    by 0x400914: main (main.c:51)
==39817== 
==39817== 504,848 bytes in 31,553 blocks are still reachable in loss record 7 of 7
==39817==    at 0x4C2B6CD: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==39817==    by 0x400D16: add_to_hash_array (main.c:136)
==39817==    by 0x400914: main (main.c:51)

Answer 1

将我的评论移到答案中，因为这几乎可以解决。

在malloc之前不需要初始化为NULL，但是稍后释放的任何变量指针都应该设置为NULL并且如果不是NULL则只有空。还要将结构中的指针值初始化为NULL（更容易使用calloc并自动获取），以便您的警卫正常工作。我已经对你的代码进行了一些查看，你的哈希让我担心 - 你需要确保你的哈希函数只返回0到BUCKET_COUNT-1范围内的值。我做了一些这种快速更改，并修复了你的replace_str方法，它不再抱怨valgrind。

现在当我运行代码时，我发现它找到了要比较的正确单词，但是排序函数没有返回一个排序的字符串，所以它没有识别字谜。应该是一个简单的解决方案。

编辑：我刚刚粘贴了谷歌搜索找到的第一个字符串排序例程并运行以获得以下输出：

./a.out trips
trips: 82 size: 5
strip

stirp

sprit

spirt

Answer 2

在generate_anagrams

void generate_anagrams(HashTable* a, char* word) {
    int index = hash(word);
    char* word_dup = strdup(word);
    char* current_word;
    string_sort(word_dup);
    printf("%s: %i size: %zi\n", word, index, strlen(word));
    Bucket* current = a->buckets[index];
    while(current != NULL) {
        if(current->data) {
            current_word = strdup(current->data);
            string_sort(current_word);
            if(strcmp(word_dup, current_word) == 0) {
                printf("%s\n", current->data);
            }
        }
        current = current->next;
    }
    free(current_word);
    free(word_dup);
}

您正在泄露strdup ed current_word。将free(current_word);移至if (current->data)。

在add_to_hash_array

void add_to_hash_array(HashTable** a, char* data) {
    int index = hash(data);
    HashTable* currentTable = a[strlen(data)];
    Bucket* existingNode = hash_lookup(currentTable, data);
    if(existingNode == NULL) {
        Bucket *newNode = (Bucket*) malloc(sizeof(Bucket));
        if(newNode == NULL) {
            exit(1);
        }
        replace_str(data, "\n", "\0");
        newNode->data = strdup(data);
        newNode->next = currentTable->buckets[index];
        currentTable->buckets[index] = newNode;
    } else {
        return;
    }
}

您只需在查找后从data删除换行符，但在删除换行符后插入data，这样就不会检测到重复项。

在main，你不应该

 generate_anagrams(hash_array[strlen(arg_dup) + 1], arg_dup);

但如果您在hash_array[strlen(arg_dup)]开头删除换行符，请使用add_to_hash_array。

当然，您应确保hash不会产生超出范围的索引。

strncpy(str, str, p-str);是未定义的行为。

Answer 3

没有内存泄漏，但代码存在一些问题（replace_str函数需要用正确的版本替换）。我的linux盒子上的valgrind输出：

$ valgrind --leak-check=full --show-reachable=yes ./MEMORY_LEAK absurde
==13476== Memcheck, a memory error detector
==13476== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al.
==13476== Using Valgrind-3.6.1-Debian and LibVEX; rerun with -h for copyright info
==13476== Command: ./MEMORY_LEAK absurde
==13476== 
==13476== Conditional jump or move depends on uninitialised value(s)
==13476==    at 0x8048A9A: hash_lookup (MEMORY_LEAK.c:132)
==13476==    by 0x80489BD: add_to_hash_array (MEMORY_LEAK.c:113)
==13476==    by 0x804871C: main (MEMORY_LEAK.c:50)
==13476== 
absurde: 70 size: 7
==13476== Conditional jump or move depends on uninitialised value(s)
==13476==    at 0x8048D1C: generate_anagrams (MEMORY_LEAK.c:169)
==13476==    by 0x8048795: main (MEMORY_LEAK.c:56)
==13476== 
==13476== Conditional jump or move depends on uninitialised value(s)
==13476==    at 0x8048890: hash_dealloc (MEMORY_LEAK.c:81)
==13476==    by 0x80487D8: main (MEMORY_LEAK.c:64)
==13476== 
==13476== Conditional jump or move depends on uninitialised value(s)
==13476==    at 0x4027BC2: free (vg_replace_malloc.c:366)
==13476==    by 0x804889C: hash_dealloc (MEMORY_LEAK.c:87)
==13476==    by 0x80487D8: main (MEMORY_LEAK.c:64)
==13476== 
==13476== 
==13476== HEAP SUMMARY:
==13476==     in use at exit: 0 bytes in 0 blocks
==13476==   total heap usage: 360 allocs, 360 frees, 133,424 bytes allocated
==13476== 
==13476== All heap blocks were freed -- no leaks are possible
==13476== 
==13476== For counts of detected and suppressed errors, rerun with: -v
==13476== Use --track-origins=yes to see where uninitialised values come from
==13476== ERROR SUMMARY: 65330 errors from 4 contexts (suppressed: 11 from 6)

释放函数中的内存

3 个答案: