Question

在我学习的课程中，有一个pset涉及读取字典文件，并从中创建一个哈希表，通过检查文本中的单词是否可以找到，检查另一个文件中单词的正确拼写在哈希表中。然后从内存中卸载它。

程序中有两个文件。我没写的一个文件（肯定没有问题），先运行并调用我写的.c文件的函数：

#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#include "dictionary.h"

#define DICSIZE 10000

typedef struct node
{

    char* word;
    struct node* next;

} 
node;

typedef struct table
{
    node** table;
    unsigned int wordCount;
}
table;

// initialize table for scope purposes
struct table* hashTable = NULL;

// declaration of the hash function
unsigned long hashDJB2(unsigned char *str);
// declaration of the string duplication function
char* mystrdup(const char* s);
// declaration of the lowercase function
char* returnLowerCase(const char* str);

/**
 * Returns true if word is in dictionary else false.
 */
bool check(const char* word)
{
    node* list = NULL;
    char* wrd = returnLowerCase(word);
    unsigned long hashValue = hashDJB2((unsigned char*) wrd);
    for (list = hashTable->table[hashValue]; list != NULL; list = list->next) {

        if (strcmp(list->word, wrd) == 0) {
            return true;
        }
    }
    free(wrd);
    return false;
}

/**
 * Loads dictionary into memory.  Returns true if successful else false.
 */
bool load(const char* dictionary)
{

    // allocation of memory for the table structure
    if ((hashTable = malloc(sizeof(struct table))) == NULL) {

        printf("Error allocating memory for table pointer\n");
        return false;

    }

    // allocate pointers to table itself
    if ((hashTable->table = malloc((sizeof(node*) * DICSIZE) + sizeof(unsigned int))) == NULL) {

        printf("Error allocating memory for heads of list\n");
        return false;

    }

    // initialize head nodes to NULL
    for (int i = 0; i < DICSIZE; i++) {

        hashTable->table[i] = NULL;
    }

    // initialize wordCount to zero
    hashTable->wordCount = 0;

    /*
    * Till now, was the creation of the backbone structure.
    * Now, implementation of hash() function and loading the words
    * onto the table.
    */ 

    FILE* dp;
    dp = fopen(dictionary, "r");

    if (dp == NULL) {
        printf("Error in reading from dictionary\n");
        return false;
    }

    // reading words from dictionary
    char newWord[LENGTH + 1];
    while (fscanf(dp, "%s", newWord) == 1) {


        // determine the hash value of the 
        unsigned long hashValue = hashDJB2((unsigned char*) newWord);

        // in the pset, they say i don't have to check whether the word already exists
        // if (! check((const char*) newWord)) {

            // if not, insert a new node:

            // attempt to create a new node
            node* newNode = NULL;
            if ((newNode = malloc(sizeof(node))) == NULL) {

                printf("Error: couldnt allocate a new node\n");
                return false;
            }

            // insert new node to the beginning of the list
            newNode->word = mystrdup(newWord);
            newNode->next = hashTable->table[hashValue];
            hashTable->table[hashValue] = newNode;
            hashTable->wordCount++;

        //}
    }    
    //free(dp);
    return true;
}

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */
unsigned int size(void)
{

    unsigned int wordCount = hashTable->wordCount;
    return wordCount;
}

/**
 * Unloads dictionary from memory.  Returns true if successful else false.
 */
bool unload(void)
{

    node* head = NULL;
    long i = 0;
    for (head = hashTable->table[i]; head != NULL; head = hashTable->table[++i]) {

        node* list = NULL;
        for (list = head; list != NULL; list = head) {

            head = head->next;
            free(list->word);
            free(list);
        }
    }
    free(hashTable->table);
    free(hashTable);
    return true;
}

// djb2 algorithm, created by Dan Bernstein
unsigned long hashDJB2(unsigned char *str)
{
    unsigned long hash = 5381;
    int c;

    while ((c = *str++))
        hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

    return hash % DICSIZE;
}

char* mystrdup(const char* s)
{
    char* p = malloc(sizeof(char) * strlen(s)+1);
    if (p) strcpy(p, s);
    return p;
}

char* returnLowerCase(const char* str) {

    int len = strlen(str);
    char* p = malloc(sizeof(char) * (len + 1));
    for (int i = 0; i < len; i++) {
        p[i] = tolower(str[i]);
    }
    p[len + 1] = '\0'; // this line made the program crash. It was fixed already.

    return p;
}

首先，“speller.c”（我从pset获得）调用load（）函数，然后调用check（）函数几万次（取决于文本中的单词数），然后卸下（）。我的检查和卸载部分有问题。当文本很小时，check（）正常运行，但是当它变大时，我得到如下错误：

speller：malloc.c：2372：sysmalloc：断言`（old_top ==（（（mbinptr）（（（char *）＆amp;（（av） - ＆gt; bins [（（1） - 1）* 2 ]）） - __builtin_offsetof（struct malloc_chunk，fd））））＆amp;＆amp; old_size == 0）|| （（unsigned long）（old_size）＆gt; =（unsigned long）（（（__ builtin_offsetof（struct malloc_chunk，fd_nextsize））+（（2 *（sizeof（size_t））） - 1））＆amp;〜（（2 * （sizeof（size_t））） - 1）））＆amp;＆amp;（（old_top） - ＆gt; size＆amp; 0x1）＆amp;＆amp;（（unsigned long）old_end＆amp; pagemask）== 0）'失败。中止（核心倾销）

第一个问题是，我分配内存的方式似乎存在问题。另一个问题是我无法释放我分配的所有内存。

出于某种原因，valgrinds --track-originins = yes不起作用，所以我得到的就是这个：

==3852== 
==3852== HEAP SUMMARY:
==3852==     in use at exit: 101,650 bytes in 18,547 blocks
==3852==   total heap usage: 305,376 allocs, 286,829 frees, 2,730,448 bytes allocated
==3852== 
==3852== LEAK SUMMARY:
==3852==    definitely lost: 101,298 bytes in 18,546 blocks
==3852==    indirectly lost: 0 bytes in 0 blocks
==3852==      possibly lost: 0 bytes in 0 blocks
==3852==    still reachable: 352 bytes in 1 blocks
==3852==         suppressed: 0 bytes in 0 blocks
==3852== Rerun with --leak-check=full to see details of leaked memory
==3852== 
==3852== For counts of detected and suppressed errors, rerun with: -v
==3852== Use --track-origins=yes to see where uninitialised values come from
==3852== ERROR SUMMARY: 57191 errors from 5 contexts (suppressed: 0 from 0)

我很想得到一些帮助。

Answer 1

returnLowerCase()中的一个错误（可能还有其他错误）：

p[len + 1] = '\0';

应该是：

p[len] = '\0';

（您已分配len + 1个字符，因此有效索引为0..len，其中实际字符串位于索引0..len-1，\0终止符位于索引{{ 1}}。）

C在我编写的

1 个答案: