Question

我试图在C中编写一个读取大文件（超过750.000行）的算法，用一些指定的分隔符分隔每一行，并将数据保存到一个结构中，然后保存到{{1} }。一切顺利，直到我想为Hashtable的每一行打印一个特定数据：输出对于某些行是好的，但控制台只是为其他行打印一些随机符号（这意味着内存泄漏可能？）。

我试图找出可能导致此问题的原因。如果我隔离了将行分开并将其保存到结构中的代码，并分别为每一行执行它，它工作正常，一切都按预期打印。

我还试图在没有动态分配的情况下做到这一点，但是因为我得到了臭名昭着的“分段错误”，它的工作效果更好

以下是拆分行并保存它的代码：

Hashtable

整个结构看起来像这样：

unsigned int hash(unsigned int id) {
    unsigned int hashage = 5381; //Valeur arbitraire
    unsigned int mdop = 10; //faire un modulo obtenir l'unite
    int idtmp = id;
    while (mdop < id) {
        idtmp = id%mdop;
        hashage = ((hashage << 6) + hashage) + idtmp;
        mdop *= 10;
    }

    return hashage % NB_CASES_HASH;
}

void initiate_hashtable(Hashtable hashtable) {
    int i = 0;
    for (; i < NB_CASES_HASH; i++) {
        hashtable[i] = NULL;
    }
}

void ajout_entete(Liste *liste, Oeuvre *oeuvre) {
    Liste p = malloc(sizeof(Cellule));
    if (!p) exit(EXIT_FAILURE);

    p->oeuvre = *oeuvre;
    p->suiv = *liste;
    //Si on imprime ici , tout va bien , les données sont correctes
    *liste = p;
}

void ajout_annee(Liste *liste, Oeuvre *oeuvre) { //ajout trié par année pour   recherche plus rapide
    if (!(*liste) || oeuvre->year <= (*liste)->oeuvre.year)
        ajout_entete(liste,oeuvre);
    else {
        if (oeuvre->year >= (*liste)->oeuvre.year)
            ajout_annee(&(*liste)->suiv, &oeuvre);
    }
}

Oeuvre peuple_oeuvre(char line[MAX_CHARS_LINE]) {
    int i = 0, j = 1, cmpt = 0;
    char strings[CHAMPS_OEUVRE][MAX_SIZE];
    char carac = *(line);
    char mot[MAX_SIZE];
    mot[0] = carac;
    bool isSuivi = false;
    Oeuvre oeuvre;

    while (carac != '\n') {
        if (carac == ',') {
            if(isSuivi) {
                mot[j - 1] = '\"';
                mot[j] = '\0';
                isSuivi = false;
            } else
                mot[j - 1] = '\0';

            strcpy(strings[i], mot);
            j = 0;
            i++;
        } else
        if (carac == '\"') {
            cmpt++;
            carac = *(line + cmpt);
            while (carac != '\"') {
                mot[j] = carac;
                j++;
                cmpt++;
                carac = *(line + cmpt);
            }
            isSuivi = true;
        }
        cmpt++;
        carac = *(line + cmpt);
        mot[j] = carac;
        j++;
    }
    mot[j] = '\0';
    strcpy(strings[i], mot);

    //Assignation des valeurs :

    oeuvre.id = atoi(strings[0]);
    oeuvre.accession_number = strdup(strings[1]);
    oeuvre.artiste.nomArtiste = strdup(strings[2]);
    oeuvre.artiste.artistRole = strdup(strings[3]);
    oeuvre.artiste.artistId = atoi(strings[4]);
    oeuvre.titre = strdup(strings[5]);
    oeuvre.url = strdup(strings[CHAMPS_OEUVRE]);
    oeuvre.year = atoi(strings[9]);

    return oeuvre;
}

void peuple_hashtable(Hashtable hashtable) { // Peuplement par redirection
    char ligne[MAX_CHARS_LINE];
    fgets(ligne, MAX_CHARS_LINE, stdin);
    Oeuvre *oeuvre = malloc(sizeof(Oeuvre));
    int hashNum;
    while (fgets(ligne, MAX_CHARS_LINE, stdin)) {
        *oeuvre = peuple_oeuvre(ligne);
        hashNum = hash(oeuvre->artiste.artistId);
        ajout_annee(&hashtable[hashNum], oeuvre);
    }
}

int main() {
    Hashtable hashtable;
    initiate_hashtable(hashtable);
    peuple_hashtable(hashtable);
    return 0; 
}

提前致谢。

Answer 1

您的代码中存在许多问题。

如果line不包含换行符或缺少双引号，则行为未定义。
您没有初始化字符串数组：如果描述缺少字段，则行为未定义。
在保存结构字段的部分中，您的分配代码不正确：您必须再分配一个字符而不是字符串strlen(string[0]) + 1而不是strlen(string[0]) * sizeof(char*)。

使用POSIX函数strdup()：

会更简单

// Assigning the values:

oeuvre.id = atoi(strings[0]);
oeuvre.accession_number = strdup(strings[1]);
oeuvre.artiste.nomArtiste = strdup(strings[2]);
oeuvre.artiste.artistRole = strdup(strings[3]);
oeuvre.artiste.artistId = atoi(strings[4]);
oeuvre.titre = strdup(strings[5]);
oeuvre.url = strdup(strings[CHAMPS_OEUVRE]));
oeuvre.year = atoi(strings[9]);

Answer 2

通过宣布像这样的全部结构来解决我的问题

typedef struct oeuvre {
unsigned int id;
char accession_number[MAX_CHARS];
Artiste artiste;
char titre[MAX_CHARS];
int year;
char url[MAX_CHARS];
 } Oeuvre;

MAX_CHARS指的是一个大数字。所以我相信我在使用之前没有正确分配字符串，这使得它们指向随机地址，导致那些奇怪的输出但没有错误。我也相信解决这个问题的另一种方法是在我的函数中为每个Oeuvre动态分配字符。

无法从Hashtable检索所有数据

2 个答案: