K-ary树(排序单词)

时间:2011-11-24 17:38:01

标签: c algorithm sorting tree

我正在努力克服C中的问题,我无法弄清楚发生了什么。 我正在根据字母的频率对树进行排序(例如“cab”为“1a 1b 1c”)。这是我的代码:

#define M 8 //maximal number of appearances of a letter
#define LAST_LETTER 25 //number of last letter
#define bzero(b,len) (memset((b), '\0', (len)), (void) 0)
static const char* alphabet = {"abcdefghijklmnopqrstuvwxyz"};


//TODO: define list structures

typedef struct word word;
struct word {
    char* _word;
    word *next;
};

typedef struct list list;
struct list {
    word *first;
};

typedef struct dict dict;
struct dict {
        dict *children[M];
        list *words[M];
};

//returns an empty list
list * list_new() {
    list *l = (list*) malloc(sizeof(list));
    word *w = (word*) malloc(sizeof(word));
    if (l == NULL || w == NULL) {
        printf("could not create list : memory allocation failed\n");
        return;
    }
    w->_word = NULL;
    w->next = NULL;
    l->first = w;
    return l;
}

//append word at end of list
void list_append(list *l, char *w) {
    // create a new word
    word *new_word = malloc(sizeof(word));
    if (l == NULL || new_word == NULL) {
        printf("could not append word to list : list is empty\n");
        return;
    }
    new_word->_word = malloc(strlen(w) + 1);
    strcpy(new_word->_word, w);
    new_word->next = NULL;
    //insert the word
    if (l->first->_word == NULL) {
        l->first->_word = new_word->_word;
    }
    else {
        //word *temp = malloc(sizeof(word));
        word *temp;
        temp = l->first;
        while(temp->next != NULL) {
            temp=temp->next;
        }
    temp->next = new_word;
    }
}

//print word list
void list_print(list *l) {
    if (l == NULL || l->first == NULL) {
        printf("could not print list : list is empty\n");
        return;
    }
    word *current = l->first;
    while (current != NULL) {
        printf("%s -> ", current->_word);
        current = current->next;
    }
    printf("NULL\n");
}

char *compute_signature(const char *word) {
        char *signature = (char*) malloc(26);
        memset((void*)signature, 0, 26);
        int i = 0, j = 0, n = 0;
        char current_letter, letter;
    for (i = 0; i < 26; i++) {
            current_letter = alphabet[i];
                n = 0;
                for (j = 0; j < (int) strlen(word); j++) {
                    letter = word[j];
                        if (letter == current_letter) {
                                n++;
                        }
                }
                signature[i] = (char) n;
        }
        return signature;
}


void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int j = 0;
    int steps = 0;
    int occur = 0;
    dict *temp = NULL;
    if (current_letter == strlen(w)-1) {
        printf("Word found : %s!\n",w);
        int i = 0;
        int different_letters = 0;
        for (i = 0; i < 26; i++) {
            if ((int) signature[i] > 0) {
                different_letters++;
            }
        }
        for (i = 0; i < 26; i++) {
            occur = (int) signature[i];
            if (occur > 0) {
                steps++;
                if (steps < different_letters) break;
            }
            else {
                if (temp == NULL) {
                    temp = d;
                }
                if (temp->children[occur] == NULL) {
                    temp->children[occur] = (dict*) malloc(sizeof(dict));
                    temp = temp->children[occur];
                }
            }
        }
        if (temp == NULL) {
            temp = d;
        }
        list *l = NULL;
        if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
            temp->words[occur] =  list_new();
            l = temp->words[occur];
        }
        else {
            l = temp->words[occur];
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        list_print(l);
        /*list_append(l,new);
        list_print(l);*/
    }
    else {
        printf("Current letter: %c.\n",w[current_letter]);
        dict_insert(d,signature,current_letter+1,w);
    }
}


dict * read_words(const char *file) {
    FILE* f = NULL;
    f = fopen(file, "r");
    if (f == NULL) {
        printf("Could not open file.\n");
        exit(EXIT_FAILURE);
    }
    char line[256];
    dict *d = (dict*) malloc(sizeof(dict));
    //bzero((void*)d, sizeof(dict));
    while (fgets(line, sizeof(line), f) != NULL) {
        if (line[strlen(line) - 1] == '\n') {
            line[strlen(line) - 1] = '\0';
        }
        char *new;
        new = malloc(strlen(line) + 1);
        strcpy(new, line);
        char *signature = compute_signature(new);
        dict_insert(d, signature, 0, new);
        free((void*)signature);
    }
    return d;
}


int main(int argc, const char* argv[]) {
    /*list *myList = list_new();
    list_print(myList);
    list_append(myList,"Word1");
    list_print(myList);
    list_append(myList,"Word2");
    list_print(myList);
    list_append(myList,"Word3");
    list_print(myList);*/
    dict *d = read_words("list");
    /*list_print(d->words[1]);
    list_print(d->children[0]->words[1]);
    list_print(d->children[0]->children[0]->words[1]);
    list_print(d->words[2]);
    list_print(d->children[0]->words[2]);*/
    return 0;
}

此代码几乎可以使用。但不是按照预期的:有些单词在树中放错了位置,有时列表似乎是重复的(我还没有弄清楚到底是怎么回事)。你能帮我清理一下这段代码吗(也许可以指出最大的错误^^)?

修改:部分修复了代码的代码。我在list_append()上的if (strcmp(l->first->_word, "") == 0)处有一个段错误。

编辑2 :当我在算法中传递一个单词时,我需要测试列表是否存在:例如我将“厄运”添加到对应于“1d 1m 2o”的列表中,当我传递“情绪”时,我希望算法将其添加到同一列表中。以下是我试图实现这一目标的方法:

if (temp == NULL) {
    temp = d;
}
list *l = NULL;
if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
    temp->words[occur] =  list_new();
    l = temp->words[occur];
}
else {
    l = temp->words[occur];
}

我有temp->words[occur]->first == NULL ...

的段错误

编辑3 :我的代码正在编译,但这些字词未添加到右侧列表中。我认为问题出在dict_insert(),声明是:

void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int j = 0;
    int steps = 0;
    int occur = 0;
    dict *temp = NULL;
    if (current_letter == strlen(w)-1) {
        printf("Word found : %s!\n",w);
        int i = 0;
        int different_letters = 0;
        for (i = 0; i < 26; i++) {
            if ((int) signature[i] > 0) {
                different_letters++;
            }
        }
        for (i = 0; i < 26; i++) {
            occur = (int) signature[i];
            if (occur > 0) {
                steps++;
                if (steps == different_letters) break;
            }
            printf("%d RIGHT\n",occur);
            printf("1 DOWN\n");
            if (temp == NULL) {
                temp = d;
            }
            temp->children[occur] = (dict*) realloc(temp->children[occur], sizeof(dict));
            //temp = temp->children[occur]
        }
        printf("%d RIGHT\n",occur);
        if (temp == NULL) {
            temp = d;
        }
        if (temp->words[occur] == NULL) {
            list *l = list_new();
            temp->words[occur] = l;
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        list_print(temp->words[occur]);
        list_append(temp->words[occur],new);
        list_print(temp->words[occur]);
    }
    else {
        printf("Current letter: %c.\n",w[current_letter]);
        dict_insert(d,signature,current_letter+1,w);
    }
}

编辑4 dict_insert()完全重做。

void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int occur;
    occur = (int) signature[current_letter];
    if (current_letter == LAST_LETTER) {
        if (d->words[occur] == NULL) {
            d->words[occur] = list_new();
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        printf("word found : %s!\n",w);
        list_print(d->words[occur]);
        list_append(d->words[occur],new);
        list_print(d->words[occur]);
    }
    else {
        if (d->children[occur] == NULL) {
            d->children[occur] = malloc(sizeof(dict));
        }
        d = d->children[occur];
        dict_insert(d,signature,current_letter+1,w);
    }
}

仍然是if (d->children[occur] == NULL) ...

的段错误

1 个答案:

答案 0 :(得分:3)

compute_signature中,您试图将字母表的当前字母与单词的当前字母进行比较

strcmp((const char*) &letter, (const char*) &current_letter) == 0

lettercurrent_letterchar *。因此,您通过letter = alphabet[i]将它们指向内存,然后获取指针的地址并将其粘贴到strcmp中。我很惊讶它没有在原版中崩溃。

lettercurrent_letter应更改为char而不是char *,您的比较应为if (letter == current_letter)

其他一些评论:

  1. 您正在转换malloc的返回值。虽然这不一定会打破任何事情not a good idea
  2. 在计算签名并使用它时,您经常在intchar之间进行投射。虽然您不可能在同一个字母上出现超过127个单词,但您应该考虑将签名更改为shortint
  3. 的数组
  4. 您的代码中有很多强制转换。演员阵容是一种强迫&#34;强迫&#34;编译器接受你想要的类型,你应该只在必要时才这样做,因为它可能导致屏蔽错误。因此,每当你写一个演员时,问问自己:为什么我必须这样做,我可以避免吗?
  5. list_append进行

    word *temp = malloc(sizeof(word));
    temp = l->first;
    

    这会在您分配一些内存时产生内存泄漏,然后通过将temp设置为列表头来忘记它。只是摆脱malloc没有必要。 dict_insert

    中也是如此
    temp->words[occur] = (list*) malloc(sizeof(list));
    list *l = list_new();
    temp->words[occur] = l;
    

    摆脱malloc