Question

我试图创建一个读取文本文件的程序从标准输入开始，打印单词及其频率，按频率降低排序。为此，我将单词存储在链表中，如果链表已包含单词，则更新其频率。然而，我还没能按照单词的频率对链表进行排序。

使用的struct我看起来像这样：

struct list {
    char *word;
    struct list *previous;
    struct list *next;
    int count;
};

所以我理解我应该将每个节点的count值与其邻居的值进行比较，然后根据count值切换它们的位置，但是我不知道我是怎么做的可以保持函数循环，直到它排序。

我的代码的哪一部分如下：

struct list {
    char *word;
    struct list *previous;
    struct list *next;
    int count;
};

void list_add(struct list *l, char *word) {
    struct list *current = l->previous;
    struct list *prev;
    int already_in_list = 0;
    while (current != NULL) {
        if (strcmp(current->word, word) == 0) {
            current->count++;
            already_in_list = 1;
            // Compare new frequency with elements higher 
            // up in the list and sort*/ 
            **How do I do this?**
        }
        prev = current;
        current = current->next;
    }
    if (already_in_list != 1) list_add_new(l, word);
}

当前输出为：

word: bye   count: 1
word: is    count: 1
word: my    count: 1
word: hello count: 6
word: world count: 2
word: name  count: 1

我想要的输出：

word: hello count: 6
word: world count: 2
word: name  count: 1
word: bye   count: 1
word: is    count: 1
word: my    count: 1

Answer 1

这可以通过将您的列表分发到存储区来解决。如果你有一个足够大的数组来容纳最大的字数，那么你可以在（几乎）线性时间内重新排序列表：

创建一个list指针数组，并将它们全部初始化为NULL。将其称为“频率列表”。您所做的就是取消链接主列表中的每个节点，并将其放入适当的频率列表（按字数计算索引）。如果您反向遍历主列表，并插入频率列表的前面，则不需要额外的遍历，并保持自然字排序。

主列表为空后，遍历频率列表数组（反向）并将它们全部连接起来。现在，您只按字数对列表进行了排序。

唯一的问题是它不是相当线性时间，因为频率计数的分布可能非常宽。您可以使用其他一些结构（例如散列）来更有效地查找频率表。对于大多数实际用途，它仍然主要线性，并且肯定比对数更好。

警告：我不知道为什么我对线性时间这么大惊小怪，因为你的代码本质上是 O（N ^ 2），因为将每个单词添加到你的主要清单包括搜索整个清单，看它是否已经在那里。

Answer 2

如果你总是对列表进行排序，这个问题实际上比排序要简单得多：

您需要遍历链接列表才能找到您要查找的单词是否已存在。如果您找到它，请增加其计数，取消链接节点与列表的链接，并将其链接回适当计数所在的位置。（注意，在这种情况下，您不需要遍历整个列表以找到正确的插入位置 - 您已经知道节点需要在列表中向后移动）

如果单词未知，请创建一个新节点（确保为节点中的字符串指针分配内存！）并将其链接到列表的开头。

请注意，代码不是完整的实现。只是根据单词和单词计数搜索节点的一些片段（前者需要找出我们是否已经有这个单词，后者是在向后移动节点时使用，因为你增加了单词数）

struct list * findNode (char * word){
    struct list * tmp = root;

    do {
        if (strcmp (word, tmp->word) == 0) {
          return tmp;
        }
        tmp = tmp->next;
    } while (tmp != NULL)
    return NULL;
}

/* Finds the first node with a greater word count after start */
struct list * findCount (struct list * start, int count) {
    struct list * tmp = start;

    do {
        if (tmp->count > count) {
          /* return the last node that was smaller than count */
          return tmp->previous;
        }

        if (tmp->next = NULL)
            return tmp;

        tmp = tmp->next;
    } while (tmp != NULL)
    return NULL;
}

void moveAfter (struct list * from, struct list * to) {
    if (from == to)
        return;

    /* Unlink from where we are */
    if (from->previous != NULL) 
       from->previous->next = from->next;
    else
       root = from->next;
    /* from->next can't be NULL, otherwise we wouldn't move */
    from->next->previous = from->previous;

    /* link into new place */
    if (to->next != NULL)
       to->next->previous = from;
    from->next = to->next;
    to->next = from;
    from->previous = to;

}

Answer 3

您可以修改插入代码，以便按降低频率对列表进行排序。

我修改了代码，使列表stub next指针指向列表的开头，而previous成员指向列表的最后一个元素。第一个列表元素的previous指针指向列表存根，最后一个元素的next指针是NULL。

以下是修改后的代码：

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>

//#include "analyser_ll.h"

struct list {
    char *word;
    struct list *previous;
    struct list *next;
    int count;
};

typedef struct list list;

struct list *list_init(void);
void list_print(struct list *l);
void list_add(struct list *list, const char *word);
int list_cleanup(struct list *l);

list *list_init(void) {
    list *l = malloc(sizeof(struct list));
    l->word = NULL;
    l->previous = NULL;
    l->next = NULL;
    l->count = 1;
    return l;
}

/* list ordering function:
 - NULL word is less to keep list stub at the beginning
 - higher count is smaller
 - lexicographic order of word for same count
 */
int list_compare(list *a, list *b) {
    if (a->word == NULL) return -1;
    if (b->word == NULL) return 1;
    if (a->count > b->count) return -1;
    if (a->count < b->count) return 1;
    return strcmp(a->word, b->word);
}

void list_add(list *l, const char *word) {
    list *current = l->next;
    while (current != NULL) {
        if (strcmp(current->word, word) == 0) {
            current->count++;
            break;
        }
        current = current->next;
    }
    if (current == NULL) {
        /* insert new node at end */
        current = list_init();
        current->word = malloc(strlen(word) + 1);
        strcpy(current->word, word);

        if (l->next == NULL) {
            l->next = current;
            current->previous = l;
        } else {
            current->previous = l->previous;
            l->previous->next = current;
        }
        l->previous = current;
    }
    /* move current node to the proper position */
    while (list_compare(current->previous, current) > 0) {
        /* swap current and previous nodes */
        list *prev = current->previous;
        list *next = current->next;
        current->previous = prev->previous;
        current->next = prev;
        current->previous->next = current;
        prev->previous = current;
        prev->next = next;
        if (next) {
            next->previous = prev;
        } else {
            l->previous = prev;
        }
    }
}

void list_print(list *l) {
    list *current = l->next;

    // List is empty.
    if (current == NULL) printf("List is empty!\n");

    // While the linked list isn't empty, print
    // the word in every node.
    while (current != NULL && current->word != NULL && current->count > 0) {
        printf("word: %s\tcount: %d\n", current->word, current->count);
        current = current->next;
    } 
}

int list_cleanup(struct list *l) {
    list *current = l->next;

    // Free all list nodes and their contents
    while (current != NULL) {
        struct list *next = current->next;
        free(current->word);
        free(current);
        current = next;
    }

    // Free head of linked list and return.
    free(l);
    return 1;
}

int main(void) {
    list *test = list_init();
    list_add(test, "name");
    list_add(test, "world");
    list_add(test, "hello");
    list_add(test, "world");
    list_add(test, "my");
    list_add(test, "hello");
    list_add(test, "is");
    list_add(test, "bye");
    list_add(test, "hello");
    list_add(test, "hello");
    list_add(test, "hello");
    list_add(test, "hello");    
    list_print(test);
    list_cleanup(test);
}

在C中对双向链表进行排序

3 个答案: