需要慢插入链接列表多线程吗?

时间:2018-03-30 21:56:58

标签: c++ c data-structures linked-list

在下面的代码中,我正在寻找在处理数百万个插入时优化插入速度的方法。代码运行正常,但在执行大量插入时速度很慢。我已经尝试了一些想法,但总是很慢。我想解决方案是使用多线程来执行插入,并使用全局变量“truct Node * nodeObj”。 我很少/没有使用C,C ++中的多线程和同步的经验,如果你能给我一个基于代码的例子,我将非常感激。 欢迎任何其他想法。

规则是: 1-最后(在插入所有数字之后)必须对链表进行排序,这意味着每个节点>数据从最低编号开始,直到最大编号。 2 - 调用者正在使用for循环,此for循环无法在Insert函数内启动。 3 - 任何代码,调用者或插入函数都可以优化,只要插入函数不会自动添加插入,这就是调用者的工作。

#include<stdio.h>
#include<stdlib.h>
#include<conio.h>

struct Node {
    int data;
    struct Node* nextAddr;
};

struct Node* Insert(Node* p, int data);
void Print(struct Node* p);
void RevPrint(struct Node* p);

int main() {

    struct Node* nodeObj = NULL;
    printf("---------------------------------\n"
        "Insert()\n---------------------------------\n");
    for (int i = 1; i < 1000000; i++) {
        nodeObj = Insert(nodeObj, i);
        printf(" %d", i);
    }
    printf("\n---------------------------------\n"
        "Print()\n---------------------------------\n");
    Print(nodeObj);
    printf("---------------------------------\n"
        "RevPrint()\n---------------------------------");
    RevPrint(nodeObj);

    printf("\nPress any key to continue...");
    _getch();
}

struct Node* Insert(Node* _pToNode, int _nbr)
{
    Node *newValue = (struct Node*)malloc(sizeof(struct Node));
    newValue->data = _nbr;
    newValue->nextAddr = NULL;

    if (_pToNode == NULL) _pToNode = newValue;
    else {
        Node* pLocal = _pToNode;
        while (pLocal->nextAddr != NULL) {
            pLocal = pLocal->nextAddr;
        }
        pLocal->nextAddr = newValue;
    }

    return _pToNode;
}

void Print(struct Node* p) {

    if (p == NULL) {
        printf("\n");
        return;
    }
    printf(" %d", p->data);
    Print(p->nextAddr);
}

void RevPrint(struct Node* p) {

    if (p == NULL) {
        printf("\n");
        return;
    }
    RevPrint(p->nextAddr);
    printf(" %d", p->data);
}

谢谢。

3 个答案:

答案 0 :(得分:2)

链接列表在现代机器上不受欢迎。 L1和L2缓存喜欢矢量和数组。他们完全鄙视链表。使用std::vector,而不是链接列表(并且当然不是手动链接列表)。尝试.reserve()向量中足够的内存来容纳所有新东西。只需将所有内容都推到矢量背面,然后使用并行执行对矢量进行排序。 C ++ 17可以轻松地做到这一点。 std::stable_sort非常相似。

答案 1 :(得分:1)

警告:这仅适用于尾部插入/附加:

int
main()
{
    struct Node* nodeObj = NULL;
    struct Node* nodeTail = NULL;

    // your other stuff ...

    for (int i = 1; i < 1000000; i++) {
        nodeObj = Insert(nodeObj, &nodeTail, i);
        printf(" %d", i);
    }

    // your other stuff ...
}

struct Node* Insert(Node* _pToNode, Node **tail,int _nbr)
{
    Node *newValue = (struct Node*)malloc(sizeof(struct Node));
    newValue->data = _nbr;
    newValue->nextAddr = NULL;

    if (_pToNode == NULL) {
        _pToNode = newValue;
    }
    else {
        (*tail)->nextAddr = newValue;
    }

    *tail = newValue;

    return _pToNode;
}

您可以使用&#34;列表&#34;来清理它。结构中包含头部和尾部(相对于单独的args)

<强>更新

  

非常酷/聪明,但不幸的是仍然很慢......

malloc et。人。对于大量的小分配来说可能会很慢。加快速度的一种方法是使用分配的子池[如WeatherVane建议的那样]。

正如我所提到的,添加一个&#34;列表&#34; struct可以使事情更整洁,我在两个地方使用它。一旦你承诺,你可以添加除头/尾之外的其他东西,比如计数。此外,如果你愿意的话,它可以更容易地将单链表转换为双链表。

旁注:使用双向链接列表,插入[稍微]更复杂,但列表中中间的插入速度要快得多,因为你没有&# 39; t必须遍历列表以找到前一个指针(例如,节点中将有一个prev指针)。此外,RevPrintList会变得像PrintList一样简单。

请注意[在我的系统上],反向打印耗尽了堆栈空间[和segfaulted],因此我将打印函数重新编码为而不是是递归的。

这是一个已清理的版本,应该可以更快地完成插入操作,因为个别malloc电话的数量会减少。

旁注:我没有为malloc等添加必要的检查,返回null。

#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>

typedef struct Node_ {
    int data;
    struct Node_* next;
} Node;

typedef struct List_ {
    int count;
    Node* head;
    Node* tail;
} List;

Node* NewNode(void);
Node* Insert(List* p, int data);
void Print(Node* p);
void PrintList(List *list);
void RevPrint(Node* p);
void RevPrintList(List *list);

List freelist;

int
main()
{
    List nodelist = { 0, NULL, NULL };

    printf("---------------------------------\n"
        "Insert()\n---------------------------------\n");

    for (int i = 1; i < 1000000; i++) {
        Insert(&nodelist, i);
#if 0
        printf(" %d", i);
#endif
    }

    printf("\n---------------------------------\n"
        "Print()\n---------------------------------\n");
#if 0
    Print(nodelist.head);
#else
    PrintList(&nodelist);
#endif

    printf("---------------------------------\n"
        "RevPrint()\n---------------------------------");
#if 0
    RevPrint(nodelist.head);
#else
    RevPrintList(&nodelist);
#endif

    printf("\nPress any key to continue...");
#if 0
    _getch();
#else
    getchar();
#endif
}

Node*
NewNode(void)
{
    Node *node;

    // NOTE: adjust the count setup (e.g. 1000) to what ever value you want
    if (freelist.count <= 0) {
        freelist.count = 1000;
        freelist.head = calloc(freelist.count,sizeof(Node));
    }

    node = freelist.head++;
    freelist.count -= 1;

    return node;
}

Node*
Insert(List* list,int _nbr)
{
    Node *node = NewNode();

    node->data = _nbr;
    node->next = NULL;

    if (list->head == NULL) {
        list->head = node;
    }
    else {
        list->tail->next = node;
    }

    list->tail = node;
    list->count += 1;

    return node;
}

void
Print(Node* p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    printf(" %d", p->data);
    Print(p->next);
}

void
PrintList(List* list)
{
    Node *node;

    for (node = list->head;  node != NULL;  node = node->next)
        printf(" %d", node->data);

    printf("\n");
}

void
RevPrint(Node* p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    RevPrint(p->next);
    printf(" %d", p->data);
}

void
RevPrintList(List *list)
{
    Node **rlist = malloc(sizeof(Node**) * list->count);
    Node *node;
    int ridx;

    ridx = list->count - 1;
    for (node = list->head;  node != NULL;  node = node->next, --ridx)
        rlist[ridx] = node;

    for (ridx = 0;  ridx < list->count;  ++ridx) {
        node = rlist[ridx];
        printf(" %d",node->data);
    }
    printf("\n");

    free(rlist);
}

更新#2:

  

你可以使freeList成为一个列表列表(使用不同的&#34;节点&#34; struct,它有一个指向列表而不是数字的指针),这样在程序完成时就可以释放内存。 / p>

以下是修改后的版本:

#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>

typedef struct Node_ {
    int data;
    struct Node_* next;
} Node;

typedef struct List_ {
    int count;
    Node* head;
    Node* tail;
} List;

typedef struct Freelist_ {
    int count;
    Node* head;
    Node* tail;
    Node* avail;
} FreeList;

Node* NewNode(void);
Node* Insert(List* p, int data);
void Print(Node* p);
void PrintList(List *list);
void RevPrint(Node* p);
void RevPrintList(List *list);
void FreeAll(void);

FreeList freelist = { 0 };

int
main()
{
    List nodelist = { 0, NULL, NULL };

    printf("---------------------------------\n"
        "Insert()\n---------------------------------\n");

    for (int i = 1; i < 1000000; i++) {
        Insert(&nodelist, i);
        // this printf will radically slow things down
#if 0
        printf(" %d", i);
#endif
    }

    printf("\n---------------------------------\n"
        "Print()\n---------------------------------\n");
#if 0
    Print(nodelist.head);
#else
    PrintList(&nodelist);
#endif

    printf("---------------------------------\n"
        "RevPrint()\n---------------------------------");
#if 0
    RevPrint(nodelist.head);
#else
    RevPrintList(&nodelist);
#endif

    // release all nodes back to the malloc free pool
    FreeAll();

    printf("\nPress any key to continue...");
#if 0
    _getch();
#else
    getchar();
#endif
}

Node*
NewNode(void)
{
    Node *node;

    // NOTE: adjust the count setup (e.g. 1000) to what ever value you want
    if (freelist.count <= 0) {
        freelist.count = 1000;
        node = calloc(freelist.count,sizeof(Node));

        // maintain linked list of nodes that are at the _start_ of a
        // malloc area/arena
        if (freelist.head == NULL)
            freelist.head = node;
        else
            freelist.tail->next = node;
        freelist.tail = node;

        // burn the first node as a placeholder
        freelist.avail = node + 1;
        freelist.count -= 1;
    }

    node = freelist.avail++;
    freelist.count -= 1;

    return node;
}

void
FreeAll(void)
{
    Node* node;
    Node* next;

    for (node = freelist.head;  node != NULL;  node = next) {
        next = node->next;
        free(node);
    }
}

Node*
Insert(List* list,int _nbr)
{
    Node *node = NewNode();

    node->data = _nbr;
    node->next = NULL;

    if (list->head == NULL) {
        list->head = node;
    }
    else {
        list->tail->next = node;
    }

    list->tail = node;
    list->count += 1;

    return node;
}

void
Print(Node* p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    printf(" %d", p->data);
    Print(p->next);
}

void
PrintList(List* list)
{
    Node *node;

    for (node = list->head;  node != NULL;  node = node->next)
        printf(" %d", node->data);

    printf("\n");
}

void
RevPrint(Node* p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    RevPrint(p->next);
    printf(" %d", p->data);
}

void
RevPrintList(List *list)
{
    Node **rlist = malloc(sizeof(Node**) * (list->count + 1));
    Node *node;
    int ridx;

    ridx = list->count - 1;
    for (node = list->head;  node != NULL;  node = node->next, --ridx)
        rlist[ridx] = node;

    for (ridx = 0;  ridx < list->count;  ++ridx) {
        node = rlist[ridx];
        printf(" %d",node->data);
    }
    printf("\n");

    free(rlist);
}

更新#3:

这是一个通过向reuse添加FreeList成员以及删除节点时可以调用的FreeOne函数来添加节点重用的版本。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include <conio.h>

typedef struct Node_ {
    int data;
    struct Node_ *next;
} Node;

typedef struct List_ {
    int count;
    Node *head;
    Node *tail;
} List;

typedef struct Freelist_ {
    int count;
    Node *head;
    Node *tail;
    Node *avail;
    Node *reuse;
} FreeList;

Node *NewNode(void);
Node *Insert(List *p,int data);
void Print(Node *p);
void PrintList(List *list);
void RevPrint(Node *p);
void RevPrintList(List *list);
void FreeOne(Node *p);
void FreeAll(void);

FreeList freelist = { 0 };

int
main()
{
    List nodelist = { 0, NULL, NULL };

    printf("---------------------------------\n" "Insert()\n---------------------------------\n");

    for (int i = 1; i < 1000000; i++) {
        Insert(&nodelist,i);
        // this printf will radically slow things down
#if 0
        printf(" %d",i);
#endif
    }

    printf("\n---------------------------------\n" "Print()\n---------------------------------\n");
#if 0
    Print(nodelist.head);
#else
    PrintList(&nodelist);
#endif

    printf("---------------------------------\n" "RevPrint()\n---------------------------------");
#if 0
    RevPrint(nodelist.head);
#else
    RevPrintList(&nodelist);
#endif

    // release all nodes back to the malloc free pool
    FreeAll();

    printf("\nPress any key to continue...");
#if 0
    _getch();
#else
    getchar();
#endif
}

Node *
NewNode(void)
{
    FreeList *list;
    Node *node;

    list = &freelist;

    do {
        // try to reuse a node that has been released by FreeOne
        node = list->reuse;
        if (node != NULL) {
            list->reuse = node->next;
            node->next = NULL;
            break;
        }

        // NOTE: adjust the count setup (e.g. 1000) to what ever value you want
        if (list->count <= 0) {
            list->count = 1000;
            node = calloc(list->count,sizeof(Node));

            // maintain linked list of nodes that are at the _start_ of a
            // malloc area/arena
            if (list->head == NULL)
                list->head = node;
            else
                list->tail->next = node;
            list->tail = node;

            // burn the first node as a placeholder
            list->avail = node + 1;
            list->count -= 1;
        }

        // grab one from the current allocation array
        node = list->avail++;
        list->count -= 1;
    } while (0);

    return node;
}

void
FreeOne(Node *node)
{
    FreeList *list;

    list = &freelist;

    // push this node onto the front of the reuse list (i.e. it's fast)
    node->next = list->reuse;
    list->reuse = node;
}

void
FreeAll(void)
{
    Node *node;
    Node *next;

    for (node = freelist.head; node != NULL; node = next) {
        next = node->next;
        free(node);
    }

    memset(&freelist,0,sizeof(FreeList));
}

Node *
Insert(List *list,int _nbr)
{
    Node *node = NewNode();

    node->data = _nbr;
    node->next = NULL;

    if (list->head == NULL) {
        list->head = node;
    }
    else {
        list->tail->next = node;
    }

    list->tail = node;
    list->count += 1;

    return node;
}

void
Print(Node *p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    printf(" %d",p->data);
    Print(p->next);
}

void
PrintList(List *list)
{
    Node *node;

    for (node = list->head; node != NULL; node = node->next)
        printf(" %d",node->data);

    printf("\n");
}

void
RevPrint(Node *p)
{

    if (p == NULL) {
        printf("\n");
        return;
    }

    RevPrint(p->next);
    printf(" %d",p->data);
}

void
RevPrintList(List *list)
{
    Node **rlist = malloc(sizeof(Node **) * (list->count + 1));
    Node *node;
    int ridx;

    ridx = list->count - 1;
    for (node = list->head; node != NULL; node = node->next, --ridx)
        rlist[ridx] = node;

    for (ridx = 0; ridx < list->count; ++ridx) {
        node = rlist[ridx];
        printf(" %d",node->data);
    }
    printf("\n");

    free(rlist);
}

答案 2 :(得分:0)

我假设你提到的元素的顺序是通过比较来排序的。即A&lt; B =&gt; A在容器中的B之前。

如果该假设保持最佳状态,您可以希望每个元素的日志(N)插入。因此,如果您想在数百万个元素中插入一个元素,那么您应该进行大约20多个比较。如果您要插入的数百万订单有订单,您可以做得更好。

现在,基于指针的结构永远不是快速数据结构的答案。 std :: vector应该是你去的地方。

要使用for语句读取此快速插入结构,您需要一个非常奇特的迭代器,或者您需要将插入的内容重新排列到另一个向量中。

总之,您希望将内容插入到简单的二进制堆中。

多线程不会使这个更快,除非您可以为插入设计类似合并的方案。