在下面的代码中,我正在寻找在处理数百万个插入时优化插入速度的方法。代码运行正常,但在执行大量插入时速度很慢。我已经尝试了一些想法,但总是很慢。我想解决方案是使用多线程来执行插入,并使用全局变量“truct Node * nodeObj”。 我很少/没有使用C,C ++中的多线程和同步的经验,如果你能给我一个基于代码的例子,我将非常感激。 欢迎任何其他想法。
规则是: 1-最后(在插入所有数字之后)必须对链表进行排序,这意味着每个节点>数据从最低编号开始,直到最大编号。 2 - 调用者正在使用for循环,此for循环无法在Insert函数内启动。 3 - 任何代码,调用者或插入函数都可以优化,只要插入函数不会自动添加插入,这就是调用者的工作。
#include<stdio.h>
#include<stdlib.h>
#include<conio.h>
struct Node {
int data;
struct Node* nextAddr;
};
struct Node* Insert(Node* p, int data);
void Print(struct Node* p);
void RevPrint(struct Node* p);
int main() {
struct Node* nodeObj = NULL;
printf("---------------------------------\n"
"Insert()\n---------------------------------\n");
for (int i = 1; i < 1000000; i++) {
nodeObj = Insert(nodeObj, i);
printf(" %d", i);
}
printf("\n---------------------------------\n"
"Print()\n---------------------------------\n");
Print(nodeObj);
printf("---------------------------------\n"
"RevPrint()\n---------------------------------");
RevPrint(nodeObj);
printf("\nPress any key to continue...");
_getch();
}
struct Node* Insert(Node* _pToNode, int _nbr)
{
Node *newValue = (struct Node*)malloc(sizeof(struct Node));
newValue->data = _nbr;
newValue->nextAddr = NULL;
if (_pToNode == NULL) _pToNode = newValue;
else {
Node* pLocal = _pToNode;
while (pLocal->nextAddr != NULL) {
pLocal = pLocal->nextAddr;
}
pLocal->nextAddr = newValue;
}
return _pToNode;
}
void Print(struct Node* p) {
if (p == NULL) {
printf("\n");
return;
}
printf(" %d", p->data);
Print(p->nextAddr);
}
void RevPrint(struct Node* p) {
if (p == NULL) {
printf("\n");
return;
}
RevPrint(p->nextAddr);
printf(" %d", p->data);
}
谢谢。
答案 0 :(得分:2)
链接列表在现代机器上不受欢迎。 L1和L2缓存喜欢矢量和数组。他们完全鄙视链表。使用std::vector
,而不是链接列表(并且当然不是手动链接列表)。尝试.reserve()向量中足够的内存来容纳所有新东西。只需将所有内容都推到矢量背面,然后使用并行执行对矢量进行排序。 C ++ 17可以轻松地做到这一点。 std::stable_sort
非常相似。
答案 1 :(得分:1)
警告:这仅适用于尾部插入/附加:
int
main()
{
struct Node* nodeObj = NULL;
struct Node* nodeTail = NULL;
// your other stuff ...
for (int i = 1; i < 1000000; i++) {
nodeObj = Insert(nodeObj, &nodeTail, i);
printf(" %d", i);
}
// your other stuff ...
}
struct Node* Insert(Node* _pToNode, Node **tail,int _nbr)
{
Node *newValue = (struct Node*)malloc(sizeof(struct Node));
newValue->data = _nbr;
newValue->nextAddr = NULL;
if (_pToNode == NULL) {
_pToNode = newValue;
}
else {
(*tail)->nextAddr = newValue;
}
*tail = newValue;
return _pToNode;
}
您可以使用&#34;列表&#34;来清理它。结构中包含头部和尾部(相对于单独的args)
<强>更新强>
非常酷/聪明,但不幸的是仍然很慢......
malloc
et。人。对于大量的小分配来说可能会很慢。加快速度的一种方法是使用分配的子池[如WeatherVane建议的那样]。
正如我所提到的,添加一个&#34;列表&#34; struct可以使事情更整洁,我在两个地方使用它。一旦你承诺,你可以添加除头/尾之外的其他东西,比如计数。此外,如果你愿意的话,它可以更容易地将单链表转换为双链表。
旁注:使用双向链接列表,插入[稍微]更复杂,但列表中中间的插入速度要快得多,因为你没有&# 39; t必须遍历列表以找到前一个指针(例如,节点中将有一个prev
指针)。此外,RevPrintList
会变得像PrintList
一样简单。
请注意[在我的系统上],反向打印耗尽了堆栈空间[和segfaulted],因此我将打印函数重新编码为而不是是递归的。
这是一个已清理的版本,应该可以更快地完成插入操作,因为个别malloc
电话的数量会减少。
旁注:我没有为malloc
等添加必要的检查,返回null。
#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>
typedef struct Node_ {
int data;
struct Node_* next;
} Node;
typedef struct List_ {
int count;
Node* head;
Node* tail;
} List;
Node* NewNode(void);
Node* Insert(List* p, int data);
void Print(Node* p);
void PrintList(List *list);
void RevPrint(Node* p);
void RevPrintList(List *list);
List freelist;
int
main()
{
List nodelist = { 0, NULL, NULL };
printf("---------------------------------\n"
"Insert()\n---------------------------------\n");
for (int i = 1; i < 1000000; i++) {
Insert(&nodelist, i);
#if 0
printf(" %d", i);
#endif
}
printf("\n---------------------------------\n"
"Print()\n---------------------------------\n");
#if 0
Print(nodelist.head);
#else
PrintList(&nodelist);
#endif
printf("---------------------------------\n"
"RevPrint()\n---------------------------------");
#if 0
RevPrint(nodelist.head);
#else
RevPrintList(&nodelist);
#endif
printf("\nPress any key to continue...");
#if 0
_getch();
#else
getchar();
#endif
}
Node*
NewNode(void)
{
Node *node;
// NOTE: adjust the count setup (e.g. 1000) to what ever value you want
if (freelist.count <= 0) {
freelist.count = 1000;
freelist.head = calloc(freelist.count,sizeof(Node));
}
node = freelist.head++;
freelist.count -= 1;
return node;
}
Node*
Insert(List* list,int _nbr)
{
Node *node = NewNode();
node->data = _nbr;
node->next = NULL;
if (list->head == NULL) {
list->head = node;
}
else {
list->tail->next = node;
}
list->tail = node;
list->count += 1;
return node;
}
void
Print(Node* p)
{
if (p == NULL) {
printf("\n");
return;
}
printf(" %d", p->data);
Print(p->next);
}
void
PrintList(List* list)
{
Node *node;
for (node = list->head; node != NULL; node = node->next)
printf(" %d", node->data);
printf("\n");
}
void
RevPrint(Node* p)
{
if (p == NULL) {
printf("\n");
return;
}
RevPrint(p->next);
printf(" %d", p->data);
}
void
RevPrintList(List *list)
{
Node **rlist = malloc(sizeof(Node**) * list->count);
Node *node;
int ridx;
ridx = list->count - 1;
for (node = list->head; node != NULL; node = node->next, --ridx)
rlist[ridx] = node;
for (ridx = 0; ridx < list->count; ++ridx) {
node = rlist[ridx];
printf(" %d",node->data);
}
printf("\n");
free(rlist);
}
更新#2:
你可以使freeList成为一个列表列表(使用不同的&#34;节点&#34; struct,它有一个指向列表而不是数字的指针),这样在程序完成时就可以释放内存。 / p>
以下是修改后的版本:
#include <stdio.h>
#include <stdlib.h>
//#include <conio.h>
typedef struct Node_ {
int data;
struct Node_* next;
} Node;
typedef struct List_ {
int count;
Node* head;
Node* tail;
} List;
typedef struct Freelist_ {
int count;
Node* head;
Node* tail;
Node* avail;
} FreeList;
Node* NewNode(void);
Node* Insert(List* p, int data);
void Print(Node* p);
void PrintList(List *list);
void RevPrint(Node* p);
void RevPrintList(List *list);
void FreeAll(void);
FreeList freelist = { 0 };
int
main()
{
List nodelist = { 0, NULL, NULL };
printf("---------------------------------\n"
"Insert()\n---------------------------------\n");
for (int i = 1; i < 1000000; i++) {
Insert(&nodelist, i);
// this printf will radically slow things down
#if 0
printf(" %d", i);
#endif
}
printf("\n---------------------------------\n"
"Print()\n---------------------------------\n");
#if 0
Print(nodelist.head);
#else
PrintList(&nodelist);
#endif
printf("---------------------------------\n"
"RevPrint()\n---------------------------------");
#if 0
RevPrint(nodelist.head);
#else
RevPrintList(&nodelist);
#endif
// release all nodes back to the malloc free pool
FreeAll();
printf("\nPress any key to continue...");
#if 0
_getch();
#else
getchar();
#endif
}
Node*
NewNode(void)
{
Node *node;
// NOTE: adjust the count setup (e.g. 1000) to what ever value you want
if (freelist.count <= 0) {
freelist.count = 1000;
node = calloc(freelist.count,sizeof(Node));
// maintain linked list of nodes that are at the _start_ of a
// malloc area/arena
if (freelist.head == NULL)
freelist.head = node;
else
freelist.tail->next = node;
freelist.tail = node;
// burn the first node as a placeholder
freelist.avail = node + 1;
freelist.count -= 1;
}
node = freelist.avail++;
freelist.count -= 1;
return node;
}
void
FreeAll(void)
{
Node* node;
Node* next;
for (node = freelist.head; node != NULL; node = next) {
next = node->next;
free(node);
}
}
Node*
Insert(List* list,int _nbr)
{
Node *node = NewNode();
node->data = _nbr;
node->next = NULL;
if (list->head == NULL) {
list->head = node;
}
else {
list->tail->next = node;
}
list->tail = node;
list->count += 1;
return node;
}
void
Print(Node* p)
{
if (p == NULL) {
printf("\n");
return;
}
printf(" %d", p->data);
Print(p->next);
}
void
PrintList(List* list)
{
Node *node;
for (node = list->head; node != NULL; node = node->next)
printf(" %d", node->data);
printf("\n");
}
void
RevPrint(Node* p)
{
if (p == NULL) {
printf("\n");
return;
}
RevPrint(p->next);
printf(" %d", p->data);
}
void
RevPrintList(List *list)
{
Node **rlist = malloc(sizeof(Node**) * (list->count + 1));
Node *node;
int ridx;
ridx = list->count - 1;
for (node = list->head; node != NULL; node = node->next, --ridx)
rlist[ridx] = node;
for (ridx = 0; ridx < list->count; ++ridx) {
node = rlist[ridx];
printf(" %d",node->data);
}
printf("\n");
free(rlist);
}
更新#3:
这是一个通过向reuse
添加FreeList
成员以及删除节点时可以调用的FreeOne
函数来添加节点重用的版本。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//#include <conio.h>
typedef struct Node_ {
int data;
struct Node_ *next;
} Node;
typedef struct List_ {
int count;
Node *head;
Node *tail;
} List;
typedef struct Freelist_ {
int count;
Node *head;
Node *tail;
Node *avail;
Node *reuse;
} FreeList;
Node *NewNode(void);
Node *Insert(List *p,int data);
void Print(Node *p);
void PrintList(List *list);
void RevPrint(Node *p);
void RevPrintList(List *list);
void FreeOne(Node *p);
void FreeAll(void);
FreeList freelist = { 0 };
int
main()
{
List nodelist = { 0, NULL, NULL };
printf("---------------------------------\n" "Insert()\n---------------------------------\n");
for (int i = 1; i < 1000000; i++) {
Insert(&nodelist,i);
// this printf will radically slow things down
#if 0
printf(" %d",i);
#endif
}
printf("\n---------------------------------\n" "Print()\n---------------------------------\n");
#if 0
Print(nodelist.head);
#else
PrintList(&nodelist);
#endif
printf("---------------------------------\n" "RevPrint()\n---------------------------------");
#if 0
RevPrint(nodelist.head);
#else
RevPrintList(&nodelist);
#endif
// release all nodes back to the malloc free pool
FreeAll();
printf("\nPress any key to continue...");
#if 0
_getch();
#else
getchar();
#endif
}
Node *
NewNode(void)
{
FreeList *list;
Node *node;
list = &freelist;
do {
// try to reuse a node that has been released by FreeOne
node = list->reuse;
if (node != NULL) {
list->reuse = node->next;
node->next = NULL;
break;
}
// NOTE: adjust the count setup (e.g. 1000) to what ever value you want
if (list->count <= 0) {
list->count = 1000;
node = calloc(list->count,sizeof(Node));
// maintain linked list of nodes that are at the _start_ of a
// malloc area/arena
if (list->head == NULL)
list->head = node;
else
list->tail->next = node;
list->tail = node;
// burn the first node as a placeholder
list->avail = node + 1;
list->count -= 1;
}
// grab one from the current allocation array
node = list->avail++;
list->count -= 1;
} while (0);
return node;
}
void
FreeOne(Node *node)
{
FreeList *list;
list = &freelist;
// push this node onto the front of the reuse list (i.e. it's fast)
node->next = list->reuse;
list->reuse = node;
}
void
FreeAll(void)
{
Node *node;
Node *next;
for (node = freelist.head; node != NULL; node = next) {
next = node->next;
free(node);
}
memset(&freelist,0,sizeof(FreeList));
}
Node *
Insert(List *list,int _nbr)
{
Node *node = NewNode();
node->data = _nbr;
node->next = NULL;
if (list->head == NULL) {
list->head = node;
}
else {
list->tail->next = node;
}
list->tail = node;
list->count += 1;
return node;
}
void
Print(Node *p)
{
if (p == NULL) {
printf("\n");
return;
}
printf(" %d",p->data);
Print(p->next);
}
void
PrintList(List *list)
{
Node *node;
for (node = list->head; node != NULL; node = node->next)
printf(" %d",node->data);
printf("\n");
}
void
RevPrint(Node *p)
{
if (p == NULL) {
printf("\n");
return;
}
RevPrint(p->next);
printf(" %d",p->data);
}
void
RevPrintList(List *list)
{
Node **rlist = malloc(sizeof(Node **) * (list->count + 1));
Node *node;
int ridx;
ridx = list->count - 1;
for (node = list->head; node != NULL; node = node->next, --ridx)
rlist[ridx] = node;
for (ridx = 0; ridx < list->count; ++ridx) {
node = rlist[ridx];
printf(" %d",node->data);
}
printf("\n");
free(rlist);
}
答案 2 :(得分:0)
我假设你提到的元素的顺序是通过比较来排序的。即A&lt; B =&gt; A在容器中的B之前。
如果该假设保持最佳状态,您可以希望每个元素的日志(N)插入。因此,如果您想在数百万个元素中插入一个元素,那么您应该进行大约20多个比较。如果您要插入的数百万订单有订单,您可以做得更好。
现在,基于指针的结构永远不是快速数据结构的答案。 std :: vector应该是你去的地方。
要使用for语句读取此快速插入结构,您需要一个非常奇特的迭代器,或者您需要将插入的内容重新排列到另一个向量中。
总之,您希望将内容插入到简单的二进制堆中。
多线程不会使这个更快,除非您可以为插入设计类似合并的方案。