Question

这个问题的灵感来自

我编写了一个程序，该程序通过调用malloc为链表中的各个节点分配内存。

在一些速度测试中，malloc被一个函数包装，该函数导致malloc调用花费比正常更多的时间。这可以使测试检测到malloc的频繁使用。

如何避免为每个节点调用malloc？

Answer 1

在一些速度测试中，包装函数调用了malloc，该函数被编写为花费更多时间并分配内存。因此，每当我在图表中调用malloc时，都会调用它，但它会花费更多时间，因此测试可以检测到malloc的使用情况。问题是我使用链接列表，因此内存是为列表的每个节点分别分配的。我不知道如何更改此实现，因为在我的结构中使用链接列表确实很舒服。

您也许可以改用数组。

举一个简单的例子：

#include <stdio.h>
#include <stdlib.h>

struct list_entry {
    struct list_entry *next;
    int foo;
};

#define MAX_THINGS 1234567
struct list_entry myArray[MAX_THINGS];
int firstFreeEntry = 0;

struct list_entry *freeListHead = NULL;

struct list_entry *listHead = NULL;

struct list_entry *allocEntry(void) {
    struct list_entry *temp;

    if(freeListHead != NULL) {
        // Recycle a previously freed entry
        temp = freeListHead;
        freeListHead = temp->next;
        return temp;
    }
    // Try to take a new entry from the array
    if(firstFreeEntry < MAX_THINGS) {
        return &myArray[firstFreeEntry++];
    }
    // Give up (no free entries)
    return NULL;
}

void freeEntry(struct list_entry *entry) {
    int offset;

    // Try to give it back to the array
    offset = entry - myArray;
    if(offset == firstFreeEntry - 1) {
        firstFreeEntry--;
        return;
    }
    // Put it on the list of freed things
    entry->next = freeListHead;
    freeListHead = entry;
}

// Allocate an entry, initialize/construct it, and put it on the linked list

struct list_entry *createEntry(int value) {
    struct list_entry *newEntry;
    newEntry = allocEntry();
    if(newEntry != NULL) {
        newEntry->foo = value;
        newEntry->next = listHead;
        listHead = newEntry;
    }
    return newEntry;
}

int main() {
    const int node_count = 1000 * 1000;
    struct list_entry *head = NULL;
    for (int index = 0; index < node_count; index++) {
        head = createEntry(0xdeadbeef);
        printf("address of head = %p\n", head);
    }
    while (head) {
        struct list_entry *next = head->next;
        printf("address of head = %p\n", head);
        freeEntry(head);
        head = next;
    }
    return 0;
}

输出

address of head = 0x101d32040
address of head = 0x101d32050
address of head = 0x101d32060
...
address of head = 0x101d32040

验证

$ ./test > storage.txt
$ split -l 1000000 storage.txt
$ tac xab > xac
$ diff xaa xac

Answer 2

一个简单的解决方案是使用mmap()实现替代的动态内存功能。

void* alt_malloc( size_t size )
{
    void* mem = mmap( NULL, size + sizeof(size_t),
                      PROT_READ | PROT_WRITE, 
                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0 ) ;

    if( mem != MAP_FAILED )
    {
        *(size_t*)mem = size ;
    }
    else
    {
        mem = NULL ;
    }

    return mem + sizeof( size_t) ;
}

void* alt_calloc( size_t nitems, size_t size)
{
    return alt_malloc( nitems * size ) ;
}

void alt_free( void* mem )
{
    if( mem != NULL) munmap( mem, *((size_t*)mem - 1) ) ;
} 

void* alt_realloc( void *old_mem, size_t new_size )
{
    void* new_mem = alt_malloc( new_size ) ;
    if( new_mem != NULL )
    {
        size_t old_size = *((size_t*)old_mem - 1) ;
        size_t copy_size = old_size > new_size ? new_size : old_size ;
        memcpy( new_mem, old_mem, copy_size ) ;
        alt_free( old_mem ) ;
    }   

    return new_mem ;
}

以下测试：

#define ALLOC_SIZE 1024 * 1024
int main()
{
    char* test = alt_malloc( ALLOC_SIZE ) ;
    memset( test, 'X', ALLOC_SIZE ) ;
    printf( "%p : %c %c\n", test, test[0], test[ALLOC_SIZE-1] ) ;
    test = alt_realloc( test, ALLOC_SIZE * 2 ) ;
    printf( "%p : %c %c\n", test, test[0], test[ALLOC_SIZE-1] ) ;
    alt_free( test ) ;

    return 0;
}

输出：

0x7f102957d008 : X X
0x7f1028ea3008 : X X

证明memset()覆盖了初始块的范围，并且重新分配创建了一个新块并复制了数据。

一种更高效但稍微复杂的解决方案是使用mmap()分配备用堆，然后实现在该块上运行的堆管理器作为标准功能的备用。不乏堆管理示例。

例如，您可以使用名称经过修改的Newlib C库分配器，并使用sbrk()来实现mmap() syscall（再次重命名以防止冲突），以为备用堆提供内存。

Answer 3

此程序以连续块的形式为链表中的节点分配内存。当块中的内存用完时，将分配一个新的块。

#include <stdio.h>
#include <stdlib.h>

// An imaginary node because the original question did not provide one
struct node {
    struct node *next, *prev;
    int numbers[1024];
    char string[1024];
};

struct node_storage {
    int count;
    int total;
    struct node *node_list;
    struct node_storage *next;
};

struct node_storage *add_storage(int count) {
    struct node_storage *pstorage = malloc(sizeof(struct node_storage));
    // We could avoid a malloc here by making node_list an array
    pstorage->node_list = malloc(sizeof(struct node) * count);
    pstorage->count = count;
    pstorage->total = count;
    pstorage->next = NULL;
    return pstorage;
}

void free_storage(struct node_storage *storage) {
    while (storage) {
        struct node_storage *next = storage->next;
        free(storage->node_list);
        free(storage);
        storage = next;
    }
}

struct node *new_node(struct node **free_list, struct node_storage **storage) {
    struct node *free_node = *free_list;
    struct node_storage *pstorage = *storage;
    struct node *result;
    // If there is a free node
    if (free_node) {
        // Get the new node from the free list
        result = free_node;
        *free_list = free_node->next;
    }
    else {
        // Get the new node from the pre-allocated storage
        result = &pstorage->node_list[pstorage->total - pstorage->count];
        pstorage->count--;
        // If that was the last pre-allocated node
        if (0 == pstorage->count) {
            // Allocate the next chunk of nodes
            pstorage->next = add_storage(pstorage->total);
            *storage = pstorage->next;
        }
    }
    return result;
}

void free_node(struct node **free_list, struct node *node) {
    struct node *pfree_list = *free_list;
    if (pfree_list) {
        node->next = pfree_list;
    }
    *free_list = node;
}

int main() {
    const int node_count = 1000 * 1000;
    struct node_storage *head;
    struct node_storage *curr;
    struct node *free_list = NULL;
    struct node *node_list = NULL;
    head = add_storage(100);
    curr = head;
    // Allocate a lot of nodes and put them in a list
    for (int index = 0; index < node_count; index++) {
        struct node *new = new_node(&free_list, &curr);
        printf("address of new = %p\n", new);
        new->next = node_list;
        node_list = new;
    }
    // Free all of the allocated nodes
    while (node_list) {
        struct node *pnode = node_list;
        node_list = node_list->next;
        free_node(&free_list, pnode);
    }
    // Allocate a lof ot nodes so that we can verify that they come from
    // the free list
    for (int index = 0; index < node_count; index ++) {
        struct node *new = new_node(&free_list, &curr);
        printf("address of new = %p\n", new);
    }
    free_storage(head);
    return 0;
}

输出

address of new = 0x10f972000
address of new = 0x10f973410
...
address of new = 0x243570230

警告

此代码不进行错误检查，因此不应在生产环境中使用。

注意

我修改了代码，以便将释放的节点放置在空闲列表中。请求新节点时，首先检查该列表。我通过比较像这样的节点地址进行了测试：

$ ./test > storage.txt
$ split -l 1000000 storage.txt
$ diff xaa xab

Answer 4

这类似于@Brendan的answer，但是对节点数没有固定限制。它源自我已经编写的代码。

释放节点后，它将放置在链接列表池中。如果需要一个节点，则从池（如果有）或数组（如果有）中获取该节点，或者对该数组进行扩展，不仅要扩展一个节点，还要扩展大量节点。这样可以减少呼叫realloc的次数。

#include <stdio.h>
#include <stdlib.h>

#define CHUNK 8192

typedef struct Node {
    int data;
    struct Node *next;
} node;

node *array;
node *pool;
int nodes_avail;
int nodes_used;

node *getnode(void)
{
    node *newnode;
    if(pool) {                              // any in the recycled pool?
        newnode = pool;
        pool = pool->next;
    }
    else if(nodes_used < nodes_avail) {     // any in the array?
        newnode = &array[nodes_used];
        nodes_used++;
    }
    else {                                  // extend the array?
        nodes_avail += CHUNK;
        node *temp = realloc(array, nodes_avail * sizeof *temp);
        if(temp == NULL) {
            exit(1);                        // or recovery
        }
        array = temp;
        newnode = &array[nodes_used];
        nodes_used++;
    }
    return newnode;
}

void freenode(node *nptr)
{
    nptr->next = pool;                      // add to recycled pool
    pool = nptr;
}

int main() {
    const int node_count = 1000 * 1000;
    node *head = NULL;
    for (int index = 0; index < node_count; index++) {
        node *new = getnode();
        new->next = head;
        head = new;
        printf("address of head = %p\n", head);
    }
    while (head) {
        node *next = head->next;
        freenode(head);
        head = next;
    }
    for (int index = 0; index < node_count; index++) {
        node *new = getnode();
        new->next = head;
        head = new;
        printf("address of head = %p\n", head);
    }
    return 0;
}

输出

address of head = 0x100bc7000
address of head = 0x100bc7010
address of head = 0x100bc7020
...
address of head = 0x101b2a3f0

验证

$ ./test > storage.txt
$ split -l 1000000 storage.txt
$ diff xaa xab

Answer 5

您可以做的最简单的事情是继续为链表的各个节点调用malloc，但是当节点被释放时，将它们放在空闲节点列表中。

node *free_list = 0;

node *node_alloc(void)
{
   /* get a node fast from the free list, if available */
   if (free_list != 0) {
      node *ret = free_list;
      free_list = free_list->next;
      return ret;
   } else {
      node *ret = malloc(sizeof *ret);
      /* check for non-null, initialize */
      return ret;
   }
}

void node_free(node *node)
{
   node->next = free_list;
   free_list = node;
}

如果您的程序在任何给定时间只有一些对象，则可以将列表节点放在这些对象的内部，这样它们就不需要单独分配：

struct process {
   node queue_node; /* list links for putting process into queues */
   ...
};

如何避免为链表中的每个节点调用malloc

5 个答案: