Question

我有一个OOP实体组件系统，目前的工作原理如下：

// In the component system
struct Component { virtual void update() = 0; }
struct Entity
{
    bool alive{true};
    vector<unique_ptr<Component>> components;
    void update() { for(const auto& c : components) c->update(); }
}

// In the user application
struct MyComp : Component
{
    void update() override { ... }
}

要创建新的实体和组件，我使用C ++的常用new和delete：

// In the component system
struct Manager
{
    vector<unique_ptr<Entity>> entities;
    Entity& createEntity() 
    { 
        auto result(new Entity);
        entities.emplace_back(result);
        return *result;
    }
    template<typename TComp, typename... TArgs>
        TComp& createComponent(Entity& mEntity, TArgs... mArgs)
    {
        auto result(new TComp(forward<TArgs>(mArgs)...));
        mEntity.components.emplace_back(result);
        return result;
    }
    void removeDead() { /* remove all entities with 'alive == false' - 'delete' is called here by the 'unique_ptr' */ }
}

// In the user application
{
    Manager m;
    auto& myEntity(m.createEntity());
    auto& myComp(m.createComponent<MyComp>(myEntity));
    // Do stuff with myEntity and myComp
    m.removeDead();
}

系统运行正常，我喜欢语法和灵活性。但是，在向管理器不断添加和删除实体和组件时，内存分配/释放会降低应用程序的速度。（我已经分析并确定减速是由new和delete引起的。

我最近读到可以在C ++中预先分配堆内存 - 如何将其应用于我的情况？

期望的结果：

// In the user application
{
    Manager m{1000}; 
    // This manager can hold about 1000 entities with components 
    // (may not be 1000 because of dynamic component size, 
    // since the user can define it's on components, but it's ok for me)

    auto& myEntity(m.createEntity());
    auto& myComp(m.createComponent<MyComp>(myEntity));
    // Do stuff with myEntity and myComp

    m.removeDead(); 
    // No 'delete' is called here! Memory of the 'dead' entities can
    // be reused for new entity creation
}
// Manager goes out of scope: 'delete' is called here

Answer 1

您可以采取一些措施来更好地实施设计规模。

在您当前的实现中，每Entity和Component有两个内存分配。第一个分配一个对象，第二个分配对象放入向量。第二个发生在向量空间不足并分配更大的数组并将旧元素移动到新数组中时。

在这种情况下，您可以做的最好的事情就是使用侵入式列表。也就是说，Entity和Component中的每一个也成为列表节点。然后，在分配了这些之后，不需要额外的内存分配来将对象放入列表中。使用Boost.Intrusive中的单链接或双链接列表，或编写自己的链接。这就是Linux内核跟踪许多不同对象的方式。

下一步是预先分配Entity和Component元素。预分配可以像这些全局数组一样简单，也可以更复杂，例如Boost.Pool。有很多方法可以构建对象的内存池。

预先分配Entity和Component并使用侵入式列表后，您就完成了。

使用boost组件的示例：

#include <boost/intrusive/list.hpp>
#include <boost/pool/pool_alloc.hpp>
#include <new>

namespace bi = boost::intrusive;

// api.h

//
// Object pooling support begin.
//
template<class T>
struct Pool
{
    static boost::pool_allocator<T> pool;
};

// Singleton. Although it is defined in the header, the linkers
// make sure there is only one instance of it in the application.
// It is instantiated on demand when Pool<T> is used.
template<class T>
boost::pool_allocator<T> Pool<T>::pool;

template<class Derived>
struct Pooled // use it on the most derived class only, not on intermediate base classes
{
    // Automatically use the object pool for plain new/delete.
    static void* operator new(size_t) { return Pool<Derived>::pool.allocate(1); }
    static void operator delete(void* p) { return Pool<Derived>::pool.deallocate(static_cast<Derived*>(p), 1); }
};
//
// Object pooling support end.
//

// Using bi::list_base_hook<bi::link_mode<bi::auto_unlink> > because it automatically
// unlinks from the list when the object is destroyed. No need to manually
// remove the object from the list when an object is about to be destroyed.

struct Component
    : bi::list_base_hook<bi::link_mode<bi::auto_unlink> > // make it an intrusive list node
{
    virtual void update() = 0;
    virtual ~Component() {}
};

struct Entity
    : bi::list_base_hook<bi::link_mode<bi::auto_unlink> > // make it an intrusive list node
    , Pooled<Entity> // optional, make it allocated from the pool
{
    bool active = false;

    bi::list<Component, bi::constant_time_size<false> > components;

    ~Entity() {
        for(auto i = components.begin(), j = components.end(); i != j;)
            delete &*i++; // i++ to make sure i stays valid after the object is destroyed
    }

    void update() {
        for(auto& c : components)
            c.update();
    }
};

struct Manager
{
    bi::list<Entity, bi::constant_time_size<false> > entities;

    ~Manager() {
        for(auto i = entities.begin(), j = entities.end(); i != j;)
            delete &*i++; // i++ to make sure i stays valid after the object is destroyed
    }

    Entity& createEntity() {
        auto result = new Entity;
        entities.push_back(*result);
        return *result;
    }

    template<typename TComp, typename... TArgs>
    TComp& createComponent(Entity& mEntity, TArgs... mArgs)
    {
        auto result = new TComp(std::forward<TArgs>(mArgs)...);
        mEntity.components.push_back(*result);
        return *result;
    }

    void removeDead() {
        for(auto i = entities.begin(), j = entities.end(); i != j;) {
            auto& entity = *i++;
            if(!entity.active)
                delete &entity;
        }
    }
};

// user.cc
struct MyComp
    : Component
    , Pooled<MyComp> // optional, make it allocated from the pool
{
    void update() override {}
};

int main() {
    Manager m;
    auto& myEntity(m.createEntity());
    auto& myComp(m.createComponent<MyComp>(myEntity));
    m.removeDead();
}

在上面的示例中，boost::pool_allocator<T>实际上使用new来分配对象，然后它不断重用被破坏的对象而不是在它们上调用delete。您可以通过预先分配所有对象来做得更好，但是根据您的要求，有很多方法可以做到这一点，因此为了简单起见，我使用boost::pool_allocator<T>来避免头发分裂。您可以将Pooled<T>的实现更改为Pooled<T, N>，其中N代表最大对象数，其余代码保持不变，因为它使用普通new/delete恰好被覆盖从池中分配的对象。

Answer 2

C ++支持特定于类的内存池。通用new / delete对不可避免地在

之间进行交易

搜索适当大小的空闲块以满足每个请求所花费的时间
合并免费积木的时间
维护和重组内部数据结构所花费的时间，以使上述两项操作更快。

获得速度的主要方法是完全使用自定义分配器来避免这些权衡 - 正如你所说的那样 - 预先分配一大块内存，将其视为一个大小相同的简单自由对象数组。最初这些都链接在空闲列表中，其中链接指针占据每个块的第一个字节“覆盖”数据最终将进入的位置。分配只是从空闲列表的头部解开一个块 - 一个需要大约2个指令的“弹出”操作。解除分配是一个“推动：”两个指令。在许多情况下，可以将内存硬件设置为在池为空时生成陷阱，因此没有用于检测的每分配开销这种错误情况。（在GC系统中，使用相同的技巧来启动收集而没有开销。）

在您的情况下，您需要两个池：一个用于实体，一个用于组件。

定义自己的池分配器并不是很难，特别是如果您的应用程序是单线程的。有关教程处理，请参阅this document。

Answer 3

您的一个问题可以通过在其创建的向量中分配足够的空间来解决

有关

vector<unique_ptr<Entity>> entities;

在构造函数中提供足够的空间

Manager::Manager() : entities(10000)
{
    //...
}

因此，您可以避免在后期阶段重新分配和复制。

第二个问题是创建unique_ptr<Entity>指针。在这里，因为您将始终使用默认构造对象，您还可以使用预先分配的对象池，您可以从中创建指针。而不是调用新的，你会调用自己的类

class EntityPool
{
public:
     EntityPool(unsigned int size = 10000) : pool(size), nextEntity(0)
     {
     }
     Entity* getNext(void)
     {
         if (nextEntity != pool.size()) // if pool is exhausted create new
         {
             pool.emplace_back(Entity());
         }                 
         return pool[nextEntity++];
     }
private:
     vector<Entity> pool;
     unsigned int nextEntity; // index into the vector to the next Entity
};

struct Manager
{
    vector<unique_ptr<Entity>> entities;
    Entity& createEntity() 
    { 
        entities.emplace_back(entityPoolInstance.getNext());
        return *result;
    }
 //...

Answer 4

或者您可以连接标准的'placement new'。这允许您分配一大块内存来构建（放置）对象，如您所愿。这将在您需要的时候将块保留在堆上，并允许您将多个短期对象分配到此块中，而不是执行昂贵的分配和取消分配，这最终会使堆碎片化。有一些问题涉及到，但总而言之，这是一个非常简单的解决方案，而无需沿着自定义内存管理器路线前进。这是一个excellent treatment of removing some of the pitfalls，详细描述了新的展示位置。我使用像堆栈这样简单的数据结构来跟踪要分配的下一个空闲块：将要删除的块的地址压入堆栈。分配时，只需从堆栈中弹出下一个空闲块，然后将其作为arg放置到新的位置。超级简单，超级快！

Answer 5

使用大多数答案和Google作为参考，我在SSVUtils library中实施了一些预分配工具。

Prealloc.h

示例：

using MemUnit = char;
using MemUnitPtr = MemUnit*;
using MemSize = decltype(sizeof(MemUnit)); // Should always be 1 byte

class MemBuffer
{
    Uptr<MemUnit[]> buffer;
    MemRange range;

    MemBuffer(MemSize mSize) : ... 
    { 
        // initialize buffer from mSize
    }
};

class PreAllocatorChunk
{
    protected:
        MemSize chunkSize;
        MemBuffer buffer;
        std::stack<MemRange> available;

    public:
        PreAllocatorChunk(MemSize mChunkSize, unsigned int mChunks) : ...
        {
            // Add "chunks" to to available...
        }

        template<typename T, typename... TArgs> T* create(TArgs&&... mArgs)
        {
            // create on first "chunk" using placement new
            auto toUse(available.top().begin); available.pop();
            return new (toUse) T{std::forward<TArgs>(mArgs)...};
        }
};

提供更多预分配实用程序：

PreAllocatorDynamic：预先分配一个大缓冲区，然后，在创建对象时，将缓冲区拆分为两部分：
- [buffer start, buffer start + obj size)
- [buffer start + obj size, buffer end)
当一个对象被销毁时，其占用的内存范围被设置为“可用”。如果在创建新对象期间没有找到足够大的“块”，则预分配器会尝试在抛出运行时异常之前统一连续的内存块。这个预分配器有时比new/delete更快，但它在很大程度上取决于预分配缓冲区的大小。
PreAllocatorStatic<T>：继承自PreAllocatorChunk。块的大小等于sizeof(T)。最快的预分配器，灵活性较低。几乎总是比new/delete快。

实体组件系统的自定义堆预分配

5 个答案: