新的[]比Win32的VirtualAlloc更快吗?

时间:2013-02-01 00:52:01

标签: c++ winapi memory-management

我正在测试一些字符串池分配器的性能:我认为提供here的那个调用Virtual­Alloc然后分割出子分配,以及使用标准C ++的类似实现(不直接调用)任何Win32 API)和new[]

我希望Virtual­Alloc版本更快,因为我认为开销应该比C ++ new[]更少;但我观察到的结果恰恰相反:使用new[]似乎导致代码比使用较低级别Virtual­Alloc更快。

我运行了几次测试(代码是用VS2010 SP1编译的),输出是这样的:

String pool using VirtualAlloc: 1280.07 ms
String pool using new[]: 799.193 ms

这是为什么?为什么new[]似乎比VirtualAlloc更快?

测试源代码如下:

////////////////////////////////////////////////////////////////////////////
// Testing VirtualAlloc vs. new[].
////////////////////////////////////////////////////////////////////////////


#include <string.h>
#include <wchar.h>
#include <algorithm>
#include <exception>
#include <iostream>
#include <new>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
#include <windows.h>
using namespace std;


//--------------------------------------------------------------------------
// String pool allocator using VirtualAlloc, based on this:
// http://blogs.msdn.com/oldnewthing/archive/2005/05/19/420038.aspx
//--------------------------------------------------------------------------
class StringPoolUsingVirtualAlloc
{
public:

    StringPoolUsingVirtualAlloc()
        : m_pchNext(nullptr), 
          m_pchLimit(nullptr), 
          m_phdrCur(nullptr)
    {
        SYSTEM_INFO si;
        GetSystemInfo(&si);
        m_dwGranularity = static_cast<DWORD>( 
            RoundUp( sizeof(HEADER) + MIN_CBCHUNK, si.dwAllocationGranularity 
            ));
    }

    ~StringPoolUsingVirtualAlloc()
    {
        HEADER* phdr = m_phdrCur;
        while (phdr) 
        {
            HEADER * phdrPrev = phdr->m_phdrPrev;
            VirtualFree(phdr, 0, MEM_RELEASE);
            phdr = phdrPrev;
        }
    }

    wchar_t* DuplicateString(const wstring& source)
    {
        return AllocString(source.c_str(), source.c_str() + source.length());
    }

private:
    union HEADER 
    {
        struct 
        {
            HEADER* m_phdrPrev;
            SIZE_T  m_cb;
        };
        wchar_t alignment;
    };

    enum 
    { 
        MIN_CBCHUNK = 32000,
        MAX_CHARALLOC = 1024*1024
    };

    wchar_t*  m_pchNext;
    wchar_t*  m_pchLimit;
    HEADER*   m_phdrCur;
    DWORD     m_dwGranularity;

    static SIZE_T RoundUp(SIZE_T cb, SIZE_T units)
    {
        return ((cb + units - 1) / units) * units;
    }

    wchar_t* AllocString(const wchar_t* pchBegin, const wchar_t* pchEnd)
    {
        SIZE_T cchTotal = pchEnd - pchBegin + 1;
        if (cchTotal > MAX_CHARALLOC) 
            throw length_error("String too big.");

        wchar_t* psz = m_pchNext;
        if (m_pchNext + cchTotal <= m_pchLimit) 
        {
            m_pchNext += cchTotal;
            lstrcpynW(psz, pchBegin, static_cast<int>(cchTotal));
            return psz;
        }

        SIZE_T cbAlloc = RoundUp(cchTotal * sizeof(wchar_t) + sizeof(HEADER), m_dwGranularity);
        BYTE* pbNext = reinterpret_cast<BYTE*>(
            VirtualAlloc(nullptr, cbAlloc, MEM_COMMIT, PAGE_READWRITE));
        if (pbNext == nullptr) 
            throw bad_alloc();

        m_pchLimit = reinterpret_cast<wchar_t*>(pbNext + cbAlloc);
        HEADER* phdrCur = reinterpret_cast<HEADER*>(pbNext);
        phdrCur->m_phdrPrev = m_phdrCur;
        phdrCur->m_cb = cbAlloc;
        m_phdrCur = phdrCur;
        m_pchNext = reinterpret_cast<wchar_t*>(phdrCur + 1);
        return AllocString(pchBegin, pchEnd);
    }

    StringPoolUsingVirtualAlloc(const StringPoolUsingVirtualAlloc &);
    StringPoolUsingVirtualAlloc & operator=(const StringPoolUsingVirtualAlloc &);
};


//--------------------------------------------------------------------------
// String pool allocator that uses standard C++ (no Win32 stuff) and new[].
//--------------------------------------------------------------------------
class StringPoolUsingNew
{
public:

    StringPoolUsingNew()
        : m_pchNext(NULL), 
          m_pchLimit(NULL), 
          m_currChunk(NULL)
    {
    }

    ~StringPoolUsingNew()
    {
        for (auto it = m_chunks.begin(); it != m_chunks.end(); ++it)
            delete *it;
    }

    wchar_t* DuplicateString(const wstring& source)
    {
        return AllocString(source.c_str(), source.c_str() + source.length());
    }

private:

    class Chunk
    {
    public:
        explicit Chunk(size_t maxCharCount)
        {
            m_data = new wchar_t[maxCharCount];
            m_maxCharCount = maxCharCount;
        }

        ~Chunk()
        {
            delete [] m_data;
        }

        wchar_t* Begin()             { return m_data; }
        const wchar_t* Begin() const { return m_data; }
        size_t Length() const        { return m_maxCharCount; }

    private:
        Chunk(const Chunk&);
        Chunk& operator=(const Chunk&);

        wchar_t * m_data;
        size_t m_maxCharCount;
    };

    static const size_t kMinChunkCharCount = 16000;
    static const size_t kMaxCharAlloc = 1024*1024;

    wchar_t*  m_pchNext;
    wchar_t*  m_pchLimit;
    Chunk*    m_currChunk;
    vector<Chunk*> m_chunks;

    wchar_t* AllocString(const wchar_t* pchBegin, const wchar_t* pchEnd)
    {
        const size_t cchTotal = pchEnd - pchBegin + 1;
        if (cchTotal > kMaxCharAlloc) 
            throw length_error("String too big.");

        wchar_t* dest = m_pchNext;
        if (m_pchNext + cchTotal <= m_pchLimit) 
        {
            m_pchNext += cchTotal;
            const size_t copyCount = cchTotal - 1;
            if (copyCount != 0)
                wmemcpy(dest, pchBegin, copyCount);
            dest[copyCount] = L'\0';
            return dest;
        }

        const size_t newChunkSize = max(cchTotal, kMinChunkCharCount);
        Chunk* newChunk = new Chunk(newChunkSize);
        m_chunks.push_back(newChunk);

        m_pchNext = newChunk->Begin();
        m_pchLimit = newChunk->Begin() + newChunk->Length();
        m_currChunk = newChunk;

        return AllocString(pchBegin, pchEnd);
    }

    StringPoolUsingNew(const StringPoolUsingNew&);
    StringPoolUsingNew& operator=(const StringPoolUsingNew&);
};


//------------------------------------------------------------------------
//                          Perf Measurement
//------------------------------------------------------------------------

long long Counter() 
{
    LARGE_INTEGER li;
    QueryPerformanceCounter(&li);
    return li.QuadPart;
}

long long Frequency() 
{
    LARGE_INTEGER li;
    QueryPerformanceFrequency(&li);
    return li.QuadPart;
}

void PrintTime(long long start, long long finish, const char * s) 
{
    cout << s << ": " << (finish - start) * 1000.0 / Frequency() << " ms" << endl;
}


//--------------------------------------------------------------------------
// Test
//--------------------------------------------------------------------------
int main()
{
    static const int kExitOk = 0;
    static const int kExitError = 1;
    try
    {
        long long start = 0;
        long long finish = 0;

        const auto shuffled = []() -> vector<wstring> 
        {
            const wstring lorem[] = {
                L"Lorem ipsum dolor sit amet, consectetuer adipiscing elit.",
                L"Maecenas porttitor congue massa. Fusce posuere, magna sed",
                L"pulvinar ultricies, purus lectus malesuada libero,",
                L"sit amet commodo magna eros quis urna.",
                L"Nunc viverra imperdiet enim. Fusce est. Vivamus a tellus.",
                L"Pellentesque habitant morbi tristique senectus et netus et",
                L"malesuada fames ac turpis egestas. Proin pharetra nonummy pede.",
                L"Mauris et orci."
            };

            vector<wstring> v;
            for (long long i = 0; i < 400*1000; ++i) 
            {
                for (auto it = begin(lorem); it != end(lorem); ++it) 
                {
                    v.push_back((*it) + L" (#" + to_wstring(i) + L")");
                }
            }
            random_shuffle(v.begin(), v.end());

            return v;
        }();

        start = Counter();
        {
            StringPoolUsingVirtualAlloc pool;
            vector<const wchar_t*> v;
            for (auto it = shuffled.begin(); it != shuffled.end(); ++it)
            {
                v.push_back( pool.DuplicateString(*it) );
            }
        }
        finish = Counter();
        PrintTime(start, finish, "String pool using VirtualAlloc");

        start = Counter();
        {
            StringPoolUsingNew pool;
            vector<const wchar_t*> v;
            for (auto it = shuffled.begin(); it != shuffled.end(); ++it)
            {
                v.push_back( pool.DuplicateString(*it) );
            }
        }
        finish = Counter();
        PrintTime(start, finish, "String pool using new[]");

        return kExitOk;
    }
    catch (const exception& e)
    {
        cerr << "*** ERROR: " << e.what() << endl;
        return kExitError;
    }
}

////////////////////////////////////////////////////////////////////////////

3 个答案:

答案 0 :(得分:13)

是的,反复拨打new[]要比反复拨打VirtualAlloc快得多。

首先,了解new T[N]的作用非常重要。 new运算符通过调用operator new[]来分配存储空间。至少从Visual C ++ 2010开始,operator new[]只调用malloc,它调用Windows API HeapAlloc从CRT堆中分配存储。在Visual C ++ 2012之前,每个CRT都有自己的堆,通过HeapCreate创建。在Visual C ++ 2012中,CRT使用通过GetProcessHeap获得的进程堆。从性能角度来看,使用哪个堆并不重要。

VirtualAlloc用于将内存页面映射到进程的虚拟地址空间。当您需要控制整个页面时,将使用此功能。例如,如果要分配存储以保存可执行代码,则需要使用VirtualAlloc,以便可以更改该存储的权限以允许执行。 VirtualAlloc未针对通用内存分配进行优化。

为此,您需要一个堆,它一次映射一大块地址空间,然后是映射地址空间的服务分配请求。每次请求分配时,堆都不必映射和取消映射虚拟页面(同样重要的是,每次执行分配时堆都不需要内存为零)。

当我运行原始基准测试时,我得到以下结果:

String pool using VirtualAlloc: 1162.45 ms
String pool using new[]: 625.842 ms

我将VirtualAlloc的使用替换为HeapAlloc。为此,我使用HeapCreate(0, 0, 0)为分配器创建了一个私有堆,然后通过调用VirtualAllocVirtualFree替换了对HeapAllocHeapFree的调用。这个私人堆。 (请注意,我没有使用进程堆,因为正如我在上面解释的那样,new[]使用该堆,因此在这里使用该堆也可以改变new[]分配器的性能。)我的结果修改后的分配器如下:

String pool using HeapAlloc: 919.853 ms
String pool using new[]: 636.515 ms

嗯,这非常令人失望!我们将自定义分配器的性能提高了21%,但它仍然比new[]慢得多。怎么了?

剖析器帮助指出了问题所在:您的基准是比较苹果和橙子。您的基于new[]的分配器使用wmemcpy复制字符串,但基于VirtualAlloc的分配器使用lstrcpynwmemcpy只是简单地调用具有内在形式的memcpy,因此可以使用疯狂快速的内在形式完全内联。 lstrcpyn是无法内联的Windows API函数。基于VirtualAlloc的分配器没有机会!

我将lstrcpyn替换为wmemcpy。结果如下:

String pool using HeapAlloc: 636.149 ms
String pool using new[]: 655.479 ms

这些是我们期望的结果:它们的表现大致相同,new[]只是稍慢一点,可能是因为调用operator new和{{1}的开销很小}。

答案 1 :(得分:7)

因为new会同时拨打VirtualAlloc(或者更有可能是HeapAlloc)来拨打相当多的内存,所以请将其用于{ {1}},呼叫new将完全符合您的要求,准确地分配您要求的内容。同样,当使用VirtualAlloc释放内存时,它比delete更快,因为一次释放更多的内存。

与使用VirtualFree完全相同比fgetc更快 - 当然,如果您一次读取一千兆字节,ReadFile可能会更快一些时间调用ReadFile很多次,但如果你一次读取一个字节,fgetc将比使用ReadFile在系统上更加强大,fgetc将一次读取几个(可能是4KB)的数据,并且然后从该缓冲区一次发出一个字符,直到它为空。

答案 2 :(得分:3)

因此,@JamesMcNellis found the main problem,即lstrcpynW在基于VirtualAlloc的池分配器中使用的事实,而wmemcpy中使用了new[] - 基于池分配器。

我使用wmemcpy统一修改原始代码,并多次运行测试并计算每次测试的平均执行时间(不包括第一次运行)。

我还在基准测试中添加了一个基于HeapAlloc的池分配器和一个简单的vector<wstring>

现在的结果是:

--- Tests summary ---
VirtualAlloc : 781.671 ms
HeapAlloc    : 806.597 ms
new[]        : 889.792 ms
STL strings  : 1491.36 ms

所以, VirtualAlloc 似乎是最快的(正如预期的那样)。

随后是可编译代码(使用VS2010 SP1 / VC10构建):

////////////////////////////////////////////////////////////////////////////
// Testing VirtualAlloc vs. HeapAlloc vs. new[] vs. STL strings.
////////////////////////////////////////////////////////////////////////////


#include <string.h>
#include <wchar.h>
#include <algorithm>
#include <exception>
#include <iostream>
#include <new>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
#include <windows.h>
using namespace std;


//--------------------------------------------------------------------------
// String pool allocator using VirtualAlloc, based on this:
// http://blogs.msdn.com/oldnewthing/archive/2005/05/19/420038.aspx
//--------------------------------------------------------------------------
class StringPoolUsingVirtualAlloc
{
public:

    StringPoolUsingVirtualAlloc()
        : m_pchNext(nullptr), 
        m_pchLimit(nullptr), 
        m_phdrCur(nullptr)
    {
        SYSTEM_INFO si;
        GetSystemInfo(&si);
        m_dwGranularity = static_cast<DWORD>( 
            RoundUp( sizeof(HEADER) + MIN_CBCHUNK, si.dwAllocationGranularity 
            ));
    }

    ~StringPoolUsingVirtualAlloc()
    {
        HEADER* phdr = m_phdrCur;
        while (phdr) 
        {
            HEADER * phdrPrev = phdr->m_phdrPrev;
            VirtualFree(phdr, 0, MEM_RELEASE);
            phdr = phdrPrev;
        }
    }

    const wchar_t* DuplicateString(const wstring& source)
    {
        return AllocString(source.c_str(), source.c_str() + source.length());
    }

private:
    union HEADER 
    {
        struct 
        {
            HEADER* m_phdrPrev;
            SIZE_T  m_cb;
        };
        wchar_t alignment;
    };

    enum 
    { 
        MIN_CBCHUNK = 32000,
        MAX_CHARALLOC = 1024*1024
    };

    wchar_t*  m_pchNext;
    wchar_t*  m_pchLimit;
    HEADER*   m_phdrCur;
    DWORD     m_dwGranularity;

    static SIZE_T RoundUp(SIZE_T cb, SIZE_T units)
    {
        return ((cb + units - 1) / units) * units;
    }

    wchar_t* AllocString(const wchar_t* pchBegin, const wchar_t* pchEnd)
    {
        SIZE_T cchTotal = pchEnd - pchBegin + 1;
        if (cchTotal > MAX_CHARALLOC) 
            throw length_error("String too big.");

        wchar_t* psz = m_pchNext;
        if (m_pchNext + cchTotal <= m_pchLimit) 
        {
            m_pchNext += cchTotal;
            wmemcpy(psz, pchBegin, cchTotal);
            return psz;
        }

        SIZE_T cbAlloc = RoundUp(cchTotal * sizeof(wchar_t) + sizeof(HEADER), m_dwGranularity);
        BYTE* pbNext = reinterpret_cast<BYTE*>(
            VirtualAlloc(nullptr, cbAlloc, MEM_COMMIT, PAGE_READWRITE));
        if (pbNext == nullptr) 
            throw bad_alloc();

        m_pchLimit = reinterpret_cast<wchar_t*>(pbNext + cbAlloc);
        HEADER* phdrCur = reinterpret_cast<HEADER*>(pbNext);
        phdrCur->m_phdrPrev = m_phdrCur;
        phdrCur->m_cb = cbAlloc;
        m_phdrCur = phdrCur;
        m_pchNext = reinterpret_cast<wchar_t*>(phdrCur + 1);
        return AllocString(pchBegin, pchEnd);
    }

    StringPoolUsingVirtualAlloc(const StringPoolUsingVirtualAlloc &);
    StringPoolUsingVirtualAlloc & operator=(const StringPoolUsingVirtualAlloc &);
};


//--------------------------------------------------------------------------
// String pool allocator using HeapAlloc, 
// based on the VirtualAlloc allocator.
//--------------------------------------------------------------------------
class StringPoolUsingHeapAlloc
{
public:

    StringPoolUsingHeapAlloc()
        : m_pchNext(nullptr), 
        m_pchLimit(nullptr), 
        m_phdrCur(nullptr)
    {
        m_heap = HeapCreate(0, 0, 0);
        if (m_heap == nullptr)
            throw runtime_error("Can't create an heap with HeapCreate().");

        SYSTEM_INFO si;
        GetSystemInfo(&si);
        m_dwGranularity = static_cast<DWORD>( 
            RoundUp( sizeof(HEADER) + MIN_CBCHUNK, si.dwAllocationGranularity 
            ));
    }

    ~StringPoolUsingHeapAlloc()
    {
        HEADER* phdr = m_phdrCur;
        while (phdr) 
        {
            HEADER * phdrPrev = phdr->m_phdrPrev;
            HeapFree(m_heap, 0, phdr);
            phdr = phdrPrev;
        }
        HeapDestroy(m_heap);
    }

    const wchar_t* DuplicateString(const wstring& source)
    {
        return AllocString(source.c_str(), source.c_str() + source.length());
    }

private:
    union HEADER 
    {
        struct 
        {
            HEADER* m_phdrPrev;
            SIZE_T  m_cb;
        };
        wchar_t alignment;
    };

    enum 
    { 
        MIN_CBCHUNK = 32000,
        MAX_CHARALLOC = 1024*1024
    };

    HANDLE    m_heap;
    wchar_t*  m_pchNext;
    wchar_t*  m_pchLimit;
    HEADER*   m_phdrCur;
    DWORD     m_dwGranularity;

    static SIZE_T RoundUp(SIZE_T cb, SIZE_T units)
    {
        return ((cb + units - 1) / units) * units;
    }

    wchar_t* AllocString(const wchar_t* pchBegin, const wchar_t* pchEnd)
    {
        SIZE_T cchTotal = pchEnd - pchBegin + 1;
        if (cchTotal > MAX_CHARALLOC) 
            throw length_error("String too big.");

        wchar_t* psz = m_pchNext;
        if (m_pchNext + cchTotal <= m_pchLimit) 
        {
            m_pchNext += cchTotal;
            wmemcpy(psz, pchBegin, cchTotal);
            return psz;
        }

        SIZE_T cbAlloc = RoundUp(cchTotal * sizeof(wchar_t) + sizeof(HEADER), m_dwGranularity);
        BYTE* pbNext = static_cast<BYTE*>( HeapAlloc(m_heap, 0, cbAlloc) );
        if (pbNext == nullptr) 
            throw bad_alloc();

        m_pchLimit = reinterpret_cast<wchar_t*>(pbNext + cbAlloc);
        HEADER* phdrCur = reinterpret_cast<HEADER*>(pbNext);
        phdrCur->m_phdrPrev = m_phdrCur;
        phdrCur->m_cb = cbAlloc;
        m_phdrCur = phdrCur;
        m_pchNext = reinterpret_cast<wchar_t*>(phdrCur + 1);
        return AllocString(pchBegin, pchEnd);
    }

    StringPoolUsingHeapAlloc(const StringPoolUsingHeapAlloc &);
    StringPoolUsingHeapAlloc & operator=(const StringPoolUsingHeapAlloc &);
};


//--------------------------------------------------------------------------
// String pool allocator that uses standard C++ (no Win32 stuff) and new[].
//--------------------------------------------------------------------------
class StringPoolUsingNew
{
public:

    StringPoolUsingNew()
        : m_pchNext(NULL), 
        m_pchLimit(NULL), 
        m_currChunk(NULL)
    {
    }

    ~StringPoolUsingNew()
    {
        for (auto it = m_chunks.begin(); it != m_chunks.end(); ++it)
            delete *it;
    }

    const wchar_t* DuplicateString(const wstring& source)
    {
        return AllocString(source.c_str(), source.c_str() + source.length());
    }

private:

    class Chunk
    {
    public:
        explicit Chunk(size_t maxCharCount)
        {
            m_data = new wchar_t[maxCharCount];
            m_maxCharCount = maxCharCount;
        }

        ~Chunk()
        {
            delete [] m_data;
        }

        wchar_t* Begin()             { return m_data; }
        const wchar_t* Begin() const { return m_data; }
        size_t Length() const        { return m_maxCharCount; }

    private:
        Chunk(const Chunk&);
        Chunk& operator=(const Chunk&);

        wchar_t * m_data;
        size_t m_maxCharCount;
    };

    static const size_t kMinChunkCharCount = 16000;
    static const size_t kMaxCharAlloc = 1024*1024;

    wchar_t*  m_pchNext;
    wchar_t*  m_pchLimit;
    Chunk*    m_currChunk;
    vector<Chunk*> m_chunks;

    wchar_t* AllocString(const wchar_t* pchBegin, const wchar_t* pchEnd)
    {
        const size_t cchTotal = pchEnd - pchBegin + 1;
        if (cchTotal > kMaxCharAlloc) 
            throw length_error("String too big.");

        wchar_t* dest = m_pchNext;
        if (m_pchNext + cchTotal <= m_pchLimit) 
        {
            m_pchNext += cchTotal;
            const size_t copyCount = cchTotal - 1;
            if (copyCount != 0)
                wmemcpy(dest, pchBegin, copyCount);
            dest[copyCount] = L'\0';
            return dest;
        }

        const size_t newChunkSize = max(cchTotal, kMinChunkCharCount);
        Chunk* newChunk = new Chunk(newChunkSize);
        m_chunks.push_back(newChunk);

        m_pchNext = newChunk->Begin();
        m_pchLimit = newChunk->Begin() + newChunk->Length();
        m_currChunk = newChunk;

        return AllocString(pchBegin, pchEnd);
    }

    StringPoolUsingNew(const StringPoolUsingNew&);
    StringPoolUsingNew& operator=(const StringPoolUsingNew&);
};


//--------------------------------------------------------------------------
// This is just a simple vector<wstring>, to compare performance of this 
// simple and easy approach vs. the other pool allocators.
//--------------------------------------------------------------------------
class StringPoolVectorOfString
{
public:

    StringPoolVectorOfString()
    {
    }

    ~StringPoolVectorOfString()
    {
    }

    const wchar_t* DuplicateString(const wstring& source)
    {
        m_strings.push_back(source);
        return m_strings.back().c_str();
    }

private:
    // Simplest case: a STL vector of STL strings
    vector<wstring> m_strings;

    StringPoolVectorOfString(const StringPoolVectorOfString&);
    StringPoolVectorOfString& operator=(const StringPoolVectorOfString&);
};


//------------------------------------------------------------------------
//                          Perf Measurement
//------------------------------------------------------------------------

long long Counter() 
{
    LARGE_INTEGER li;
    QueryPerformanceCounter(&li);
    return li.QuadPart;
}

long long Frequency() 
{
    LARGE_INTEGER li;
    QueryPerformanceFrequency(&li);
    return li.QuadPart;
}


//--------------------------------------------------------------------------
// Tests
//--------------------------------------------------------------------------

// Prints the first N strings in a vector-like container.
template <typename Container>
void PrintFirst(const Container & c, const size_t firstN)
{
    const size_t n = min(firstN, c.size());
    for (size_t i = 0; i < n; i++)
        wcout << "#" << (i+1) << ": " << c[i] << '\n';
    wcout << endl;
}

// Prints the first N strings using the specified allocator.
template <typename Allocator>
void VerifyAllocator(const vector<wstring>& source, const size_t firstN, const char* allocatorName)
{
    const size_t n = min(firstN, source.size());

    Allocator alloc;
    vector<const wchar_t*> v;

    for (size_t i = 0; i < n; i++)
    {
        v.push_back( alloc.DuplicateString(source[i]) );
    }

    wcout << allocatorName << " :\n";
    PrintFirst(v, n);
}

// Tests a given allocator, returning the execution time in ms.
template <typename Allocator>
double TestAllocator(const vector<wstring>& source, const char* allocatorName)
{
    wcout << "Testing " << allocatorName << " : ";
    long long start = Counter();
    {
        Allocator alloc;
        vector<const wchar_t*> v;

        for (auto it = source.begin(); it != source.end(); ++it)
        {
            v.push_back( alloc.DuplicateString(*it) );
        }
    }
    long long finish = Counter();
    const double time = (finish - start) * 1000.0 / Frequency(); // ms

    wcout << time << " ms\n";
    return time;
}

// Calculates the average in a vector of doubles.
double Average(const vector<double>& data)
{
    if (data.empty())
        throw invalid_argument("Can't compute average of empty vector.");

    double sum = data[0];
    const size_t count = data.size();
    for (size_t i = 1; i < count; ++i)
    {
        sum += data[i];
    }
    return (sum / count);
}

// App entry-point ("test driver").
int main()
{
    static const int kExitOk = 0;
    static const int kExitError = 1;
    try
    {
        wcout << '\n';
        wcout << "Testing VirtualAlloc vs. HeapAlloc vs. new[] allocators vs STL strings.\n";
        wcout << "-----------------------------------------------------------------------\n\n"; 

        wcout << "Preparing some strings for testing...\n";

        const auto shuffled = []() -> vector<wstring> 
        {
            const wstring lorem[] = {
                L"Lorem ipsum dolor sit amet, consectetuer adipiscing elit.",
                L"Maecenas porttitor congue massa. Fusce posuere, magna sed",
                L"pulvinar ultricies, purus lectus malesuada libero,",
                L"sit amet commodo magna eros quis urna.",
                L"Nunc viverra imperdiet enim. Fusce est. Vivamus a tellus.",
                L"Pellentesque habitant morbi tristique senectus et netus et",
                L"malesuada fames ac turpis egestas. Proin pharetra nonummy pede.",
                L"Mauris et orci."
            };

            vector<wstring> v;
#ifdef _DEBUG
            static const int kLoopCount = 10;
#else
            static const int kLoopCount = 400*1000;
#endif
            for (long long i = 0; i < kLoopCount; ++i) 
            {
                for (auto it = begin(lorem); it != end(lorem); ++it) 
                {
                    v.push_back((*it) + L" (#" + to_wstring(i) + L")");
                }
            }
            random_shuffle(v.begin(), v.end());

            return v;
        }();

        wcout << "Total string count: " << shuffled.size() << "\n\n";
        wcout << "Some verification output ...\n\n";
        wcout << "Original array of strings :\n";
        PrintFirst(shuffled, 5);

        VerifyAllocator<StringPoolUsingVirtualAlloc>(shuffled, 5, "VirtualAlloc");
        VerifyAllocator<StringPoolUsingHeapAlloc>(shuffled, 5, "HeapAlloc");
        VerifyAllocator<StringPoolUsingNew>(shuffled, 5, "new[]");
        VerifyAllocator<StringPoolVectorOfString>(shuffled, 5, "vector<wstring>");

        vector<double> timeVirtualAlloc;
        vector<double> timeHeapAlloc;
        vector<double> timeNew;
        vector<double> timeStlString;

        static const int kTestCount = 10;

        // First execution tests are discarded.
        wcout << "\nWarm up... discard first tests execution.\n";
        TestAllocator<StringPoolUsingVirtualAlloc>(shuffled, "VirtualAlloc");
        TestAllocator<StringPoolUsingHeapAlloc>(shuffled, "HeapAlloc");
        TestAllocator<StringPoolUsingNew>(shuffled, "new[]");
        TestAllocator<StringPoolVectorOfString>(shuffled, "vector<wstring>");

        // Run the tests several times and compute the average for each test.
        for (int i = 0; i < kTestCount; i++)
        {
            wcout << "\nTest loop #" << (i+1) << ":\n";
            timeVirtualAlloc.push_back( TestAllocator<StringPoolUsingVirtualAlloc>(shuffled, "VirtualAlloc") );
            timeHeapAlloc.push_back( TestAllocator<StringPoolUsingHeapAlloc>(shuffled, "HeapAlloc") );
            timeNew.push_back( TestAllocator<StringPoolUsingNew>(shuffled, "new[]") );
            timeStlString.push_back( TestAllocator<StringPoolVectorOfString>(shuffled, "vector<wstring>") );
        }

        // Print average times
        wcout << "\n\n--- Tests summary ---\n";
        wcout << "VirtualAlloc : " << Average(timeVirtualAlloc) << " ms\n";
        wcout << "HeapAlloc    : " << Average(timeHeapAlloc) << " ms\n";
        wcout << "new[]        : " << Average(timeNew) << " ms\n";
        wcout << "STL strings  : " << Average(timeStlString) << " ms\n";
        wcout << '\n';

        return kExitOk;
    }
    catch (const exception& e)
    {
        wcerr << "\n*** ERROR: " << e.what() << '\n';
        return kExitError;
    }
}


////////////////////////////////////////////////////////////////////////////