我有一个用于处理数组分配的类。我的课很简单,定义如下:
DimArray.hpp:
#ifndef DIMARRAY_HPP_INCLUDED
#define DIMARRAY_HPP_INCLUDED
#include <vector>
template<typename T>
class DimArray
{
private:
int Width, Height;
std::vector<T> Data;
public:
DimArray(int Width, int Height);
DimArray(T* Data, int Width, int Height);
DimArray(T** Data, int Width, int Height);
DimArray(const DimArray &da);
DimArray(DimArray &&da);
inline int size() {return Width * Height;}
inline int size() const {return Width * Height;}
inline int width() {return Width;}
inline int width() const {return Width;}
inline int height() {return Height;}
inline int height() const {return Height;}
inline T* operator [](int Index) {return const_cast<T*>(Data.data()) + Height * Index;}
inline const T* operator [](int Index) const {return Data.data() + Height * Index;}
DimArray& operator = (DimArray da);
};
template<typename T>
DimArray<T>::DimArray(int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {}
template<typename T>
DimArray<T>::DimArray(T* Data, int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {std::copy(Data, Data + Width * Height, const_cast<T*>(this->Data.data()));}
template<typename T>
DimArray<T>::DimArray(T** Data, int Width, int Height) : Width(Width), Height(Height), Data(Width * Height, 0) {std::copy(Data[0], Data[0] + Width * Height, const_cast<T*>(this->Data.data()));}
template<typename T>
DimArray<T>::DimArray(const DimArray &da) : Width(da.Width), Height(da.Height), Data(da.Data) {}
template<typename T>
DimArray<T>::DimArray(DimArray &&da) : Width(std::move(da.Width)), Height(std::move(da.Height)), Data(std::move(da.Data)) {}
template<typename T>
DimArray<T>& DimArray<T>::operator = (DimArray<T> da)
{
this->Width = da.Width;
this->Height = da.Height;
this->Data.swap(da.Data);
return *this;
}
#endif // DIMARRAY_HPP_INCLUDED
对于基准时序,我使用以下内容:
Timer.hpp:
#ifndef TIME_HPP_INCLUDED
#define TIME_HPP_INCLUDED
#include <chrono>
#if defined _WIN32 || defined _WIN64
#include <windows.h>
template<typename T>
class Timer
{
private:
typedef T duration;
typedef typename T::rep rep;
typedef typename T::period period;
typedef std::chrono::time_point<Timer, duration> time_point;
std::chrono::time_point<Timer, duration> Time;
static const bool is_steady = true;
const rep g_Frequency = []() -> rep
{
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return frequency.QuadPart;
}();
inline std::chrono::time_point<Timer, duration> now()
{
LARGE_INTEGER count;
QueryPerformanceCounter(&count);
return time_point(duration(count.QuadPart * static_cast<rep>(period::den) / g_Frequency));
}
public:
inline void Start() {this->Time = this->now();}
inline rep End() {return std::chrono::duration_cast<T>(this->now() - this->Time).count();}
};
#else
template<typename T>
class Timer
{
private:
static const bool is_steady = true;
std::chrono::high_resolution_clock Clock;
std::chrono::time_point<std::chrono::high_resolution_clock> Time;
public:
inline void Start() {this->Time = this->Clock.now();}
inline T::rep End() {return std::chrono::duration_cast<T>(this->Clock.now() - this->Time).count();}
};
#endif
#endif // TIME_HPP_INCLUDED
我的基准如下:
int main()
{
Timer<std::chrono::nanoseconds> T;
T.Start();
for (int i = 0; i < 100; ++i)
{
int** T2DArray = new int*[10000];
for (int i = 0; i < 10000; ++i)
{
T2DArray[i] = new int[10000];
}
for (int i = 0; i < 10000; ++i)
{
delete[] T2DArray[i];
}
delete[] T2DArray;
}
std::cout<<T.End()<<" us\n\n";
T.Start();
for (int i = 0; i < 100; ++i)
{
DimArray<int> TwoDArray(10000, 10000);
}
std::cout<<T.End()<<" us\n\n";
}
它打印的结果是:
4.9599256 seconds //for int**
42.9303941 seconds //for DimArray<int>
这是一个巨大的差异!我无法弄清楚为什么?!
所以我把它改成了:
int main()
{
Timer<std::chrono::nanoseconds> T;
T.Start();
for (int i = 0; i < 100; ++i)
{
int** T2DArray = new int*[10000];
for (int i = 0; i < 10000; ++i)
{
T2DArray[i] = new int[10000];
}
for (int i = 0; i < 10000; ++i)
{
delete[] T2DArray[i];
}
delete[] T2DArray;
}
std::cout<<T.End()<<" us\n\n";
T.Start();
for (int i = 0; i < 100; ++i)
{
int* TwoDArray = new int[10000 * 10000];
delete[] TwoDArray;
}
std::cout<<T.End()<<" us\n\n";
}
结果是:
4.6085725 seconds //for int**
0.1142958 seconds //for int*
与使用原始指针相比,我使用std::vector
的类的原因是什么?
答案 0 :(得分:7)
vector
会将为其分配的内存清零。您使用new
的代码可以获得
“垃圾初始化”记忆。因此,分配TwoDArray(10000, 10000)
会为您提供一个充满零的数组,而new int[10000 * 10000]
会为您提供一组未定义的内容。 (仅仅观察导致未定义的行为)
请注意,这意味着在向量大小写中,您的程序实际上会写入所有100000000
int
s,而在new
情况下,您的程序只会为那么多位置留出地址空间int
秒。
对于可比较的测量,您必须先将新阵列归零;例如使用int* TwoDArray = new int[10000 * 10000]();
代替int* TwoDArray = new int[10000 * 10000];
。
答案 1 :(得分:3)
为了说明Billy ONeal的建议以及戏剧性的结果,我做到了这一点:
template <class T>
class no_init_alloc
: public std::allocator<T>
{
public:
using std::allocator<T>::allocator;
template <class U, class... Args> void construct(U*, Args&&...) {}
};
template<typename T>
class DimArray
{
private:
int Width, Height;
std::vector<T, no_init_alloc<T>> Data;
public:
…
我的结果改为:
2959854464 us
28734347029 us
为:
2980402236 us
850190 us
答案 2 :(得分:1)
我在新年前夜派对上有点迟了。
除了提供自定义分配器外,还可以在实际数据可用时调用reserve()
然后调用push_back()
/ emplace_back()
。从未定义行为的角度来看,它也是安全的:您只能访问已初始化的数据。不幸的是,这种方法可能不适合您的应用。
必须非常小心这种基准测试。例如,Linux做懒惰分配;如果我运行下面的代码,我会得到以下结果:
$ / usr / bin / time --verbose ./a.out
int []:0 ms
矢量:0毫秒
...
最大驻留集大小(千字节):1004
如果我取消注释评论中的任何代码,它将变为391 MB。
#include <chrono>
#include <cstdio>
#include <vector>
using namespace std;
using namespace std::chrono;
constexpr size_t SIZE = 100000000;
int main(int argc, char* argv[]) {
auto t1 = high_resolution_clock::now();
{
int* pInt = new int[SIZE];
//int* pInt = new int[SIZE]();
pInt[0] = 0;
delete[] pInt;
}
auto t2 = high_resolution_clock::now();
{
vector<int> v;
v.reserve(SIZE);
v.push_back(0);
//v.resize(SIZE, 0);
}
auto t3 = high_resolution_clock::now();
printf("int[]: %ld ms\n", duration_cast<milliseconds>(t2-t1).count());
printf("vector: %ld ms\n", duration_cast<milliseconds>(t3-t2).count());
}