#include <iostream>
#include <chrono>
#include <vector>
#include <algorithm>
#include <cstring>
using namespace std;
int main() {
constexpr size_t elems = 500000;
{
int array_source[elems];
int array_destination[elems];
iota(begin(array_source),end(array_source),0);
auto start = chrono::high_resolution_clock::now();
memcpy (array_destination, array_source, elems * sizeof(int));
auto stop = chrono::high_resolution_clock::now();
auto duration = stop - start;
auto nanoseconds = chrono::duration_cast<chrono::nanoseconds>(duration);
cout << "Duration of old style copy: " << nanoseconds.count() << " ns." << endl;
}
{
vector<int> vector_source(elems);
vector<int> vector_destination(elems);
iota(begin(vector_source),end(vector_source),0);
auto start = chrono::high_resolution_clock::now();
copy(begin(vector_source), end(vector_source), begin(vector_destination));
auto stop = chrono::high_resolution_clock::now();
auto duration = stop - start;
auto nanoseconds = chrono::duration_cast<chrono::nanoseconds>(duration);
cout << "Duration of stl style copy: " << nanoseconds.count() << " ns." << endl;
}
}
输出是:
Duration of old style copy: 280 ns.
Duration of stl style copy: 931438 ns.
我希望这两种方法能够在优化构建上产生几乎相同的指令。为什么复制矢量的速度要慢1000倍以上。是否根本没有使用任何优化?
答案 0 :(得分:4)
这是一个优化问题。您应该使用destination_array
中的内容。我将您的代码修补为
memcpy (array_destination, array_source, elems * sizeof(int));
unsigned ix = getpid () % elems;
cout << "ix#" << ix << " @" << array_destination [ix] << endl;
auto stop = chrono::high_resolution_clock::now();
auto duration = stop - start;
auto nanoseconds = chrono::duration_cast<chrono::nanoseconds>(duration);
当然我还添加了适当的#include
...
并获得更合理的时间(实际上也衡量了IO的时间):
ix#4640 @4640
Duration of old style copy: 1925353 ns.
Duration of stl style copy: 910400 ns.
编译器是优化代码的正确方法。 array_destination
并未真正使用过。
当然,您可以将输出移出时间。我是在自己的机器上做的(我对ideone不是很熟悉),并得到了:
Duration of old style copy: 675192 ns.
Duration of stl style copy: 228392 ns.
ix#1877 x=1877
BTW,人们可以梦想一个非常聪明的优化器,它甚至不构造array_source
和array_destination
并且会优化输出线
cout << "ix#" << ix << " @" << ix << endl;
因为可以证明对于所有索引i
我们都有array_destination[i] == i
,但编译器还不够聪明。