我在没有矢量化( - O2)的情况下编译以下代码,并将矢量化( - O3 -march = native)的时间与三种不同的矢量长度进行比较 (通过取消注释相应的#define SIZE
),获得29 :: 9,247 :: 145和 4866 :: 4884 ,适用于矢量大小10000,100000和分别为1000000 。
#include <iostream>
#include <random>
#include<chrono>
#include<cmath>
using namespace std;
using namespace std::chrono;
//#define SIZE (10000) // 29::9
//#define SIZE (100000) // 247::145
#define SIZE (1000000) // 4866::4884
void vector_op_2(int * __restrict__ v1, int * __restrict__ v2) {
for (unsigned i = 0; i < SIZE; i++)
v1[i] = 2 * v2[i];
}
int main() {
using namespace std;
int* v = new int[SIZE];
int* w = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
v[i] = i;
}
auto start = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
for (int k = 0; k < 5000; k++) {
vector_op_2(w, v);
}
auto end = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
std::cout << "Time " << end.count() - start.count() << std::endl;
for (int i = 0; i < SIZE; i++) {
if (abs(w[i]-2*v[i])>0.01) {
throw 1;
}
}
delete v;
return 0;
}
[缩短]
long vector_op_1(int v[SIZE]) throw()
{
long s = 0;
for (unsigned i=0; i<SIZE; i++) s += v[i];
return s;
}
[...我在Ubuntu 16.04上使用g ++ 7 ...]
[...对于短矢量1000,我实现了6:1的比例! ...]