#include <Eigen/Dense>
#include <ctime>
#include <iostream>
using namespace std;
using namespace Eigen;
void slow(const Matrix<std::complex<float>, 20, 1> &a, const Matrix<float, 1, 2> &b, Matrix<std::complex<float>, 20, 2> &out)
{
out.noalias() += a * b;
}
void fast(const Matrix<std::complex<float>, 20, 1> &a, const Matrix<float, 1, 2> &b, Matrix<std::complex<float>, 20, 2> &out)
{
for (size_t i = 0; i < 2; ++i)
{
out.col(i).noalias() += a * b[i];
}
}
int main(int, const char**)
{
clock_t start;
Matrix<std::complex<float>, 20, 2> out;
Matrix<std::complex<float>, 20, 1> a;
Matrix<float, 1, 2> b;
a = a.Random();
b = b.Random();
out.setZero();
start = clock();
const size_t N = 10000000;
for( size_t i = 0; i < N; ++i )
{
slow(a, b, out);
}
cout << "Matrix norm: " << out.norm() << endl;
cout << "Slow: " << (std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << endl;
out.setZero();
start = clock();
for (size_t i = 0; i < N; ++i)
{
fast(a, b, out);
}
cout << "Matrix norm: " << out.norm() << endl;
cout << "Fast: " << (clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << endl;
return 0;
}
我已使用Visual Studio 2017 Eigen 3.3.7和以下编译器标志编译了以上代码:
/ O2 / fp:fast / arch:AVX2
这是程序的输出:
Matrix norm: 3.07615e+07
Slow: 6707 ms
Matrix norm: 3.07615e+07
Fast: 230 ms
函数“ fast”和“ slow”计算相同的结果。 为什么“慢速”功能比“快速”功能慢?