Question

#include <Eigen/Dense>
#include <ctime>
#include <iostream>

using namespace std;
using namespace Eigen;

void slow(const Matrix<std::complex<float>, 20, 1> &a, const Matrix<float, 1, 2> &b, Matrix<std::complex<float>, 20, 2> &out)
{
    out.noalias() += a * b;
}

void fast(const Matrix<std::complex<float>, 20, 1> &a, const Matrix<float, 1, 2> &b, Matrix<std::complex<float>, 20, 2> &out)
{
    for (size_t i = 0; i < 2; ++i)
    {
        out.col(i).noalias() += a * b[i];
    }
}

int main(int, const char**)
{
    clock_t start;
    Matrix<std::complex<float>, 20, 2> out;
    Matrix<std::complex<float>, 20, 1> a;
    Matrix<float, 1, 2> b;
    a = a.Random();
    b = b.Random();
    out.setZero();

    start = clock();
    const size_t N = 10000000;
    for( size_t i = 0; i < N; ++i )
    { 
        slow(a, b, out);
    }
    cout << "Matrix norm: " << out.norm() << endl;
    cout << "Slow: " << (std::clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << endl;

    out.setZero();
    start = clock();
    for (size_t i = 0; i < N; ++i)
    {
        fast(a, b, out);
    }
    cout << "Matrix norm: " << out.norm() << endl;
    cout << "Fast: " << (clock() - start) / (double)(CLOCKS_PER_SEC / 1000) << " ms" << endl;

    return 0;
}

我已使用Visual Studio 2017 Eigen 3.3.7和以下编译器标志编译了以上代码：

/ O2 / fp：fast / arch：AVX2

这是程序的输出：

Matrix norm: 3.07615e+07
Slow: 6707 ms
Matrix norm: 3.07615e+07
Fast: 230 ms

函数“ fast”和“ slow”计算相同的结果。为什么“慢速”功能比“快速”功能慢？

为什么计算每个矩阵列比使用矩阵乘积要快？

0 个答案: