在使用eigen3寻求最佳矩阵 - 矩阵乘法(并希望从SIMD支持中获利)时,我写了以下测试:
#include <iostream>
#include <Eigen/Dense>
#include <ctime>
using namespace Eigen;
using namespace std;
const int test_size= 13;
const int test_size_16b= test_size+1;
typedef Matrix<double, Dynamic, Dynamic, ColMajor, test_size_16b, test_size_16b> TestMatrix_dyn16b_t;
typedef Matrix<double, Dynamic, Dynamic> TestMatrix_dynalloc_t;
typedef Matrix<double, test_size, test_size> TestMatrix_t;
typedef Matrix<double, test_size_16b, test_size_16b> TestMatrix_fix16b_t;
template<typename TestMatrix_t> EIGEN_DONT_INLINE void test(const char * msg, int m_size= test_size, int n= 10000) {
double s= 0.0;
clock_t elapsed= 0;
TestMatrix_t m3;
for(int i= 0; i<n; i++) {
TestMatrix_t m1 = TestMatrix_t::Random(m_size, m_size);
TestMatrix_t m2= TestMatrix_t::Random(m_size, m_size);
clock_t begin = clock();
m3.noalias()= m1*m2;
clock_t end = clock();
elapsed+= end - begin;
// make sure m3 is not optimized away
s+= m3(1, 1);
}
double elapsed_secs = double(elapsed) / CLOCKS_PER_SEC;
cout << "Elapsed time " << msg << ": " << elapsed_secs << " size " << m3.cols() << ", " << m3.rows() << endl;
}
int main() {
#ifdef EIGEN_VECTORIZE
cout << "EIGEN_VECTORIZE on " << endl;
#endif
test<TestMatrix_t> ("normal ");
test<TestMatrix_dyn16b_t> ("dyn 16b ");
test<TestMatrix_dynalloc_t>("dyn alloc");
test<TestMatrix_fix16b_t> ("fix 16b ", test_size_16b);
}
使用g++ -msse3 -O2 -DEIGEN_DONT_PARALLELIZE test.cpp
编译并在Athlon II X2 255上运行。结果让我感到惊讶:
EIGEN_VECTORIZE on
Elapsed time normal : 0.019193 size 13, 13
Elapsed time dyn 16b : 0.025226 size 13, 13
Elapsed time dyn alloc: 0.018648 size 13, 13
Elapsed time fix 16b : 0.018221 size 14, 14
test_size
的其他奇数可以获得类似的结果。令我困惑的是:
我的问题很简短:为什么Matrix<double, Dynamic, Dynamic, ColMajor, test_size_16b, test_size_16b>
这么慢?你能证实我的观察结果,甚至可以解释一下吗?
答案 0 :(得分:1)
常见问题已过时。从Eigen版本3.3开始,未对齐的向量和矩阵被矢量化。
然后考虑为什么Matrix<double, Dynamic, Dynamic, ColMajor, test_size_16b, test_size_16b>
较慢,这只是首选矩阵产品实现的编译时选择中的一个问题。修复将是Eigen 3.3.1的一部分。