在MSVC V120上的EIGEN,启用了矢量化功能,而评估效果不佳(也许)

时间:2019-01-16 18:10:40

标签: c++ visual-studio-2013 vectorization eigen eigen3

我正在将Eigen版本3.3.5与MSVC VC ++ VS2013一起使用。

我已经阅读了很多有关MSVC和其他编译器下的特征和向量化的文章。

与编译器相关的选项:

  • sse2(已选中,正在矢量化查看本征文档提供的测试功能组件)
  • O2,快速代码
  • 内联(已测试各种选项)
  • 全局代码优化

我承认我是msvc编译器家族的新手,围绕它进行模板操作可能比gcc或clang效率低。

void fooa(Vector3f& u, Vector3f& v, Vector3f& w, Affine3f & t){
  EIGEN_ASM_COMMENT("begin mul v*t");
  u = t*u;
  v = t*u;
  w = t*u;
  EIGEN_ASM_COMMENT("end mul v*t");
}

msvc下的上述代码产生对的调用:

Eigen::internal::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> >::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> >

和:

Eigen::DenseStorage<float,4,4,1,0>::DenseStorage<float,4,4,1,0> (013F222370h)  

虽然gcc不会,不会对性能产生明显影响,但我的应用程序使用了很多此类操作。

部分矢量化了吗?这是模板问题吗?或优化问题? 最重要的是,我是否可以做一些事情来产生类似于以下与gcc相关的输出?

MSVC:

        EIGEN_ASM_COMMENT("inizio prova mul v*t");
    u = t*u;
000000013F221431  test        al,0Fh  
000000013F221433  je          fooa+4Fh (013F22144Fh)  
000000013F221435  lea         rdx,[string L"c:\\users\\user\\docume"... (013F224550h)]  
000000013F22143C  lea         rcx,[string L"(internal::UIntPtr(a"... (013F2245F0h)]  
000000013F221443  mov         r8d,6Dh  
000000013F221449  call        qword ptr [__imp__wassert (013F224160h)]  
000000013F22144F  movss       xmm0,dword ptr [rbx]  
000000013F221453  movss       xmm4,dword ptr [__real@3f800000 (013F224990h)]  
000000013F22145B  lea         rax,[rbp+27h]  
000000013F22145F  movss       dword ptr [rbp-39h],xmm0  
000000013F221464  movss       xmm1,dword ptr [rbx+4]  
000000013F221469  movss       dword ptr [rbp-35h],xmm1  
000000013F22146E  movss       xmm3,dword ptr [rbx+8]  
000000013F221473  movss       dword ptr [rbp-2Dh],xmm4  
000000013F221478  movss       dword ptr [rbp-31h],xmm3  
000000013F22147D  test        al,0Fh  
000000013F22147F  je          fooa+0AFh (013F2214AFh)  
000000013F221481  lea         rdx,[string L"c:\\users\\user\\docume"... (013F224550h)]  
000000013F221488  lea         rcx,[string L"(internal::UIntPtr(a"... (013F2245F0h)]  
000000013F22148F  mov         r8d,6Dh  
000000013F221495  call        qword ptr [__imp__wassert (013F224160h)]  
000000013F22149B  movss       xmm4,dword ptr [rbp-2Dh]  
000000013F2214A0  movss       xmm3,dword ptr [rbp-31h]  
000000013F2214A5  movss       xmm1,dword ptr [rbp-35h]  
000000013F2214AA  movss       xmm0,dword ptr [rbp-39h]  
000000013F2214AF  movss       xmm2,xmm0  
000000013F2214B3  movaps      xmm0,xmm1  
000000013F2214B6  movaps      xmm1,xmm3  
    v = t*u;
000000013F2214B9  lea         rcx,[rbp-39h]  
    EIGEN_ASM_COMMENT("inizio prova mul v*t");
    u = t*u;
000000013F2214BD  shufps      xmm0,xmm0,0  
000000013F2214C1  shufps      xmm2,xmm2,0  
000000013F2214C5  shufps      xmm1,xmm1,0  
000000013F2214C9  mulps       xmm0,xmmword ptr [rdi+10h]  
000000013F2214CD  mulps       xmm1,xmmword ptr [rdi+20h]  
000000013F2214D1  mulps       xmm2,xmmword ptr [rdi]  
000000013F2214D4  addps       xmm2,xmm0  
000000013F2214D7  movss       xmm0,xmm4  
000000013F2214DB  addps       xmm2,xmm1  
000000013F2214DE  shufps      xmm0,xmm0,0  
000000013F2214E2  mulps       xmm0,xmmword ptr [rdi+30h]  
000000013F2214E6  addps       xmm2,xmm0  
000000013F2214E9  movaps      xmm0,xmm2  
000000013F2214EC  movaps      xmm1,xmm2  
000000013F2214EF  shufps      xmm0,xmm2,55h  
000000013F2214F3  shufps      xmm1,xmm2,0AAh  
    EIGEN_ASM_COMMENT("inizio prova mul v*t");
    u = t*u;
000000013F2214F7  movss       dword ptr [rbp-25h],xmm0  
000000013F2214FC  movss       dword ptr [rbp-21h],xmm1  
000000013F221501  movss       dword ptr [rbx],xmm2  
000000013F221505  movss       xmm0,dword ptr [rbp-25h]  
000000013F22150A  movss       dword ptr [rbx+4],xmm0  
000000013F22150F  movss       xmm1,dword ptr [rbp-21h]  
000000013F221514  movss       dword ptr [rbx+8],xmm1  
    v = t*u;
000000013F221519  call        Eigen::DenseStorage<float,4,4,1,0>::DenseStorage<float,4,4,1,0> (013F222370h)  
000000013F22151E  movss       xmm0,dword ptr [rbx]  
000000013F221522  lea         rcx,[rbp+27h]  
000000013F221526  movss       dword ptr [rbp-39h],xmm0  
000000013F22152B  movss       xmm1,dword ptr [rbx+4]  
000000013F221530  movss       dword ptr [rbp-35h],xmm1  
000000013F221535  movss       xmm0,dword ptr [rbx+8]  
000000013F22153A  mov         dword ptr [rbp-2Dh],3F800000h  
000000013F221541  movss       dword ptr [rbp-31h],xmm0  
000000013F221546  call        Eigen::DenseStorage<float,4,4,1,0>::DenseStorage<float,4,4,1,0> (013F222370h)  
000000013F22154B  lea         rax,[rbp-39h]  
000000013F22154F  lea         rdx,[rbp+37h]  
000000013F221553  mov         qword ptr [rbp-11h],rax  
000000013F221557  lea         rax,[rbp-39h]  
000000013F22155B  lea         rcx,[rbp-19h]  
000000013F22155F  xor         r8d,r8d  
000000013F221562  mov         qword ptr [rbp-19h],rdi  
000000013F221566  mov         qword ptr [rbp-9],rdi  
000000013F22156A  mov         qword ptr [rbp+7],rax  
000000013F22156E  mov         qword ptr [rbp+17h],4  
000000013F221576  call        Eigen::internal::product_evaluator<Eigen::Product<Eigen::Matrix<float,4,4,0,4,4>,Eigen::Matrix<float,4,1,0,4,1>,1>,3,Eigen::DenseShape,Eigen::DenseShape,float,float>::packet<16,__m128> (013F222420h)  
    w = t*u;
000000013F22157B  lea         rcx,[rbp-39h]  
    v = t*u;
000000013F22157F  movaps      xmm2,xmmword ptr [rax]  
000000013F221582  movaps      xmm0,xmm2  
000000013F221585  movaps      xmm1,xmm2  
000000013F221588  shufps      xmm0,xmm2,55h  
000000013F22158C  shufps      xmm1,xmm2,0AAh  
000000013F221590  movss       dword ptr [rbp-25h],xmm0  
000000013F221595  movss       dword ptr [rbp-21h],xmm1  
000000013F22159A  movss       dword ptr [r14],xmm2  
000000013F22159F  movss       xmm0,dword ptr [rbp-25h]  
000000013F2215A4  movss       dword ptr [r14+4],xmm0  
000000013F2215AA  movss       xmm1,dword ptr [rbp-21h]  
000000013F2215AF  movss       dword ptr [r14+8],xmm1  
    w = t*u;
000000013F2215B5  call        Eigen::DenseStorage<float,4,4,1,0>::DenseStorage<float,4,4,1,0> (013F222370h)  
000000013F2215BA  lea         rax,[rbp-39h]  
000000013F2215BE  xor         r14d,r14d  
000000013F2215C1  mov         qword ptr [rbp-19h],rax  
000000013F2215C5  lea         rax,[rbp-39h]  
000000013F2215C9  lea         rdx,[rbp-19h]  
000000013F2215CD  lea         rcx,[rbp+27h]  
000000013F2215D1  mov         qword ptr [rbp-1],r14  
000000013F2215D5  mov         qword ptr [rbp+0Fh],4  
000000013F2215DD  mov         qword ptr [rbp-9],rax  
000000013F2215E1  call        Eigen::internal::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> >::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> > (013F2223B0h)  
000000013F2215E6  mov         eax,dword ptr [rbx]  
000000013F2215E8  mov         rcx,qword ptr [rbp+27h]  
000000013F2215EC  mov         dword ptr [rcx],eax  
000000013F2215EE  mov         eax,dword ptr [rbx+4]  
000000013F2215F1  mov         dword ptr [rcx+4],eax  
000000013F2215F4  mov         eax,dword ptr [rbx+8]  
000000013F2215F7  mov         dword ptr [rcx+8],eax  
000000013F2215FA  mov         dword ptr [rbp-2Dh],3F800000h  
000000013F221601  lea         rcx,[rbp+37h]  
000000013F221605  call        Eigen::DenseStorage<float,4,4,1,0>::DenseStorage<float,4,4,1,0> (013F222370h)  
000000013F22160A  lea         rax,[rbp-39h]  
000000013F22160E  lea         rdx,[rbp+47h]  
000000013F221612  mov         qword ptr [rbp-11h],rax  
000000013F221616  lea         rax,[rbp-39h]  
000000013F22161A  lea         rcx,[rbp-19h]  
000000013F22161E  xor         r8d,r8d  
000000013F221621  mov         qword ptr [rbp-19h],rdi  
000000013F221625  mov         qword ptr [rbp-9],rdi  
000000013F221629  mov         qword ptr [rbp+7],rax  
000000013F22162D  mov         qword ptr [rbp+17h],4  
000000013F221635  call        Eigen::internal::product_evaluator<Eigen::Product<Eigen::Matrix<float,4,4,0,4,4>,Eigen::Matrix<float,4,1,0,4,1>,1>,3,Eigen::DenseShape,Eigen::DenseShape,float,float>::packet<16,__m128> (013F222420h)  
000000013F22163A  lea         rdx,[rbp-19h]  
000000013F22163E  lea         rcx,[rbp+27h]  
000000013F221642  mov         qword ptr [rbp-1],r14  
000000013F221646  movaps      xmm0,xmmword ptr [rax]  
000000013F221649  movaps      xmmword ptr [rbp+37h],xmm0  
000000013F22164D  lea         rax,[rbp+37h]  
000000013F221651  mov         qword ptr [rbp+0Fh],4  
000000013F221659  mov         qword ptr [rbp-19h],rax  
000000013F22165D  lea         rax,[rbp+37h]  
000000013F221661  mov         qword ptr [rbp-9],rax  
000000013F221665  call        Eigen::internal::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> >::evaluator<Eigen::Block<Eigen::Matrix<float,4,1,0,4,1>,3,1,0> > (013F2223B0h)  
000000013F22166A  mov         rax,qword ptr [rbp+27h]  
    EIGEN_ASM_COMMENT("fine prova mul v*t");
}
000000013F22166E  lea         r11,[rsp+0B0h]  
    w = t*u;
000000013F221676  movss       xmm2,dword ptr [rax]  
000000013F22167A  movss       dword ptr [rbp-29h],xmm2  
000000013F22167F  movss       xmm0,dword ptr [rax+4]  
000000013F221684  movss       dword ptr [rbp-25h],xmm0  
000000013F221689  movss       xmm1,dword ptr [rax+8]  
    EIGEN_ASM_COMMENT("fine prova mul v*t");
}
000000013F22168E  mov         rbx,qword ptr [r11+10h]  
000000013F221692  mov         rdi,qword ptr [r11+20h]  
000000013F221696  mov         r14,qword ptr [r11+28h]  
    w = t*u;
000000013F22169A  movss       dword ptr [rbp-21h],xmm1  
000000013F22169F  movss       dword ptr [rsi],xmm2  
000000013F2216A3  movss       xmm0,dword ptr [rbp-25h]  
000000013F2216A8  movss       dword ptr [rsi+4],xmm0  
000000013F2216AD  movss       xmm1,dword ptr [rbp-21h]  
000000013F2216B2  movss       dword ptr [rsi+8],xmm1  
    EIGEN_ASM_COMMENT("fine prova mul v*t");
}

GCC:

# 18 "main.cpp" 1
#inizio prova mul v*t
# 0 "" 2
#NO_APP
movss   4(%rdi), %xmm0
shufps  $0, %xmm0, %xmm0
movss   (%rdi), %xmm1
shufps  $0, %xmm1, %xmm1
movaps  (%rcx), %xmm2
mulps   16(%rcx), %xmm0
mulps   %xmm1, %xmm2
movaps  %xmm0, %xmm1
movss   8(%rdi), %xmm0
shufps  $0, %xmm0, %xmm0
addps   %xmm2, %xmm1
mulps   32(%rcx), %xmm0
addps   %xmm1, %xmm0
addps   48(%rcx), %xmm0
movaps  %xmm0, (%rsp)
movss   (%rsp), %xmm0
movss   4(%rsp), %xmm1
movss   8(%rsp), %xmm3
movss   %xmm0, (%rdi)
shufps  $0, %xmm0, %xmm0
movss   %xmm1, 4(%rdi)
shufps  $0, %xmm1, %xmm1
movss   %xmm3, 8(%rdi)
movaps  (%rcx), %xmm2
mulps   16(%rcx), %xmm1
mulps   %xmm0, %xmm2
movaps  %xmm3, %xmm0
shufps  $0, %xmm0, %xmm0
addps   %xmm2, %xmm1
mulps   32(%rcx), %xmm0
addps   %xmm1, %xmm0
addps   48(%rcx), %xmm0
movaps  %xmm0, (%rsp)
movss   (%rsp), %xmm0
movss   %xmm0, (%rsi)
movss   4(%rsp), %xmm0
movss   %xmm0, 4(%rsi)
movss   8(%rsp), %xmm0
movss   %xmm0, 8(%rsi)
movss   4(%rdi), %xmm0
shufps  $0, %xmm0, %xmm0
movss   (%rdi), %xmm1
shufps  $0, %xmm1, %xmm1
movaps  (%rcx), %xmm2
mulps   16(%rcx), %xmm0
mulps   %xmm1, %xmm2
movaps  %xmm0, %xmm1
movss   8(%rdi), %xmm0
shufps  $0, %xmm0, %xmm0
addps   %xmm2, %xmm1
mulps   32(%rcx), %xmm0
addps   %xmm1, %xmm0
addps   48(%rcx), %xmm0
movaps  %xmm0, (%rsp)
movss   (%rsp), %xmm0
movss   %xmm0, (%rdx)
movss   4(%rsp), %xmm0
movss   %xmm0, 4(%rdx)
movss   8(%rsp), %xmm0
movss   %xmm0, 8(%rdx)
#APP
# 22 "main.cpp" 1
#fine prova mul v*t
# 0 "" 2

0 个答案:

没有答案