下一个代码(使用-O3 -ffast-math):
#include <cmath>
float a[4];
void sin1() {
for(unsigned i = 0; i < 4; i++) a[i] = sinf(a[i]);
}
编译sinf的矢量化版本(_ZGVbN4v_sinf):
sin1():
sub rsp, 8
movaps xmm0, XMMWORD PTR a[rip]
call _ZGVbN4v_sinf
movaps XMMWORD PTR a[rip], xmm0
add rsp, 8
ret
但是当我使用c ++版本的sinf(std :: sin)时,没有矢量化发生:
void sin2() {
for(unsigned i = 0; i < 4; i++) a[i] = std::sin(a[i]);
}
sin2():
sub rsp, 8
movss xmm0, DWORD PTR a[rip]
call sinf
movss DWORD PTR a[rip], xmm0
movss xmm0, DWORD PTR a[rip+4]
call sinf
movss DWORD PTR a[rip+4], xmm0
movss xmm0, DWORD PTR a[rip+8]
call sinf
movss DWORD PTR a[rip+8], xmm0
movss xmm0, DWORD PTR a[rip+12]
call sinf
movss DWORD PTR a[rip+12], xmm0
add rsp, 8
ret