我用SSE2编写了一个计算2个大浮点向量的点积。但后来我发现结果是错误的,但差异很小。下面的例子演示了如此奇怪的行为
srand(static_cast<unsigned int>(time(NULL)));
const int size = 1000;
float array[size];
int i = 0;
for (; i < size; ++i)
array[i] = rand() % 1000 / 100.;
float sum = 0.0f, ps[4];
__m128 p = _mm_setzero_ps();
for (i = 0; i < size; i += 4)
{
__m128 p0 = _mm_loadu_ps(array + i);
p = _mm_add_ps(p, _mm_mul_ps(p0, p0));
}
assert(i == size);
_mm_storeu_ps(ps, p);
sum = ps[0] + ps[1] + ps[2] + ps[3];
for (i = 0; i < size; ++i)
sum -= array[i] * array[i];
std::cout << sum << std::endl;
return 0;
为什么会这样?