float myfunction ( P b1, P b2, int dimention )
{
__m128 v_b1,v_b2,v_b3;
int j=0;
for (int i=0; i<dimention/4; i++) {
v_b1=_mm_load_ps(b1.c +j);
v_b2=_mm_load_ps(b2.c +j);
v_b3=_mm_sub_ps(v_b1,v_b2);
j+=4;
}
}
我真正想做的是:
for (int i=0; i<dimention; i++ ) {
result += b1.c[i] - b2.c[i];
}
我想要返回result
。你能救我吗?
答案 0 :(得分:2)
我假设您要总结所有差异,然后将其作为函数结果返回:
#include <pmmintrin.h> // SSE3
float myfunction ( P b1, P b2, int dimention )
{
__m128 v_b1, v_b2, v_b3, v_b4;
float f;
v_b4 = _mm_setzero_ps(); // initialise sum of differences to zero
for (int j = 0; j < dimention; j += 4)
{
v_b1 = _mm_load_ps(b1.c + j); // load 4 floats from b1[j]
v_b2 = _mm_load_ps(b2.c + j); // load 4 floats form b2[j]
v_b3 = _mm_sub_ps(v_b1, v_b2); // calc 4 differences
v_b4 = _mm_add_ps(v_b4, v_b3); // accumulate 4 differences
}
v_b4 = _mm_hadd_ps(v_b4, v_b4); // sum horizontally
v_b4 = _mm_hadd_ps(v_b4, v_b4); // (NB: need to do this twice to sum all 4 elements)
_mm_store_ss(&f, v_b4); // extract sum
return f; // return sum
}
如果这不是您要执行的操作,请更新您的问题并提供更多详细信息,我会相应更新代码。