我正在尝试使用SSE优化来编码图像分段代码。我有一个奇怪的结果。
这是我的代码:
void binaire_sse(unsigned int * img, long h,long l, long seuil ,unsigned int * out)
{
__m128i vThreshold = _mm_set1_epi8(seuil);
int i, j;
for (i = 0; i < h; ++i)
{
const __m128i * p_in = (__m128i *)&img[i * l];
uint16_t * p_out = (uint16_t *)&out[i * l / CHAR_BIT];
for (j = 0; j < l; j += 16)
{
__m128i v = _mm_load_si128(p_in);
uint16_t b;
v = _mm_add_epi8(v, vThreshold);
b = _mm_movemask_epi8(v);
*p_out = b;
p_in++;
p_out++;
}
}
}