我得到了将SIMD SSE指令转换为等效C代码的任务......代码是这样的 (I1_block_addr--是uint8_t,esc_offset_1是32位整数。)
__m128i xmm1 = _mm_load_si128((__m128i*)(I1_block_addr+desc_offset_1));
__m128i xmm6 = _mm_load_si128((__m128i*)(I2_block_addr+desc_offset_1));
xmm6 = _mm_sad_epu8(xmm1,xmm6);
我做的是
char *c1;
char xmm1[16],xmm6[16];
short xmm6s[16]
//for loding 16 byte
c1=( char* )( I1_block_addr + desc_offset_1 );
memcpy( xmm1 , c1 , 16 );
c1=( char* )( I2_block_addr + desc_offset_1 );
memcpy( xmm6 , c1 , 16 );
xmm6s[0] = (short) ( abs(xmm1[0]-xmm6[0]) + abs(xmm1[1]-xmm6[1]) + abs(xmm1[2]-xmm6[2]) + abs(xmm1[3]-xmm6[3]) + abs(xmm1[4]-xmm6[4]) + abs(xmm1[5]-xmm6[5]) +
abs(xmm1[6]-xmm6[6]) + abs(xmm1[7]-xmm6[7]) );
xmm6s[1] = 0; xmm6s[2] = 0; xmm6s[3] = 0;
xmm6s[4] = (short) ( abs(xmm1[8]-xmm6[8]) + abs(xmm1[9]-xmm6[9]) + abs(xmm1[10]-xmm6[10]) + abs(xmm1[11]-xmm6[11]) + abs(xmm1[12]-xmm6[12]) +
abs(xmm1[13]-xmm6[13]) + abs(xmm1[14]-xmm6[14]) + abs(xmm1[15]-xmm6[15]) );
xmm6s[5] = 0; xmm6s[6] = 0; xmm6s[7] = 0;
我没有收到任何错误,但图像质量却下降了。请告诉我代码的哪一部分是错的以及如何纠正。
答案 0 :(得分:1)
代码看起来或多或少好 - 我认为你只需要将你的char / short类型更改为适当的无符号类型:
#include <stdint.h>
uint8_t *c1;
uint8_t xmm1[16], xmm6[16];
uint16_t xmm6s[8];
c1 = I1_block_addr + desc_offset_1;
memcpy(xmm1, c1, 16);
c1 = I2_block_addr + desc_offset_1;
memcpy(xmm6, c1, 16);
xmm6s[0] = (uint16_t) ( abs(xmm1[0]-xmm6[0]) + abs(xmm1[1]-xmm6[1]) + abs(xmm1[2]-xmm6[2]) + abs(xmm1[3]-xmm6[3]) +
abs(xmm1[4]-xmm6[4]) + abs(xmm1[5]-xmm6[5]) + abs(xmm1[6]-xmm6[6]) + abs(xmm1[7]-xmm6[7]) );
xmm6s[1] = xmm6s[2] = xmm6s[3] = 0;
xmm6s[4] = (uint16_t) ( abs(xmm1[8]-xmm6[8]) + abs(xmm1[9]-xmm6[9]) + abs(xmm1[10]-xmm6[10]) + abs(xmm1[11]-xmm6[11]) +
abs(xmm1[12]-xmm6[12]) + abs(xmm1[13]-xmm6[13]) + abs(xmm1[14]-xmm6[14]) + abs(xmm1[15]-xmm6[15]) );
xmm6s[5] = xmm6s[6] = xmm6s[7] = 0;