Question

我试图对某些代码进行矢量化。想法：我们有一个像素（__ m128 in），如果它的任何元素大于upper，用不同的像素替换 entier 像素（__ m128 upper_color）

无效的无效代码：

if(inp[0] >= upper || inp[1] >= upper || inp[2] >= upper)
{
  outp[0] = upper_color[0];
  outp[1] = upper_color[1];
  outp[2] = upper_color[2];
}

到目前为止，我提出了以下内容，但（我相信如此）它不会取代整个像素，而只会取代那些比上面更大的组件：

  const __m128 pixel = _mm_load_ps(in);
  const __m128 isoe = _mm_cmpge_ps(pixel, upper);
  __m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel),
                            _mm_and_ps(isoe, upper_color));
  _mm_stream_ps(out, result);

假设upper = 1,1,1和upper_color = 1,0,0

第四个频道是alpha，所以我不关心它。

结果：

IN:   0.5 0.3 0.7
OUT:  0.5 0.3 0.7 (Expected)
OUT:  0.5 0.3 0.7 (Recieved)

IN:   1.5 1.1 0.7
OUT:  1   0   0   (Expected)
OUT:  1   0   0.7 (Recieved)

也许有人可以帮助我？这有可能吗？

Answer 1

您需要计算水平OR。 SSE中没有水平OR指令，但可以使用2x UNPACK +垂直OR模拟此类操作。

const __m128 pixel = _mm_load_ps(in);
/* (p3, p2, p1, p0 ) */
__m128 isoe = _mm_cmpge_ps(pixel, upper);
/* (p3|p1, p2|p0, p3|p1, p2|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
/* (p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0, p3|p2|p1|p0) */
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, pixel), _mm_and_ps(isoe, upper_color));
_mm_stream_ps(out, result);

Answer 2

您可以使用_mm_movemask_epi8进行横向OR。

#include <stdio.h>
#include <emmintrin.h>

void foo(float ina[]) {
    //float ina[] = {0.5, 0.3, 0.7, 0};
    float uppera[] = {1,1,1,1};
    float upper_colora[] = {1,0,0,0};
    float out[4];
    __m128 in = _mm_load_ps(ina);
    __m128 upper = _mm_load_ps(uppera);
    __m128 upper_color = _mm_load_ps(upper_colora);

    const __m128 pixel = _mm_load_ps(ina);
    const __m128 isoe = _mm_cmpge_ps(pixel, upper);
    if(_mm_movemask_epi8(_mm_castps_si128(isoe))) {         
        _mm_stream_ps(out, upper_color);
    }
    else {
        _mm_stream_ps(out, in);
    }
    printf("%f %f %f %f\n", out[0], out[1], out[2], out[3]);
}

int main() {    
    float ina1[] = {0.5, 0.3, 0.7, 0}; //output 0.5 0.3 0.7 0.0
    float ina2[] = {0.5, 1.1, 0.7, 0}; //output 1.0 0.0 0.0 0.0
    foo(ina1);
    foo(ina2);
}

SSE：有条件地替换像素

2 个答案: