使用ARM NEON将8位图像调整为4

时间:2014-08-11 00:33:28

标签: image image-processing arm simd neon

我想使用ARM Neon将8位灰度图像从1280x960调整到320x240。

作为一个例子,我已经从640x480到320x240调整了2倍:

void divideimageby2(uint8_t * src, uint8_t * dest) {
    //src is 640 x 480
    //dst is 320 x 240
    int h;
    for (h = 0; h < 240; h++)
        resizeline2(src + 640 * (h * 2 + 0), src + 640 * (h * 2 + 1), dt + 320 * h);
}

void resizeline2(uint8_t * __restrict src1, uint8_t * __restrict src2, uint8_t * __restrict dest) {
    int w;
    for (w = 0; w < 640; w += 16) {
        uint16x8_t a = vpaddlq_u8(vld1q_u8(src1));
        uint16x8_t b = vpaddlq_u8(vld1q_u8(src2));
        uint16x8_t ab = vaddq_u16(a, b);
        vst1_u8(dest, vshrn_n_u16(ab, 2));
        src1 += 16;
        src2 += 16;
        dest += 8;
    }
}   

如果我想做类似的事情,我可以在resizeline4中使用哪种霓虹灯指令聚合4行?

void divideimageby4(uint8_t * src, uint8_t * dest) {
    //src is 1280 x 960
    //dst is 320 x 240
    int h;
    for (h = 0; h < 240; h++)
        resize_line2(src + 640 * (h * 4 + 0), src + 640 * (h * 4 + 1), src + 640 * (h * 4 + 2), src + 640 * (h * 4 + 3), dt + 320 * h);
}

void resizeline4(uint8_t * __restrict src1, uint8_t * __restrict src2, uint8_t * __restrict src3, uint8_t * __restrict src4, uint8_t * __restrict dest) {
    int w;
    for (w = 0; w < 1280; w += 16) {
        //What to put here?
        src1 += 16;
        src2 += 16;
        src3 += 16;
        src4 += 16;
        dest += 4;
    }
}   

1 个答案:

答案 0 :(得分:2)

您应该将vpaddl与vpadal结合使用。

在q寄存器line1a,line1b ..... line4b

中加载32 * 4矩阵

vpaddl.u8 line1a,line1a

vpaddl.u8 line1b,line1b

vpadal.u8 line1a,line2a

vpadal.u8 line1b,line2b

vpadal.u8 line1b,line4b

vpadd.u16 d0,line1alow,line1ahigh

vpadd.u16 d1,line1blow,line1bhigh

vrshrn.u16 d0,q0,#4

vst1.8 {d0},[pDst]!