我正在尝试重新构建我的MJPEG解码代码,以便我可以以异构方式实现它。我开始寻找我认为可以从并行处理中受益的代码段。我从this链接获得了单线程CPU代码。
以下代码正常运行。 请注意,我省略了代码的某些部分,以便更好地代表。
//Original MCU blocks
MCU_main AllMCUs[nb_MCU_X * nb_MCU_Y];
int32_t MCU[64];
//All RGB blocks
RGB_struct AllRGBs[nb_MCU_X * nb_MCU_Y];
//Counter for the main MCUs
uint32_t x;
x=0;
///////--------Unpack-blocks for 3 color components
//process 8x8 blocks horizontally
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
//process 8x8 blocks vertically
for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
AllMCUs[x].block_id=x;
component_index = component_order[0];
AllMCUs[x].MCU= MCU;
unpack_block(movie, &scan_desc,0, AllMCUs[x].MCU);
for (int i=0; i < 64; i++)
{
AllMCUs[x].MCU_Y[i]= AllMCUs[x].MCU[i];
}
unpack_block(movie, &scan_desc,1, AllMCUs[x].MCU);
for (int i=0; i < 64; i++)
{
AllMCUs[x].MCU_Cb[i]= AllMCUs[x].MCU[i];
}
unpack_block(movie, &scan_desc,2, AllMCUs[x].MCU);
for (int i=0; i < 64; i++)
{
AllMCUs[x].MCU_Cr[i]= AllMCUs[x].MCU[i];
}
x+= 1;
}
}
///////-----------IQZZ, IDCT, Upsampling and Color-conversion---------///////
uint32_t y=0;
x=0;
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
if(AllMCUs[x].block_id == y){
//iqzz, idct and upsampling for Y
iqzz_block();
idct();
upsampler();
//iqzz, idct and upsampling for Cb
iqzz_block();
idct();
upsampler();
//iqzz, idct and upsampling for Cr
iqzz_block();
idct();
upsampler();
AllRGBs[x].block_id= x;
//color-conversion
if(color && (SOF_section.n > 1)){
YCbCr_to_ARGB(YCbCr_MCU, &AllRGBs[x].RGB_MCU, max_ss_h, max_ss_v);
}
else{
to_NB(YCbCr_MCU, RGB_MCU, max_ss_h, max_ss_v);
}
screen_cpyrect(index_Y * MCU_sy * max_ss_h, index_X * MCU_sx * max_ss_v, MCU_sy * max_ss_h, MCU_sx * max_ss_v, &AllRGBs[x].RGB_MCU);
x+=1;
y+=1;
}
}
}
正确的输出如下:
我的RGB_struct结构如下:
typedef struct{
uint32_t block_id;
uint32_t *RGB_MCU
}RGB_struct;
我的MCU_main结构(虽然不是立即相关)是:
typedef struct{
uint32_t block_id;
int32_t *MCU_original;
int32_t MCU_Cb[64], MCU_Cr[64], MCU_Y[64];
}MCU_main;
screen_cpyrect
根据偏移值复制屏幕上的每个8x8块。它的代码如下(与this链接中的代码相同):
void screen_cpyrect(uint32_t x, uint32_t y, uint32_t w, uint32_t h, void *ptr)
{
void *dest_ptr;
void *src_ptr;
uint32_t line;
uint32_t w_internal = w, h_internal = h;
int w_length;
SDL_LockSurface(screen);
w_length = screen->pitch / (screen->format->BitsPerPixel / 8 );
#ifdef DEBUG
if ((y) > screen->h) {
printf("[%s] : block can't be copied, "
"not in the screen (too low)\n", __func__);
exit(1);
}
if ((x) > screen->w) {
printf("[%s] : block can't be copied, "
"not in the screen (right border)\n", __func__);
exit(1);
}
#endif
if ((x+w) > screen->w) {
w_internal = screen->w -x;
}
if ((y+h) > screen->h) {
h_internal = screen->h -y;
}
for(line = 0; line < h_internal ; line++)
{
// Positionning src and dest pointers
// _ src : must be placed at the beginning of line "line"
// _ dest : must be placed at the beginning
// of the corresponding block :
//(line offset + current line + position on the current line)
// We assume that RGB is 4 bytes
dest_ptr = (void*)((uint32_t *)(screen->pixels) +
((y+line)*w_length) + x);
src_ptr = (void*)((uint32_t *)ptr + ((line * w)));
memcpy(dest_ptr,src_ptr,w_internal * sizeof(uint32_t));
}
SDL_UnlockSurface(screen);
}
我希望函数screen_cpyrect
保留在CPU上。但是,在我的工作代码中,它连接到其余的JPEG函数。 为了使我的代码更加平行,我试图将其与其他JPEG函数分开。
以下代码无法生成正确的输出:
///////--------Unpack-blocks for 3 color components
//process 8x8 blocks horizontally
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
//process 8x8 blocks vertically
for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
//For Y
unpack_block(movie, &scan_desc,0, AllMCUs[x].MCU);
//For Cb
unpack_block(movie, &scan_desc,1, AllMCUs[x].MCU);
//For Cr
unpack_block(movie, &scan_desc,2, AllMCUs[x].MCU);
x+= 1;
}
}
///////-----------IQZZ, IDCT, Upsampling and Color-conversion---------///////
uint32_t y=0;
x=0;
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
if(AllMCUs[x].block_id == y){
//iqzz, idct and upsampling for Y
iqzz_block();
idct();
upsampler();
//iqzz, idct and upsampling for Cb
iqzz_block();
idct();
upsampler();
//iqzz, idct and upsampling for Cr
iqzz_block();
idct();
upsampler();
AllRGBs[x].block_id= x;
//color-conversion
if(color && (SOF_section.n > 1)){
YCbCr_to_ARGB(YCbCr_MCU, &AllRGBs[x].RGB_MCU, max_ss_h, max_ss_v);
}
else{
to_NB(YCbCr_MCU, RGB_MCU, max_ss_h, max_ss_v);
}
x+=1;
y+=1;
}
}
}
//COPYING ALL THE PROCESSED BLOCKS TO THE SCREEN SEPARATELY
x=0;
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
screen_cpyrect(index_Y * MCU_sy * max_ss_h, index_X * MCU_sx * max_ss_v, MCU_sy * max_ss_h, MCU_sx * max_ss_v, &AllRGBs[x].RGB_MCU);
x+=1;
}
}
if (screen_refresh() == 1)
{
end_of_file = 1;
}
我得到一个完全扭曲的输出。处理帧但它们未正确显示。 不正确的输出如下所示:
颜色转换代码如下(从this链接复制):
void YCbCr_to_ARGB(uint8_t *YCbCr_MCU[3], uint32_t *RGB_MCU, uint32_t nb_MCU_H, uint32_t nb_MCU_V)
{
uint8_t *MCU_Y, *MCU_Cb, *MCU_Cr;
int R, G, B;
uint32_t ARGB;
uint8_t index, i, j;
MCU_Y = YCbCr_MCU[0];
MCU_Cb = YCbCr_MCU[1];
MCU_Cr = YCbCr_MCU[2];
for (i = 0; i < 8 * nb_MCU_V; i++) {
for (j = 0; j < 8 * nb_MCU_H; j++) {
index = i * (8 * nb_MCU_H) + j;
R = (MCU_Cr[index] - 128) * 1.402f + MCU_Y[index];
B = (MCU_Cb[index] - 128) * 1.7772f + MCU_Y[index];
G = MCU_Y[index] - (MCU_Cb[index] - 128) * 0.34414f -
(MCU_Cr[index] - 128) * 0.71414f;
/* Saturate */
if (R > 255)
R = 255;
if (R < 0)
R = 0;
if (G > 255)
G = 255;
if (G < 0)
G = 0;
if (B > 255)
B = 255;
if (B < 0)
B = 0;
ARGB = ((R & 0xFF) << 16) | ((G & 0xFF) << 8) | (B & 0xFF);
// ARGB = 0xFF << 8;
RGB_MCU[(i * (8 * nb_MCU_H) + j)] = ARGB;
}
}
}
我不明白为什么我修改的解码帧代码不能正常工作。我尝试以各种方式调试它,我在备用实现中找不到明显的问题。 AllRGBs
数组的所有值都正确存储。我打印出了值,它们是相同的。
RGB before cpy_rect:0x7fff8f868bf8 RGB before cpy_rect:0x7fff8f868c08 RGB before cpy_rect:0x7fff8f868c18 RGB before cpy_rect:0x7fff8f868c28.......
RGB after cpy_rect:0x7fff8f868bf8 RGB after cpy_rect:0x7fff8f868c08 RGB after cpy_rect:0x7fff8f868c18 RGB after cpy_rect:0x7fff8f868c28
您是否有任何想法为什么一个版本的代码提供正确的输出而不是另一个?
编辑:
为了测试我的代码,我编写了代码,以便在将每个8x8块复制到屏幕后立即显示它们。我注意到,在我正确的代码中,块被正确复制。但是,在我的错误代码中,8x8块未正确显示(正如您在我附上的照片中看到的那样)。
即使我的两个程序中的RGB值相同,为什么会发生这种情况呢?当我一次性复制屏幕上的所有RGB块时,我得到的输出不正确?
修改
在以我能想到的各种方式测试代码之后,我开始认为这个问题是由于void指针而引起的。
screen_cpyrect
的输入参数之一是空指针:
extern void screen_cpyrect(uint32_t x, uint32_t y, uint32_t w, uint32_t h, void *ptr)
RGB_MCU
被用作ptr
位置的函数参数。但是,ptr
是指向所有内容的指针,而RGB_MCU
是指向类型uint32_t
的指针,如此声明中所示:
typedef struct{
uint32_t block_id;
uint32_t *RGB_MCU
}RGB_struct;
我在this回答的评论中读到,当错误地使用void指针时,它可能导致程序输出错误或者只是崩溃。
如何确认我遇到的问题是由无效指针引起的?