Profiler说,这个功能占总时间的50%。你会如何优化它? 它将BMP颜色方案转换为YUV。谢谢!
更新:平台是ARMV6(为iPhone写作)
#define Y_FROM_RGB(_r_,_g_,_b_) ( ( 66 * _b_ + 129 * _g_ + 25 * _r_ + 128) >> 8) + 16
#define V_FROM_RGB(_r_,_g_,_b_) ( ( 112 * _b_ - 94 * _g_ - 18 * _r_ + 128) >> 10) + 128
#define U_FROM_RGB(_r_,_g_,_b_) ( ( -38 * _b_ - 74 * _g_ + 112 * _r_ + 128) >> 10) + 128
/*!
* \brief
* Converts 24 bit image to YCrCb image channels
*
* \param source
* Source 24bit image pointer
*
* \param source_width
* Source image width
*
* \param dest_Y
* destination image Y component pointer
*
* \param dest_scan_size_Y
* destination image Y component line size
*
* \param dest_U
* destination image U component pointer
*
* \param dest_scan_size_U
* destination image U component line size
*
* \param dest_V
* destination image V component pointer
*
* \param dest_scan_size_V
* destination image V component line size
*
* \param dest_width
* Destination image width = source_width
*
* \param dest_height
* Destination image height = source image height
*
* Convert 24 bit image (source) with width (source_width)
* to YCrCb image channels (dest_Y, dest_U, dest_V) with size (dest_width)x(dest_height), and line size
* (dest_scan_size_Y, dest_scan_size_U, dest_scan_size_V) (in bytes)
*
*/
void ImageConvert_24_YUV420P(unsigned char * source, int source_width,
unsigned char * dest_Y, int dest_scan_size_Y,
unsigned char * dest_U, int dest_scan_size_U,
unsigned char * dest_V, int dest_scan_size_V,
int dest_width, int dest_height)
{
int source_scan_size = source_width*3;
int half_width = dest_width/2;
//Y loop
for (int y = 0; y < dest_height/2; y ++)
{
//Start of line
unsigned char * source_scan = source;
unsigned char * source_scan_next = source+source_scan_size;
unsigned char * dest_scan_Y = dest_Y;
unsigned char * dest_scan_U = dest_U;
unsigned char * dest_scan_V = dest_V;
//Do all pixels
for (int x = 0; x < half_width; x++)
{
int R = source_scan[0];
int G = source_scan[1];
int B = source_scan[2];
//Y
int Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
int R1 = source_scan[0];
int G1 = source_scan[1];
int B1 = source_scan[2];
//Y
Y = Y_FROM_RGB(B1, G1, R1);
R += (R1 + source_scan_next[0] + source_scan_next[3]);
G += (G1 + source_scan_next[1] + source_scan_next[4]);
B += (B1 + source_scan_next[2] + source_scan_next[5]);
//YCrCb
*dest_scan_Y = Y;
*dest_scan_V = V_FROM_RGB(B, G, R);
*dest_scan_U = U_FROM_RGB(B, G, R);
source_scan += 3;
dest_scan_Y += 1;
dest_scan_U += 1;
dest_scan_V += 1;
source_scan_next += 6;
};
//scroll to next line
source += source_scan_size;
dest_Y += dest_scan_size_Y;
dest_U += dest_scan_size_U;
dest_V += dest_scan_size_V;
//Start of line
source_scan = source;
dest_scan_Y = dest_Y;
//Do all pixels
for (int x = 0; x < half_width; x ++)
{
int R = source_scan[0];
int G = source_scan[1];
int B = source_scan[2];
//Y
int Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
R = source_scan[0];
G = source_scan[1];
B = source_scan[2];
//Y
Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
};
source += source_scan_size;
dest_Y += dest_scan_size_Y;
};
};
答案 0 :(得分:5)
除非我遗漏了某些内容,否则以下代码似乎会在两个循环中重复出现,那么,为什么不经历这个循环呢?这可能需要对算法进行一些更改,但这会提高性能。
for (int x = 0; x < half_width; x ++)
{
int R = source_scan[0];
int G = source_scan[1];
int B = source_scan[2];
//Y
int Y = Y_FROM_RGB(B, G, R);
*dest_scan_Y = Y;
source_scan += 3;
dest_scan_Y += 1;
R = source_scan[0];
G = source_scan[1];
B = source_scan[2];
但是,在做任何事情之前,将两个内部循环移动到单独的函数中,然后运行你的探查器,看看你是否在一个函数上花费的时间多于另一个函数。
此功能中有三个循环,您不知道哪个部分实际上是您花费时间的地方。因此,在进行任何优化之前确定,否则您可能会发现您正在修复错误的部分。
答案 1 :(得分:0)
答案 2 :(得分:0)
假设他们指向的内存不重叠,您应该使用source
限定符声明dest_Y
,dest_U
,dest_V
和restrict
指针,告诉编译器这个并允许它更好地进行优化。