我想通过gcc和inline asm将32位值写入索引指针。
这是它应该做的:
this->screenPtr[ x + drawY * this->w ] = col;
这是我的一个尝试:
asm (
"movl %2, %%eax;"
"imull %3;"
"addl %1, %%eax;"
"movl %4, (%0 , %%eax, 4);"
: "=r" (*this->screenPtr)
: "r" (x), "r" (drawY), "r" (this->w), "r" (col)
: "%eax"
);
怎么做的? 谢谢。
哦,我可以在这里编辑:)(有人要求提供更多代码)
class raycaster{
public:
int isInit;
long* screenPtr;
int w;
int h;
int mapWidth;
int mapHeight;
cam_t camera;
...
(xLoop)
(calculations)
for (int drawY = drawStart; drawY < drawEnd; drawY ++ ) {
int texY;
long col;
asm volatile(
//int d = (drawY * 2 - h + lineHeight) * 128;
"movl %1, %%eax;" // mov drawY in eax
"shll $1, %%eax;" // * 2
"subl %2, %%eax;" // - h
"addl %3, %%eax;" // + lineHeight
"movl $128, %%ebx;"
"imull %%ebx;" // * 128
//int texY = ((d * TEX_HEIGHT) / lineHeight) / 256;
"imull %4;" // * TEX_HEIGHT
"movl %3, %%ebx;"
"idivl %%ebx;" // : lineHeight
"movl $0, %%edx;"
"movl $256, %%ebx;"
"idivl %%ebx;" // : 256
//[texX + texY * TEX_HEIGHT];
"movl %4, %%ebx;"
"imull %%ebx;" // * TEX_HEIGHT
"addl %5, %%eax;" // + lineHeight
"movl %%eax, %0;"
: "=r" (texY)
: "r" (drawY), "m" (h), "r" (lineHeight), "r" (TEX_HEIGHT), "m" (texX)
: "%eax", "%ebx", "%edx"
);
col = this->textureMemory[textureNumber - 1][texY];
这就是我做col的方法,现在我想将它组合起来,这样它就可以直接在缓冲区中编写它。
/*
asm (
"movl %2, %%eax;"
"imull %3;"
"addl %1, %%eax;"
"movl %4, (%0 , %eax, 4);"
//"movl %%eax, %0;"
: "=r" (*screenPtr)
: "r" (x), "r" (drawY), "r" (this->w), "r" (col)
: "%eax"
);
*/
this->screenPtr[ x + drawY * this->w ] = col;
}
“额外”代码可能更令人困惑,专注于第一个剪切和问题。
编辑2:
如果有人阅读了评论,我们一直在寻找速度慢的原因。这段代码更好,达到了汇编代码的速度。我改变的是,我删除了临时变量以避免内存访问。
int texY = texX + (((drawY * 2 - h + lineHeight) * 128* TEX_HEIGHT) / lineHeight) / 256 * TEX_HEIGHT;
long col = this->textureMemory[textureNumber - 1][texY];
this->screenPtr[ x + drawY * this->w ] = col;