修改
我从memcmp切换到家庭酿造的13字节比较功能,而自制软件没有额外的指令。所以我只能猜测额外的程序集只是优化器中的一个缺陷。
if (!EQ13(&ti, &m_ti)) { // in 2014, memcmp was not being optimzied here
000007FEF91B2CFE mov rdx,qword ptr [rsp]
000007FEF91B2D02 movzx eax,byte ptr [rsp+0Ch]
000007FEF91B2D07 mov ecx,dword ptr [rsp+8]
000007FEF91B2D0B cmp rdx,qword ptr [r10+28h]
000007FEF91B2D0F jne TSccIter::SetTi+9Dh (7FEF91B2D1Dh)
000007FEF91B2D11 cmp ecx,dword ptr [r10+30h]
000007FEF91B2D15 jne TSccIter::SetTi+9Dh (7FEF91B2D1Dh)
000007FEF91B2D17 cmp al,byte ptr [r10+34h]
000007FEF91B2D1B je TSccIter::SetTi+0B1h (7FEF91B2D31h)
我的自制软件在这种情况下并不完美,因为它在开始时会执行3个mov,即使它不太可能检查过第一个mov。我需要在那部分工作。
原始问题
这是来自msvc 2010的asm代码,展示了它如何优化一个小的,固定大小的memcmp(在这种情况下,13个字节)。我在代码中经常看到这种类型的优化,但从未使用过去的6行。任何人都可以告诉我为什么最后6行组装在那里? TransferItem是13个字节,因此解释了QWORD,DWORD,然后是BYTE cmps。
struct TransferItem {
char m_szCxrMkt1[3];
char m_szCxrOp1[3];
char m_chDelimiter;
char m_szCxrMkt2[3];
char m_szCxrOp2[3];
};
...
if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E lea rax,[rsp]
2B92 mov rdx,qword ptr [rax]
2B95 cmp rdx,qword ptr [r10+28h]
2B99 jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2B9B mov edx,dword ptr [rax+8]
2B9E cmp edx,dword ptr [r10+30h]
2BA2 jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2BA4 movzx edx,byte ptr [rax+0Ch]
2BA8 cmp dl,byte ptr [r10+34h]
2BAC jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2BAE xor eax,eax
2BB0 jmp TSccIter::SetTi+0A7h (7FEF9302BB7h)
2BB2 sbb eax,eax
2BB4 sbb eax,0FFFFFFFFh
2BB7 test eax,eax
2BB9 je TSccIter::SetTi+0CCh (7FEF9302BDCh)
另外xor eax的重点是什么,我们知道eax为零,然后测试那条在2bb7线上已知为零的那个?
这是整个功能
// fWildCard means match certain fields to '**' in the db
// szCxrMkt1,2 are required and cannot be null, ' ', or '\0\0'.
// szCxrOp1,2 can be null, ' ', or '\0\0'.
TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
if (m_fSkipSet)
return *this;
m_iSid = -1; // resets the iterator to search from the start
// Pad the struct to 16 bytes so we can clear it with 2 QWORDS
// We use a temp, ti, to detect if the new transferitem has changed
class TransferItemPadded : public TransferItem {
char padding[16 - sizeof(TransferItem)]; // get us to 16 bytes
} ti;
U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020; // 8 spaces
// copy in the params
CPY2(ti.m_szCxrMkt1, szCxrMkt1);
if (szCxrOp1 && *szCxrOp1)
CPY2(ti.m_szCxrOp1, szCxrOp1);
ti.m_chDelimiter = (fWildCard) ? '*' : ':'; // this controls wild card matching
CPY2(ti.m_szCxrMkt2, szCxrMkt2);
if (szCxrOp2 && *szCxrOp2)
CPY2(ti.m_szCxrOp2, szCxrOp2);
// see if different
if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
memcpy(&m_ti, &ti, sizeof(TransferItem));
m_fQryChanged = true;
}
return *this;
}
typedef unsigned __int64 U8;
#define CPY2(a,b) ((*(WORD*)a) = (*(WORD*)b))
这就是整个asm
TSccIter& SetTi(bool fWildCard, LPCSTR szCxrMkt1, LPCSTR szCxrOp1, LPCSTR szCxrMkt2, LPCSTR szCxrOp2) {
2B10 sub rsp,18h
if (m_fSkipSet)
2B14 cmp byte ptr [rcx+0EAh],0
2B1B mov r10,rcx
return *this;
2B1E jne TSccIter::SetTi+0CCh (7FEF9302BDCh)
m_iSid = -1;
class TransferItemPadded : public TransferItem {
char padding[16 - sizeof(TransferItem)];
} ti;
U8(&ti) = U8(BUMP(&ti, 8)) = 0x2020202020202020;
2B24 mov rax,2020202020202020h
2B2E mov byte ptr [rcx+36h],0FFh
2B32 mov qword ptr [rsp],rax
2B36 mov qword ptr [rsp+8],rax
CPY2(ti.m_szCxrMkt1, szCxrMkt1);
2B3B movzx eax,word ptr [r8]
2B3F mov word ptr [rsp],ax
if (szCxrOp1 && *szCxrOp1)
2B43 test r9,r9
2B46 je TSccIter::SetTi+47h (7FEF9302B57h)
2B48 cmp byte ptr [r9],0
2B4C je TSccIter::SetTi+47h (7FEF9302B57h)
CPY2(ti.m_szCxrOp1, szCxrOp1);
2B4E movzx eax,word ptr [r9]
2B52 mov word ptr [rsp+3],ax
ti.m_chDelimiter = (fWildCard) ? '*' : ':';
2B57 mov eax,3Ah
2B5C mov ecx,2Ah
2B61 test dl,dl
2B63 cmovne eax,ecx
2B66 mov byte ptr [rsp+6],al
CPY2(ti.m_szCxrMkt2, szCxrMkt2);
2B6A mov rax,qword ptr [szCxrMkt2]
2B6F movzx ecx,word ptr [rax]
if (szCxrOp2 && *szCxrOp2)
2B72 mov rax,qword ptr [szCxrOp2]
2B77 mov word ptr [rsp+7],cx
2B7C test rax,rax
2B7F je TSccIter::SetTi+7Eh (7FEF9302B8Eh)
2B81 cmp byte ptr [rax],0
2B84 je TSccIter::SetTi+7Eh (7FEF9302B8Eh)
CPY2(ti.m_szCxrOp2, szCxrOp2);
2B86 movzx eax,word ptr [rax]
2B89 mov word ptr [rsp+0Ah],ax
if (memcmp(&ti, &m_ti, sizeof(TransferItem))) {
2B8E lea rax,[rsp]
2B92 mov rdx,qword ptr [rax]
2B95 cmp rdx,qword ptr [r10+28h]
2B99 jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2B9B mov edx,dword ptr [rax+8]
2B9E cmp edx,dword ptr [r10+30h]
2BA2 jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2BA4 movzx edx,byte ptr [rax+0Ch]
2BA8 cmp dl,byte ptr [r10+34h]
2BAC jne TSccIter::SetTi+0A2h (7FEF9302BB2h)
2BAE xor eax,eax
2BB0 jmp TSccIter::SetTi+0A7h (7FEF9302BB7h)
2BB2 sbb eax,eax
2BB4 sbb eax,0FFFFFFFFh
2BB7 test eax,eax
2BB9 je TSccIter::SetTi+0CCh (7FEF9302BDCh)
memcpy(&m_ti, &ti, sizeof(TransferItem));
2BBB mov rax,qword ptr [rsp]
m_fQryChanged = true;
2BBF mov byte ptr [r10+0E9h],1
2BC7 mov qword ptr [r10+28h],rax
2BCB mov eax,dword ptr [rsp+8]
2BCF mov dword ptr [r10+30h],eax
2BD3 movzx eax,byte ptr [rsp+0Ch]
2BD8 mov byte ptr [r10+34h],al
}
return *this;
2BDC mov rax,r10
}
答案 0 :(得分:0)
最后6行返回eax == 0中的值以进行匹配,并设置SF和ZF条件代码。
答案 1 :(得分:0)
2bb7
可以通过不同的代码路径访问:通过2b99
,2ba2
和2bac
的跳转,以及在没有条件跳转时直接跳转。 xor eax,eax
仅在最后一条路径上执行,它确保eax
为0 - 否则显然不是这样。
答案 2 :(得分:-1)
测试eax,eax将测试eax和eax == 0.如果为零,以下je将跳转。
而xor eax,eax是一种编码“eax = 0”的有效方法。它比mov eax,0
更有效编辑:最初误读了这个问题。看起来会发生“TSccIter :: SetTi + 0A7h”会发生什么变化值?
此外,SBB解释进位(2BB2-2BB4)的技巧在这里解释:
http://compgroups.net/comp.lang.asm.x86/trick-with-sbb-instruction/20164