我正在尝试优化几个装配程序的尺寸,我不关心速度。 我熟悉的优化情况如下:
;the following two lines
mov rbp, rsp
add rbp, 50h
;can be changed to
lea rbp, [rsp+50h]
我可以使用哪些其他优化来减少以下过程中的字节数? 我不是要求任何人完全优化这个程序,只是指出我可以改进的地方。
;get procedure address
asmGetProc proc
push rcx ;pointer to function name
push rdx ;DllBase address (IMAGE_DOS_HEADER pointer)
push r8 ;pointer to IMAGE_EXPORT_DIRECTORY
push r9 ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]
push rbx ;saved pointer to function name
push r10 ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
mov rbx, rcx ;save the function name pointer to rax
mov r8d, [rdx+3ch] ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
add r8, rdx ;add DllBase to the e_lfanew offset
add r8, 88h ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
;r8 points to the IMAGE_DATA_DIRECTORY structure
mov r8d, [r8] ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
add r8, rdx ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)
mov r9d, [r8+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov r10d, [r8+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
add r10, rdx ;add DllBase to AddressOfNames (DWORD)
for_each_function:
;decrement function name counter
dec r9
;load current index of AddressOfNames into r11
lea rcx, [r10 + 4 * r9] ;AddressOfNames[i] - function string RVA (relative virtual address)
mov ecx, [rcx] ;r11d is the AddressOfName[r9] RVA (DWORD)
add rcx, rdx ;add DllBase to string RVA DWORD
call asmHsh ;hash the function name
cmp rax, rbx ;compare the function name hash with the passed hash
jnz for_each_function ;jump to top of loop is not a match
;r8 - export directory
;r9 - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
mov r10d, [r8+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
add r10, rdx ;add DllBase to AddressOfNameOrdinals DWORD
mov r9w, [r10+2*r9] ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov r10d, [r8+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
add r10, rdx ;add DllBase to AddressOfFunctions DWORD
mov eax, [r10+r9*4] ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
add rax, rdx ;add DllBase to function ordinal RVA DWORD
pop r10
pop rbx
pop r9
pop r8
pop rdx
pop rcx
ret ;return from procedure
asmGetProc endp
编辑:添加asmHsh(我的不好)
;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
push rdx
mov rax, 5381d
hl:
mov rdx, rax
shl rax, 5
add rax, rdx
xor al, [rcx]
inc rcx
;check for null termination
mov dl, [rcx]
cmp dl, 00h
jne short hl
pop rdx
pop rcx
ret
asmHsh endp
答案 0 :(得分:1)
在64位模式下优化空间程序集应该:(1)当满足时(前缀较少),使用 DWORD 宽度; (2)坚持旧的X86寄存器 eax-edx / esi / edi / ebp (更严格的编码)。
希望下面的内容能说明这个想法。 ML64将原始例程组装为135字节,修改后的版本组装为103字节。
更改示例:(1)使用 rbp / rsi / rdi 而不是 r8 / r9 / r10 ; (2)缩小的指令序列,可以通过多分量地址模式完成; (3)使用 DWORD dec ,其中数据已知为32位; (4)使用 IMUL 代替shift / add。
" ; - "在被删除的线前面#34; ; ## delta "附加到附加行,其中delta是新代码生成的字节差异。没有尝试调整评论。
;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
;-push rdx ;## -1
mov rax, 5381d
hl:
;- mov rdx, rax
;- shl rax, 5
;- add rax, rdx
imul rax,rax,33 ;## -6
xor al, [rcx]
inc rcx
;check for null termination
;-mov dl, [rcx]
;-cmp dl, 00h
cmp byte ptr [rcx], 00h ;## -2
jne short hl
;-pop rdx ;## -1
pop rcx
ret
asmHsh endp
;get procedure address
asmGetProc proc
push rcx ;pointer to function name
push rdx ;DllBase address (IMAGE_DOS_HEADER pointer)
;-push r8 ;pointer to IMAGE_EXPORT_DIRECTORY
push rbp ;## -1
;-push r9 ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
push rsi ;## -1
;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]
push rbx ;saved pointer to function name
;-push r10 ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
push rdi ;## -1
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions
mov rbx, rcx ;save the function name pointer to rax
;-mov r8d, [rdx+3ch] ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
mov ebp, [rdx+3ch] ;## -1
;-add r8, rdx ;add DllBase to the e_lfanew offset
;-add r8, 88h ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
;- ;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
;- ;r8 points to the IMAGE_DATA_DIRECTORY structure
;-mov r8d, [r8] ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
mov ebp, [rbp+rdx+88h] ;## -5
;-add r8, rdx ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)
add rbp, rdx ;## 0
;-mov r9d, [r8+18h] ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov esi, [rbp+18h] ;## -1
;-mov r10d, [r8+20h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
mov edi, [rbp+20h] ;## -1
;-add r10, rdx ;add DllBase to AddressOfNames (DWORD)
add rdi, rdx ;## 0
for_each_function:
;decrement function name counter
;- dec r9
dec esi ;## -1
;load current index of AddressOfNames into r11
;- lea rcx, [r10 + 4 * r9] ;AddressOfNames[i] - function string RVA (relative virtual address)
;- mov ecx, [rcx] ;r11d is the AddressOfName[r9] RVA (DWORD)
mov ecx, [rdi + 4 * rsi] ;## -3
add rcx, rdx ;add DllBase to string RVA DWORD
call asmHsh ;hash the function name
cmp rax, rbx ;compare the function name hash with the passed hash
jnz for_each_function ;jump to top of loop is not a match
;r8 - export directory
;r9 - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
;-mov r10d, [r8+24h] ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
mov edi, [rbp+24h];## -1
;-add r10, rdx ;add DllBase to AddressOfNameOrdinals DWORD
add rdi, rdx; ## 0
;-mov r9w, [r10+2*r9] ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov si, [rdi+2*rsi] ;## -1
;-mov r10d, [r8+1ch] ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
mov edi, [rbp+1ch] ;## -1
;-add r10, rdx ;add DllBase to AddressOfFunctions DWORD
add rdi, rdx ;## 0
;-mov eax, [r10+r9*4] ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
mov eax, [rdi+rsi*4] ; ## -1
add rax, rdx ;add DllBase to function ordinal RVA DWORD
;-pop r10
pop rdi ; ## -1
pop rbx
;-pop r9
pop rsi
;-pop r8
pop rbp ;## -1
pop rdx
pop rcx
ret ;return from procedure
asmGetProc endp