我是昨天的汇编新手,所以我决定编写简单的(我希望的)函数 - memcpy和memset。
Memcpy运行良好,但memset不是,有时候。一定有小错误,但我找不到。
memset的:
; NASM syntax
%include 'types.asm'
SEGMENT .CODE
section .text
global trn_memset
global _trn_memset
; ==========================================================
; memset
; void trn_memset(void* dest, char val, unsigned plong sz)
_trn_memset:
trn_memset:
%ifdef TRN_64BIT
getarg tax, 1
getarg tcx, 2
getarg tbx, 3
%else
mov tax, [tsp + ((2 * sizeof_LONG) + sizeof_INT)] ; size
mov tcx, [tsp + ( sizeof_LONG + sizeof_INT)] ; tcx = char
mov tbx, [tsp + ( sizeof_LONG )] ; tbx = destination
%endif
; dest = [tsp + (1 * sizeof_LONG)]
; src = [tsp + (2 * sizeof_LONG)]
; sz = [tsp + (3 * sizeof_LONG)]
mov tdx, 0 ; remainder
mov tcx, 8 ; tax / tcx
mov tax, [tsp + (3 * sizeof_LONG)] ; size
div tcx
;
mov tbx, [tsp + (1 * sizeof_LONG)] ; tbx = destination
mov tcx, [tsp + (2 * sizeof_LONG)] ; tcx = val
push tax ; save it to stack, we will use al
mov al, cl
; create 32bit int from 8bit value
; ((((((val << 8) | val) << 8) | val) << 8) | val)
mov cl, al
shl tcx, 8
mov cl, al
shl tcx, 8
mov cl, al
shl tcx, 8
mov cl, al
.cmp_remainder:
;cmp tdx, 0
or tdx, tdx
jz .aftercmp_remainder
dec tdx
mov [tbx], cl
inc tbx
jmp .cmp_remainder
.aftercmp_remainder:
pop tdx ; restore result from stack to tdx
; because now value in tdx is useless
.cmp_result:
;cmp tdx, 0
or tdx, tdx
jz .aftercmp_result
dec tdx
; 2x 32bit set
mov [tbx], ecx
add tbx, 4
;
mov [tbx], ecx
add tbx, 4
;
jmp .cmp_result
.aftercmp_result:
mov tax, [tsp + (1 * sizeof_LONG)] ; tax = destination
ret
types.asm :
%ifdef WINDOWS
%define TRN_WIN
%else
; TODO make it better
%define TRN_NIX
%endif
; C Variable types
%ifdef TRN_64BIT
%define CHAR BYTE
%define SHORT WORD
%define INT DWORD
%define LONG QWORD
%define LLONG QWORD
%define DCHAR db
%define DSHORT dw
%define DINT dd
%define DLONG dq
%define DLLONG dq
%define sizeof_CHAR 1
%define sizeof_SHORT 2
%define sizeof_INT 4
%define sizeof_LONG 8
%define sizeof_LLONG 8
%define sizeof_DCHAR 1
%define sizeof_DSHORT 2
%define sizeof_DINT 4
%define sizeof_DLONG 8
%define sizeof_DLLONG 8
%else
%define CHAR BYTE
%define SHORT WORD
%define INT DWORD
%define LONG DWORD
%define LLONG QWORD
%define DCHAR db
%define DSHORT dw
%define DINT dd
%define DLONG dd
%define DLLONG dq
%define sizeof_CHAR 1
%define sizeof_SHORT 2
%define sizeof_INT 4
%define sizeof_LONG 4
%define sizeof_LLONG 8
%define sizeof_DCHAR 1
%define sizeof_DSHORT 2
%define sizeof_DINT 4
%define sizeof_DLONG 4
%define sizeof_DLLONG 8
%endif
; Registers
%ifdef TRN_64BIT
%define tax rax
%define tbx rbx
%define tcx rcx
%define tdx rdx
%define tsp rsp
%else
%define tax eax
%define tbx ebx
%define tcx ecx
%define tdx edx
%define tsp esp
%endif
; Getting arguments
; - 64bit nix: rdi, rsi, rdx, rcx, r8, r9
; - 64bit win: RCX, RDX, R8, R9
; getarg(out, arg_index)
; - out can be memory or register
; - arg_index is starting from 1
; - bytes_sz - 1,2,4 or 8 bytes
%ifdef TRN_64BIT
%ifdef TRN_WIN
%macro getarg 2
%if %2 = 1
mov %1, rcx
%elif %2 = 2
mov %1, rdx
%elif %2 = 3
mov %1, r8
%elif %2 = 4
mov %1, r9
%elif %2 > 4
mov %1, [tsp + ((%2 - 4) * 8)]
%endif
%endmacro
%else
%macro getarg 2
%if %2 = 1
mov %1, rdi
%elif %2 = 2
mov %1, rsi
%elif %2 = 3
mov %1, rdx
%elif %2 = 4
mov %1, rcx
%elif %2 = 5
mov %1, r8
%elif %2 = 6
mov %1, r9
%elif %2 > 6
mov %1, [tsp + ((%2 - 6) * 8)]
%endif
%endmacro
%endif
%endif
; Custom commands
%macro pushsz 1
sub tsp, %1
%endmacro
%macro popsz 1
add tsp, %1
%endmacro
%macro pushreg 0
push LONG tdx
push LONG tcx
push LONG tbx
push LONG tax
%endmacro
%macro popreg 0
pop LONG tax
pop LONG tbx
pop LONG tcx
pop LONG tdx
%endmacro
%macro print 1
push LONG %1
call printf
pop LONG eax
%endmacro
修改
现在,我真的不知道自己做错了什么。我有错吗? 32位
_trn_memset:
trn_memset:
mov eax, DWORD [esp + 12] ; size
mov ecx, DWORD [esp + 8] ; tcx = val
mov ebx, DWORD [esp + 4] ; tbx = destination
.memset_loop:
or eax, eax
jz .aftermemset_loop
mov [ebx], BYTE cl
dec eax
inc ebx
.aftermemset_loop:
mov eax, DWORD [esp + 4] ; Return destionation
ret
当我编译内核&amp;使用C的memset和汇编编写的memcpy测试程序,它有效,但是当我包含汇编memset版本时,打印时格式错误。
答案 0 :(得分:0)
代码是好的,除了在开始时所有使用的寄存器(第一个eax / rax除外)必须push
进行堆叠,并且在返回之前它们必须被pop
编辑回来。
因此,最简单的32位memset看起来像:
_trn_memset:
trn_memset:
push ebp
mov ebp, esp
add ebp, 4 ; We pushed one register to stack, count it
push ebx ; Save used registers
push ecx
mov eax, DWORD [ebp + 12] ; size
mov ecx, DWORD [ebp + 8] ; tcx = val
mov ebx, DWORD [ebp + 4] ; tbx = destination
.memset_loop:
or eax, eax ; Fast compare to 0
jz .aftermemset_loop
mov [ebx], BYTE cl
dec eax
inc ebx
.aftermemset_loop:
mov eax, DWORD [ebp + 4] ; Return destionation
pop ecx ; Restore used registers
pop ebx
pop ebp
ret