我是昨天的汇编新手,所以我决定编写简单的(我希望的)函数 - memcpy和memset。



; NASM syntax

%include 'types.asm'

section .text

global  trn_memset
global _trn_memset

; ==========================================================
; memset
; void trn_memset(void* dest, char val, unsigned plong sz)
    %ifdef TRN_64BIT
        getarg tax, 1
        getarg tcx, 2
        getarg tbx, 3
        mov tax, [tsp + ((2 * sizeof_LONG) + sizeof_INT)]    ; size
        mov tcx, [tsp + (     sizeof_LONG  + sizeof_INT)]    ; tcx = char
        mov tbx, [tsp + (     sizeof_LONG              )]    ; tbx = destination

    ; dest = [tsp + (1 * sizeof_LONG)]
    ; src = [tsp + (2 * sizeof_LONG)]
    ; sz = [tsp + (3 * sizeof_LONG)]
    mov tdx, 0                         ; remainder
    mov tcx, 8                         ; tax / tcx
    mov tax, [tsp + (3 * sizeof_LONG)]     ; size
    div tcx
    mov tbx, [tsp + (1 * sizeof_LONG)]     ; tbx = destination
    mov tcx, [tsp + (2 * sizeof_LONG)]     ; tcx = val
    push tax                           ; save it to stack, we will use al
    mov al, cl
    ; create 32bit int from 8bit value
    ; ((((((val << 8) | val) << 8) | val) << 8) | val)
    mov cl, al
    shl tcx, 8
    mov cl, al
    shl tcx, 8
    mov cl, al
    shl tcx, 8
    mov cl, al
    ;cmp tdx, 0
    or tdx, tdx
    jz .aftercmp_remainder
    dec tdx
    mov [tbx], cl
    inc tbx
    jmp .cmp_remainder
    pop tdx                            ; restore result from stack to tdx
                                       ; because now value in tdx is useless
    ;cmp tdx, 0
    or tdx, tdx
    jz .aftercmp_result
    dec tdx
    ; 2x 32bit set
    mov [tbx], ecx
    add tbx, 4
    mov [tbx], ecx
    add tbx, 4
    jmp .cmp_result
    mov tax, [tsp + (1 * sizeof_LONG)]     ; tax = destination


%ifdef WINDOWS
    %define TRN_WIN
    ; TODO make it better
    %define TRN_NIX

; C Variable types
%ifdef TRN_64BIT
    %define CHAR BYTE
    %define SHORT WORD
    %define INT DWORD
    %define LONG QWORD
    %define LLONG QWORD

    %define DCHAR db
    %define DSHORT dw
    %define DINT dd
    %define DLONG dq
    %define DLLONG dq

    %define sizeof_CHAR 1
    %define sizeof_SHORT 2
    %define sizeof_INT 4
    %define sizeof_LONG 8
    %define sizeof_LLONG 8

    %define sizeof_DCHAR 1
    %define sizeof_DSHORT 2
    %define sizeof_DINT 4
    %define sizeof_DLONG 8
    %define sizeof_DLLONG 8
    %define CHAR BYTE
    %define SHORT WORD
    %define INT DWORD
    %define LONG DWORD
    %define LLONG QWORD

    %define DCHAR db
    %define DSHORT dw
    %define DINT dd
    %define DLONG dd
    %define DLLONG dq

    %define sizeof_CHAR 1
    %define sizeof_SHORT 2
    %define sizeof_INT 4
    %define sizeof_LONG 4
    %define sizeof_LLONG 8

    %define sizeof_DCHAR 1
    %define sizeof_DSHORT 2
    %define sizeof_DINT 4
    %define sizeof_DLONG 4
    %define sizeof_DLLONG 8

; Registers
%ifdef TRN_64BIT
    %define tax rax
    %define tbx rbx
    %define tcx rcx
    %define tdx rdx
    %define tsp rsp
    %define tax eax
    %define tbx ebx
    %define tcx ecx
    %define tdx edx
    %define tsp esp

; Getting arguments
; - 64bit nix: rdi, rsi, rdx, rcx, r8, r9
; - 64bit win: RCX, RDX, R8, R9
; getarg(out, arg_index)
; - out can be memory or register
; - arg_index is starting from 1
; - bytes_sz - 1,2,4 or 8 bytes

%ifdef TRN_64BIT
    %ifdef TRN_WIN
        %macro getarg 2
            %if %2 = 1
                mov %1, rcx
            %elif %2 = 2
                mov %1, rdx
            %elif %2 = 3
                mov %1, r8
            %elif %2 = 4
                mov %1, r9
            %elif %2 > 4
                mov %1, [tsp + ((%2 - 4) * 8)]
        %macro getarg 2
            %if %2 = 1
                mov %1, rdi
            %elif %2 = 2
                mov %1, rsi
            %elif %2 = 3
                mov %1, rdx
            %elif %2 = 4
                mov %1, rcx
            %elif %2 = 5
                mov %1, r8
            %elif %2 = 6
                mov %1, r9
            %elif %2 > 6
                mov %1, [tsp + ((%2 - 6) * 8)]

; Custom commands
%macro pushsz 1
    sub tsp, %1

%macro popsz 1
    add tsp, %1

%macro pushreg 0
    push LONG tdx
    push LONG tcx
    push LONG tbx
    push LONG tax

%macro popreg 0
    pop LONG tax
    pop LONG tbx
    pop LONG tcx
    pop LONG tdx

%macro print 1
    push LONG %1
    call printf
    pop LONG eax


现在,我真的不知道自己做错了什么。我有错吗? 32位

    mov eax, DWORD [esp + 12]     ; size
    mov ecx, DWORD [esp + 8]      ; tcx = val
    mov ebx, DWORD [esp + 4]      ; tbx = destination

    or eax, eax
    jz .aftermemset_loop
    mov [ebx], BYTE cl
    dec eax
    inc ebx
    mov eax, DWORD [esp + 4]            ; Return destionation


代码是好的,除了在开始时所有使用的寄存器(第一个eax / rax除外)必须push进行堆叠,并且在返回之前它们必须被pop编辑回来。


    push ebp
    mov ebp, esp
    add ebp, 4 ; We pushed one register to stack, count it
    push ebx ; Save used registers
    push ecx
    mov eax, DWORD [ebp + 12]     ; size
    mov ecx, DWORD [ebp + 8]      ; tcx = val
    mov ebx, DWORD [ebp + 4]      ; tbx = destination

    or eax, eax ; Fast compare to 0
    jz .aftermemset_loop
    mov [ebx], BYTE cl
    dec eax
    inc ebx
    mov eax, DWORD [ebp + 4]            ; Return destionation
    pop ecx ; Restore used registers
    pop ebx
    pop ebp