Question

我目前正在处理一些问题，这是我遇到的问题。为了清楚起见，我是一个初学者，所以任何帮助都非常值得欢迎。

问题：

按降序排列二进制文件的内容。文件名作为命令行参数传递。文件内容被解释为四字节正整数，其中找到的值0不会写入文件中。结果必须写入已读取的同一文件中。

据我了解，我必须拥有一个二进制文件。打开它。获取其内容。在查找所有字符的同时要牢记正号，四字节整数，找到零，去除零，对其余数字进行排序。

我们被允许使用glibc，所以这是我的尝试：

section .data
    warning db 'File does not exist!', 10, 0
    argument db 'Enter your argument.', 10, 0

    mode dd 'r+'
    opened db 'File is open. Time to read.', 10, 0


section .bss
    content resd 10
    counter resb 1

section .text

    extern printf, fopen, fgets, fputc

global main
main:
    push rbp
    mov rbp, rsp
    push rsi
    push rdi
    push rbx

    ;location of argument's address
    push rsi 

    cmp rdi, 2
    je .openfile
    mov rdi, argument
    mov rax, 0
    call printf
    jmp .end

.openfile:
    pop rbx
    ;First real argument of command line
    mov rdi, [rbx + 8]
    mov rsi, mode
    mov rax, 0
    call fopen
    cmp al, 0
    je .end

    push rax

    mov rdi, opened
    mov rax, 0
    call printf

.readfromfile:
    mov rdi, content
    mov rsi, 12 ;I wrote 10 numbers in my file
    pop rdx
    mov rax, 0
    call fgets
    cmp al, 0
    je .end

    push rax


    mov rsi, tekst
    pop rdi
.loop:
    lodsd
    inc byte[counter]
    cmp eax, '0'
    jne .loop

    ;this is the part where I am not sure what to do. 
    ;I am trying to delete the zero with backspace, then use space and 
    ;backspace again - I saw it here somewhere as a solution
    mov esi, 0x08
    call fputc  
    mov esi, 0x20
    call fputc
    mov esi, 0x08
    call fputc
    cmp eax, 0
    je .end
    jmp .loop

.end:
    pop rdi
    pop rsi
    pop rbx
    mov rsp, rbp
    pop rbp
    ret

因此，我的想法是打开文件，找到零，使用退格和空格将其删除，然后再次退格；继续直到我到达文件末尾，然后对其进行排序。可以看出，我没有尝试对内容进行排序，因为我无法让程序为我做第一部分。我已经尝试了几天，一切都变得模糊了。

如果有人可以帮助我，我将非常感激。如果有类似的问题，请随时将其链接至我。任何有帮助的地方，我都准备阅读和学习。

我也不确定必须提供多少信息。如果不清楚，请向我指出。

谢谢

Answer 1

为了我自己的私心，当检测到dword零值时，内存区域被“折叠”的示例：

使用NASM在Linux中构建目标ELF64可执行文件：

nasm -f elf64 so_64b_collapseZeroDword.asm -l so_64b_collapseZeroDword.lst -w+all
ld -b elf64-x86-64 -o so_64b_collapseZeroDword so_64b_collapseZeroDword.o

对于调试器，我使用的是edb（从源代码构建）（该可执行文件不会执行用户可观察到的任何操作，当它正常运行时，应该在调试器中单步执行指令，然后在.data段上查看内存，以查看值如何在内存中移动。

源文件so_64b_collapseZeroDword.asm

    segment .text

collapseZeroDwords:
; input (custom calling convention, suitable only for calls from assembly):
;   rsi - address of first element
;   rdx - address beyond last element ("vector::end()" pointer)
; return: rdi - new "beyond last element" address
; modifies: rax, rsi, rdi
; the memory after new end() is not cleared (the zeroes are just thrown away)!

; search for first zero (up till that point the memory content will remain same)
    cmp     rsi, rdx
    jae     .noZeroFound    ; if the (rsi >= end()), no zero was in the memory
    lodsd                   ; eax = [rsi], rsi += 4
    test    eax, eax        ; check for zero
    jne     collapseZeroDwords
; first zero found, from here on, the non-zero values will be copied to earlier area
    lea     rdi, [rsi-4]    ; address where the non-zero values should be written
.moveNonZeroValues:
    cmp     rsi, rdx
    jae     .wholeArrayCollapsed    ; if (rsi >= end()), whole array is collapsed
    lodsd                   ; eax = [rsi], rsi += 4
    test    eax, eax        ; check for zero
    jz      .moveNonZeroValues      ; zero detected, skip the "store" value part
    stosd                   ; [rdi] = eax, rdi += 4 (pointing beyond last element)
    jmp     .moveNonZeroValues

.noZeroFound:
    mov     rdi, rdx        ; just return the original "end()" pointer
.wholeArrayCollapsed:       ; or just return when rdi is already set as new end()
    ret

global _start
_start:     ; run some hardcoded simple tests, verify in debugger
    lea     rsi, [test1]
    lea     rdx, [test1+4*4]
    call    collapseZeroDwords
    cmp     rdi, test1+4*4      ; no zero collapsed

    lea     rsi, [test2]
    lea     rdx, [test2+4*4]
    call    collapseZeroDwords
    cmp     rdi, test2+3*4      ; one zero

    lea     rsi, [test3]
    lea     rdx, [test3+4*4]
    call    collapseZeroDwords
    cmp     rdi, test3+3*4      ; one zero

    lea     rsi, [test4]
    lea     rdx, [test4+4*4]
    call    collapseZeroDwords
    cmp     rdi, test4+2*4      ; two zeros

    lea     rsi, [test5]
    lea     rdx, [test5+4*4]
    call    collapseZeroDwords
    cmp     rdi, test5+2*4      ; two zeros

    lea     rsi, [test6]
    lea     rdx, [test6+4*4]
    call    collapseZeroDwords
    cmp     rdi, test6+0*4      ; four zeros

    ; exit back to linux
    mov     eax, 60
    xor     edi, edi
    syscall

    segment .data
    ; all test arrays are 4 elements long for simplicity
        dd 0xCCCCCCCC       ; debug canary value to detect any over-read or over-write
test1   dd 71, 72, 73, 74, 0xCCCCCCCC
test2   dd 71, 72, 73,  0, 0xCCCCCCCC
test3   dd  0, 71, 72, 73, 0xCCCCCCCC
test4   dd  0, 71,  0, 72, 0xCCCCCCCC
test5   dd 71,  0, 72,  0, 0xCCCCCCCC
test6   dd  0,  0,  0,  0, 0xCCCCCCCC

我尝试对其进行广泛评论，以显示其功能/原因/方式，但是随时可以询问任何特定部分。该代码的编写考虑到了简单性，因此不使用任何激进的性能优化方法（例如对第一个零值进行矢量化搜索等）。

打开文件，删除零，对其进行排序-NASM

1 个答案: