我目前正在处理一些问题,这是我遇到的问题。为了清楚起见,我是一个初学者,所以任何帮助都非常值得欢迎。
问题:
按降序排列二进制文件的内容。文件名作为命令行参数传递。文件内容被解释为四字节正整数,其中找到的值0不会写入文件中。结果必须写入已读取的同一文件中。
据我了解,我必须拥有一个二进制文件。打开它。获取其内容。在查找所有字符的同时要牢记正号,四字节整数,找到零,去除零,对其余数字进行排序。
我们被允许使用glibc,所以这是我的尝试:
section .data
warning db 'File does not exist!', 10, 0
argument db 'Enter your argument.', 10, 0
mode dd 'r+'
opened db 'File is open. Time to read.', 10, 0
section .bss
content resd 10
counter resb 1
section .text
extern printf, fopen, fgets, fputc
global main
main:
push rbp
mov rbp, rsp
push rsi
push rdi
push rbx
;location of argument's address
push rsi
cmp rdi, 2
je .openfile
mov rdi, argument
mov rax, 0
call printf
jmp .end
.openfile:
pop rbx
;First real argument of command line
mov rdi, [rbx + 8]
mov rsi, mode
mov rax, 0
call fopen
cmp al, 0
je .end
push rax
mov rdi, opened
mov rax, 0
call printf
.readfromfile:
mov rdi, content
mov rsi, 12 ;I wrote 10 numbers in my file
pop rdx
mov rax, 0
call fgets
cmp al, 0
je .end
push rax
mov rsi, tekst
pop rdi
.loop:
lodsd
inc byte[counter]
cmp eax, '0'
jne .loop
;this is the part where I am not sure what to do.
;I am trying to delete the zero with backspace, then use space and
;backspace again - I saw it here somewhere as a solution
mov esi, 0x08
call fputc
mov esi, 0x20
call fputc
mov esi, 0x08
call fputc
cmp eax, 0
je .end
jmp .loop
.end:
pop rdi
pop rsi
pop rbx
mov rsp, rbp
pop rbp
ret
因此,我的想法是打开文件,找到零,使用退格和空格将其删除,然后再次退格;继续直到我到达文件末尾,然后对其进行排序。可以看出,我没有尝试对内容进行排序,因为我无法让程序为我做第一部分。我已经尝试了几天,一切都变得模糊了。
如果有人可以帮助我,我将非常感激。如果有类似的问题,请随时将其链接至我。任何有帮助的地方,我都准备阅读和学习。
我也不确定必须提供多少信息。如果不清楚,请向我指出。
谢谢
答案 0 :(得分:2)
为了我自己的私心,当检测到dword零值时,内存区域被“折叠”的示例:
使用NASM在Linux中构建目标ELF64可执行文件:
nasm -f elf64 so_64b_collapseZeroDword.asm -l so_64b_collapseZeroDword.lst -w+all
ld -b elf64-x86-64 -o so_64b_collapseZeroDword so_64b_collapseZeroDword.o
对于调试器,我使用的是edb
(从源代码构建)(该可执行文件不会执行用户可观察到的任何操作,当它正常运行时,应该在调试器中单步执行指令,然后在.data
段上查看内存,以查看值如何在内存中移动。
源文件so_64b_collapseZeroDword.asm
segment .text
collapseZeroDwords:
; input (custom calling convention, suitable only for calls from assembly):
; rsi - address of first element
; rdx - address beyond last element ("vector::end()" pointer)
; return: rdi - new "beyond last element" address
; modifies: rax, rsi, rdi
; the memory after new end() is not cleared (the zeroes are just thrown away)!
; search for first zero (up till that point the memory content will remain same)
cmp rsi, rdx
jae .noZeroFound ; if the (rsi >= end()), no zero was in the memory
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jne collapseZeroDwords
; first zero found, from here on, the non-zero values will be copied to earlier area
lea rdi, [rsi-4] ; address where the non-zero values should be written
.moveNonZeroValues:
cmp rsi, rdx
jae .wholeArrayCollapsed ; if (rsi >= end()), whole array is collapsed
lodsd ; eax = [rsi], rsi += 4
test eax, eax ; check for zero
jz .moveNonZeroValues ; zero detected, skip the "store" value part
stosd ; [rdi] = eax, rdi += 4 (pointing beyond last element)
jmp .moveNonZeroValues
.noZeroFound:
mov rdi, rdx ; just return the original "end()" pointer
.wholeArrayCollapsed: ; or just return when rdi is already set as new end()
ret
global _start
_start: ; run some hardcoded simple tests, verify in debugger
lea rsi, [test1]
lea rdx, [test1+4*4]
call collapseZeroDwords
cmp rdi, test1+4*4 ; no zero collapsed
lea rsi, [test2]
lea rdx, [test2+4*4]
call collapseZeroDwords
cmp rdi, test2+3*4 ; one zero
lea rsi, [test3]
lea rdx, [test3+4*4]
call collapseZeroDwords
cmp rdi, test3+3*4 ; one zero
lea rsi, [test4]
lea rdx, [test4+4*4]
call collapseZeroDwords
cmp rdi, test4+2*4 ; two zeros
lea rsi, [test5]
lea rdx, [test5+4*4]
call collapseZeroDwords
cmp rdi, test5+2*4 ; two zeros
lea rsi, [test6]
lea rdx, [test6+4*4]
call collapseZeroDwords
cmp rdi, test6+0*4 ; four zeros
; exit back to linux
mov eax, 60
xor edi, edi
syscall
segment .data
; all test arrays are 4 elements long for simplicity
dd 0xCCCCCCCC ; debug canary value to detect any over-read or over-write
test1 dd 71, 72, 73, 74, 0xCCCCCCCC
test2 dd 71, 72, 73, 0, 0xCCCCCCCC
test3 dd 0, 71, 72, 73, 0xCCCCCCCC
test4 dd 0, 71, 0, 72, 0xCCCCCCCC
test5 dd 71, 0, 72, 0, 0xCCCCCCCC
test6 dd 0, 0, 0, 0, 0xCCCCCCCC
我尝试对其进行广泛评论,以显示其功能/原因/方式,但是随时可以询问任何特定部分。该代码的编写考虑到了简单性,因此不使用任何激进的性能优化方法(例如对第一个零值进行矢量化搜索等)。