如何创建/处理简单的bmp文件,只使用intel 64位汇编和nasm汇编器填充一种颜色?
答案 0 :(得分:4)
包括此类操作的步骤如下:
广告。 2:这有点棘手 - 如果每行的像素数不能被4整除,程序必须用0xFF填充缺少的字节。在这里,我purpousely创建了一张图片201x201。在这个例子中,我们可以看到每行将有3 * 201 = 603个字节,这意味着我们每行需要额外的字节。因此,图像缓冲区所需的大小为604 * 201 = 121404。
回答问题的源代码:
section .text
global _start ;must be declared for linker (ld)
_start: ;tell linker entry point
;#######################################################################
;### This program creates empty bmp file - 64 bit version ##############
;#######################################################################
;### main ##############################################################
;#######################################################################
; open file
mov rax,85 ;system call number - open/create file
mov rdi,msg ;file name
;flags
mov rsi,111111111b ;mode
syscall ;call kernel
; save file descriptor
mov r8, rax
; write headline to file
mov rax, 1 ;system call number - write
mov rdi, r8 ;load file desc
mov rsi, bmpheadline ;load adress of buffer to write
mov rdx, 54 ;load number of bytes
syscall ;call kernel
mov rbx, 201 ;LOOPY counter
mov rdx, empty_space ;load address of buffer (space allocated for picture pixels)
LOOPY:
mov rcx, 201 ;LOOPX counter
LOOPX:
mov byte [rdx+0], 0x00 ;BLUE
mov byte [rdx+1], 0xFF ;GREEN
mov byte [rdx+2], 0xFF ;RED
dec rcx ;decrease counter_x
add rdx, 3 ;move address pointer by 3 bytes (1 pixel = 3 bytes, which we just have written)
cmp rcx, 0 ;check if counter is 0
jne LOOPX ;if not jump to LOOPX
dec rbx ;decrease counter_y
mov byte [rdx], 0xFF ;additional byte per row
inc rdx ;increase address
cmp rbx, 0 ;check if counter is 0
jne LOOPY ;if not jump to LOOPY
; write content to file
mov rax, 1 ;system call number - write
mov rdi, r8 ;load file desc
mov rsi, empty_space ;load adress of buffer to write
mov rdx, 121404 ;load number of bytes
syscall ;call kernel
; close file
mov rax, 3 ;system call number - close
mov rdi, r8 ;load file desc
syscall ;call kernel
; exit program
mov rax,60 ;system call number - exit
syscall ;call kernel
section .data
msg db 'filename.bmp',0x00 ;name of out file, 0x00 = end of string
bmpheadline db 0x42,0x4D,0x72,0xDA,0x01,0x00,0x00,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0xC9,0x00,0x00,0x00,0xC9,0x00,0x00,0x00,0x01,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x3C,0xDA,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
section .bss ;this section is responsible for preallocated block of memory of fixed size
empty_space: resb 121404 ;preallocation of 121404 bytes
这里是bmp标题的解释(更多在此链接下:http://www.dragonwins.com/domains/getteched/bmp/bmpfileformat.htm)
;### File Header - 14 bytes
;#######################################################################
;### bfType, 2 bytes, The characters "BM"
;### 0x42,0x4D = "B","M"
;###
;### bfSize, 4 bytes, The size of the file in bytes
;### 0x72,0xDA,0x01,0x00 => 0x00,0x01,0xDA,0x72 = 0x1DA72 = 121458 bytes
;### 121458 = 54 + 201 * (201 + 1) * 3
;###
;### Comment:
;### We want to create file 201x201, that means 201 rows and 201 columns
;### meaning each row will take 201*3 = 603 bytes
;###
;### According to BMP file specification each such row must be adjusted
;### so its size is dividable by 4, this gives us plus 1 byte for each
;### row.
;###
;###
;### bfReserved1, 2 bytes, Unused - must be zero
;### 0x00,0x00
;###
;### bfReserved2, 2 bytes, Unused - must be zero
;### 0x00,0x00
;###
;### bfOffBits, 4 bytes, Offset to start of Pixel Data
;### 0x36,0x00,0x00,0x00 = 54 bytes
;###
;### Image Header - 40 bytes
;#######################################################################
;### biSize 4 Header Size - Must be at least 40
;### 0x28,0x00,0x00,0x00 = 40
;###
;### biWidth 4 Image width in pixels
;### 0xC9,0x00,0x00,0x00 = 201
;###
;### biHeight 4 Image height in pixels
;### 0xC9,0x00,0x00,0x00 = 201
;###
;### biPlanes 2 Must be 1
;### 0x01,0x00
;###
;### biBitCount 2 Bits per pixel - 1, 4, 8, 16, 24, or 32
;### 0x18,0x00 = 24
;###
;### biCompression 4 Compression type (0 = uncompressed)
;### 0x00,0x00,0x00,0x00
;###
;### biSizeImage 4 Image Size - may be zero for uncompressed images
;### 0x3C,0xDA,0x01,0x00 => 0x00,0x01,0xDA,0x3C = 121404 bytes
;###
;### biXPelsPerMeter 4 Preferred resolution in pixels per meter
;### 0x00,0x00,0x00,0x00
;###
;### biYPelsPerMeter 4 Preferred resolution in pixels per meter
;### 0x00,0x00,0x00,0x00
;###
;### biClrUsed 4 Number Color Map entries that are actually used
;### 0x00,0x00,0x00,0x00
;###
;### biClrImportant 4 Number of significant colors
;### 0x00,0x00,0x00,0x00
;###
答案 1 :(得分:0)
这是rbraun答案的改进版本。这应该是关于codereview.SE>。<
的Q& A我决定发布一个单独的答案而不是编辑,但如果你愿意,可以随意将其中任何一个复制回答案中。我已经针对几种不同的行/列大小对此进行了测试,并且它可以正常工作。
我改进了评论,并进行了优化。评论如"调用内核"太明显了,无法写作;这只是噪音。我更改了系统调用的注释,以更清楚地说明发生了什么。例如看起来您正在调用sys_open,但您实际上正在使用sys_creat。这意味着即使你在评论中提到它,也没有flags
arg。
我还参数化了BMP标头并将其循环,因此它适用于BMPcols
和BMProws
的任何汇编时值,并且在运行时没有额外的开销。如果行宽是4B的倍数,没有填充,它会省略存储并完全增加指令。
对于非常大的缓冲区,在结束于行尾的缓冲区上使用多个write()调用会很有意义,因此可以重用它。例如lcm(4096, row_bytes)
的任何倍数都是好的,因为它包含整数行。大约128kiB可能是一个很好的大小,因为自Nehalem以来Intel CPU中的L2高速缓存大小为256kiB,所以数据有望在L2中保持热,而内核会反复将其发送到页面缓存中。您肯定希望缓冲区明显小于最后一级缓存大小。
原始版本的更改:
将静态常量数据放在.rodata
中。我们根本不需要.data
部分/部分。
尽可能使用32位操作数,特别是在寄存器中放置小常量。 (请注意,mov-immediate实际上不是"加载")。
改进循环习语:dec / jnz,没有单独的CMP。
在BMProws / BMPcols上进行参数化,并定义了各种大小的汇编时常数,而不是硬编码。汇编程序可以为您做数学运算,因此可以利用它。
dd
项定义BMP标头,而不是使用db
定义不再有意义的字节块。
DEFAULT REL ; default to RIP-relative addressing for static data
;#######################################################################
;### This program creates empty bmp file - 64 bit version ##############
section .rodata ; read-only data is the right place for these, not .data
BMPcols equ 2019
BMProws equ 2011
; 3 bytes per pixel, with each row padded to a multiple of 4B
BMPpixbytes equ 3 * BMProws * ((BMPcols + 3) & ~0x3)
;; TODO: rewrite this header with separate db and dd directives for the different fields. Preferably in terms of assembler-constant width and height
ALIGN 16 ; for efficient rep movs
bmpheader:
;; BMP is a little-endian format, so we can use dd and stuff directly instead of encoding the bytes ourselves
bfType: dw "BM"
bfSize: dd BMPpixbytes + bmpheader_len ; size of file in bytes
dd 0 ; reserved
bfOffBits: dd bmpheader_len ; yes we can refer to stuff that's defined later.
biSize: dd 40 ; header size, min = 40
biWidth: dd BMPcols
biHeight: dd BMProws
biPlanes: dw 1 ; must be 1
biBitCount: dw 24 ; bits per pixel: 1, 4, 8, 16, 24, or 32
biCompression: dd 0 ; uncompressed = 0
biSizeImage: dd BMPpixbytes ; Image Size - may be zero for uncompressed images
biXPelsPerMeter: dd 0 ; Preferred resolution in pixels per meter
biYPelsPerMeter: dd 0 ; Preferred resolution in pixels per meter
biClrUsed: dd 0 ; Number Color Map entries that are actually used
biClrImportant: dd 0 ; Number of significant colors
bmpheader_len equ $ - bmpheader ; Let the assembler calculate this for us. Should be 54. `.` is the current position
; output filename is hard-coded. Checking argc / argv is left as an exercise for the reader.
; Of course it would be even easier to be more Unixy and just always write to stdout, so the user could redirect
fname db 'filename.bmp',0x00 ;name of out file, 0x00 = end of string
section .bss ;this section is responsible for fixed size preallocated blocks
bmpbuf: resb 54 + BMPpixbytes ; static buffer big enough to hold the whole file (including header).
bmpbuf_len equ $ - bmpbuf
section .text
global _start ;make the symbol externally visible
_start: ;The linker looks for this symbol to set the entry point
;#######################################################################
;### main ##############################################################
; creat(fname, 0666)
mov eax,85 ; SYS_creat from /usr/include/x86_64-linux-gnu/asm/unistd_64.h
;mov edi, fname ;file name string. Static data is always in the low 2G, so you can use 32bit immediates.
lea rdi, [fname] ; file name, PIC version. We don't need [rel fname] since we used DEFAULT REL.
; Ubuntu 16.10 defaults to enabling position-independent executables that can use ASLR, but doesn't require it the way OS X does.)
;creat doesn't take flags. It's equivalent to open(path, O_CREAT|O_WRONLY|O_TRUNC, mode).
mov esi, 666o ;mode in octal, to be masked by the user's umask
syscall ; eax = fd or -ERRNO
test eax,eax ; error checking on system calls.
js .handle_error ; We don't print anything, so run under strace to see what happened.
;;; memcpy the BMP header to the start of our buffer.
;;; SSE loads/stores would probably be more efficient for such a small copy
mov edi, bmpbuf
mov esi, bmpheader
;Alternative: rep movsd or movsq may be faster.
;mov ecx, bmpheader_len/4 + 1 ; It's not a multiple of 4, but copy extra bytes because MOVSD is faster
mov ecx, bmpheader_len
rep movsb
; edi now points to the first byte after the header, where pixels should be stored
; mov edi, bmpbuffer+bmpheader_len might let out-of-order execution get started on the rest while rep movsb was still running, but IDK.
;######### main loop
mov ebx, BMProws
.LOOPY: ; do{
mov ecx, BMPcols ; Note the use of a macro later to decide whether we need padding at the end of each row or not, so arbitrary widths should work.
.LOOPX: ; do{
mov dword [rdi], (0xFF <<16) | (0xFF <<8) | 0x00 ;RED=FF, GREEN=FF, BLUE=00
; stores one extra byte, but we overlap it with the next store
add rdi, 3 ;move address pointer by 3 bytes (1 pixel = 3 bytes, which we just have written)
dec ecx
jne .LOOPX ; } while(--x != 0)
; end of inner loop
%if ((BMPcols * 3) % 4) != 0
; Pad the row to a multiple of 4B
mov dword [rdi], 0xFFFFFFFF ; might only need a byte or word store, but another dword store that we overlap is fine as long as it doesn't go off the end of the buffer
add rdi, 4 - (BMPcols * 3) % 4 ; advance to a 4B boundary
%endif
dec ebx
jne .LOOPY ; } while(--y != 0)
;##### Write out the buffer to the file
; fd is still where we left it in RAX.
; write and close calls both take it as the first arg,
; and the SYSCALL ABI only clobbers RAX, RCX, and R11, so we can just put it in EDI once.
mov edi, eax ; fd
; write content to file: write(fd, bmpbuf, bmpbuf_len)
mov eax, 1 ;SYS_write
lea rsi, [bmpbuf] ;buffer.
; We already have enough info in registers that reloading this stuff as immediate constants isn't necessary, but it's much more readable and probably at least as efficient anyway.
mov edx, bmpbuf_len
syscall
; close file
mov eax, 3 ;SYS_close
; fd is still in edi
syscall
.handle_error:
; exit program
mov rax,60 ;system call number - exit
syscall
我有时使用RIP相对LEA,有时候静态数据使用绝对寻址(mov r32, imm32
)。这很傻;我真的应该选择一个并在任何地方使用它。 (如果我选择绝对非PIC,那么我知道地址肯定在虚拟地址空间的低31位中,利用add edi,3
而不是RDI的东西来利用它。)
见my comments on the original answer for more optimization suggestions。我没有实现任何东西,而不是将三个字节存储组合成一个双字存储的最基本的东西。展开以便您可以使用更广泛的商店会有很多帮助,但这仍然是读者的练习。