NASM:如何使用intel 64位汇编创建/处理基本bmp文件?

时间:2016-12-07 18:08:41

标签: linux assembly x86 nasm x86-64

如何创建/处理简单的bmp文件,只使用intel 64位汇编和nasm汇编器填充一种颜色?

2 个答案:

答案 0 :(得分:4)

包括此类操作的步骤如下:

  1. 使用固定值创建bmp文件头(下面特定字段的说明)
  2. 创建包含足够空间的缓冲区 - 每个像素三个字节(一个颜色=红色+绿色+蓝色)
  3. 打开/创建文件
  4. 填充缓冲区
  5. 将标题写入文件
  6. 将缓冲区写入文件
  7. 关闭文件
  8. 退出程序
  9. 广告。 2:这有点棘手 - 如果每行的像素数不能被4整除,程序必须用0xFF填充缺少的字节。在这里,我purpousely创建了一张图片201x201。在这个例子中,我们可以看到每行将有3 * 201 = 603个字节,这意味着我们每行需要额外的字节。因此,图像缓冲区所需的大小为604 * 201 = 121404。

    回答问题的源代码:

    section     .text
    global      _start                              ;must be declared for linker (ld)
    
    _start:                                         ;tell linker entry point
    
    ;#######################################################################
    ;### This program creates empty bmp file - 64 bit version ##############
    ;#######################################################################
    ;### main ##############################################################
    ;#######################################################################
    
        ; open file
        mov     rax,85                              ;system call number - open/create file
        mov     rdi,msg                             ;file name
                                                    ;flags
        mov     rsi,111111111b                      ;mode
        syscall                                     ;call kernel
    
        ; save file descriptor
        mov     r8, rax
    
        ; write headline to file
        mov     rax, 1                              ;system call number - write
        mov     rdi, r8                             ;load file desc
        mov     rsi, bmpheadline                    ;load adress of buffer to write
        mov     rdx, 54                             ;load number of bytes
        syscall                                     ;call kernel
    
            mov         rbx, 201                    ;LOOPY counter
            mov         rdx, empty_space            ;load address of buffer (space allocated for picture pixels)
    LOOPY:
            mov         rcx, 201                    ;LOOPX counter
    
    LOOPX:
            mov         byte [rdx+0], 0x00          ;BLUE
            mov         byte [rdx+1], 0xFF          ;GREEN
            mov         byte [rdx+2], 0xFF          ;RED
    
            dec         rcx                         ;decrease counter_x
            add         rdx, 3                      ;move address pointer by 3 bytes (1 pixel = 3 bytes, which we just have written)
            cmp         rcx, 0                      ;check if counter is 0
            jne         LOOPX                       ;if not jump to LOOPX
    
            dec         rbx                         ;decrease counter_y
            mov         byte [rdx], 0xFF            ;additional byte per row
            inc         rdx                         ;increase address
            cmp         rbx, 0                      ;check if counter is 0
            jne         LOOPY                       ;if not jump to LOOPY
    
    
    
        ; write content to file
        mov     rax, 1                              ;system call number - write
        mov     rdi, r8                             ;load file desc
        mov     rsi, empty_space                    ;load adress of buffer to write
        mov     rdx, 121404                         ;load number of bytes
        syscall                                     ;call kernel
    
        ; close file
        mov     rax, 3                              ;system call number - close
        mov     rdi, r8                             ;load file desc
        syscall                                     ;call kernel
    
        ; exit program
        mov     rax,60                              ;system call number - exit
        syscall                                     ;call kernel
    
    section     .data
    
        msg         db  'filename.bmp',0x00         ;name of out file, 0x00 = end of string
        bmpheadline db  0x42,0x4D,0x72,0xDA,0x01,0x00,0x00,0x00,0x00,0x00,0x36,0x00,0x00,0x00,0x28,0x00,0x00,0x00,0xC9,0x00,0x00,0x00,0xC9,0x00,0x00,0x00,0x01,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x3C,0xDA,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
    
    section .bss                                    ;this section is responsible for preallocated block of memory of fixed size
    
        empty_space: resb 121404                    ;preallocation of 121404 bytes
    

    这里是bmp标题的解释(更多在此链接下:http://www.dragonwins.com/domains/getteched/bmp/bmpfileformat.htm

    ;### File Header - 14 bytes
    ;#######################################################################
    ;### bfType,        2 bytes,    The characters "BM"
    ;### 0x42,0x4D = "B","M"
    ;### 
    ;### bfSize,        4 bytes,    The size of the file in bytes
    ;### 0x72,0xDA,0x01,0x00 => 0x00,0x01,0xDA,0x72 = 0x1DA72 = 121458 bytes
    ;### 121458 = 54 + 201 * (201 + 1) * 3
    ;### 
    ;### Comment:
    ;### We want to create file 201x201, that means 201 rows and 201 columns
    ;### meaning each row will take 201*3 = 603 bytes
    ;### 
    ;### According to BMP file specification each such row must be adjusted
    ;### so its size is dividable by 4, this gives us plus 1 byte for each 
    ;### row.
    ;###
    ;###
    ;### bfReserved1,   2 bytes,    Unused - must be zero
    ;### 0x00,0x00
    ;### 
    ;### bfReserved2,   2 bytes,    Unused - must be zero
    ;### 0x00,0x00
    ;### 
    ;### bfOffBits,     4 bytes,    Offset to start of Pixel Data
    ;### 0x36,0x00,0x00,0x00 = 54 bytes
    ;### 
    
    ;### Image Header - 40 bytes
    ;#######################################################################
    ;### biSize             4   Header Size - Must be at least 40
    ;### 0x28,0x00,0x00,0x00 = 40
    ;### 
    ;### biWidth            4   Image width in pixels
    ;### 0xC9,0x00,0x00,0x00 = 201
    ;### 
    ;### biHeight           4   Image height in pixels
    ;### 0xC9,0x00,0x00,0x00 = 201
    ;### 
    ;### biPlanes           2   Must be 1
    ;### 0x01,0x00
    ;### 
    ;### biBitCount         2   Bits per pixel - 1, 4, 8, 16, 24, or 32
    ;### 0x18,0x00 = 24
    ;### 
    ;### biCompression      4   Compression type (0 = uncompressed)
    ;### 0x00,0x00,0x00,0x00
    ;### 
    ;### biSizeImage        4   Image Size - may be zero for uncompressed images
    ;### 0x3C,0xDA,0x01,0x00 => 0x00,0x01,0xDA,0x3C = 121404 bytes
    ;### 
    ;### biXPelsPerMeter    4   Preferred resolution in pixels per meter
    ;### 0x00,0x00,0x00,0x00
    ;### 
    ;### biYPelsPerMeter    4   Preferred resolution in pixels per meter
    ;### 0x00,0x00,0x00,0x00
    ;### 
    ;### biClrUsed          4   Number Color Map entries that are actually used
    ;### 0x00,0x00,0x00,0x00
    ;### 
    ;### biClrImportant     4   Number of significant colors
    ;### 0x00,0x00,0x00,0x00
    ;### 
    

答案 1 :(得分:0)

这是rbraun答案的改进版本。这应该是关于codereview.SE>。<

的Q& A

我决定发布一个单独的答案而不是编辑,但如果你愿意,可以随意将其中任何一个复制回答案中。我已经针对几种不同的行/列大小对此进行了测试,并且它可以正常工作。

我改进了评论,并进行了优化。评论如"调用内核"太明显了,无法写作;这只是噪音。我更改了系统调用的注释,以更清楚地说明发生了什么。例如看起来您正在调用sys_open,但您实际上正在使用sys_creat。这意味着即使你在评论中提到它,也没有flags arg。

我还参数化了BMP标头并将其循环,因此它适用于BMPcolsBMProws的任何汇编时值,并且在运行时没有额外的开销。如果行宽是4B的倍数,没有填充,它会省略存储并完全增加指令。

对于非常大的缓冲区,在结束于行尾的缓冲区上使用多个write()调用会很有意义,因此可以重用它。例如lcm(4096, row_bytes)的任何倍数都是好的,因为它包含整数行。大约128kiB可能是一个很好的大小,因为自Nehalem以来Intel CPU中的L2高速缓存大小为256kiB,所以数据有望在L2中保持热,而内核会反复将其发送到页面缓存中。您肯定希望缓冲区明显小于最后一级缓存大小。

原始版本的更改:

  • 固定文件创建模式:不设置执行位,只读/写。像正常人一样使用八进制。
  • 如上所述,改进评论:更明确地说明我们正在制作的系统调用。避免重新说明asm说明中已经清楚的内容。
  • 演示静态对象的RIP相对寻址
  • 将静态常量数据放在.rodata中。我们根本不需要.data部分/部分。

  • 尽可能使用32位操作数,特别是在寄存器中放置小常量。 (请注意,mov-immediate实际上不是"加载")。

  • 改进循环习语:dec / jnz,没有单独的CMP。

  • 在BMProws / BMPcols上进行参数化,并定义了各种大小的汇编时常数,而不是硬编码。汇编程序可以为您做数学运算,因此可以利用它。

  • 使用单独命名的dd项定义BMP标头,而不是使用db定义不再有意义的字节块。
  • 只进行一次write()系统调用:首先将BMP头复制到缓冲区中。 54字节的memcpy比额外的系统调用快得多。
  • 保存一些说明,不要在系统调用已经存在的情况下重复设置args。
  • 将像素组件的三个字节存储合并为一个双字存储。这些商店重叠,但没关系。

DEFAULT REL            ; default to RIP-relative addressing for static data

;#######################################################################
;### This program creates empty bmp file - 64 bit version ##############

section     .rodata                  ; read-only data is the right place for these, not .data


    BMPcols   equ  2019
    BMProws   equ  2011

    ; 3 bytes per pixel, with each row padded to a multiple of 4B
    BMPpixbytes equ 3 * BMProws * ((BMPcols + 3) & ~0x3)

    ;; TODO: rewrite this header with separate db and dd directives for the different fields.  Preferably in terms of assembler-constant width and height

    ALIGN 16   ; for efficient rep movs
bmpheader:
;; BMP is a little-endian format, so we can use dd and stuff directly instead of encoding the bytes ourselves
bfType:  dw "BM"
bfSize:  dd BMPpixbytes + bmpheader_len   ; size of file in bytes
         dd 0                 ; reserved
bfOffBits: dd bmpheader_len   ; yes we can refer to stuff that's defined later.

biSize:   dd 40    ; header size, min = 40
biWidth:  dd BMPcols
biHeight: dd BMProws
biPlanes:       dw 1     ; must be 1
biBitCount:     dw 24    ; bits per pixel: 1, 4, 8, 16, 24, or 32
biCompression:  dd 0     ; uncompressed = 0
biSizeImage:    dd BMPpixbytes  ; Image Size - may be zero for uncompressed images

biXPelsPerMeter: dd 0   ;  Preferred resolution in pixels per meter
biYPelsPerMeter: dd 0   ;  Preferred resolution in pixels per meter
biClrUsed:       dd 0   ;  Number Color Map entries that are actually used
biClrImportant:  dd 0   ;  Number of significant colors

    bmpheader_len   equ   $ - bmpheader         ; Let the assembler calculate this for us.  Should be 54.  `.` is the current position

    ; output filename is hard-coded.  Checking argc / argv is left as an exercise for the reader.
    ; Of course it would be even easier to be more Unixy and just always write to stdout, so the user could redirect
    fname         db  'filename.bmp',0x00         ;name of out file, 0x00 = end of string


section .bss                                    ;this section is responsible for fixed size preallocated blocks

    bmpbuf: resb 54 + BMPpixbytes    ; static buffer big enough to hold the whole file (including header).
    bmpbuf_len  equ  $ - bmpbuf


section     .text
global      _start                              ;make the symbol externally visible

_start:                                         ;The linker looks for this symbol to set the entry point

;#######################################################################
;### main ##############################################################

    ; creat(fname, 0666)
    mov     eax,85                              ; SYS_creat from /usr/include/x86_64-linux-gnu/asm/unistd_64.h
    ;mov     edi, fname                          ;file name string.  Static data is always in the low 2G, so you can use 32bit immediates.
    lea     rdi, [fname]                        ; file name, PIC version.  We don't need [rel fname] since we used DEFAULT REL.
                                                ; Ubuntu 16.10 defaults to enabling position-independent executables that can use ASLR, but doesn't require it the way OS X does.)
                                                ;creat doesn't take flags.  It's equivalent to open(path, O_CREAT|O_WRONLY|O_TRUNC, mode).
    mov     esi, 666o                          ;mode in octal, to be masked by the user's umask
    syscall                              ; eax = fd or -ERRNO

    test  eax,eax             ; error checking on system calls.
    js    .handle_error       ; We don't print anything, so run under strace to see what happened.


    ;;; memcpy the BMP header to the start of our buffer.
    ;;; SSE loads/stores would probably be more efficient for such a small copy
    mov    edi, bmpbuf
    mov    esi, bmpheader
     ;Alternative: rep movsd or movsq may be faster.
     ;mov    ecx, bmpheader_len/4 + 1   ; It's not a multiple of 4, but copy extra bytes because MOVSD is faster
    mov    ecx, bmpheader_len
    rep movsb

    ; edi now points to the first byte after the header, where pixels should be stored
    ; mov  edi, bmpbuffer+bmpheader_len might let out-of-order execution get started on the rest while rep movsb was still running, but IDK.


;#########  main loop
        mov         ebx, BMProws
.LOOPY:                                     ; do{

        mov         ecx, BMPcols  ; Note the use of a macro later to decide whether we need padding at the end of each row or not, so arbitrary widths should work.

.LOOPX:                                        ; do{
        mov         dword [rdi],  (0xFF <<16) | (0xFF <<8) | 0x00      ;RED=FF, GREEN=FF, BLUE=00
                                                ; stores one extra byte, but we overlap it with the next store

        add         rdi, 3                      ;move address pointer by 3 bytes (1 pixel = 3 bytes, which we just have written)
        dec         ecx
        jne         .LOOPX                     ; } while(--x != 0)
    ; end of inner loop


%if    ((BMPcols * 3) % 4) != 0
        ; Pad the row to a multiple of 4B
        mov         dword [rdi], 0xFFFFFFFF    ; might only need a byte or word store, but another dword store that we overlap is fine as long as it doesn't go off the end of the buffer

        add         rdi, 4 - (BMPcols * 3) % 4  ; advance to a 4B boundary
%endif

        dec         ebx
        jne         .LOOPY                    ; } while(--y != 0)


;##### Write out the buffer to the file

    ; fd is still where we left it in RAX.
    ; write and close calls both take it as the first arg,
    ;  and the SYSCALL ABI only clobbers RAX, RCX, and R11, so we can just put it in EDI once.
    mov     edi, eax                            ; fd

    ; write content to file: write(fd, bmpbuf, bmpbuf_len)
    mov     eax, 1                              ;SYS_write
    lea     rsi, [bmpbuf]                       ;buffer.
    ; We already have enough info in registers that reloading this stuff as immediate constants isn't necessary, but it's much more readable and probably at least as efficient anyway.
    mov     edx, bmpbuf_len
    syscall

    ; close file
    mov     eax, 3                              ;SYS_close
    ; fd is still in edi
    syscall

.handle_error:
    ; exit program
    mov     rax,60                              ;system call number - exit
    syscall

我有时使用RIP相对LEA,有时候静态数据使用绝对寻址(mov r32, imm32)。这很傻;我真的应该选择一个并在任何地方使用它。 (如果我选择绝对非PIC,那么我知道地址肯定在虚拟地址空间的低31位中,利用add edi,3而不是RDI的东西来利用它。)

my comments on the original answer for more optimization suggestions。我没有实现任何东西,而不是将三个字节存储组合成一个双字存储的最基本的东西。展开以便您可以使用更广泛的商店会有很多帮助,但这仍然是读者的练习。