汇编程序处理逃逸字符

时间:2016-10-02 00:49:33

标签: assembly

编辑10/2/16 - 它不再是segfaulting但小写字符没有转换 - 这是我在小写标签中更改的内容:

lowercase:
        cmp     ax, 'a'                  ; is it a?
        jb      slashprint               ; jump out if it's below a
        cmp     ax, 'z'                  ; is it z?
        ja      slashprint               ; jump out if it's above z
        mov     eax, [esi]               ; move the char to eax to index
        mov     ebx, mappings            ; point to the mappings array
        sub     eax, 97                  ; convert the lower case letter to decimal
        mov     ax, [ebx + eax]          ; indexed addressing done here
        jmp     done  

我正在开发一个程序,允许我从用户那里获取一个字符串,检测转义序列并正确输出转换后的消息。我已经知道我需要清理子程序handle_ESC中的一些逻辑,但我试图一次测试和调试一种情况。我的问题是,每次从子程序返回并将我修改的字符添加到存储在edi中的新字符串(在标签L1_cont的第一行)时,我得到一个段错误。到目前为止,我一直专注于让它接受小写的转义序列。编辑:改变了一些事情。现在,当我输入2个斜杠时,我并没有分裂,但它也没有做我想做的事情。它不会打印任何东西。在查看同一行的小写字母时仍然是segfaulting。随着我所做的改变,它永远不应该达到那个指示。

%define STDIN 0
%define STDOUT 1
%define SYSCALL_EXIT  1
%define SYSCALL_READ  3
%define SYSCALL_WRITE 4
%define BUFLEN 256


        SECTION .data                   ; initialized data section

msg1:   db "Enter string: "             ; user prompt
len1:   equ $-msg1                      ; length of first message

msg2:   db "Original: "                 ; original string label
len2:   equ $-msg2                      ; length of second message

msg3:   db "Convert:  "                 ; converted string label
len3:   equ $-msg3

msg4:   db 10, "Read error", 10         ; error message
len4:   equ $-msg4

msg5:   db 10, "Error: unknown escape sequence", 10 ; unknown escape error (not sure this works)
len5:   equ $-msg5

msg6:   db 10, "Error: octal value overflow!", 10 ; overflow error
len6:   equ $-msg6

; Declare the array using mappings.
mappings: db 7, 8, -1, -1, -1, 12, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1, 13, -1, 9, -1, 11, -1, -1, -1, -1


        SECTION .bss                    ; uninitialized data section
buf:    resb BUFLEN                     ; buffer for read
newstr: resb BUFLEN                     ; converted string
rlen:   resb 4                          ; length


        SECTION .text                   ; Code section.
        global  _start                  ; let loader see entry point

_start: nop                             ; Entry point.
start:                                  ; address for gdb

        ; prompt user for input
        ;
        mov     eax, SYSCALL_WRITE      ; write function
        mov     ebx, STDOUT             ; Arg1: file descriptor
        mov     ecx, msg1               ; Arg2: addr of message
        mov     edx, len1               ; Arg3: length of message
        int     080h                    ; ask kernel to write

        ; read user input
        ;
        mov     eax, SYSCALL_READ       ; read function
        mov     ebx, STDIN              ; Arg 1: file descriptor
        mov     ecx, buf                ; Arg 2: address of buffer
        mov     edx, BUFLEN             ; Arg 3: buffer length
        mov     [ecx + edx], byte 0     ; null terminate the read buffer
        sub     edx, 1                  ; make sure the buffer length is 1 less than BUFLEN
        int     080h

        ; error check
        ;
        mov     [rlen], eax             ; save length of string read
        cmp     eax, 0                  ; check if any chars read
        jg      read_OK                 ; >0 chars read = OK
        mov     eax, SYSCALL_WRITE      ; ow print error mesg
        mov     ebx, STDOUT
        mov     ecx, msg4
        mov     edx, len4
        int     080h
        jmp     exit                    ; skip over rest
read_OK:


        ; Loop for iterating over string

L1_init:
        mov     esi, buf                ; point to start of buffer
        mov     edi, newstr             ; point to start of new string
        mov     bx, [mappings]          ; point to the mappings array

L1_top:
        mov     al, [esi]               ; get a character
        inc     esi                     ; point to the next character
        cmp     al, '\'                 ; is current char a backslash?
        je      escapectrl              ; if so, run the subroutine
        jmp     L1_cont                 ; if not, jump to L1_cont and add the char

escapectrl:
        call    handle_ESC              ; run subroutine
        inc     edi
        cmp     [esi], byte 0
        je      L1_end
        jmp     L1_top

L1_cont:
        mov     [edi], al               ; store char in new string
        inc     edi                     ; update dest pointer
        cmp     [esi], byte 0           ; if not null, keep looping
        je      L1_end                  ; end loop if null
        jmp     L1_top                  ; loop if there are more chars



L1_end:


        ; print out user input for feedback
        ;
        mov     eax, SYSCALL_WRITE      ; write message
        mov     ebx, STDOUT
        mov     ecx, msg2
        mov     edx, len2
        int     080h

        mov     eax, SYSCALL_WRITE      ; write user input
        mov     ebx, STDOUT
        mov     ecx, buf
        mov     edx, [rlen]
        int     080h

        ; print out converted string
        ;
        mov     eax, SYSCALL_WRITE      ; write message
        mov     ebx, STDOUT
        mov     ecx, msg3
        mov     edx, len3
        int     080h

        mov     eax, SYSCALL_WRITE      ; write out string
        mov     ebx, STDOUT
        mov     ecx, newstr
        mov     edx, [rlen]
        int     080h


        ; final exit
        ;
exit:   mov     eax, SYSCALL_EXIT       ; exit function
        mov     ebx, 0                  ; exit code, 0=normal
        int     080h                    ; ask kernel to take over

        ; Subroutine begins here.

handle_ESC:

        mov     di, [esi]               ; take a copy of the current char
        inc     esi                     ; point to the next char

        cmp     di, '7'                 ; is it above 7?
        ja      lowercase               ; if so, skip octal check

        cmp     di, '0'                 ; is it below zero?
        jb      lowercase               ; if so, skip octal check

octalinit:
        mov     bl, 0                   ; holds the total for the digit
        mov     cl, 3                   ; count for how many times the loop should run if it's octal
        mov     dh, 8                   ; holds the octal multiple

octalloop:
        mov     ah, [di]                ; store the new char in ah for operations
        sub     ah, 48                  ; convert char to integer
        mul     dl                      ; multiply the octal value by 8
        add     ax, dx                  ; update the total value
        mov     di, [esi]               ; store the next character in di for consideration
        inc     esi                     ; point to the character after that
        cmp     di, '0'                 ; is it below 0?
        jb      lowercase               ; if below, jump out
        cmp     di, '7'                 ; is it above 7?
        ja      lowercase               ; if above, jump out
        dec     cl                      ; count down
        jnz     octalloop               ; loop if needed
        dec     esi                     ; undo the prefetch if no longer looping
        jo      octaloverflow           ; if the overflow flag was set, handle the overflow
        mov     al, bl                  ; stores the finalized value in al
        jmp     done                    ; exit


octaloverflow:
        mov     [edi], byte 32          ; makes the second char blank due to error
        jmp     done                     ; not sure if this part is gonna work right

lowercase:
        cmp     di, 'a'                  ; is it a?
        jb      slashprint               ; jump out if it's below a
        cmp     di, 'z'                  ; is it z?
        ja      slashprint               ; jump out if it's above z
        sub     di, 97                   ; convert the lower case letter to decimal
        mov     cl, [bx + di]            ; indexed addressing done here
        cmp     cl, -1                   ; is it -1?
        mov     [edi], cl                ; store the converted char in the string
        inc     esi                      ; point to the spot after
        inc     edi                      ; point to the next entry in the new string
        mov     [edi], byte 32           ; add a blank space in the new string where the lowercase letter was
        jmp     done                  

slashprint:
        cmp     di, '\'                  ; is it another slash?
        jne     errormsg                 ; jump and print an error message
        mov     [edi], byte 92           ; store a slash in the new string
        inc     edi                      ; move to the next position of the new string
        mov     [edi], byte 32           ; make the second char blank since both are slashes
        jmp     done                     ; return

errormsg:
        inc     esi                     ; skip over the character after the slash


done:
        ret

         ; end Subroutine.

1 个答案:

答案 0 :(得分:0)

此代码中存在许多错误,但我会突出显示可能导致崩溃的错误:

  • handle_ESC clobbers EDI所以它或主程序会写入一些随机存储位置
  • 我假设这是32位Linux应用程序 - 使用16位寄存器进行寻址是错误的