输入一串字符并输出为大写

时间:2012-07-03 01:27:01

标签: linux assembly io x86 nasm

我试图编写一个程序,将一个小写的字符串转换为大写,使用缓冲区来存储初始字符串。我遇到的问题是我的程序会打印出一个无限循环的字符,这些字符必须与我给它的字符串相似。

我认为代码中存在的其他问题如下:

  • 某些子程序在通话结束时使用ret。我遇到问题的问题是找出这些子程序中哪些实际上不需要ret,并且最好与jmp一起使用。说实话,我在这两者的语义之间有点困惑。例如,在通话结束时,使用ja调用的子程序是否需要ret

  • 我还尝试打印出用于转换值的循环的每次迭代中发生的迭代次数。无论出于何种原因,我inc计数器并决定使用PrintNumIter例程打印它,唉,不幸的是,它没有做任何事情。

完整的程序如下。

Codez

bits 32

[section .bss]

        buf: resb 1024                  ;allocate 1024 bytes of memory to buf

[section .data]

        ;*************
        ;* CONSTANTS *
        ;*************

        ;ASCII comparison/conversion

        LowercaseA:     equ 0x61
        LowercaseZ:     equ 0x7A
        SubToUppercase: equ 0x20

        ;IO specifiers/descriptors

        EOF:            equ 0x0

        sys_read:       equ 0x3
        sys_write:      equ 0x4

        stdin:          equ 0x0
        stdout:         equ 0x1
        stderr:         equ 0x2

        ;Kernel Commands/Program Directives

        _exit:          equ 0x1
        exit_success:   equ 0x0
        execute_cmd:    equ 0x80

        ;Memory Usage

        buflen:         equ 0x400   ;1KB of memory


        ;*****************
        ;* NON-CONSTANTS *
        ;*****************

        iteration_count:    db 0
        query :             db "Please enter a string of lowercase characters, and I will output them for you in uppercase ^.^: ", 10   
        querylen :          equ $-query

[section .text]

    global _start
;===========================================
;             Entry Point
;===========================================

_start:
        nop                                         ;keep GDB from complaining
        call    AskUser 
        call    Read
        call    SetupBuf
        call    Scan
        call    Write
        jmp     Exit

;===========================================
;           IO Instructions
;===========================================

Read:
        mov     eax, sys_read                       ;we're going to read in something
        mov     ebx, stdin                          ;where we obtain this is from stdin
        mov     ecx, buf                            ;read data into buf
        mov     edx, buflen                         ;amount of data to read

        int     execute_cmd                         ;invoke kernel to do its bidding
        ret

Write:
        mov     eax, sys_write                      ;we're going to write something
        mov     ebx, stdout                         ;where we output this is going to be in stdout
        mov     ecx, buf                            ;buf goes into ecx; thus, whatever is in ecx gets written out to
        mov     edx, buflen                         ;write the entire buf

        int     execute_cmd                         ;invoke kernel to do its bidding
        ret

AskUser:
        mov     eax, sys_write
        mov     ebx, stdout
        mov     ecx, query
        mov     edx, querylen   

        int     execute_cmd
        ret

PrintNumIter:
        mov     eax, sys_write
        mov     ebx, stdout
        push    ecx                                 ;save ecx's address
        mov     ecx, iteration_count                ;print the value of iteration_count
        mov     edx, 4                              ;print 4 bytes of data

        int     execute_cmd
        pop     ecx                                 ;grab the value back in
        ret
;===========================================
;           Program Preperation
;===========================================

SetupBuf:
        mov     ecx, esi                        ;place the number of bytes read into ecx
        mov     ebp, buf                        ;place the address of buf into ebp
        dec     ebp                             ;decrement buf by 1 to prevent "off by one" error
        ret                                         

;===========================================
;           Conversion Routines     
;===========================================

ToUpper:
        sub     dword [ebp + ecx], SubToLowercase   ;grab the address of buf and sub its value to create uppercase character


Scan:
        call    PrintNumIter                        ;print the current iteration within the loop

        cmp     dword [ebp + ecx], LowercaseA       ;Test input char against lowercase 'a'
        jb      ToUpper                             ;If below 'a' in ASCII, then is not lowercase - goto ToLower

        cmp     dword [ebp + ecx], LowercaseZ       ;Test input char against lowercase 'z'
        ja      ToUpper                             ;If above 'z' in ASCII, then is not lowercase - goto ToLower

        dec     ecx                                 ;decrement ecx by one, so we can get the next character
        inc     byte [iteration_count]              ;increment the __value__ in iteration count by 1
        jnz     Scan                                ;if ecx != 0, then continue the process
        ret

;===========================================

;Next:
;       dec     ecx                             ;decrement ecx by one
;       jnz     Scan                            ;if ecx != 0 scan
;       ret

;===========================================

Exit:
        mov     eax, _exit
        mov     ebx, exit_success

        int     execute_cmd

1 个答案:

答案 0 :(得分:4)

你的问题直接归结于这样一个事实,即你完成处理后你永远不会在你的字符串缓冲区的末尾附加一个nul终结符(从我记忆中,read系统调用不回读一个空)。

不幸的是,由于你的奇怪的控制流程,这有点难以做,但改变SetupBuf应该做的伎俩(注意,你应该检查你没有溢出buf,但是有1KB,我怀疑你需要担心学习计划):

SetupBuf:
        mov     ecx, esi                        
        mov     ebp, buf
        mov     [ebp+ecx],0  ;make sure the string is nul terminated            
        dec     ebp                             
        ret

请注意

另一个似乎困扰你的代码的问题(你已经注意到了),你的奇怪的控制流程。如此简单的指导方针(注意:不是规则,只是指导方针),希望能帮助您减少spagetti代码:

  • JMP(以及条件跳转)应仅用于在同一过程中转换为标签,否则您将开始进入绑定,因为您无法放松。唯一可以使用跳跃的时间是尾部调用,但在这个阶段你不应该担心,它更混乱。

  • 在进行其他程序时始终使用CALL,这样您就可以使用RETN / RET指令正确返回呼叫站点,从而使控制更加流畅更符合逻辑。

一个简单的例子:

print_num: ;PROC: num to print in ecx, ecx is caller preserved
    push ecx
    push num_format ; "%d\n" 
    call _printf
    sub esp,8 ;cleanup for printf
    retn

print_loop_count: ;PROC: takes no args
    mov ecx,0x10 ;loop 16 times

do_loop: ;LABEL: used as a jump target for the loop
         ;good idea to prefix jump lables with "." to differentiate them
   push ecx ;save ecx
   call print_num ;value to print is already in ecx
   pop ecx ;restore ecx
   dec ecx
   jnz do_loop ;again?

   retn