在Assembly Segfaults中调用C函数

时间:2013-05-07 08:02:29

标签: c arrays assembly

我正在尝试编写一个汇编程序,它调用c中的函数,该函数将使用预定义字符替换字符串中的某些字符,前提是char数组中的当前字符符合某些条件。

我的c档案:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

//display *((char *) $edi)
// These functions will be implemented in assembly:
//

int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;


int isvowel (int c) {

   if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u') 
      return 1 ;

   if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U') 
      return 1 ;

   return 0 ;
}

int main(){
    char *str1;
    int r;
// I ran my code through a debugger again, and it seems that when displaying 
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"

    str1 = strdup("ABC 123 779 Hello World") ;
    r = strrepl(str1, '#', &isdigit) ;
    printf("str1 = \"%s\"\n", str1) ;
    printf("%d chararcters were replaced\n", r) ;
    free(str1) ;
    return 0;
}

我的.asm文件:

; File: strrepl.asm
; Implements a C function with the prototype:
;
;   int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
; 
; Result: chars in string are replaced with the replacement character and string is returned.

    SECTION .text
    global  strrepl


_strrepl:   nop
strrepl:
    push    ebp         ; set up stack frame
    mov ebp, esp

    push    esi         ; save registers
    push    ebx
    xor eax, eax
    mov ecx, [ebp + 8]      ;load string (char array) into ecx
    jecxz   end         ;jump if [ecx] is zero
    mov esi, [ebp + 12]     ;move the replacement character into esi
    mov edx, [ebp + 16]     ;move function pointer into edx
    xor bl, bl          ;bl will be our counter


firstLoop:
    add bl, 1           ;inc bl would work too
    add ecx, 1
    mov eax, [ecx]  
    cmp eax, 0
    jz  end
    push    eax         ; parameter for (*isinsubset)
    ;BREAK
    call    edx         ; execute (*isinsubset)

    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value




end:
    pop ebx         ; restore registers
    pop esi
    mov esp, ebp        ; take down stack frame
    pop ebp
    ret

当通过gdb运行并在断点处设置断点时; BREAK,在我采取步骤调试命令后出现以下错误:

Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6

isdigit是我在c文件中包含的标准c库的一部分,所以我不知道该怎么做。

编辑:我已经编辑了我的firstLoop并包含了一个secondLoop,它应该用“#”替换任何数字,但它似乎取代了整个数组。

firstLoop:

    xor eax, eax

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    ;cmp    eax, 0
    ;jne    end

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

使用gdb,当代码进入secondloop时,一切都是正确的。 ecx显示为“1”,这是从.c文件传入的字符串中的第一个数字。 Esi正在显示为“#”。然而,在我做了[ecx]后,esi似乎崩溃了。 ecx在这一点上显示为“#”,但是一旦我增加1来到达数组中的下一个字符,它就会被显示为“/ 000”。 1后面的每个字符都替换为“#”,显示为“/ 000”。在我让secondLoop尝试用“#”替换字符之前,我只是用自己的firstLoop循环来查看它是否可以通过整个数组而不会崩溃。确实如此,并且每次增量后ecx都显示为正确的字符。我不确定为什么做mov [ecx],esi会将ecx的其余部分设置为null。

2 个答案:

答案 0 :(得分:1)

firstLoop:中,您使用以下字符加载字符串中的字符:

mov eax, [ecx]

在一个平局而不是一个字节上加载4个字节。因此,您传递给int的{​​{1}}可能远远超出其处理范围(它可能使用简单的表查找)。

您可以使用以下Intel asm语法加载单个字节:

isdigit()

其他一些事情:

  • 它还会产生这样的效果:它可能无法正确检测字符串的结尾,因为空终结符可能不会跟随其他三个零字节。
  • 我不确定为什么在处理字符串
  • 中的第一个字符之前增加movzx eax, byte ptr [ecx]
  • 您发布的汇编代码似乎没有实际循环字符串

答案 1 :(得分:0)

我在您的代码中添加了一些注释: -

  ; this is OK: setting up the stack frame and saving important register
  ; on Win32, the registers that need saving are: esi, edi and ebx
  ; the rest can be used without needing to preserve them
  push    ebp
  mov ebp, esp
  push    esi
  push    ebx

  xor eax, eax
  mov ecx, [ebp + 8]

  ; you said that this checked [ecx] for zero, but I think you've just written
  ; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
  ; the value at the address defined by reg
  ; so this is effectively doing a null pointer check (which is good)
  jecxz   end

  mov esi, [ebp + 12]
  mov edx, [ebp + 16]
  xor bl, bl

firstLoop:
  add bl, 1
  ; you increment ecx before loading the first character, this means
  ; that the function ignores the first character of the string
  ; and will therefore produce an incorrect result if the string
  ; starts with a character that needs replacing
  add ecx, 1
  ; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
  mov eax, [ecx]  
  cmp eax, 0
  jz  end
  push    eax
  ; possibly segfaults due to character out of range
  ; also, as mentioned elsewhere, the function you call here must conform to the 
  ; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
  ; Win32 systems), so eax, ecx and edx can change, so next time you call
  ; [edx] it might be referencing random memory
  ; either save edx on the stack (push before pushing parameters, pop after add esp)
  ; or just load edx with [ebp+16] here instead of at the start
  call    edx

  add esp, 4
  mov ebx, eax

  ; more functionality required here!



end:
  ; restore important values, etc
  pop ebx
  pop esi
  mov esp, ebp
  pop ebp
  ; the result of the function should be in eax, but that's not set up properly yet
  ret

关于内循环的评论: -

firstLoop:

    xor eax, eax

    ; you're loading a 32 bit value and checking for zero,
    ; strings are terminated with a null character, an 8 bit value,
    ; not a 32 bit value, so you're reading past the end of the string
    ; so this is unlikely to correctly test the end of string

    mov edi, [ecx]
    cmp edi, 0
    jz  end

    mov edi, ecx        ; save array


    movzx   eax, byte [ecx]     ;load single byte into eax  
    ; you need to keep ebp! its value must be saved (at the end, 
    ; you do a mov esp,ebp)
    mov ebp, edx        ; save function pointer
    push    eax         ; parameter for (*isinsubset)           
    call    edx         ; execute (*isinsubset)

    mov ecx, edi        ; restore array
    cmp eax, 0
    jne secondLoop  
    mov edx, ebp        ; restore function pointer
    add esp, 4          ; "pop off" the parameter
    mov ebx, eax        ; store return value
    add ecx, 1
    jmp firstLoop

secondLoop:
    ; again, your accessing the string using a 32 bit value, not an 8 bit value
    ; so you're replacing the matched character and the three next characters
    ; with the new value
    ; the upper 24 bits are probably zero so the loop will terminate on the
    ; next character
    ; also, the function seems to be returning a count of characters replaced,
    ; but you're not recording the fact that characters have been replaced
    mov [ecx], esi
    mov edx, ebp
    add esp, 4
    mov ebx, eax
    add ecx, 1
    jmp     firstLoop

您似乎遇到了内存工作方式的问题,您在8位和32位内存访问之间感到困惑。